diff --git a/models/audio/speech_recognition/conformer/igie/README.md b/models/audio/speech_recognition/conformer/igie/README.md
index 1596f260f849630df77b19e23e8210e345d0ad16..a7770ff53ba86c234735aef8a7855122e9cdadef 100644
--- a/models/audio/speech_recognition/conformer/igie/README.md
+++ b/models/audio/speech_recognition/conformer/igie/README.md
@@ -18,10 +18,19 @@ Conformer applies convolution to the Encoder layer of Transformer, enhancing the
 
 ### Prepare Resources
 
-Pretrained model: <https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/wenetspeech/20211025_conformer_exp.tar.gz>
-
 Dataset: <https://www.openslr.org/33/> to download the Aishell dataset.
 
+```bash
+# Download and put model in conformer_checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/conformer_checkpoints.tar
+tar xf conformer_checkpoints.tar
+
+# Prepare AISHELL Data
+DATA_DIR=/PATH/to/aishell_test_data
+TOOL_DIR="$(pwd)/tools"
+bash scripts/aishell_data_prepare.sh ${DATA_DIR} ${TOOL_DIR}
+```
+
 ### Install Dependencies
 
 ```bash
@@ -32,41 +41,10 @@ yum install sox sox-devel -y
 apt install sox libsox-fmt-all -y
 
 pip3 install -r requirements.txt
-cd ctc_decoder/swig && bash setup.sh
-cd ../../
-```
-
-### Model Conversion
-
-```bash
-tar -zxvf 20211025_conformer_exp.tar.gz
-
-export PYTHONPATH=`pwd`/wenet:$PYTHONPATH
-
-# Get Onnx Model
-cd wenet
-python3 wenet/bin/export_onnx_gpu.py                          \
-    --config ../20211025_conformer_exp/train.yaml             \
-    --checkpoint ../20211025_conformer_exp/final.pt           \
-    --batch_size 24                                           \
-    --seq_len 384                                             \
-    --beam 4                                                  \
-    --cmvn_file ../20211025_conformer_exp/global_cmvn         \
-    --output_onnx_dir ../
-cd ..
-
-# Use onnxsim optimize onnx model
-onnxsim encoder_bs24_seq384_static.onnx encoder_bs24_seq384_static_opt.onnx
-python3 alter_onnx.py --batch_size 24 --path encoder_bs24_seq384_static_opt.onnx
 ```
 
 ## Model Inference
 
-```bash
-# Need to unzip aishell to the current directory. For details, refer to data.list
-tar -zxvf aishell.tar.gz
-```
-
 ### FP16
 
 ```bash
@@ -78,6 +56,6 @@ bash scripts/infer_conformer_fp16_performance.sh
 
 ## Model Results
 
-| Model     | BatchSize | Precision | FPS      | ACC   |
-| :----: | :----: | :----: | :----: | :----: |
-| Conformer | 32        | FP16      | 1940.759 | 95.29 |
+| Model     | BatchSize | Precision | QPS     | CER    |
+| --------- | --------- | --------- | ------- | ------ |
+| Conformer | 24        | FP16      | 1408.352 | 0.0497 |
diff --git a/models/audio/speech_recognition/conformer/igie/alter_onnx.py b/models/audio/speech_recognition/conformer/igie/alter_onnx.py
deleted file mode 100644
index ad1b380d7058d947449c44881d4c4b5e0be53568..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/alter_onnx.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-import onnx
-from onnx import numpy_helper
-import numpy as np
-import os
-import argparse
-
-def get_args_parser(add_help=True):
-    parser = argparse.ArgumentParser(description='alter onnx model', add_help=add_help)
-
-    parser.add_argument('--batch_size', type=int, default=24, help='Model batch size.')
-    parser.add_argument('--path', type=str, required=True, help='ONNX model path.')
-    return parser
-
-
-args = get_args_parser().parse_args()
-
-encoder_onnx_path=args.path
-batch_size = args.batch_size
-onnx_model = onnx.load(encoder_onnx_path)
-
-
-graph = onnx_model.graph
-node  = graph.node
-
-matmul_input_node = []
-for i in range(len(node)):
-    if node[i].op_type == 'MatMul':
-        for name in node[i].input:
-            matmul_input_node.append(name)
-
-## alter node
-for initializer in graph.initializer:
-    if initializer.name in matmul_input_node:
-        if initializer.dims[0] == 1:
-            W = numpy_helper.to_array(initializer)
-            W_new  = []
-            for i in range(batch_size):
-                W_new.append(W[0])
-            W_new = np.array(W_new)
-            tensor = numpy_helper.from_array(W_new, initializer.name)
-            initializer.CopyFrom(tensor)
-            initializer.dims[0] = batch_size
-
-## print node
-for initializer in graph.initializer:
-    if initializer.name in matmul_input_node:
-        if initializer.dims[0] == 24:
-            W = numpy_helper.to_array(initializer)
-            weights_map = {}
-            weights_map[initializer.name] = W
-
-onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
-onnx.checker.check_model(onnx_model)
-
-file_name, file_ext = os.path.splitext(encoder_onnx_path)
-print("Save New Model to ", file_name + "_matmul.onnx")
-onnx.save(onnx_model, file_name + "_matmul.onnx")
diff --git a/models/audio/speech_recognition/conformer/igie/build_engine.py b/models/audio/speech_recognition/conformer/igie/build_engine.py
index aee72f0ce3ac1cb01e4d9b0e5dabbaf59d120bb3..554d94c9db3e435d20838ce1d165b37405c51cb8 100644
--- a/models/audio/speech_recognition/conformer/igie/build_engine.py
+++ b/models/audio/speech_recognition/conformer/igie/build_engine.py
@@ -1,83 +1,85 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
+import os
+import json
+import onnx
+import logging
+import argparse
 
+import tensorrt
+from tensorrt import Dims
 import tvm
-import argparse
 from tvm import relay
 from tvm.relay.import_model import import_model_to_igie
 
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
+
 def parse_args():
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(description="Build tensorrt engine of deepspeech2")
+    parser.add_argument("--onnx_model", type=str, required=True, help="The onnx path")
+    parser.add_argument("--bsz", type=int, default=1, help="batch size")
+    parser.add_argument("--input_size", type=tuple, default=(-1, 161), help="inference size")
+    parser.add_argument("--engine_path", type=str, required=True, help="engine path to save")
+    parser.add_argument( "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4")
 
-    parser.add_argument("--model_path", 
-                        type=str, 
-                        required=True, 
-                        help="original model path.")
-    
-    parser.add_argument("--engine_path", 
-                        type=str, 
-                        required=True, 
-                        help="igie export engine path.")
-
-    parser.add_argument("--input",
-                        type=str,
-                        nargs='+', 
-                        required=True, 
-                        help="""
-                            input info of the model, format should be:
-                            input_name:input_shape
-                            eg: --input input:1,3,224,224.
-                            """)
-               
-    parser.add_argument("--precision",
-                        type=str,
-                        choices=["fp32", "fp16", "int8"],
-                        required=True,
-                        help="model inference precision.")
-    
     args = parser.parse_args()
-
     return args
 
-def main():
-    args = parse_args()
 
-    # get input valueinfo
-    input_dict = {}
-    for input_info in args.input:
-        input_name, input_shape = input_info.split(":")
-        shape = tuple([int(s) for s in input_shape.split(",")])
-        input_dict[input_name] = shape
+def build_engine_trtapi_dynamicshape(args):
+    onnx_model = args.onnx_model
+    assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!"
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    profile = builder.create_optimization_profile()
+    
+    profile.set_shape(
+        "input", Dims([1,1,80]),Dims([16,800,80]),Dims([128,1500,80])
+    )
+    profile.set_shape(
+        "seq_lengths", Dims([1]), Dims([16]), Dims([128])
+    )
 
-    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    build_config.add_optimization_profile(profile)
 
-    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
 
-    func = mod["main"]
-    body = func.body
-    new_body = relay.Tuple([body[0], body[1], body[2]])
-    func = relay.Function(relay.analysis.free_vars(new_body), new_body)
-    encoder_mod = tvm.IRModule.from_expr(func)
-    encoder_mod = relay.transform.InferType()(encoder_mod)
+    parser.parse_from_file(onnx_model)
+    build_config.set_flag(tensorrt.BuilderFlag.FP16)
 
-    # build engine
-    lib = tvm.relay.build(encoder_mod, target=target, params=params, precision=args.precision)
+    # set dynamic
+    input_tensor = network.get_input(0)
+    input_tensor.shape = Dims([-1, -1, 80])
+    
+    seq_lengths_tensor = network.get_input(1)
+    seq_lengths_tensor.shape = Dims([-1])
 
-    # export engine
-    lib.export_library(args.engine_path)
+    plan = builder.build_serialized_network(network, build_config)
+    with open(args.engine_path, "wb") as f:
+        f.write(plan)
 
+    print("Build dynamic shape engine done!")
 
+
+
+def build_engine_igieapi_dynamicshape(args):
+    onnx_model = args.onnx_model
+    assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!"
+    
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+    device = tvm.device(target.kind.name, 0)
+    inputs_info = {'input': ([128, 1500, 80], 'float16'), 'seq_lengths': ([128], 'int32')}
+    precision = "fp16"
+
+    mod, params = import_model_to_igie(onnx_model, inputs_info, outputs_info=None, precision=precision, backend="tensorrt")
+    lib = relay.build(mod, target=target, params=params, precision=precision, device=device)
+    lib.export_library(args.engine_path)
+
+    print("Build dynamic shape engine done!")
+    
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    args = parse_args()
+    build_engine_trtapi_dynamicshape(args)
+
diff --git a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
index 4ad9e36177af9375c777c8fa104563d1445a4627..49ec77a0ed67c0a5218b117f8f6145aac817797e 100644
--- a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
+++ b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
@@ -13,13 +13,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 set -x
 
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 if [[ ${ID} == "ubuntu" ]]; then
-    apt-get update
-    apt install sox libsox-fmt-all
+    apt install sox libsox-fmt-all -y
 elif [[ ${ID} == "centos" ]]; then
     yum install sox sox-devel -y
 else
@@ -27,26 +25,9 @@ else
 fi
 
 pip3 install -r requirements.txt
-cd ctc_decoder/swig && bash setup.sh
-cd ../../
-
-# tar -zxvf 20211025_conformer_exp.tar.gz
-
-# Get Onnx Model
-cd wenet
-python3 wenet/bin/export_onnx_gpu.py                          \
-    --config ../20211025_conformer_exp/train.yaml             \
-    --checkpoint ../20211025_conformer_exp/final.pt           \
-    --batch_size 24                                           \
-    --seq_len 384                                             \
-    --beam 4                                                  \
-    --cmvn_file ../20211025_conformer_exp/global_cmvn         \
-    --output_onnx_dir ../
-cd ..
-
-# Use onnxsim optimize onnx model
-onnxsim encoder_bs24_seq384_static.onnx encoder_bs24_seq384_static_opt.onnx
-python3 alter_onnx.py --batch_size 24 --path encoder_bs24_seq384_static_opt.onnx
 
-# Need to unzip aishell to the current directory. For details, refer to data.list
-# tar -zxvf aishell.tar.gz
+ln -s /mnt/deepspark/data/checkpoints/conformer_checkpoints.tar ./
+tar xf conformer_checkpoints.tar
+cp /mnt/deepspark/data/datasets/aishell_test_data.tar ./
+tar xf aishell_test_data.tar
+bash scripts/aishell_data_prepare.sh ./aishell_test_data ./tools
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/common.py b/models/audio/speech_recognition/conformer/igie/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..6081f807c3a709e8d73f1c1a6bc62185ddcdfc09
--- /dev/null
+++ b/models/audio/speech_recognition/conformer/igie/common.py
@@ -0,0 +1,107 @@
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+import cuda.cuda as cuda
+import cuda.cudart as cudart
+
+
+def trtapi(engine_file):
+    datatype = tensorrt.DataType.FLOAT
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    with open(engine_file, "rb") as f, tensorrt.Runtime(logger) as runtime:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
+
+
+def setup_io_bindings(engine, context):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = context.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/config.yaml b/models/audio/speech_recognition/conformer/igie/config.yaml
deleted file mode 100644
index a5bdda4d400d7d42ac88beb2c95d9c612db5a8dd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/config.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-beam_size: 4
-ctc_weight: -1.0
-fp16: false
-reverse_weight: -1.0
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/README.md b/models/audio/speech_recognition/conformer/igie/ctc_decoder/README.md
deleted file mode 100644
index d722d30e57770e9b135ae72dbc0b9d90ae62933a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-## Installation
-
-We adapted this ctc decoder from [here](https://github.com/PaddlePaddle/DeepSpeech/tree/develop/deepspeech/decoders/swig).
-This decoder can only run on cpu.
-
-* continuous decoding for streaming asr
-* support kenlm language model
-* multiprocessing
-
-To install the decoder:
-```bash
-git clone https://github.com/Slyne/ctc_decoder.git
-apt-get update
-apt-get install swig
-apt-get install python3-dev 
-cd ctc_decoder/swig && bash setup.sh
-```
-
-## Usage
-
-Please refer to ```swig/test/test_en.py``` and ```swig/test/test_zh.py``` for how to do streaming decoding and offline decoding w/o language model.
-
-### Adding language model
-How to build the language model ?
-You may refer to [kenlm](https://github.com/kpu/kenlm).
-For Mandarin, the input text for language model should be like:
-```
-好 好 学 习 ，天 天 向 上 ！
-再 接 再 厉
-...
-```
-There's a space between two characters.
-
-For English, the input text is just like the normal text.
-```
-Share Market Today - Stock Market and Share Market Live Updates
-```
-
-How to add language model:
-```
-alpha = 0.5
-beta = 0.5
-lm_path = '../kenlm/lm/test.arpa'
-scorer = decoder.Scorer(alpha, beta, lm_path, vocab_list)
-......
-result1 =  decoder.ctc_beam_search_decoder_batch(batch_chunk_log_prob_seq, 
-                                                 batch_chunk_log_probs_idx,
-                                                 batch_root_trie,
-                                                 batch_start,
-                                                 beam_size, num_processes,
-                                                 blank_id, space_id,
-                                                 cutoff_prob, scorer)
-```
-How language model in called in this implementation of ctc prefix beam search ?
-
-If the language model is char based (like the Mandarin lm), it will call the language model scorer all the times.
-If the language model is word based (like the English lm), it will only call the scorer whenever `space_id` is detected.
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/ctc_beam_search_decoder.cpp b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/ctc_beam_search_decoder.cpp
deleted file mode 100644
index 4e85faebd75f23134aa628aa4a236cc3297d58d4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/ctc_beam_search_decoder.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "ctc_beam_search_decoder.h"
-#include <algorithm>
-#include <cmath>
-#include <future>
-#include <iostream>
-#include <limits>
-#include <map>
-#include <utility>
-#include "ThreadPool/ThreadPool.h"
-#include "decoder_utils.h"
-#include "fst/fstlib.h"
-#include "path_trie.h"
-
-using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
-std::vector<std::pair<double, std::vector<int>>> ctc_beam_search_decoder(
-    const std::vector<std::vector<double>> &log_probs_seq,
-    const std::vector<std::vector<int>> &log_probs_idx, PathTrie &root,
-    const bool start, size_t beam_size, int blank_id, int space_id,
-    double cutoff_prob, Scorer *ext_scorer) {
-  if (start) {
-    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
-      auto fst_dict = static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
-      fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
-      root.set_dictionary(dict_ptr);
-      auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
-      root.set_matcher(matcher);
-    }
-  }
-  int timesteps = log_probs_seq.size();
-
-  std::vector<PathTrie *> prefixes;
-
-  // update log probs
-  if (root.log_prob_b_prev == -NUM_FLT_INF && start) {
-    root.score = root.log_prob_b_prev = 0.0;
-  }
-  root.iterate_to_vec_only(prefixes);
-  int prev_id = -1;
-  // prefix search over time
-  for (size_t time_step = 0; time_step < timesteps; ++time_step) {
-    float min_cutoff = -NUM_FLT_INF;
-    bool full_beam = false;
-
-    auto &log_prob = log_probs_seq[time_step];
-    auto &log_prob_idx = log_probs_idx[time_step];
-
-    double top_prob = exp(log_prob[0]);
-    auto top_id = log_prob_idx[0];
-    if (top_prob >= cutoff_prob && top_id == blank_id)
-      if (prev_id == blank_id) {
-        continue;  // skip this round
-      } else
-        prev_id = top_id;
-    else
-      prev_id = -1;
-
-    // loop over chars
-    double cur_acc_prob = 0.0;
-    for (size_t index = 0; index < log_prob.size(); index++) {
-      auto c = log_prob_idx[index];
-      float log_prob_c = log_prob[index];
-      cur_acc_prob += exp(log_prob_c);
-      if (cur_acc_prob > cutoff_prob && index >= 1) break;
-      for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
-        auto prefix = prefixes[i];
-        if (full_beam && log_prob_c + prefix->score < min_cutoff) {
-          break;
-        }
-        // blank
-        if (c == blank_id) {
-          prefix->log_prob_b_cur =
-              log_sum_exp(prefix->log_prob_b_cur, log_prob_c + prefix->score);
-          continue;
-        }
-        // repeated character
-        if (c == prefix->character) {
-          prefix->log_prob_nb_cur = log_sum_exp(
-              prefix->log_prob_nb_cur, log_prob_c + prefix->log_prob_nb_prev);
-        }
-        // get new prefix
-        auto prefix_new = prefix->get_path_trie(c);
-        if (prefix_new != nullptr) {
-          float log_p = -NUM_FLT_INF;
-
-          if (c == prefix->character &&
-              prefix->log_prob_b_prev > -NUM_FLT_INF) {
-            log_p = log_prob_c + prefix->log_prob_b_prev;
-          } else if (c != prefix->character) {
-            log_p = log_prob_c + prefix->score;
-          }
-
-          // language model scoring
-          if (ext_scorer != nullptr &&
-              (c == space_id || ext_scorer->is_character_based())) {
-            PathTrie *prefix_to_score = nullptr;
-            // skip scoring the space
-            if (ext_scorer->is_character_based()) {
-              prefix_to_score = prefix_new;
-            } else {
-              prefix_to_score = prefix;
-            }
-            float score = 0.0;
-            std::vector<std::string> ngram;
-            ngram = ext_scorer->make_ngram(prefix_to_score);
-            score = ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
-            log_p += score;
-            log_p += ext_scorer->beta;
-          }
-
-          prefix_new->log_prob_nb_cur =
-              log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
-        }
-      }  // end of loop over prefix
-    }    // end of loop over vocabulary
-    prefixes.clear();
-    // update log probs
-    root.iterate_to_vec(prefixes);
-    // only preserve top beam_size prefixes
-    if (prefixes.size() >= beam_size) {
-      std::nth_element(prefixes.begin(), prefixes.begin() + beam_size,
-                       prefixes.end(), prefix_compare);
-      for (size_t i = beam_size; i < prefixes.size(); ++i) {
-        prefixes[i]->remove();
-      }
-    }
-  }  // end of loop over time
-  size_t num_prefixes = std::min(prefixes.size(), beam_size);
-  std::sort(prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
-  return get_beam_search_result(prefixes, beam_size);
-}
-
-std::string map_sent(const std::vector<int> &sent,
-                     const std::vector<std::string> &vocabulary, bool greedy,
-                     int blank_id) {
-  std::string output_str;
-
-  if (!greedy) {
-    for (size_t j = 0; j < sent.size(); j++) {
-      output_str += vocabulary[sent[j]];
-    }
-  } else {
-    // greedy search
-    int prev = -1;
-    for (size_t i = 0; i < sent.size(); i++) {
-      int cur = sent[i];
-      if (cur != prev && cur != blank_id) output_str += vocabulary[cur];
-      prev = cur;
-    }
-  }
-  return output_str;
-}
-
-std::vector<std::string> map_batch(
-    const std::vector<std::vector<int>> &batch_sents,
-    const std::vector<std::string> &vocabulary, size_t num_processes,
-    bool greedy, int blank_id) {
-  ThreadPool pool(num_processes);
-  size_t batch_size = batch_sents.size();
-  std::vector<std::future<std::string>> res;
-  for (size_t i = 0; i < batch_size; ++i) {
-    res.emplace_back(pool.enqueue(map_sent, std::ref(batch_sents[i]),
-                                  std::ref(vocabulary), greedy, blank_id));
-  }
-  // get decoding results
-  std::vector<std::string> batch_results;
-  for (size_t i = 0; i < batch_size; ++i) {
-    batch_results.emplace_back(res[i].get());
-  }
-  return batch_results;
-}
-
-std::vector<std::vector<std::pair<double, std::vector<int>>>>
-ctc_beam_search_decoder_batch(
-    const std::vector<std::vector<std::vector<double>>> &batch_log_probs_seq,
-    const std::vector<std::vector<std::vector<int>>> &batch_log_probs_idx,
-    std::vector<PathTrie *> &batch_root_trie,
-    const std::vector<bool> &batch_start, size_t beam_size,
-    size_t num_processes, int blank_id, int space_id, double cutoff_prob,
-    Scorer *ext_scorer) {
-  // thread pool
-  ThreadPool pool(num_processes);
-  // number of samples
-  size_t batch_size = batch_log_probs_seq.size();
-
-  // enqueue the tasks of decoding
-
-  std::vector<std::future<std::vector<std::pair<double, std::vector<int>>>>>
-      res;
-
-  for (size_t i = 0; i < batch_size; ++i) {
-    res.emplace_back(
-        pool.enqueue(ctc_beam_search_decoder, std::ref(batch_log_probs_seq[i]),
-                     std::ref(batch_log_probs_idx[i]),
-                     std::ref(*batch_root_trie[i]), batch_start[i], beam_size,
-                     blank_id, space_id, cutoff_prob, ext_scorer));
-  }
-
-  // get decoding results
-  std::vector<std::vector<std::pair<double, std::vector<int>>>> batch_results;
-  for (size_t i = 0; i < batch_size; ++i) {
-    batch_results.emplace_back(res[i].get());
-  }
-  return batch_results;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/ctc_beam_search_decoder.h b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/ctc_beam_search_decoder.h
deleted file mode 100644
index 8b3921456a73843b1ce9a9936e4fb083c9ae12a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/ctc_beam_search_decoder.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef CTC_BEAM_SEARCH_DECODER_H_
-#define CTC_BEAM_SEARCH_DECODER_H_
-
-#include <string>
-#include <utility>
-#include <vector>
-#include "path_trie.h"
-#include "scorer.h"
-
-/* CTC Beam Search Decoder
-
- * Parameters:
- *     log_probs_seq: 2-D vector that each element is a vector of log
- probabilities
- *                    for one time step, it is sorted   (topk)
- *     log_probs_idx: 2-D vector that the index of every element in
- log_probs_seq
- *                     topk index
- *     root: A PathTrie root
- *     start: whether this the first chunk of this sequence
- *     beam_size: The width of beam search.
- *     blank_id: default is 0
- *     space_id: default is -1
- *     cutoff_prob: Cutoff probability for pruning.
- *     ext_scorer: External scorer to evaluate a prefix, which consists of
- *                 n-gram language model scoring and word insertion term.
- *                 Default null, decoding the input sample without scorer.
- * Return:
- *     A vector that each element is a pair of score  and decoding result,
- *     in desending order.
-*/
-std::vector<std::pair<double, std::vector<int>>> ctc_beam_search_decoder(
-    const std::vector<std::vector<double>> &log_probs_seq,
-    const std::vector<std::vector<int>> &log_probs_idx, PathTrie &root,
-    const bool start, size_t beam_size, int blank_id = 0, int space_id = -1,
-    double cutoff_prob = 0.999, Scorer *ext_scorer = nullptr);
-
-/* CTC Beam Search Decoder for batch data
-
- * Parameters:
- *     batch_log_probs_seq: 3-D vector that each element is a 2-D vector that
- can be used
- *                by ctc_beam_search_decoder().
- *     batch_log_probs_idx: 3-D vector that each element is a 2-D vector that
- can be used
- *                by ctc_beam_search_decoder().
- *     batch_root_trie: a batch of Path trie for each sequence
- *     batch_start: a batch of boolean value to indicate whether this is the
- first
- *                  chunk of each sequence
- *     beam_size: The width of beam search.
- *     num_processes: Number of threads for beam search.
- *     blank_id: default blank_id is 0
- *     space_id: default space_id is -1, this is for word based scorer
- *     cutoff_prob: Cutoff probability for pruning.
- *     ext_scorer: External scorer to evaluate a prefix, which consists of
- *                 n-gram language model scoring and word insertion term.
- *                 Default null, decoding the input sample without scorer.
- * Return:
- *     A 2-D vector that each element is a vector of beam search decoding
- *     result for one audio sample.
-*/
-std::vector<std::vector<std::pair<double, std::vector<int>>>>
-ctc_beam_search_decoder_batch(
-    const std::vector<std::vector<std::vector<double>>> &batch_log_probs_seq,
-    const std::vector<std::vector<std::vector<int>>> &batch_log_probs_idx,
-    std::vector<PathTrie *> &batch_root_trie,
-    const std::vector<bool> &batch_start, size_t beam_size,
-    size_t num_processes, int blank_id = 0, int space_id = -1,
-    double cutoff_prob = 0.999, Scorer *ext_scorer = nullptr);
-
-/* Map vector of int to string
-
- * Parameters:
- *   sent: a vector of int ids
- *   vocabulary: vocabulary
- * Return:
- *   A decoded string
-*/
-std::string map_sent(const std::vector<int> &sent,
-                     const std::vector<std::string> &vocabulary,
-                     bool greedy = false, int blank_id = 0);
-
-/* Map batch vector of int to string
-
- * Parameters:
- *   batch_sents: a batch of vector of int ids
- *   vocabulary: vocabulary
- *   num_processes: number of processes to use
- * Return:
- *   A vector decoded string
-*/
-std::vector<std::string> map_batch(
-    const std::vector<std::vector<int>> &batch_sents,
-    const std::vector<std::string> &vocabulary, size_t num_processes,
-    bool greedy = false, int blank_id = 0);
-
-#endif  // CTC_BEAM_SEARCH_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoder_utils.cpp b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoder_utils.cpp
deleted file mode 100644
index 404fefda7188cfdd967a6b1d8a4b733e898a3a79..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoder_utils.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder_utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <limits>
-
-std::vector<std::pair<size_t, float>> get_pruned_log_probs(
-    const std::vector<double> &prob_step, double cutoff_prob,
-    size_t cutoff_top_n) {
-  std::vector<std::pair<int, double>> prob_idx;
-  for (size_t i = 0; i < prob_step.size(); ++i) {
-    prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
-  }
-  // pruning of vacobulary
-  size_t cutoff_len = prob_step.size();
-  if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
-    std::sort(prob_idx.begin(), prob_idx.end(),
-              pair_comp_second_rev<int, double>);
-    if (cutoff_prob < 1.0) {
-      double cum_prob = 0.0;
-      cutoff_len = 0;
-      for (size_t i = 0; i < prob_idx.size(); ++i) {
-        cum_prob += prob_idx[i].second;
-        cutoff_len += 1;
-        if (cum_prob >= cutoff_prob || cutoff_len >= cutoff_top_n) break;
-      }
-    }
-    prob_idx = std::vector<std::pair<int, double>>(
-        prob_idx.begin(), prob_idx.begin() + cutoff_len);
-  }
-  std::vector<std::pair<size_t, float>> log_prob_idx;
-  for (size_t i = 0; i < cutoff_len; ++i) {
-    log_prob_idx.push_back(std::pair<int, float>(
-        prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN)));
-  }
-  return log_prob_idx;
-}
-
-std::vector<std::pair<double, std::vector<int>>> get_beam_search_result(
-    const std::vector<PathTrie *> &prefixes, size_t beam_size) {
-  // allow for the post processing
-  std::vector<PathTrie *> space_prefixes;
-  if (space_prefixes.empty()) {
-    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
-      space_prefixes.push_back(prefixes[i]);
-    }
-  }
-
-  std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare);
-  std::vector<std::pair<double, std::vector<int>>> output_vecs;
-  for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) {
-    std::vector<int> output;
-    space_prefixes[i]->get_path_vec(output);
-    // convert index to string
-
-    std::pair<double, std::vector<int>> output_pair(space_prefixes[i]->score,
-                                                    output);
-    output_vecs.emplace_back(output_pair);
-  }
-  return output_vecs;
-}
-
-size_t get_utf8_str_len(const std::string &str) {
-  size_t str_len = 0;
-  for (char c : str) {
-    str_len += ((c & 0xc0) != 0x80);
-  }
-  return str_len;
-}
-
-std::vector<std::string> split_utf8_str(const std::string &str) {
-  std::vector<std::string> result;
-  std::string out_str;
-
-  for (char c : str) {
-    if ((c & 0xc0) != 0x80)  // new UTF-8 character
-    {
-      if (!out_str.empty()) {
-        result.push_back(out_str);
-        out_str.clear();
-      }
-    }
-
-    out_str.append(1, c);
-  }
-  result.push_back(out_str);
-  return result;
-}
-
-std::vector<std::string> split_str(const std::string &s,
-                                   const std::string &delim) {
-  std::vector<std::string> result;
-  std::size_t start = 0, delim_len = delim.size();
-  while (true) {
-    std::size_t end = s.find(delim, start);
-    if (end == std::string::npos) {
-      if (start < s.size()) {
-        result.push_back(s.substr(start));
-      }
-      break;
-    }
-    if (end > start) {
-      result.push_back(s.substr(start, end - start));
-    }
-    start = end + delim_len;
-  }
-  return result;
-}
-
-bool prefix_compare(const PathTrie *x, const PathTrie *y) {
-  if (x->score == y->score) {
-    if (x->character == y->character) {
-      return false;
-    } else {
-      return (x->character < y->character);
-    }
-  } else {
-    return x->score > y->score;
-  }
-}
-
-void add_word_to_fst(const std::vector<int> &word,
-                     fst::StdVectorFst *dictionary) {
-  if (dictionary->NumStates() == 0) {
-    fst::StdVectorFst::StateId start = dictionary->AddState();
-    assert(start == 0);
-    dictionary->SetStart(start);
-  }
-  fst::StdVectorFst::StateId src = dictionary->Start();
-  fst::StdVectorFst::StateId dst;
-  for (auto c : word) {
-    dst = dictionary->AddState();
-    dictionary->AddArc(src, fst::StdArc(c, c, 0, dst));
-    src = dst;
-  }
-  dictionary->SetFinal(dst, fst::StdArc::Weight::One());
-}
-
-bool add_word_to_dictionary(
-    const std::string &word,
-    const std::unordered_map<std::string, int> &char_map, bool add_space,
-    int SPACE_ID, fst::StdVectorFst *dictionary) {
-  auto characters = split_utf8_str(word);
-
-  std::vector<int> int_word;
-
-  for (auto &c : characters) {
-    if (c == " ") {
-      int_word.push_back(SPACE_ID);
-    } else {
-      auto int_c = char_map.find(c);
-      if (int_c != char_map.end()) {
-        int_word.push_back(int_c->second);
-      } else {
-        return false;  // return without adding
-      }
-    }
-  }
-
-  if (add_space) {
-    int_word.push_back(SPACE_ID);
-  }
-
-  add_word_to_fst(int_word, dictionary);
-  return true;  // return with successful adding
-}
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoder_utils.h b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoder_utils.h
deleted file mode 100644
index 4d100fb3af5835ec3e08e9f53407948180c4cc66..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoder_utils.h
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_UTILS_H_
-#define DECODER_UTILS_H_
-
-#include <utility>
-#include "fst/log.h"
-#include "path_trie.h"
-
-const float NUM_FLT_INF = std::numeric_limits<float>::max();
-const float NUM_FLT_MIN = std::numeric_limits<float>::min();
-
-// inline function for validation check
-inline void check(bool x, const char *expr, const char *file, int line,
-                  const char *err) {
-  if (!x) {
-    std::cout << "[" << file << ":" << line << "] ";
-    LOG(FATAL) << "\"" << expr << "\" check failed. " << err;
-  }
-}
-
-#define VALID_CHECK(x, info) \
-  check(static_cast<bool>(x), #x, __FILE__, __LINE__, info)
-#define VALID_CHECK_EQ(x, y, info) VALID_CHECK((x) == (y), info)
-#define VALID_CHECK_GT(x, y, info) VALID_CHECK((x) > (y), info)
-#define VALID_CHECK_LT(x, y, info) VALID_CHECK((x) < (y), info)
-
-// Function template for comparing two pairs
-template <typename T1, typename T2>
-bool pair_comp_first_rev(const std::pair<T1, T2> &a,
-                         const std::pair<T1, T2> &b) {
-  return a.first > b.first;
-}
-
-// Function template for comparing two pairs
-template <typename T1, typename T2>
-bool pair_comp_second_rev(const std::pair<T1, T2> &a,
-                          const std::pair<T1, T2> &b) {
-  return a.second > b.second;
-}
-
-// Return the sum of two probabilities in log scale
-template <typename T>
-T log_sum_exp(const T &x, const T &y) {
-  static T num_min = -std::numeric_limits<T>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  T xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-// Get pruned probability vector for each time step's beam search
-std::vector<std::pair<size_t, float>> get_pruned_log_probs(
-    const std::vector<double> &prob_step, double cutoff_prob,
-    size_t cutoff_top_n);
-
-// Get beam search result from prefixes in trie tree
-std::vector<std::pair<double, std::string>> get_beam_search_result(
-    const std::vector<PathTrie *> &prefixes,
-    const std::vector<std::string> &vocabulary, size_t beam_size);
-
-std::vector<std::pair<double, std::vector<int>>> get_beam_search_result(
-    const std::vector<PathTrie *> &prefixes, size_t beam_size);
-
-// Functor for prefix comparsion
-bool prefix_compare(const PathTrie *x, const PathTrie *y);
-
-/* Get length of utf8 encoding string
- * See: http://stackoverflow.com/a/4063229
- */
-size_t get_utf8_str_len(const std::string &str);
-
-/* Split a string into a list of strings on a given string
- * delimiter. NB: delimiters on beginning / end of string are
- * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"].
- */
-std::vector<std::string> split_str(const std::string &s,
-                                   const std::string &delim);
-
-/* Splits string into vector of strings representing
- * UTF-8 characters (not same as chars)
- */
-std::vector<std::string> split_utf8_str(const std::string &str);
-
-// Add a word in index to the dicionary of fst
-void add_word_to_fst(const std::vector<int> &word,
-                     fst::StdVectorFst *dictionary);
-
-// Add a word in string to dictionary
-bool add_word_to_dictionary(
-    const std::string &word,
-    const std::unordered_map<std::string, int> &char_map, bool add_space,
-    int SPACE_ID, fst::StdVectorFst *dictionary);
-#endif  // DECODER_UTILS_H
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoders.i b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoders.i
deleted file mode 100644
index a53ab46ab1aa32607d7b71dbb252e13c478d9dc6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoders.i
+++ /dev/null
@@ -1,37 +0,0 @@
-%module swig_decoders
-%{
-#include "scorer.h"
-#include "ctc_beam_search_decoder.h"
-#include "decoder_utils.h"
-#include "path_trie.h"
-%}
-
-%include "std_vector.i"
-%include "std_pair.i"
-%include "std_string.i"
-%include "path_trie.h"
-%import "decoder_utils.h"
-
-namespace std {
-    %template(DoubleVector) std::vector<double>;
-    %template(IntVector) std::vector<int>;
-    %template(StringVector) std::vector<std::string>;
-    %template(VectorOfStructVectorDouble) std::vector<std::vector<double> >;
-    %template(VectorOfStructVectorInt) std::vector<std::vector<int>>;
-    %template(FloatVector) std::vector<float>;
-    %template(Pair) std::pair<float, std::vector<int>>;
-    %template(PairFloatVectorVector)  std::vector<std::pair<float, std::vector<int>>>;
-    %template(PairDoubleVectorVector) std::vector<std::pair<double, std::vector<int>>>;
-    %template(PairDoubleVectorVector2) std::vector<std::vector<std::pair<double, std::vector<int>>>>;
-    %template(DoubleVector3) std::vector<std::vector<std::vector<double>>>;
-    %template(IntVector3) std::vector<std::vector<std::vector<int>>>;
-    %template(TrieVector) std::vector<PathTrie*>;
-    %template(BoolVector) std::vector<bool>;
-}
-%template(IntDoublePairCompSecondRev) pair_comp_second_rev<int, double>;
-%template(StringDoublePairCompSecondRev) pair_comp_second_rev<std::string, double>;
-%template(DoubleStringPairCompFirstRev) pair_comp_first_rev<double, std::string>;
-
-%include "scorer.h"
-%include "path_trie.h"
-%include "ctc_beam_search_decoder.h"
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoders_wrap.cxx b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoders_wrap.cxx
deleted file mode 100644
index b9c3c8aea63dd6efc29f5b937adc615b8835e0ae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/decoders_wrap.cxx
+++ /dev/null
@@ -1,38204 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
- *
- * This file is not intended to be easily readable and contains a number of
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG
- * interface file instead.
- * ----------------------------------------------------------------------------- */
-
-
-#ifndef SWIGPYTHON
-#define SWIGPYTHON
-#endif
-
-#define SWIG_PYTHON_DIRECTOR_NO_VTABLE
-
-
-#ifdef __cplusplus
-/* SwigValueWrapper is described in swig.swg */
-template<typename T> class SwigValueWrapper {
-  struct SwigMovePointer {
-    T *ptr;
-    SwigMovePointer(T *p) : ptr(p) { }
-    ~SwigMovePointer() { delete ptr; }
-    SwigMovePointer& operator=(SwigMovePointer& rhs) { T* oldptr = ptr; ptr = 0; delete oldptr; ptr = rhs.ptr; rhs.ptr = 0; return *this; }
-  } pointer;
-  SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-  SwigValueWrapper(const SwigValueWrapper<T>& rhs);
-public:
-  SwigValueWrapper() : pointer(0) { }
-  SwigValueWrapper& operator=(const T& t) { SwigMovePointer tmp(new T(t)); pointer = tmp; return *this; }
-  operator T&() const { return *pointer.ptr; }
-  T *operator&() { return pointer.ptr; }
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__))
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__))
-# else
-#   define SWIGUNUSED
-# endif
-#endif
-
-#ifndef SWIG_MSC_UNSUPPRESS_4505
-# if defined(_MSC_VER)
-#   pragma warning(disable : 4505) /* unreferenced local function has been removed */
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if defined(__GNUC__)
-#  if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#    ifndef GCC_HASCLASSVISIBILITY
-#      define GCC_HASCLASSVISIBILITY
-#    endif
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Apple's deprecated 'AssertMacros.h' from Carbon-framework */
-#if defined(__APPLE__) && !defined(__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES)
-# define __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES 0
-#endif
-
-/* Intel's compiler complains if a variable which was never initialised is
- * cast to void, which is a common idiom which we use to indicate that we
- * are aware a variable isn't used.  So we just silence that warning.
- * See: https://github.com/swig/swig/issues/192 for more discussion.
- */
-#ifdef __INTEL_COMPILER
-# pragma warning disable 592
-#endif
-
-
-#if defined(_DEBUG) && defined(SWIG_PYTHON_INTERPRETER_NO_DEBUG)
-/* Use debug wrappers with the Python release dll */
-# undef _DEBUG
-# include <Python.h>
-# define _DEBUG
-#else
-# include <Python.h>
-#endif
-
-/* -----------------------------------------------------------------------------
- * swigrun.swg
- *
- * This file contains generic C API SWIG runtime support for pointer
- * type checking.
- * ----------------------------------------------------------------------------- */
-
-/* This should only be incremented when either the layout of swig_type_info changes,
-   or for whatever reason, the runtime changes incompatibly */
-#define SWIG_RUNTIME_VERSION "4"
-
-/* define SWIG_TYPE_TABLE_NAME as "SWIG_TYPE_TABLE" */
-#ifdef SWIG_TYPE_TABLE
-# define SWIG_QUOTE_STRING(x) #x
-# define SWIG_EXPAND_AND_QUOTE_STRING(x) SWIG_QUOTE_STRING(x)
-# define SWIG_TYPE_TABLE_NAME SWIG_EXPAND_AND_QUOTE_STRING(SWIG_TYPE_TABLE)
-#else
-# define SWIG_TYPE_TABLE_NAME
-#endif
-
-/*
-  You can use the SWIGRUNTIME and SWIGRUNTIMEINLINE macros for
-  creating a static or dynamic library from the SWIG runtime code.
-  In 99.9% of the cases, SWIG just needs to declare them as 'static'.
-
-  But only do this if strictly necessary, ie, if you have problems
-  with your compiler or suchlike.
-*/
-
-#ifndef SWIGRUNTIME
-# define SWIGRUNTIME SWIGINTERN
-#endif
-
-#ifndef SWIGRUNTIMEINLINE
-# define SWIGRUNTIMEINLINE SWIGRUNTIME SWIGINLINE
-#endif
-
-/*  Generic buffer size */
-#ifndef SWIG_BUFFER_SIZE
-# define SWIG_BUFFER_SIZE 1024
-#endif
-
-/* Flags for pointer conversions */
-#define SWIG_POINTER_DISOWN        0x1
-#define SWIG_CAST_NEW_MEMORY       0x2
-
-/* Flags for new pointer objects */
-#define SWIG_POINTER_OWN           0x1
-
-
-/*
-   Flags/methods for returning states.
-
-   The SWIG conversion methods, as ConvertPtr, return an integer
-   that tells if the conversion was successful or not. And if not,
-   an error code can be returned (see swigerrors.swg for the codes).
-
-   Use the following macros/flags to set or process the returning
-   states.
-
-   In old versions of SWIG, code such as the following was usually written:
-
-     if (SWIG_ConvertPtr(obj,vptr,ty.flags) != -1) {
-       // success code
-     } else {
-       //fail code
-     }
-
-   Now you can be more explicit:
-
-    int res = SWIG_ConvertPtr(obj,vptr,ty.flags);
-    if (SWIG_IsOK(res)) {
-      // success code
-    } else {
-      // fail code
-    }
-
-   which is the same really, but now you can also do
-
-    Type *ptr;
-    int res = SWIG_ConvertPtr(obj,(void **)(&ptr),ty.flags);
-    if (SWIG_IsOK(res)) {
-      // success code
-      if (SWIG_IsNewObj(res) {
-        ...
-	delete *ptr;
-      } else {
-        ...
-      }
-    } else {
-      // fail code
-    }
-
-   I.e., now SWIG_ConvertPtr can return new objects and you can
-   identify the case and take care of the deallocation. Of course that
-   also requires SWIG_ConvertPtr to return new result values, such as
-
-      int SWIG_ConvertPtr(obj, ptr,...) {
-        if (<obj is ok>) {
-          if (<need new object>) {
-            *ptr = <ptr to new allocated object>;
-            return SWIG_NEWOBJ;
-          } else {
-            *ptr = <ptr to old object>;
-            return SWIG_OLDOBJ;
-          }
-        } else {
-          return SWIG_BADOBJ;
-        }
-      }
-
-   Of course, returning the plain '0(success)/-1(fail)' still works, but you can be
-   more explicit by returning SWIG_BADOBJ, SWIG_ERROR or any of the
-   SWIG errors code.
-
-   Finally, if the SWIG_CASTRANK_MODE is enabled, the result code
-   allows to return the 'cast rank', for example, if you have this
-
-       int food(double)
-       int fooi(int);
-
-   and you call
-
-      food(1)   // cast rank '1'  (1 -> 1.0)
-      fooi(1)   // cast rank '0'
-
-   just use the SWIG_AddCast()/SWIG_CheckState()
-*/
-
-#define SWIG_OK                    (0)
-#define SWIG_ERROR                 (-1)
-#define SWIG_IsOK(r)               (r >= 0)
-#define SWIG_ArgError(r)           ((r != SWIG_ERROR) ? r : SWIG_TypeError)
-
-/* The CastRankLimit says how many bits are used for the cast rank */
-#define SWIG_CASTRANKLIMIT         (1 << 8)
-/* The NewMask denotes the object was created (using new/malloc) */
-#define SWIG_NEWOBJMASK            (SWIG_CASTRANKLIMIT  << 1)
-/* The TmpMask is for in/out typemaps that use temporal objects */
-#define SWIG_TMPOBJMASK            (SWIG_NEWOBJMASK << 1)
-/* Simple returning values */
-#define SWIG_BADOBJ                (SWIG_ERROR)
-#define SWIG_OLDOBJ                (SWIG_OK)
-#define SWIG_NEWOBJ                (SWIG_OK | SWIG_NEWOBJMASK)
-#define SWIG_TMPOBJ                (SWIG_OK | SWIG_TMPOBJMASK)
-/* Check, add and del mask methods */
-#define SWIG_AddNewMask(r)         (SWIG_IsOK(r) ? (r | SWIG_NEWOBJMASK) : r)
-#define SWIG_DelNewMask(r)         (SWIG_IsOK(r) ? (r & ~SWIG_NEWOBJMASK) : r)
-#define SWIG_IsNewObj(r)           (SWIG_IsOK(r) && (r & SWIG_NEWOBJMASK))
-#define SWIG_AddTmpMask(r)         (SWIG_IsOK(r) ? (r | SWIG_TMPOBJMASK) : r)
-#define SWIG_DelTmpMask(r)         (SWIG_IsOK(r) ? (r & ~SWIG_TMPOBJMASK) : r)
-#define SWIG_IsTmpObj(r)           (SWIG_IsOK(r) && (r & SWIG_TMPOBJMASK))
-
-/* Cast-Rank Mode */
-#if defined(SWIG_CASTRANK_MODE)
-#  ifndef SWIG_TypeRank
-#    define SWIG_TypeRank             unsigned long
-#  endif
-#  ifndef SWIG_MAXCASTRANK            /* Default cast allowed */
-#    define SWIG_MAXCASTRANK          (2)
-#  endif
-#  define SWIG_CASTRANKMASK          ((SWIG_CASTRANKLIMIT) -1)
-#  define SWIG_CastRank(r)           (r & SWIG_CASTRANKMASK)
-SWIGINTERNINLINE int SWIG_AddCast(int r) {
-  return SWIG_IsOK(r) ? ((SWIG_CastRank(r) < SWIG_MAXCASTRANK) ? (r + 1) : SWIG_ERROR) : r;
-}
-SWIGINTERNINLINE int SWIG_CheckState(int r) {
-  return SWIG_IsOK(r) ? SWIG_CastRank(r) + 1 : 0;
-}
-#else /* no cast-rank mode */
-#  define SWIG_AddCast(r) (r)
-#  define SWIG_CheckState(r) (SWIG_IsOK(r) ? 1 : 0)
-#endif
-
-
-#include <string.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void *(*swig_converter_func)(void *, int *);
-typedef struct swig_type_info *(*swig_dycast_func)(void **);
-
-/* Structure to store information on one type */
-typedef struct swig_type_info {
-  const char             *name;			/* mangled name of this type */
-  const char             *str;			/* human readable name of this type */
-  swig_dycast_func        dcast;		/* dynamic cast function down a hierarchy */
-  struct swig_cast_info  *cast;			/* linked list of types that can cast into this type */
-  void                   *clientdata;		/* language specific type data */
-  int                    owndata;		/* flag if the structure owns the clientdata */
-} swig_type_info;
-
-/* Structure to store a type and conversion function used for casting */
-typedef struct swig_cast_info {
-  swig_type_info         *type;			/* pointer to type that is equivalent to this type */
-  swig_converter_func     converter;		/* function to cast the void pointers */
-  struct swig_cast_info  *next;			/* pointer to next cast in linked list */
-  struct swig_cast_info  *prev;			/* pointer to the previous cast */
-} swig_cast_info;
-
-/* Structure used to store module information
- * Each module generates one structure like this, and the runtime collects
- * all of these structures and stores them in a circularly linked list.*/
-typedef struct swig_module_info {
-  swig_type_info         **types;		/* Array of pointers to swig_type_info structures that are in this module */
-  size_t                 size;		        /* Number of types in this module */
-  struct swig_module_info *next;		/* Pointer to next element in circularly linked list */
-  swig_type_info         **type_initial;	/* Array of initially generated type structures */
-  swig_cast_info         **cast_initial;	/* Array of initially generated casting structures */
-  void                    *clientdata;		/* Language specific module data */
-} swig_module_info;
-
-/*
-  Compare two type names skipping the space characters, therefore
-  "char*" == "char *" and "Class<int>" == "Class<int >", etc.
-
-  Return 0 when the two name types are equivalent, as in
-  strncmp, but skipping ' '.
-*/
-SWIGRUNTIME int
-SWIG_TypeNameComp(const char *f1, const char *l1,
-		  const char *f2, const char *l2) {
-  for (;(f1 != l1) && (f2 != l2); ++f1, ++f2) {
-    while ((*f1 == ' ') && (f1 != l1)) ++f1;
-    while ((*f2 == ' ') && (f2 != l2)) ++f2;
-    if (*f1 != *f2) return (*f1 > *f2) ? 1 : -1;
-  }
-  return (int)((l1 - f1) - (l2 - f2));
-}
-
-/*
-  Check type equivalence in a name list like <name1>|<name2>|...
-  Return 0 if equal, -1 if nb < tb, 1 if nb > tb
-*/
-SWIGRUNTIME int
-SWIG_TypeCmp(const char *nb, const char *tb) {
-  int equiv = 1;
-  const char* te = tb + strlen(tb);
-  const char* ne = nb;
-  while (equiv != 0 && *ne) {
-    for (nb = ne; *ne; ++ne) {
-      if (*ne == '|') break;
-    }
-    equiv = SWIG_TypeNameComp(nb, ne, tb, te);
-    if (*ne) ++ne;
-  }
-  return equiv;
-}
-
-/*
-  Check type equivalence in a name list like <name1>|<name2>|...
-  Return 0 if not equal, 1 if equal
-*/
-SWIGRUNTIME int
-SWIG_TypeEquiv(const char *nb, const char *tb) {
-  return SWIG_TypeCmp(nb, tb) == 0 ? 1 : 0;
-}
-
-/*
-  Check the typename
-*/
-SWIGRUNTIME swig_cast_info *
-SWIG_TypeCheck(const char *c, swig_type_info *ty) {
-  if (ty) {
-    swig_cast_info *iter = ty->cast;
-    while (iter) {
-      if (strcmp(iter->type->name, c) == 0) {
-        if (iter == ty->cast)
-          return iter;
-        /* Move iter to the top of the linked list */
-        iter->prev->next = iter->next;
-        if (iter->next)
-          iter->next->prev = iter->prev;
-        iter->next = ty->cast;
-        iter->prev = 0;
-        if (ty->cast) ty->cast->prev = iter;
-        ty->cast = iter;
-        return iter;
-      }
-      iter = iter->next;
-    }
-  }
-  return 0;
-}
-
-/*
-  Identical to SWIG_TypeCheck, except strcmp is replaced with a pointer comparison
-*/
-SWIGRUNTIME swig_cast_info *
-SWIG_TypeCheckStruct(swig_type_info *from, swig_type_info *ty) {
-  if (ty) {
-    swig_cast_info *iter = ty->cast;
-    while (iter) {
-      if (iter->type == from) {
-        if (iter == ty->cast)
-          return iter;
-        /* Move iter to the top of the linked list */
-        iter->prev->next = iter->next;
-        if (iter->next)
-          iter->next->prev = iter->prev;
-        iter->next = ty->cast;
-        iter->prev = 0;
-        if (ty->cast) ty->cast->prev = iter;
-        ty->cast = iter;
-        return iter;
-      }
-      iter = iter->next;
-    }
-  }
-  return 0;
-}
-
-/*
-  Cast a pointer up an inheritance hierarchy
-*/
-SWIGRUNTIMEINLINE void *
-SWIG_TypeCast(swig_cast_info *ty, void *ptr, int *newmemory) {
-  return ((!ty) || (!ty->converter)) ? ptr : (*ty->converter)(ptr, newmemory);
-}
-
-/*
-   Dynamic pointer casting. Down an inheritance hierarchy
-*/
-SWIGRUNTIME swig_type_info *
-SWIG_TypeDynamicCast(swig_type_info *ty, void **ptr) {
-  swig_type_info *lastty = ty;
-  if (!ty || !ty->dcast) return ty;
-  while (ty && (ty->dcast)) {
-    ty = (*ty->dcast)(ptr);
-    if (ty) lastty = ty;
-  }
-  return lastty;
-}
-
-/*
-  Return the name associated with this type
-*/
-SWIGRUNTIMEINLINE const char *
-SWIG_TypeName(const swig_type_info *ty) {
-  return ty->name;
-}
-
-/*
-  Return the pretty name associated with this type,
-  that is an unmangled type name in a form presentable to the user.
-*/
-SWIGRUNTIME const char *
-SWIG_TypePrettyName(const swig_type_info *type) {
-  /* The "str" field contains the equivalent pretty names of the
-     type, separated by vertical-bar characters.  We choose
-     to print the last name, as it is often (?) the most
-     specific. */
-  if (!type) return NULL;
-  if (type->str != NULL) {
-    const char *last_name = type->str;
-    const char *s;
-    for (s = type->str; *s; s++)
-      if (*s == '|') last_name = s+1;
-    return last_name;
-  }
-  else
-    return type->name;
-}
-
-/*
-   Set the clientdata field for a type
-*/
-SWIGRUNTIME void
-SWIG_TypeClientData(swig_type_info *ti, void *clientdata) {
-  swig_cast_info *cast = ti->cast;
-  /* if (ti->clientdata == clientdata) return; */
-  ti->clientdata = clientdata;
-
-  while (cast) {
-    if (!cast->converter) {
-      swig_type_info *tc = cast->type;
-      if (!tc->clientdata) {
-	SWIG_TypeClientData(tc, clientdata);
-      }
-    }
-    cast = cast->next;
-  }
-}
-SWIGRUNTIME void
-SWIG_TypeNewClientData(swig_type_info *ti, void *clientdata) {
-  SWIG_TypeClientData(ti, clientdata);
-  ti->owndata = 1;
-}
-
-/*
-  Search for a swig_type_info structure only by mangled name
-  Search is a O(log #types)
-
-  We start searching at module start, and finish searching when start == end.
-  Note: if start == end at the beginning of the function, we go all the way around
-  the circular list.
-*/
-SWIGRUNTIME swig_type_info *
-SWIG_MangledTypeQueryModule(swig_module_info *start,
-                            swig_module_info *end,
-		            const char *name) {
-  swig_module_info *iter = start;
-  do {
-    if (iter->size) {
-      size_t l = 0;
-      size_t r = iter->size - 1;
-      do {
-	/* since l+r >= 0, we can (>> 1) instead (/ 2) */
-	size_t i = (l + r) >> 1;
-	const char *iname = iter->types[i]->name;
-	if (iname) {
-	  int compare = strcmp(name, iname);
-	  if (compare == 0) {
-	    return iter->types[i];
-	  } else if (compare < 0) {
-	    if (i) {
-	      r = i - 1;
-	    } else {
-	      break;
-	    }
-	  } else if (compare > 0) {
-	    l = i + 1;
-	  }
-	} else {
-	  break; /* should never happen */
-	}
-      } while (l <= r);
-    }
-    iter = iter->next;
-  } while (iter != end);
-  return 0;
-}
-
-/*
-  Search for a swig_type_info structure for either a mangled name or a human readable name.
-  It first searches the mangled names of the types, which is a O(log #types)
-  If a type is not found it then searches the human readable names, which is O(#types).
-
-  We start searching at module start, and finish searching when start == end.
-  Note: if start == end at the beginning of the function, we go all the way around
-  the circular list.
-*/
-SWIGRUNTIME swig_type_info *
-SWIG_TypeQueryModule(swig_module_info *start,
-                     swig_module_info *end,
-		     const char *name) {
-  /* STEP 1: Search the name field using binary search */
-  swig_type_info *ret = SWIG_MangledTypeQueryModule(start, end, name);
-  if (ret) {
-    return ret;
-  } else {
-    /* STEP 2: If the type hasn't been found, do a complete search
-       of the str field (the human readable name) */
-    swig_module_info *iter = start;
-    do {
-      size_t i = 0;
-      for (; i < iter->size; ++i) {
-	if (iter->types[i]->str && (SWIG_TypeEquiv(iter->types[i]->str, name)))
-	  return iter->types[i];
-      }
-      iter = iter->next;
-    } while (iter != end);
-  }
-
-  /* neither found a match */
-  return 0;
-}
-
-/*
-   Pack binary data into a string
-*/
-SWIGRUNTIME char *
-SWIG_PackData(char *c, void *ptr, size_t sz) {
-  static const char hex[17] = "0123456789abcdef";
-  const unsigned char *u = (unsigned char *) ptr;
-  const unsigned char *eu =  u + sz;
-  for (; u != eu; ++u) {
-    unsigned char uu = *u;
-    *(c++) = hex[(uu & 0xf0) >> 4];
-    *(c++) = hex[uu & 0xf];
-  }
-  return c;
-}
-
-/*
-   Unpack binary data from a string
-*/
-SWIGRUNTIME const char *
-SWIG_UnpackData(const char *c, void *ptr, size_t sz) {
-  unsigned char *u = (unsigned char *) ptr;
-  const unsigned char *eu = u + sz;
-  for (; u != eu; ++u) {
-    char d = *(c++);
-    unsigned char uu;
-    if ((d >= '0') && (d <= '9'))
-      uu = (unsigned char)((d - '0') << 4);
-    else if ((d >= 'a') && (d <= 'f'))
-      uu = (unsigned char)((d - ('a'-10)) << 4);
-    else
-      return (char *) 0;
-    d = *(c++);
-    if ((d >= '0') && (d <= '9'))
-      uu |= (unsigned char)(d - '0');
-    else if ((d >= 'a') && (d <= 'f'))
-      uu |= (unsigned char)(d - ('a'-10));
-    else
-      return (char *) 0;
-    *u = uu;
-  }
-  return c;
-}
-
-/*
-   Pack 'void *' into a string buffer.
-*/
-SWIGRUNTIME char *
-SWIG_PackVoidPtr(char *buff, void *ptr, const char *name, size_t bsz) {
-  char *r = buff;
-  if ((2*sizeof(void *) + 2) > bsz) return 0;
-  *(r++) = '_';
-  r = SWIG_PackData(r,&ptr,sizeof(void *));
-  if (strlen(name) + 1 > (bsz - (r - buff))) return 0;
-  strcpy(r,name);
-  return buff;
-}
-
-SWIGRUNTIME const char *
-SWIG_UnpackVoidPtr(const char *c, void **ptr, const char *name) {
-  if (*c != '_') {
-    if (strcmp(c,"NULL") == 0) {
-      *ptr = (void *) 0;
-      return name;
-    } else {
-      return 0;
-    }
-  }
-  return SWIG_UnpackData(++c,ptr,sizeof(void *));
-}
-
-SWIGRUNTIME char *
-SWIG_PackDataName(char *buff, void *ptr, size_t sz, const char *name, size_t bsz) {
-  char *r = buff;
-  size_t lname = (name ? strlen(name) : 0);
-  if ((2*sz + 2 + lname) > bsz) return 0;
-  *(r++) = '_';
-  r = SWIG_PackData(r,ptr,sz);
-  if (lname) {
-    strncpy(r,name,lname+1);
-  } else {
-    *r = 0;
-  }
-  return buff;
-}
-
-SWIGRUNTIME const char *
-SWIG_UnpackDataName(const char *c, void *ptr, size_t sz, const char *name) {
-  if (*c != '_') {
-    if (strcmp(c,"NULL") == 0) {
-      memset(ptr,0,sz);
-      return name;
-    } else {
-      return 0;
-    }
-  }
-  return SWIG_UnpackData(++c,ptr,sz);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-/*  Errors in SWIG */
-#define  SWIG_UnknownError    	   -1
-#define  SWIG_IOError        	   -2
-#define  SWIG_RuntimeError   	   -3
-#define  SWIG_IndexError     	   -4
-#define  SWIG_TypeError      	   -5
-#define  SWIG_DivisionByZero 	   -6
-#define  SWIG_OverflowError  	   -7
-#define  SWIG_SyntaxError    	   -8
-#define  SWIG_ValueError     	   -9
-#define  SWIG_SystemError    	   -10
-#define  SWIG_AttributeError 	   -11
-#define  SWIG_MemoryError    	   -12
-#define  SWIG_NullReferenceError   -13
-
-
-
-/* Compatibility macros for Python 3 */
-#if PY_VERSION_HEX >= 0x03000000
-
-#define PyClass_Check(obj) PyObject_IsInstance(obj, (PyObject *)&PyType_Type)
-#define PyInt_Check(x) PyLong_Check(x)
-#define PyInt_AsLong(x) PyLong_AsLong(x)
-#define PyInt_FromLong(x) PyLong_FromLong(x)
-#define PyInt_FromSize_t(x) PyLong_FromSize_t(x)
-#define PyString_Check(name) PyBytes_Check(name)
-#define PyString_FromString(x) PyUnicode_FromString(x)
-#define PyString_Format(fmt, args)  PyUnicode_Format(fmt, args)
-#define PyString_AsString(str) PyBytes_AsString(str)
-#define PyString_Size(str) PyBytes_Size(str)	
-#define PyString_InternFromString(key) PyUnicode_InternFromString(key)
-#define Py_TPFLAGS_HAVE_CLASS Py_TPFLAGS_BASETYPE
-#define PyString_AS_STRING(x) PyUnicode_AS_STRING(x)
-#define _PyLong_FromSsize_t(x) PyLong_FromSsize_t(x)
-
-#endif
-
-#ifndef Py_TYPE
-#  define Py_TYPE(op) ((op)->ob_type)
-#endif
-
-/* SWIG APIs for compatibility of both Python 2 & 3 */
-
-#if PY_VERSION_HEX >= 0x03000000
-#  define SWIG_Python_str_FromFormat PyUnicode_FromFormat
-#else
-#  define SWIG_Python_str_FromFormat PyString_FromFormat
-#endif
-
-
-/* Warning: This function will allocate a new string in Python 3,
- * so please call SWIG_Python_str_DelForPy3(x) to free the space.
- */
-SWIGINTERN char*
-SWIG_Python_str_AsChar(PyObject *str)
-{
-#if PY_VERSION_HEX >= 0x03000000
-  char *cstr;
-  char *newstr;
-  Py_ssize_t len;
-  str = PyUnicode_AsUTF8String(str);
-  PyBytes_AsStringAndSize(str, &cstr, &len);
-  newstr = (char *) malloc(len+1);
-  memcpy(newstr, cstr, len+1);
-  Py_XDECREF(str);
-  return newstr;
-#else
-  return PyString_AsString(str);
-#endif
-}
-
-#if PY_VERSION_HEX >= 0x03000000
-#  define SWIG_Python_str_DelForPy3(x) free( (void*) (x) )
-#else
-#  define SWIG_Python_str_DelForPy3(x) 
-#endif
-
-
-SWIGINTERN PyObject*
-SWIG_Python_str_FromChar(const char *c)
-{
-#if PY_VERSION_HEX >= 0x03000000
-  return PyUnicode_FromString(c); 
-#else
-  return PyString_FromString(c);
-#endif
-}
-
-/* Add PyOS_snprintf for old Pythons */
-#if PY_VERSION_HEX < 0x02020000
-# if defined(_MSC_VER) || defined(__BORLANDC__) || defined(_WATCOM)
-#  define PyOS_snprintf _snprintf
-# else
-#  define PyOS_snprintf snprintf
-# endif
-#endif
-
-/* A crude PyString_FromFormat implementation for old Pythons */
-#if PY_VERSION_HEX < 0x02020000
-
-#ifndef SWIG_PYBUFFER_SIZE
-# define SWIG_PYBUFFER_SIZE 1024
-#endif
-
-static PyObject *
-PyString_FromFormat(const char *fmt, ...) {
-  va_list ap;
-  char buf[SWIG_PYBUFFER_SIZE * 2];
-  int res;
-  va_start(ap, fmt);
-  res = vsnprintf(buf, sizeof(buf), fmt, ap);
-  va_end(ap);
-  return (res < 0 || res >= (int)sizeof(buf)) ? 0 : PyString_FromString(buf);
-}
-#endif
-
-#ifndef PyObject_DEL
-# define PyObject_DEL PyObject_Del
-#endif
-
-/* A crude PyExc_StopIteration exception for old Pythons */
-#if PY_VERSION_HEX < 0x02020000
-# ifndef PyExc_StopIteration
-#  define PyExc_StopIteration PyExc_RuntimeError
-# endif
-# ifndef PyObject_GenericGetAttr
-#  define PyObject_GenericGetAttr 0
-# endif
-#endif
-
-/* Py_NotImplemented is defined in 2.1 and up. */
-#if PY_VERSION_HEX < 0x02010000
-# ifndef Py_NotImplemented
-#  define Py_NotImplemented PyExc_RuntimeError
-# endif
-#endif
-
-/* A crude PyString_AsStringAndSize implementation for old Pythons */
-#if PY_VERSION_HEX < 0x02010000
-# ifndef PyString_AsStringAndSize
-#  define PyString_AsStringAndSize(obj, s, len) {*s = PyString_AsString(obj); *len = *s ? strlen(*s) : 0;}
-# endif
-#endif
-
-/* PySequence_Size for old Pythons */
-#if PY_VERSION_HEX < 0x02000000
-# ifndef PySequence_Size
-#  define PySequence_Size PySequence_Length
-# endif
-#endif
-
-/* PyBool_FromLong for old Pythons */
-#if PY_VERSION_HEX < 0x02030000
-static
-PyObject *PyBool_FromLong(long ok)
-{
-  PyObject *result = ok ? Py_True : Py_False;
-  Py_INCREF(result);
-  return result;
-}
-#endif
-
-/* Py_ssize_t for old Pythons */
-/* This code is as recommended by: */
-/* http://www.python.org/dev/peps/pep-0353/#conversion-guidelines */
-#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
-typedef int Py_ssize_t;
-# define PY_SSIZE_T_MAX INT_MAX
-# define PY_SSIZE_T_MIN INT_MIN
-typedef inquiry lenfunc;
-typedef intargfunc ssizeargfunc;
-typedef intintargfunc ssizessizeargfunc;
-typedef intobjargproc ssizeobjargproc;
-typedef intintobjargproc ssizessizeobjargproc;
-typedef getreadbufferproc readbufferproc;
-typedef getwritebufferproc writebufferproc;
-typedef getsegcountproc segcountproc;
-typedef getcharbufferproc charbufferproc;
-static long PyNumber_AsSsize_t (PyObject *x, void *SWIGUNUSEDPARM(exc))
-{
-  long result = 0;
-  PyObject *i = PyNumber_Int(x);
-  if (i) {
-    result = PyInt_AsLong(i);
-    Py_DECREF(i);
-  }
-  return result;
-}
-#endif
-
-#if PY_VERSION_HEX < 0x02050000
-#define PyInt_FromSize_t(x) PyInt_FromLong((long)x)
-#endif
-
-#if PY_VERSION_HEX < 0x02040000
-#define Py_VISIT(op)				\
-  do { 						\
-    if (op) {					\
-      int vret = visit((op), arg);		\
-      if (vret)					\
-        return vret;				\
-    }						\
-  } while (0)
-#endif
-
-#if PY_VERSION_HEX < 0x02030000
-typedef struct {
-  PyTypeObject type;
-  PyNumberMethods as_number;
-  PyMappingMethods as_mapping;
-  PySequenceMethods as_sequence;
-  PyBufferProcs as_buffer;
-  PyObject *name, *slots;
-} PyHeapTypeObject;
-#endif
-
-#if PY_VERSION_HEX < 0x02030000
-typedef destructor freefunc;
-#endif
-
-#if ((PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION > 6) || \
-     (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION > 0) || \
-     (PY_MAJOR_VERSION > 3))
-# define SWIGPY_USE_CAPSULE
-# define SWIGPY_CAPSULE_NAME ((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION ".type_pointer_capsule" SWIG_TYPE_TABLE_NAME)
-#endif
-
-#if PY_VERSION_HEX < 0x03020000
-#define PyDescr_TYPE(x) (((PyDescrObject *)(x))->d_type)
-#define PyDescr_NAME(x) (((PyDescrObject *)(x))->d_name)
-#define Py_hash_t long
-#endif
-
-/* -----------------------------------------------------------------------------
- * error manipulation
- * ----------------------------------------------------------------------------- */
-
-SWIGRUNTIME PyObject*
-SWIG_Python_ErrorType(int code) {
-  PyObject* type = 0;
-  switch(code) {
-  case SWIG_MemoryError:
-    type = PyExc_MemoryError;
-    break;
-  case SWIG_IOError:
-    type = PyExc_IOError;
-    break;
-  case SWIG_RuntimeError:
-    type = PyExc_RuntimeError;
-    break;
-  case SWIG_IndexError:
-    type = PyExc_IndexError;
-    break;
-  case SWIG_TypeError:
-    type = PyExc_TypeError;
-    break;
-  case SWIG_DivisionByZero:
-    type = PyExc_ZeroDivisionError;
-    break;
-  case SWIG_OverflowError:
-    type = PyExc_OverflowError;
-    break;
-  case SWIG_SyntaxError:
-    type = PyExc_SyntaxError;
-    break;
-  case SWIG_ValueError:
-    type = PyExc_ValueError;
-    break;
-  case SWIG_SystemError:
-    type = PyExc_SystemError;
-    break;
-  case SWIG_AttributeError:
-    type = PyExc_AttributeError;
-    break;
-  default:
-    type = PyExc_RuntimeError;
-  }
-  return type;
-}
-
-
-SWIGRUNTIME void
-SWIG_Python_AddErrorMsg(const char* mesg)
-{
-  PyObject *type = 0;
-  PyObject *value = 0;
-  PyObject *traceback = 0;
-
-  if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
-  if (value) {
-    char *tmp;
-    PyObject *old_str = PyObject_Str(value);
-    PyErr_Clear();
-    Py_XINCREF(type);
-
-    PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
-    SWIG_Python_str_DelForPy3(tmp);
-    Py_DECREF(old_str);
-    Py_DECREF(value);
-  } else {
-    PyErr_SetString(PyExc_RuntimeError, mesg);
-  }
-}
-
-#if defined(SWIG_PYTHON_NO_THREADS)
-#  if defined(SWIG_PYTHON_THREADS)
-#    undef SWIG_PYTHON_THREADS
-#  endif
-#endif
-#if defined(SWIG_PYTHON_THREADS) /* Threading support is enabled */
-#  if !defined(SWIG_PYTHON_USE_GIL) && !defined(SWIG_PYTHON_NO_USE_GIL)
-#    if (PY_VERSION_HEX >= 0x02030000) /* For 2.3 or later, use the PyGILState calls */
-#      define SWIG_PYTHON_USE_GIL
-#    endif
-#  endif
-#  if defined(SWIG_PYTHON_USE_GIL) /* Use PyGILState threads calls */
-#    ifndef SWIG_PYTHON_INITIALIZE_THREADS
-#     define SWIG_PYTHON_INITIALIZE_THREADS  PyEval_InitThreads() 
-#    endif
-#    ifdef __cplusplus /* C++ code */
-       class SWIG_Python_Thread_Block {
-         bool status;
-         PyGILState_STATE state;
-       public:
-         void end() { if (status) { PyGILState_Release(state); status = false;} }
-         SWIG_Python_Thread_Block() : status(true), state(PyGILState_Ensure()) {}
-         ~SWIG_Python_Thread_Block() { end(); }
-       };
-       class SWIG_Python_Thread_Allow {
-         bool status;
-         PyThreadState *save;
-       public:
-         void end() { if (status) { PyEval_RestoreThread(save); status = false; }}
-         SWIG_Python_Thread_Allow() : status(true), save(PyEval_SaveThread()) {}
-         ~SWIG_Python_Thread_Allow() { end(); }
-       };
-#      define SWIG_PYTHON_THREAD_BEGIN_BLOCK   SWIG_Python_Thread_Block _swig_thread_block
-#      define SWIG_PYTHON_THREAD_END_BLOCK     _swig_thread_block.end()
-#      define SWIG_PYTHON_THREAD_BEGIN_ALLOW   SWIG_Python_Thread_Allow _swig_thread_allow
-#      define SWIG_PYTHON_THREAD_END_ALLOW     _swig_thread_allow.end()
-#    else /* C code */
-#      define SWIG_PYTHON_THREAD_BEGIN_BLOCK   PyGILState_STATE _swig_thread_block = PyGILState_Ensure()
-#      define SWIG_PYTHON_THREAD_END_BLOCK     PyGILState_Release(_swig_thread_block)
-#      define SWIG_PYTHON_THREAD_BEGIN_ALLOW   PyThreadState *_swig_thread_allow = PyEval_SaveThread()
-#      define SWIG_PYTHON_THREAD_END_ALLOW     PyEval_RestoreThread(_swig_thread_allow)
-#    endif
-#  else /* Old thread way, not implemented, user must provide it */
-#    if !defined(SWIG_PYTHON_INITIALIZE_THREADS)
-#      define SWIG_PYTHON_INITIALIZE_THREADS
-#    endif
-#    if !defined(SWIG_PYTHON_THREAD_BEGIN_BLOCK)
-#      define SWIG_PYTHON_THREAD_BEGIN_BLOCK
-#    endif
-#    if !defined(SWIG_PYTHON_THREAD_END_BLOCK)
-#      define SWIG_PYTHON_THREAD_END_BLOCK
-#    endif
-#    if !defined(SWIG_PYTHON_THREAD_BEGIN_ALLOW)
-#      define SWIG_PYTHON_THREAD_BEGIN_ALLOW
-#    endif
-#    if !defined(SWIG_PYTHON_THREAD_END_ALLOW)
-#      define SWIG_PYTHON_THREAD_END_ALLOW
-#    endif
-#  endif
-#else /* No thread support */
-#  define SWIG_PYTHON_INITIALIZE_THREADS
-#  define SWIG_PYTHON_THREAD_BEGIN_BLOCK
-#  define SWIG_PYTHON_THREAD_END_BLOCK
-#  define SWIG_PYTHON_THREAD_BEGIN_ALLOW
-#  define SWIG_PYTHON_THREAD_END_ALLOW
-#endif
-
-/* -----------------------------------------------------------------------------
- * Python API portion that goes into the runtime
- * ----------------------------------------------------------------------------- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* -----------------------------------------------------------------------------
- * Constant declarations
- * ----------------------------------------------------------------------------- */
-
-/* Constant Types */
-#define SWIG_PY_POINTER 4
-#define SWIG_PY_BINARY  5
-
-/* Constant information structure */
-typedef struct swig_const_info {
-  int type;
-  char *name;
-  long lvalue;
-  double dvalue;
-  void   *pvalue;
-  swig_type_info **ptype;
-} swig_const_info;
-
-
-/* -----------------------------------------------------------------------------
- * Wrapper of PyInstanceMethod_New() used in Python 3
- * It is exported to the generated module, used for -fastproxy
- * ----------------------------------------------------------------------------- */
-#if PY_VERSION_HEX >= 0x03000000
-SWIGRUNTIME PyObject* SWIG_PyInstanceMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *func)
-{
-  return PyInstanceMethod_New(func);
-}
-#else
-SWIGRUNTIME PyObject* SWIG_PyInstanceMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *SWIGUNUSEDPARM(func))
-{
-  return NULL;
-}
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-
-/* -----------------------------------------------------------------------------
- * pyrun.swg
- *
- * This file contains the runtime support for Python modules
- * and includes code for managing global variables and pointer
- * type checking.
- *
- * ----------------------------------------------------------------------------- */
-
-/* Common SWIG API */
-
-/* for raw pointers */
-#define SWIG_Python_ConvertPtr(obj, pptr, type, flags)  SWIG_Python_ConvertPtrAndOwn(obj, pptr, type, flags, 0)
-#define SWIG_ConvertPtr(obj, pptr, type, flags)         SWIG_Python_ConvertPtr(obj, pptr, type, flags)
-#define SWIG_ConvertPtrAndOwn(obj,pptr,type,flags,own)  SWIG_Python_ConvertPtrAndOwn(obj, pptr, type, flags, own)
-
-#ifdef SWIGPYTHON_BUILTIN
-#define SWIG_NewPointerObj(ptr, type, flags)            SWIG_Python_NewPointerObj(self, ptr, type, flags)
-#else
-#define SWIG_NewPointerObj(ptr, type, flags)            SWIG_Python_NewPointerObj(NULL, ptr, type, flags)
-#endif
-
-#define SWIG_InternalNewPointerObj(ptr, type, flags)	SWIG_Python_NewPointerObj(NULL, ptr, type, flags)
-
-#define SWIG_CheckImplicit(ty)                          SWIG_Python_CheckImplicit(ty) 
-#define SWIG_AcquirePtr(ptr, src)                       SWIG_Python_AcquirePtr(ptr, src)
-#define swig_owntype                                    int
-
-/* for raw packed data */
-#define SWIG_ConvertPacked(obj, ptr, sz, ty)            SWIG_Python_ConvertPacked(obj, ptr, sz, ty)
-#define SWIG_NewPackedObj(ptr, sz, type)                SWIG_Python_NewPackedObj(ptr, sz, type)
-
-/* for class or struct pointers */
-#define SWIG_ConvertInstance(obj, pptr, type, flags)    SWIG_ConvertPtr(obj, pptr, type, flags)
-#define SWIG_NewInstanceObj(ptr, type, flags)           SWIG_NewPointerObj(ptr, type, flags)
-
-/* for C or C++ function pointers */
-#define SWIG_ConvertFunctionPtr(obj, pptr, type)        SWIG_Python_ConvertFunctionPtr(obj, pptr, type)
-#define SWIG_NewFunctionPtrObj(ptr, type)               SWIG_Python_NewPointerObj(NULL, ptr, type, 0)
-
-/* for C++ member pointers, ie, member methods */
-#define SWIG_ConvertMember(obj, ptr, sz, ty)            SWIG_Python_ConvertPacked(obj, ptr, sz, ty)
-#define SWIG_NewMemberObj(ptr, sz, type)                SWIG_Python_NewPackedObj(ptr, sz, type)
-
-
-/* Runtime API */
-
-#define SWIG_GetModule(clientdata)                      SWIG_Python_GetModule(clientdata)
-#define SWIG_SetModule(clientdata, pointer)             SWIG_Python_SetModule(pointer)
-#define SWIG_NewClientData(obj)                         SwigPyClientData_New(obj)
-
-#define SWIG_SetErrorObj                                SWIG_Python_SetErrorObj                            
-#define SWIG_SetErrorMsg                        	SWIG_Python_SetErrorMsg				   
-#define SWIG_ErrorType(code)                    	SWIG_Python_ErrorType(code)                        
-#define SWIG_Error(code, msg)            		SWIG_Python_SetErrorMsg(SWIG_ErrorType(code), msg) 
-#define SWIG_fail                        		goto fail					   
-
-
-/* Runtime API implementation */
-
-/* Error manipulation */
-
-SWIGINTERN void 
-SWIG_Python_SetErrorObj(PyObject *errtype, PyObject *obj) {
-  SWIG_PYTHON_THREAD_BEGIN_BLOCK; 
-  PyErr_SetObject(errtype, obj);
-  Py_DECREF(obj);
-  SWIG_PYTHON_THREAD_END_BLOCK;
-}
-
-SWIGINTERN void 
-SWIG_Python_SetErrorMsg(PyObject *errtype, const char *msg) {
-  SWIG_PYTHON_THREAD_BEGIN_BLOCK;
-  PyErr_SetString(errtype, msg);
-  SWIG_PYTHON_THREAD_END_BLOCK;
-}
-
-#define SWIG_Python_Raise(obj, type, desc)  SWIG_Python_SetErrorObj(SWIG_Python_ExceptionType(desc), obj)
-
-/* Set a constant value */
-
-#if defined(SWIGPYTHON_BUILTIN)
-
-SWIGINTERN void
-SwigPyBuiltin_AddPublicSymbol(PyObject *seq, const char *key) {
-  PyObject *s = PyString_InternFromString(key);
-  PyList_Append(seq, s);
-  Py_DECREF(s);
-}
-
-SWIGINTERN void
-SWIG_Python_SetConstant(PyObject *d, PyObject *public_interface, const char *name, PyObject *obj) {   
-#if PY_VERSION_HEX < 0x02030000
-  PyDict_SetItemString(d, (char *)name, obj);
-#else
-  PyDict_SetItemString(d, name, obj);
-#endif
-  Py_DECREF(obj);
-  if (public_interface)
-    SwigPyBuiltin_AddPublicSymbol(public_interface, name);
-}
-
-#else
-
-SWIGINTERN void
-SWIG_Python_SetConstant(PyObject *d, const char *name, PyObject *obj) {   
-#if PY_VERSION_HEX < 0x02030000
-  PyDict_SetItemString(d, (char *)name, obj);
-#else
-  PyDict_SetItemString(d, name, obj);
-#endif
-  Py_DECREF(obj);                            
-}
-
-#endif
-
-/* Append a value to the result obj */
-
-SWIGINTERN PyObject*
-SWIG_Python_AppendOutput(PyObject* result, PyObject* obj) {
-#if !defined(SWIG_PYTHON_OUTPUT_TUPLE)
-  if (!result) {
-    result = obj;
-  } else if (result == Py_None) {
-    Py_DECREF(result);
-    result = obj;
-  } else {
-    if (!PyList_Check(result)) {
-      PyObject *o2 = result;
-      result = PyList_New(1);
-      PyList_SetItem(result, 0, o2);
-    }
-    PyList_Append(result,obj);
-    Py_DECREF(obj);
-  }
-  return result;
-#else
-  PyObject*   o2;
-  PyObject*   o3;
-  if (!result) {
-    result = obj;
-  } else if (result == Py_None) {
-    Py_DECREF(result);
-    result = obj;
-  } else {
-    if (!PyTuple_Check(result)) {
-      o2 = result;
-      result = PyTuple_New(1);
-      PyTuple_SET_ITEM(result, 0, o2);
-    }
-    o3 = PyTuple_New(1);
-    PyTuple_SET_ITEM(o3, 0, obj);
-    o2 = result;
-    result = PySequence_Concat(o2, o3);
-    Py_DECREF(o2);
-    Py_DECREF(o3);
-  }
-  return result;
-#endif
-}
-
-/* Unpack the argument tuple */
-
-SWIGINTERN Py_ssize_t
-SWIG_Python_UnpackTuple(PyObject *args, const char *name, Py_ssize_t min, Py_ssize_t max, PyObject **objs)
-{
-  if (!args) {
-    if (!min && !max) {
-      return 1;
-    } else {
-      PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got none", 
-		   name, (min == max ? "" : "at least "), (int)min);
-      return 0;
-    }
-  }  
-  if (!PyTuple_Check(args)) {
-    if (min <= 1 && max >= 1) {
-      Py_ssize_t i;
-      objs[0] = args;
-      for (i = 1; i < max; ++i) {
-	objs[i] = 0;
-      }
-      return 2;
-    }
-    PyErr_SetString(PyExc_SystemError, "UnpackTuple() argument list is not a tuple");
-    return 0;
-  } else {
-    Py_ssize_t l = PyTuple_GET_SIZE(args);
-    if (l < min) {
-      PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got %d", 
-		   name, (min == max ? "" : "at least "), (int)min, (int)l);
-      return 0;
-    } else if (l > max) {
-      PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got %d", 
-		   name, (min == max ? "" : "at most "), (int)max, (int)l);
-      return 0;
-    } else {
-      Py_ssize_t i;
-      for (i = 0; i < l; ++i) {
-	objs[i] = PyTuple_GET_ITEM(args, i);
-      }
-      for (; l < max; ++l) {
-	objs[l] = 0;
-      }
-      return i + 1;
-    }    
-  }
-}
-
-/* A functor is a function object with one single object argument */
-#if PY_VERSION_HEX >= 0x02020000
-#define SWIG_Python_CallFunctor(functor, obj)	        PyObject_CallFunctionObjArgs(functor, obj, NULL);
-#else
-#define SWIG_Python_CallFunctor(functor, obj)	        PyObject_CallFunction(functor, "O", obj);
-#endif
-
-/*
-  Helper for static pointer initialization for both C and C++ code, for example
-  static PyObject *SWIG_STATIC_POINTER(MyVar) = NewSomething(...);
-*/
-#ifdef __cplusplus
-#define SWIG_STATIC_POINTER(var)  var
-#else
-#define SWIG_STATIC_POINTER(var)  var = 0; if (!var) var
-#endif
-
-/* -----------------------------------------------------------------------------
- * Pointer declarations
- * ----------------------------------------------------------------------------- */
-
-/* Flags for new pointer objects */
-#define SWIG_POINTER_NOSHADOW       (SWIG_POINTER_OWN      << 1)
-#define SWIG_POINTER_NEW            (SWIG_POINTER_NOSHADOW | SWIG_POINTER_OWN)
-
-#define SWIG_POINTER_IMPLICIT_CONV  (SWIG_POINTER_DISOWN   << 1)
-
-#define SWIG_BUILTIN_TP_INIT	    (SWIG_POINTER_OWN << 2)
-#define SWIG_BUILTIN_INIT	    (SWIG_BUILTIN_TP_INIT | SWIG_POINTER_OWN)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*  How to access Py_None */
-#if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#  ifndef SWIG_PYTHON_NO_BUILD_NONE
-#    ifndef SWIG_PYTHON_BUILD_NONE
-#      define SWIG_PYTHON_BUILD_NONE
-#    endif
-#  endif
-#endif
-
-#ifdef SWIG_PYTHON_BUILD_NONE
-#  ifdef Py_None
-#   undef Py_None
-#   define Py_None SWIG_Py_None()
-#  endif
-SWIGRUNTIMEINLINE PyObject * 
-_SWIG_Py_None(void)
-{
-  PyObject *none = Py_BuildValue((char*)"");
-  Py_DECREF(none);
-  return none;
-}
-SWIGRUNTIME PyObject * 
-SWIG_Py_None(void)
-{
-  static PyObject *SWIG_STATIC_POINTER(none) = _SWIG_Py_None();
-  return none;
-}
-#endif
-
-/* The python void return value */
-
-SWIGRUNTIMEINLINE PyObject * 
-SWIG_Py_Void(void)
-{
-  PyObject *none = Py_None;
-  Py_INCREF(none);
-  return none;
-}
-
-/* SwigPyClientData */
-
-typedef struct {
-  PyObject *klass;
-  PyObject *newraw;
-  PyObject *newargs;
-  PyObject *destroy;
-  int delargs;
-  int implicitconv;
-  PyTypeObject *pytype;
-} SwigPyClientData;
-
-SWIGRUNTIMEINLINE int 
-SWIG_Python_CheckImplicit(swig_type_info *ty)
-{
-  SwigPyClientData *data = (SwigPyClientData *)ty->clientdata;
-  return data ? data->implicitconv : 0;
-}
-
-SWIGRUNTIMEINLINE PyObject *
-SWIG_Python_ExceptionType(swig_type_info *desc) {
-  SwigPyClientData *data = desc ? (SwigPyClientData *) desc->clientdata : 0;
-  PyObject *klass = data ? data->klass : 0;
-  return (klass ? klass : PyExc_RuntimeError);
-}
-
-
-SWIGRUNTIME SwigPyClientData * 
-SwigPyClientData_New(PyObject* obj)
-{
-  if (!obj) {
-    return 0;
-  } else {
-    SwigPyClientData *data = (SwigPyClientData *)malloc(sizeof(SwigPyClientData));
-    /* the klass element */
-    data->klass = obj;
-    Py_INCREF(data->klass);
-    /* the newraw method and newargs arguments used to create a new raw instance */
-    if (PyClass_Check(obj)) {
-      data->newraw = 0;
-      data->newargs = obj;
-      Py_INCREF(obj);
-    } else {
-#if (PY_VERSION_HEX < 0x02020000)
-      data->newraw = 0;
-#else
-      data->newraw = PyObject_GetAttrString(data->klass, (char *)"__new__");
-#endif
-      if (data->newraw) {
-	Py_INCREF(data->newraw);
-	data->newargs = PyTuple_New(1);
-	PyTuple_SetItem(data->newargs, 0, obj);
-      } else {
-	data->newargs = obj;
-      }
-      Py_INCREF(data->newargs);
-    }
-    /* the destroy method, aka as the C++ delete method */
-    data->destroy = PyObject_GetAttrString(data->klass, (char *)"__swig_destroy__");
-    if (PyErr_Occurred()) {
-      PyErr_Clear();
-      data->destroy = 0;
-    }
-    if (data->destroy) {
-      int flags;
-      Py_INCREF(data->destroy);
-      flags = PyCFunction_GET_FLAGS(data->destroy);
-#ifdef METH_O
-      data->delargs = !(flags & (METH_O));
-#else
-      data->delargs = 0;
-#endif
-    } else {
-      data->delargs = 0;
-    }
-    data->implicitconv = 0;
-    data->pytype = 0;
-    return data;
-  }
-}
-
-SWIGRUNTIME void 
-SwigPyClientData_Del(SwigPyClientData *data) {
-  Py_XDECREF(data->newraw);
-  Py_XDECREF(data->newargs);
-  Py_XDECREF(data->destroy);
-}
-
-/* =============== SwigPyObject =====================*/
-
-typedef struct {
-  PyObject_HEAD
-  void *ptr;
-  swig_type_info *ty;
-  int own;
-  PyObject *next;
-#ifdef SWIGPYTHON_BUILTIN
-  PyObject *dict;
-#endif
-} SwigPyObject;
-
-
-#ifdef SWIGPYTHON_BUILTIN
-
-SWIGRUNTIME PyObject *
-SwigPyObject_get___dict__(PyObject *v, PyObject *SWIGUNUSEDPARM(args))
-{
-  SwigPyObject *sobj = (SwigPyObject *)v;
-
-  if (!sobj->dict)
-    sobj->dict = PyDict_New();
-
-  Py_INCREF(sobj->dict);
-  return sobj->dict;
-}
-
-#endif
-
-SWIGRUNTIME PyObject *
-SwigPyObject_long(SwigPyObject *v)
-{
-  return PyLong_FromVoidPtr(v->ptr);
-}
-
-SWIGRUNTIME PyObject *
-SwigPyObject_format(const char* fmt, SwigPyObject *v)
-{
-  PyObject *res = NULL;
-  PyObject *args = PyTuple_New(1);
-  if (args) {
-    if (PyTuple_SetItem(args, 0, SwigPyObject_long(v)) == 0) {
-      PyObject *ofmt = SWIG_Python_str_FromChar(fmt);
-      if (ofmt) {
-#if PY_VERSION_HEX >= 0x03000000
-	res = PyUnicode_Format(ofmt,args);
-#else
-	res = PyString_Format(ofmt,args);
-#endif
-	Py_DECREF(ofmt);
-      }
-      Py_DECREF(args);
-    }
-  }
-  return res;
-}
-
-SWIGRUNTIME PyObject *
-SwigPyObject_oct(SwigPyObject *v)
-{
-  return SwigPyObject_format("%o",v);
-}
-
-SWIGRUNTIME PyObject *
-SwigPyObject_hex(SwigPyObject *v)
-{
-  return SwigPyObject_format("%x",v);
-}
-
-SWIGRUNTIME PyObject *
-#ifdef METH_NOARGS
-SwigPyObject_repr(SwigPyObject *v)
-#else
-SwigPyObject_repr(SwigPyObject *v, PyObject *args)
-#endif
-{
-  const char *name = SWIG_TypePrettyName(v->ty);
-  PyObject *repr = SWIG_Python_str_FromFormat("<Swig Object of type '%s' at %p>", (name ? name : "unknown"), (void *)v);
-  if (v->next) {
-# ifdef METH_NOARGS
-    PyObject *nrep = SwigPyObject_repr((SwigPyObject *)v->next);
-# else
-    PyObject *nrep = SwigPyObject_repr((SwigPyObject *)v->next, args);
-# endif
-# if PY_VERSION_HEX >= 0x03000000
-    PyObject *joined = PyUnicode_Concat(repr, nrep);
-    Py_DecRef(repr);
-    Py_DecRef(nrep);
-    repr = joined;
-# else
-    PyString_ConcatAndDel(&repr,nrep);
-# endif
-  }
-  return repr;  
-}
-
-SWIGRUNTIME int
-SwigPyObject_compare(SwigPyObject *v, SwigPyObject *w)
-{
-  void *i = v->ptr;
-  void *j = w->ptr;
-  return (i < j) ? -1 : ((i > j) ? 1 : 0);
-}
-
-/* Added for Python 3.x, would it also be useful for Python 2.x? */
-SWIGRUNTIME PyObject*
-SwigPyObject_richcompare(SwigPyObject *v, SwigPyObject *w, int op)
-{
-  PyObject* res;
-  if( op != Py_EQ && op != Py_NE ) {
-    Py_INCREF(Py_NotImplemented);
-    return Py_NotImplemented;
-  }
-  res = PyBool_FromLong( (SwigPyObject_compare(v, w)==0) == (op == Py_EQ) ? 1 : 0);
-  return res;  
-}
-
-
-SWIGRUNTIME PyTypeObject* SwigPyObject_TypeOnce(void);
-
-#ifdef SWIGPYTHON_BUILTIN
-static swig_type_info *SwigPyObject_stype = 0;
-SWIGRUNTIME PyTypeObject*
-SwigPyObject_type(void) {
-    SwigPyClientData *cd;
-    assert(SwigPyObject_stype);
-    cd = (SwigPyClientData*) SwigPyObject_stype->clientdata;
-    assert(cd);
-    assert(cd->pytype);
-    return cd->pytype;
-}
-#else
-SWIGRUNTIME PyTypeObject*
-SwigPyObject_type(void) {
-  static PyTypeObject *SWIG_STATIC_POINTER(type) = SwigPyObject_TypeOnce();
-  return type;
-}
-#endif
-
-SWIGRUNTIMEINLINE int
-SwigPyObject_Check(PyObject *op) {
-#ifdef SWIGPYTHON_BUILTIN
-  PyTypeObject *target_tp = SwigPyObject_type();
-  if (PyType_IsSubtype(op->ob_type, target_tp))
-    return 1;
-  return (strcmp(op->ob_type->tp_name, "SwigPyObject") == 0);
-#else
-  return (Py_TYPE(op) == SwigPyObject_type())
-    || (strcmp(Py_TYPE(op)->tp_name,"SwigPyObject") == 0);
-#endif
-}
-
-SWIGRUNTIME PyObject *
-SwigPyObject_New(void *ptr, swig_type_info *ty, int own);
-
-SWIGRUNTIME void
-SwigPyObject_dealloc(PyObject *v)
-{
-  SwigPyObject *sobj = (SwigPyObject *) v;
-  PyObject *next = sobj->next;
-  if (sobj->own == SWIG_POINTER_OWN) {
-    swig_type_info *ty = sobj->ty;
-    SwigPyClientData *data = ty ? (SwigPyClientData *) ty->clientdata : 0;
-    PyObject *destroy = data ? data->destroy : 0;
-    if (destroy) {
-      /* destroy is always a VARARGS method */
-      PyObject *res;
-
-      /* PyObject_CallFunction() has the potential to silently drop
-         the active active exception.  In cases of unnamed temporary
-         variable or where we just finished iterating over a generator
-         StopIteration will be active right now, and this needs to
-         remain true upon return from SwigPyObject_dealloc.  So save
-         and restore. */
-      
-      PyObject *val = NULL, *type = NULL, *tb = NULL;
-      PyErr_Fetch(&val, &type, &tb);
-
-      if (data->delargs) {
-        /* we need to create a temporary object to carry the destroy operation */
-        PyObject *tmp = SwigPyObject_New(sobj->ptr, ty, 0);
-        res = SWIG_Python_CallFunctor(destroy, tmp);
-        Py_DECREF(tmp);
-      } else {
-        PyCFunction meth = PyCFunction_GET_FUNCTION(destroy);
-        PyObject *mself = PyCFunction_GET_SELF(destroy);
-        res = ((*meth)(mself, v));
-      }
-      if (!res)
-        PyErr_WriteUnraisable(destroy);
-
-      PyErr_Restore(val, type, tb);
-
-      Py_XDECREF(res);
-    } 
-#if !defined(SWIG_PYTHON_SILENT_MEMLEAK)
-    else {
-      const char *name = SWIG_TypePrettyName(ty);
-      printf("swig/python detected a memory leak of type '%s', no destructor found.\n", (name ? name : "unknown"));
-    }
-#endif
-  } 
-  Py_XDECREF(next);
-  PyObject_DEL(v);
-}
-
-SWIGRUNTIME PyObject* 
-SwigPyObject_append(PyObject* v, PyObject* next)
-{
-  SwigPyObject *sobj = (SwigPyObject *) v;
-#ifndef METH_O
-  PyObject *tmp = 0;
-  if (!PyArg_ParseTuple(next,(char *)"O:append", &tmp)) return NULL;
-  next = tmp;
-#endif
-  if (!SwigPyObject_Check(next)) {
-    PyErr_SetString(PyExc_TypeError, "Attempt to append a non SwigPyObject");
-    return NULL;
-  }
-  sobj->next = next;
-  Py_INCREF(next);
-  return SWIG_Py_Void();
-}
-
-SWIGRUNTIME PyObject* 
-#ifdef METH_NOARGS
-SwigPyObject_next(PyObject* v)
-#else
-SwigPyObject_next(PyObject* v, PyObject *SWIGUNUSEDPARM(args))
-#endif
-{
-  SwigPyObject *sobj = (SwigPyObject *) v;
-  if (sobj->next) {    
-    Py_INCREF(sobj->next);
-    return sobj->next;
-  } else {
-    return SWIG_Py_Void();
-  }
-}
-
-SWIGINTERN PyObject*
-#ifdef METH_NOARGS
-SwigPyObject_disown(PyObject *v)
-#else
-SwigPyObject_disown(PyObject* v, PyObject *SWIGUNUSEDPARM(args))
-#endif
-{
-  SwigPyObject *sobj = (SwigPyObject *)v;
-  sobj->own = 0;
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject*
-#ifdef METH_NOARGS
-SwigPyObject_acquire(PyObject *v)
-#else
-SwigPyObject_acquire(PyObject* v, PyObject *SWIGUNUSEDPARM(args))
-#endif
-{
-  SwigPyObject *sobj = (SwigPyObject *)v;
-  sobj->own = SWIG_POINTER_OWN;
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject*
-SwigPyObject_own(PyObject *v, PyObject *args)
-{
-  PyObject *val = 0;
-#if (PY_VERSION_HEX < 0x02020000)
-  if (!PyArg_ParseTuple(args,(char *)"|O:own",&val))
-#elif (PY_VERSION_HEX < 0x02050000)
-  if (!PyArg_UnpackTuple(args, (char *)"own", 0, 1, &val)) 
-#else
-  if (!PyArg_UnpackTuple(args, "own", 0, 1, &val)) 
-#endif
-    {
-      return NULL;
-    } 
-  else
-    {
-      SwigPyObject *sobj = (SwigPyObject *)v;
-      PyObject *obj = PyBool_FromLong(sobj->own);
-      if (val) {
-#ifdef METH_NOARGS
-	if (PyObject_IsTrue(val)) {
-	  SwigPyObject_acquire(v);
-	} else {
-	  SwigPyObject_disown(v);
-	}
-#else
-	if (PyObject_IsTrue(val)) {
-	  SwigPyObject_acquire(v,args);
-	} else {
-	  SwigPyObject_disown(v,args);
-	}
-#endif
-      } 
-      return obj;
-    }
-}
-
-#ifdef METH_O
-static PyMethodDef
-swigobject_methods[] = {
-  {(char *)"disown",  (PyCFunction)SwigPyObject_disown,  METH_NOARGS,  (char *)"releases ownership of the pointer"},
-  {(char *)"acquire", (PyCFunction)SwigPyObject_acquire, METH_NOARGS,  (char *)"acquires ownership of the pointer"},
-  {(char *)"own",     (PyCFunction)SwigPyObject_own,     METH_VARARGS, (char *)"returns/sets ownership of the pointer"},
-  {(char *)"append",  (PyCFunction)SwigPyObject_append,  METH_O,       (char *)"appends another 'this' object"},
-  {(char *)"next",    (PyCFunction)SwigPyObject_next,    METH_NOARGS,  (char *)"returns the next 'this' object"},
-  {(char *)"__repr__",(PyCFunction)SwigPyObject_repr,    METH_NOARGS,  (char *)"returns object representation"},
-  {0, 0, 0, 0}  
-};
-#else
-static PyMethodDef
-swigobject_methods[] = {
-  {(char *)"disown",  (PyCFunction)SwigPyObject_disown,  METH_VARARGS,  (char *)"releases ownership of the pointer"},
-  {(char *)"acquire", (PyCFunction)SwigPyObject_acquire, METH_VARARGS,  (char *)"acquires ownership of the pointer"},
-  {(char *)"own",     (PyCFunction)SwigPyObject_own,     METH_VARARGS,  (char *)"returns/sets ownership of the pointer"},
-  {(char *)"append",  (PyCFunction)SwigPyObject_append,  METH_VARARGS,  (char *)"appends another 'this' object"},
-  {(char *)"next",    (PyCFunction)SwigPyObject_next,    METH_VARARGS,  (char *)"returns the next 'this' object"},
-  {(char *)"__repr__",(PyCFunction)SwigPyObject_repr,   METH_VARARGS,  (char *)"returns object representation"},
-  {0, 0, 0, 0}  
-};
-#endif
-
-#if PY_VERSION_HEX < 0x02020000
-SWIGINTERN PyObject *
-SwigPyObject_getattr(SwigPyObject *sobj,char *name)
-{
-  return Py_FindMethod(swigobject_methods, (PyObject *)sobj, name);
-}
-#endif
-
-SWIGRUNTIME PyTypeObject*
-SwigPyObject_TypeOnce(void) {
-  static char swigobject_doc[] = "Swig object carries a C/C++ instance pointer";
-
-  static PyNumberMethods SwigPyObject_as_number = {
-    (binaryfunc)0, /*nb_add*/
-    (binaryfunc)0, /*nb_subtract*/
-    (binaryfunc)0, /*nb_multiply*/
-    /* nb_divide removed in Python 3 */
-#if PY_VERSION_HEX < 0x03000000
-    (binaryfunc)0, /*nb_divide*/
-#endif
-    (binaryfunc)0, /*nb_remainder*/
-    (binaryfunc)0, /*nb_divmod*/
-    (ternaryfunc)0,/*nb_power*/
-    (unaryfunc)0,  /*nb_negative*/
-    (unaryfunc)0,  /*nb_positive*/
-    (unaryfunc)0,  /*nb_absolute*/
-    (inquiry)0,    /*nb_nonzero*/
-    0,		   /*nb_invert*/
-    0,		   /*nb_lshift*/
-    0,		   /*nb_rshift*/
-    0,		   /*nb_and*/
-    0,		   /*nb_xor*/
-    0,		   /*nb_or*/
-#if PY_VERSION_HEX < 0x03000000
-    0,   /*nb_coerce*/
-#endif
-    (unaryfunc)SwigPyObject_long, /*nb_int*/
-#if PY_VERSION_HEX < 0x03000000
-    (unaryfunc)SwigPyObject_long, /*nb_long*/
-#else
-    0, /*nb_reserved*/
-#endif
-    (unaryfunc)0,                 /*nb_float*/
-#if PY_VERSION_HEX < 0x03000000
-    (unaryfunc)SwigPyObject_oct,  /*nb_oct*/
-    (unaryfunc)SwigPyObject_hex,  /*nb_hex*/
-#endif
-#if PY_VERSION_HEX >= 0x03050000 /* 3.5 */
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_matrix_multiply */
-#elif PY_VERSION_HEX >= 0x03000000 /* 3.0 */
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_index, nb_inplace_divide removed */
-#elif PY_VERSION_HEX >= 0x02050000 /* 2.5.0 */
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_index */
-#elif PY_VERSION_HEX >= 0x02020000 /* 2.2.0 */
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_true_divide */
-#elif PY_VERSION_HEX >= 0x02000000 /* 2.0.0 */
-    0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_or */
-#endif
-  };
-
-  static PyTypeObject swigpyobject_type;
-  static int type_init = 0;
-  if (!type_init) {
-    const PyTypeObject tmp = {
-#if PY_VERSION_HEX >= 0x03000000
-      PyVarObject_HEAD_INIT(NULL, 0)
-#else
-      PyObject_HEAD_INIT(NULL)
-      0,                                    /* ob_size */
-#endif
-      (char *)"SwigPyObject",               /* tp_name */
-      sizeof(SwigPyObject),                 /* tp_basicsize */
-      0,                                    /* tp_itemsize */
-      (destructor)SwigPyObject_dealloc,     /* tp_dealloc */
-      0,                                    /* tp_print */
-#if PY_VERSION_HEX < 0x02020000
-      (getattrfunc)SwigPyObject_getattr,    /* tp_getattr */
-#else
-      (getattrfunc)0,                       /* tp_getattr */
-#endif
-      (setattrfunc)0,                       /* tp_setattr */
-#if PY_VERSION_HEX >= 0x03000000
-      0, /* tp_reserved in 3.0.1, tp_compare in 3.0.0 but not used */
-#else
-      (cmpfunc)SwigPyObject_compare,        /* tp_compare */
-#endif
-      (reprfunc)SwigPyObject_repr,          /* tp_repr */
-      &SwigPyObject_as_number,              /* tp_as_number */
-      0,                                    /* tp_as_sequence */
-      0,                                    /* tp_as_mapping */
-      (hashfunc)0,                          /* tp_hash */
-      (ternaryfunc)0,                       /* tp_call */
-      0,                                    /* tp_str */
-      PyObject_GenericGetAttr,              /* tp_getattro */
-      0,                                    /* tp_setattro */
-      0,                                    /* tp_as_buffer */
-      Py_TPFLAGS_DEFAULT,                   /* tp_flags */
-      swigobject_doc,                       /* tp_doc */
-      0,                                    /* tp_traverse */
-      0,                                    /* tp_clear */
-      (richcmpfunc)SwigPyObject_richcompare,/* tp_richcompare */
-      0,                                    /* tp_weaklistoffset */
-#if PY_VERSION_HEX >= 0x02020000
-      0,                                    /* tp_iter */
-      0,                                    /* tp_iternext */
-      swigobject_methods,                   /* tp_methods */
-      0,                                    /* tp_members */
-      0,                                    /* tp_getset */
-      0,                                    /* tp_base */
-      0,                                    /* tp_dict */
-      0,                                    /* tp_descr_get */
-      0,                                    /* tp_descr_set */
-      0,                                    /* tp_dictoffset */
-      0,                                    /* tp_init */
-      0,                                    /* tp_alloc */
-      0,                                    /* tp_new */
-      0,                                    /* tp_free */
-      0,                                    /* tp_is_gc */
-      0,                                    /* tp_bases */
-      0,                                    /* tp_mro */
-      0,                                    /* tp_cache */
-      0,                                    /* tp_subclasses */
-      0,                                    /* tp_weaklist */
-#endif
-#if PY_VERSION_HEX >= 0x02030000
-      0,                                    /* tp_del */
-#endif
-#if PY_VERSION_HEX >= 0x02060000
-      0,                                    /* tp_version_tag */
-#endif
-#if PY_VERSION_HEX >= 0x03040000
-      0,                                    /* tp_finalize */
-#endif
-#ifdef COUNT_ALLOCS
-      0,                                    /* tp_allocs */
-      0,                                    /* tp_frees */
-      0,                                    /* tp_maxalloc */
-#if PY_VERSION_HEX >= 0x02050000
-      0,                                    /* tp_prev */
-#endif
-      0                                     /* tp_next */
-#endif
-    };
-    swigpyobject_type = tmp;
-    type_init = 1;
-#if PY_VERSION_HEX < 0x02020000
-    swigpyobject_type.ob_type = &PyType_Type;
-#else
-    if (PyType_Ready(&swigpyobject_type) < 0)
-      return NULL;
-#endif
-  }
-  return &swigpyobject_type;
-}
-
-SWIGRUNTIME PyObject *
-SwigPyObject_New(void *ptr, swig_type_info *ty, int own)
-{
-  SwigPyObject *sobj = PyObject_NEW(SwigPyObject, SwigPyObject_type());
-  if (sobj) {
-    sobj->ptr  = ptr;
-    sobj->ty   = ty;
-    sobj->own  = own;
-    sobj->next = 0;
-  }
-  return (PyObject *)sobj;
-}
-
-/* -----------------------------------------------------------------------------
- * Implements a simple Swig Packed type, and use it instead of string
- * ----------------------------------------------------------------------------- */
-
-typedef struct {
-  PyObject_HEAD
-  void *pack;
-  swig_type_info *ty;
-  size_t size;
-} SwigPyPacked;
-
-SWIGRUNTIME int
-SwigPyPacked_print(SwigPyPacked *v, FILE *fp, int SWIGUNUSEDPARM(flags))
-{
-  char result[SWIG_BUFFER_SIZE];
-  fputs("<Swig Packed ", fp); 
-  if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))) {
-    fputs("at ", fp); 
-    fputs(result, fp); 
-  }
-  fputs(v->ty->name,fp); 
-  fputs(">", fp);
-  return 0; 
-}
-  
-SWIGRUNTIME PyObject *
-SwigPyPacked_repr(SwigPyPacked *v)
-{
-  char result[SWIG_BUFFER_SIZE];
-  if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))) {
-    return SWIG_Python_str_FromFormat("<Swig Packed at %s%s>", result, v->ty->name);
-  } else {
-    return SWIG_Python_str_FromFormat("<Swig Packed %s>", v->ty->name);
-  }  
-}
-
-SWIGRUNTIME PyObject *
-SwigPyPacked_str(SwigPyPacked *v)
-{
-  char result[SWIG_BUFFER_SIZE];
-  if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))){
-    return SWIG_Python_str_FromFormat("%s%s", result, v->ty->name);
-  } else {
-    return SWIG_Python_str_FromChar(v->ty->name);
-  }  
-}
-
-SWIGRUNTIME int
-SwigPyPacked_compare(SwigPyPacked *v, SwigPyPacked *w)
-{
-  size_t i = v->size;
-  size_t j = w->size;
-  int s = (i < j) ? -1 : ((i > j) ? 1 : 0);
-  return s ? s : strncmp((char *)v->pack, (char *)w->pack, 2*v->size);
-}
-
-SWIGRUNTIME PyTypeObject* SwigPyPacked_TypeOnce(void);
-
-SWIGRUNTIME PyTypeObject*
-SwigPyPacked_type(void) {
-  static PyTypeObject *SWIG_STATIC_POINTER(type) = SwigPyPacked_TypeOnce();
-  return type;
-}
-
-SWIGRUNTIMEINLINE int
-SwigPyPacked_Check(PyObject *op) {
-  return ((op)->ob_type == SwigPyPacked_TypeOnce()) 
-    || (strcmp((op)->ob_type->tp_name,"SwigPyPacked") == 0);
-}
-
-SWIGRUNTIME void
-SwigPyPacked_dealloc(PyObject *v)
-{
-  if (SwigPyPacked_Check(v)) {
-    SwigPyPacked *sobj = (SwigPyPacked *) v;
-    free(sobj->pack);
-  }
-  PyObject_DEL(v);
-}
-
-SWIGRUNTIME PyTypeObject*
-SwigPyPacked_TypeOnce(void) {
-  static char swigpacked_doc[] = "Swig object carries a C/C++ instance pointer";
-  static PyTypeObject swigpypacked_type;
-  static int type_init = 0;
-  if (!type_init) {
-    const PyTypeObject tmp = {
-#if PY_VERSION_HEX>=0x03000000
-      PyVarObject_HEAD_INIT(NULL, 0)
-#else
-      PyObject_HEAD_INIT(NULL)
-      0,                                    /* ob_size */
-#endif
-      (char *)"SwigPyPacked",               /* tp_name */
-      sizeof(SwigPyPacked),                 /* tp_basicsize */
-      0,                                    /* tp_itemsize */
-      (destructor)SwigPyPacked_dealloc,     /* tp_dealloc */
-      (printfunc)SwigPyPacked_print,        /* tp_print */
-      (getattrfunc)0,                       /* tp_getattr */
-      (setattrfunc)0,                       /* tp_setattr */
-#if PY_VERSION_HEX>=0x03000000
-      0, /* tp_reserved in 3.0.1 */
-#else
-      (cmpfunc)SwigPyPacked_compare,        /* tp_compare */
-#endif
-      (reprfunc)SwigPyPacked_repr,          /* tp_repr */
-      0,                                    /* tp_as_number */
-      0,                                    /* tp_as_sequence */
-      0,                                    /* tp_as_mapping */
-      (hashfunc)0,                          /* tp_hash */
-      (ternaryfunc)0,                       /* tp_call */
-      (reprfunc)SwigPyPacked_str,           /* tp_str */
-      PyObject_GenericGetAttr,              /* tp_getattro */
-      0,                                    /* tp_setattro */
-      0,                                    /* tp_as_buffer */
-      Py_TPFLAGS_DEFAULT,                   /* tp_flags */
-      swigpacked_doc,                       /* tp_doc */
-      0,                                    /* tp_traverse */
-      0,                                    /* tp_clear */
-      0,                                    /* tp_richcompare */
-      0,                                    /* tp_weaklistoffset */
-#if PY_VERSION_HEX >= 0x02020000
-      0,                                    /* tp_iter */
-      0,                                    /* tp_iternext */
-      0,                                    /* tp_methods */
-      0,                                    /* tp_members */
-      0,                                    /* tp_getset */
-      0,                                    /* tp_base */
-      0,                                    /* tp_dict */
-      0,                                    /* tp_descr_get */
-      0,                                    /* tp_descr_set */
-      0,                                    /* tp_dictoffset */
-      0,                                    /* tp_init */
-      0,                                    /* tp_alloc */
-      0,                                    /* tp_new */
-      0,                                    /* tp_free */
-      0,                                    /* tp_is_gc */
-      0,                                    /* tp_bases */
-      0,                                    /* tp_mro */
-      0,                                    /* tp_cache */
-      0,                                    /* tp_subclasses */
-      0,                                    /* tp_weaklist */
-#endif
-#if PY_VERSION_HEX >= 0x02030000
-      0,                                    /* tp_del */
-#endif
-#if PY_VERSION_HEX >= 0x02060000
-      0,                                    /* tp_version_tag */
-#endif
-#if PY_VERSION_HEX >= 0x03040000
-      0,                                    /* tp_finalize */
-#endif
-#ifdef COUNT_ALLOCS
-      0,                                    /* tp_allocs */
-      0,                                    /* tp_frees */
-      0,                                    /* tp_maxalloc */
-#if PY_VERSION_HEX >= 0x02050000
-      0,                                    /* tp_prev */
-#endif
-      0                                     /* tp_next */
-#endif
-    };
-    swigpypacked_type = tmp;
-    type_init = 1;
-#if PY_VERSION_HEX < 0x02020000
-    swigpypacked_type.ob_type = &PyType_Type;
-#else
-    if (PyType_Ready(&swigpypacked_type) < 0)
-      return NULL;
-#endif
-  }
-  return &swigpypacked_type;
-}
-
-SWIGRUNTIME PyObject *
-SwigPyPacked_New(void *ptr, size_t size, swig_type_info *ty)
-{
-  SwigPyPacked *sobj = PyObject_NEW(SwigPyPacked, SwigPyPacked_type());
-  if (sobj) {
-    void *pack = malloc(size);
-    if (pack) {
-      memcpy(pack, ptr, size);
-      sobj->pack = pack;
-      sobj->ty   = ty;
-      sobj->size = size;
-    } else {
-      PyObject_DEL((PyObject *) sobj);
-      sobj = 0;
-    }
-  }
-  return (PyObject *) sobj;
-}
-
-SWIGRUNTIME swig_type_info *
-SwigPyPacked_UnpackData(PyObject *obj, void *ptr, size_t size)
-{
-  if (SwigPyPacked_Check(obj)) {
-    SwigPyPacked *sobj = (SwigPyPacked *)obj;
-    if (sobj->size != size) return 0;
-    memcpy(ptr, sobj->pack, size);
-    return sobj->ty;
-  } else {
-    return 0;
-  }
-}
-
-/* -----------------------------------------------------------------------------
- * pointers/data manipulation
- * ----------------------------------------------------------------------------- */
-
-SWIGRUNTIMEINLINE PyObject *
-_SWIG_This(void)
-{
-    return SWIG_Python_str_FromChar("this");
-}
-
-static PyObject *swig_this = NULL;
-
-SWIGRUNTIME PyObject *
-SWIG_This(void)
-{
-  if (swig_this == NULL)
-    swig_this = _SWIG_This();
-  return swig_this;
-}
-
-/* #define SWIG_PYTHON_SLOW_GETSET_THIS */
-
-/* TODO: I don't know how to implement the fast getset in Python 3 right now */
-#if PY_VERSION_HEX>=0x03000000
-#define SWIG_PYTHON_SLOW_GETSET_THIS 
-#endif
-
-SWIGRUNTIME SwigPyObject *
-SWIG_Python_GetSwigThis(PyObject *pyobj) 
-{
-  PyObject *obj;
-
-  if (SwigPyObject_Check(pyobj))
-    return (SwigPyObject *) pyobj;
-
-#ifdef SWIGPYTHON_BUILTIN
-  (void)obj;
-# ifdef PyWeakref_CheckProxy
-  if (PyWeakref_CheckProxy(pyobj)) {
-    pyobj = PyWeakref_GET_OBJECT(pyobj);
-    if (pyobj && SwigPyObject_Check(pyobj))
-      return (SwigPyObject*) pyobj;
-  }
-# endif
-  return NULL;
-#else
-
-  obj = 0;
-
-#if (!defined(SWIG_PYTHON_SLOW_GETSET_THIS) && (PY_VERSION_HEX >= 0x02030000))
-  if (PyInstance_Check(pyobj)) {
-    obj = _PyInstance_Lookup(pyobj, SWIG_This());      
-  } else {
-    PyObject **dictptr = _PyObject_GetDictPtr(pyobj);
-    if (dictptr != NULL) {
-      PyObject *dict = *dictptr;
-      obj = dict ? PyDict_GetItem(dict, SWIG_This()) : 0;
-    } else {
-#ifdef PyWeakref_CheckProxy
-      if (PyWeakref_CheckProxy(pyobj)) {
-	PyObject *wobj = PyWeakref_GET_OBJECT(pyobj);
-	return wobj ? SWIG_Python_GetSwigThis(wobj) : 0;
-      }
-#endif
-      obj = PyObject_GetAttr(pyobj,SWIG_This());
-      if (obj) {
-	Py_DECREF(obj);
-      } else {
-	if (PyErr_Occurred()) PyErr_Clear();
-	return 0;
-      }
-    }
-  }
-#else
-  obj = PyObject_GetAttr(pyobj,SWIG_This());
-  if (obj) {
-    Py_DECREF(obj);
-  } else {
-    if (PyErr_Occurred()) PyErr_Clear();
-    return 0;
-  }
-#endif
-  if (obj && !SwigPyObject_Check(obj)) {
-    /* a PyObject is called 'this', try to get the 'real this'
-       SwigPyObject from it */ 
-    return SWIG_Python_GetSwigThis(obj);
-  }
-  return (SwigPyObject *)obj;
-#endif
-}
-
-/* Acquire a pointer value */
-
-SWIGRUNTIME int
-SWIG_Python_AcquirePtr(PyObject *obj, int own) {
-  if (own == SWIG_POINTER_OWN) {
-    SwigPyObject *sobj = SWIG_Python_GetSwigThis(obj);
-    if (sobj) {
-      int oldown = sobj->own;
-      sobj->own = own;
-      return oldown;
-    }
-  }
-  return 0;
-}
-
-/* Convert a pointer value */
-
-SWIGRUNTIME int
-SWIG_Python_ConvertPtrAndOwn(PyObject *obj, void **ptr, swig_type_info *ty, int flags, int *own) {
-  int res;
-  SwigPyObject *sobj;
-  int implicit_conv = (flags & SWIG_POINTER_IMPLICIT_CONV) != 0;
-
-  if (!obj)
-    return SWIG_ERROR;
-  if (obj == Py_None && !implicit_conv) {
-    if (ptr)
-      *ptr = 0;
-    return SWIG_OK;
-  }
-
-  res = SWIG_ERROR;
-
-  sobj = SWIG_Python_GetSwigThis(obj);
-  if (own)
-    *own = 0;
-  while (sobj) {
-    void *vptr = sobj->ptr;
-    if (ty) {
-      swig_type_info *to = sobj->ty;
-      if (to == ty) {
-        /* no type cast needed */
-        if (ptr) *ptr = vptr;
-        break;
-      } else {
-        swig_cast_info *tc = SWIG_TypeCheck(to->name,ty);
-        if (!tc) {
-          sobj = (SwigPyObject *)sobj->next;
-        } else {
-          if (ptr) {
-            int newmemory = 0;
-            *ptr = SWIG_TypeCast(tc,vptr,&newmemory);
-            if (newmemory == SWIG_CAST_NEW_MEMORY) {
-              assert(own); /* badly formed typemap which will lead to a memory leak - it must set and use own to delete *ptr */
-              if (own)
-                *own = *own | SWIG_CAST_NEW_MEMORY;
-            }
-          }
-          break;
-        }
-      }
-    } else {
-      if (ptr) *ptr = vptr;
-      break;
-    }
-  }
-  if (sobj) {
-    if (own)
-      *own = *own | sobj->own;
-    if (flags & SWIG_POINTER_DISOWN) {
-      sobj->own = 0;
-    }
-    res = SWIG_OK;
-  } else {
-    if (implicit_conv) {
-      SwigPyClientData *data = ty ? (SwigPyClientData *) ty->clientdata : 0;
-      if (data && !data->implicitconv) {
-        PyObject *klass = data->klass;
-        if (klass) {
-          PyObject *impconv;
-          data->implicitconv = 1; /* avoid recursion and call 'explicit' constructors*/
-          impconv = SWIG_Python_CallFunctor(klass, obj);
-          data->implicitconv = 0;
-          if (PyErr_Occurred()) {
-            PyErr_Clear();
-            impconv = 0;
-          }
-          if (impconv) {
-            SwigPyObject *iobj = SWIG_Python_GetSwigThis(impconv);
-            if (iobj) {
-              void *vptr;
-              res = SWIG_Python_ConvertPtrAndOwn((PyObject*)iobj, &vptr, ty, 0, 0);
-              if (SWIG_IsOK(res)) {
-                if (ptr) {
-                  *ptr = vptr;
-                  /* transfer the ownership to 'ptr' */
-                  iobj->own = 0;
-                  res = SWIG_AddCast(res);
-                  res = SWIG_AddNewMask(res);
-                } else {
-                  res = SWIG_AddCast(res);		    
-                }
-              }
-            }
-            Py_DECREF(impconv);
-          }
-        }
-      }
-    }
-    if (!SWIG_IsOK(res) && obj == Py_None) {
-      if (ptr)
-        *ptr = 0;
-      if (PyErr_Occurred())
-        PyErr_Clear();
-      res = SWIG_OK;
-    }
-  }
-  return res;
-}
-
-/* Convert a function ptr value */
-
-SWIGRUNTIME int
-SWIG_Python_ConvertFunctionPtr(PyObject *obj, void **ptr, swig_type_info *ty) {
-  if (!PyCFunction_Check(obj)) {
-    return SWIG_ConvertPtr(obj, ptr, ty, 0);
-  } else {
-    void *vptr = 0;
-    swig_cast_info *tc;
-
-    /* here we get the method pointer for callbacks */
-    const char *doc = (((PyCFunctionObject *)obj) -> m_ml -> ml_doc);
-    const char *desc = doc ? strstr(doc, "swig_ptr: ") : 0;
-    if (desc)
-      desc = ty ? SWIG_UnpackVoidPtr(desc + 10, &vptr, ty->name) : 0;
-    if (!desc)
-      return SWIG_ERROR;
-    tc = SWIG_TypeCheck(desc,ty);
-    if (tc) {
-      int newmemory = 0;
-      *ptr = SWIG_TypeCast(tc,vptr,&newmemory);
-      assert(!newmemory); /* newmemory handling not yet implemented */
-    } else {
-      return SWIG_ERROR;
-    }
-    return SWIG_OK;
-  }
-}
-
-/* Convert a packed value value */
-
-SWIGRUNTIME int
-SWIG_Python_ConvertPacked(PyObject *obj, void *ptr, size_t sz, swig_type_info *ty) {
-  swig_type_info *to = SwigPyPacked_UnpackData(obj, ptr, sz);
-  if (!to) return SWIG_ERROR;
-  if (ty) {
-    if (to != ty) {
-      /* check type cast? */
-      swig_cast_info *tc = SWIG_TypeCheck(to->name,ty);
-      if (!tc) return SWIG_ERROR;
-    }
-  }
-  return SWIG_OK;
-}  
-
-/* -----------------------------------------------------------------------------
- * Create a new pointer object
- * ----------------------------------------------------------------------------- */
-
-/*
-  Create a new instance object, without calling __init__, and set the
-  'this' attribute.
-*/
-
-SWIGRUNTIME PyObject* 
-SWIG_Python_NewShadowInstance(SwigPyClientData *data, PyObject *swig_this)
-{
-#if (PY_VERSION_HEX >= 0x02020000)
-  PyObject *inst = 0;
-  PyObject *newraw = data->newraw;
-  if (newraw) {
-    inst = PyObject_Call(newraw, data->newargs, NULL);
-    if (inst) {
-#if !defined(SWIG_PYTHON_SLOW_GETSET_THIS)
-      PyObject **dictptr = _PyObject_GetDictPtr(inst);
-      if (dictptr != NULL) {
-	PyObject *dict = *dictptr;
-	if (dict == NULL) {
-	  dict = PyDict_New();
-	  *dictptr = dict;
-	  PyDict_SetItem(dict, SWIG_This(), swig_this);
-	}
-      }
-#else
-      PyObject *key = SWIG_This();
-      PyObject_SetAttr(inst, key, swig_this);
-#endif
-    }
-  } else {
-#if PY_VERSION_HEX >= 0x03000000
-    inst = ((PyTypeObject*) data->newargs)->tp_new((PyTypeObject*) data->newargs, Py_None, Py_None);
-    if (inst) {
-      PyObject_SetAttr(inst, SWIG_This(), swig_this);
-      Py_TYPE(inst)->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
-    }
-#else
-    PyObject *dict = PyDict_New();
-    if (dict) {
-      PyDict_SetItem(dict, SWIG_This(), swig_this);
-      inst = PyInstance_NewRaw(data->newargs, dict);
-      Py_DECREF(dict);
-    }
-#endif
-  }
-  return inst;
-#else
-#if (PY_VERSION_HEX >= 0x02010000)
-  PyObject *inst = 0;
-  PyObject *dict = PyDict_New();
-  if (dict) {
-    PyDict_SetItem(dict, SWIG_This(), swig_this);
-    inst = PyInstance_NewRaw(data->newargs, dict);
-    Py_DECREF(dict);
-  }
-  return (PyObject *) inst;
-#else
-  PyInstanceObject *inst = PyObject_NEW(PyInstanceObject, &PyInstance_Type);
-  if (inst == NULL) {
-    return NULL;
-  }
-  inst->in_class = (PyClassObject *)data->newargs;
-  Py_INCREF(inst->in_class);
-  inst->in_dict = PyDict_New();
-  if (inst->in_dict == NULL) {
-    Py_DECREF(inst);
-    return NULL;
-  }
-#ifdef Py_TPFLAGS_HAVE_WEAKREFS
-  inst->in_weakreflist = NULL;
-#endif
-#ifdef Py_TPFLAGS_GC
-  PyObject_GC_Init(inst);
-#endif
-  PyDict_SetItem(inst->in_dict, SWIG_This(), swig_this);
-  return (PyObject *) inst;
-#endif
-#endif
-}
-
-SWIGRUNTIME void
-SWIG_Python_SetSwigThis(PyObject *inst, PyObject *swig_this)
-{
- PyObject *dict;
-#if (PY_VERSION_HEX >= 0x02020000) && !defined(SWIG_PYTHON_SLOW_GETSET_THIS)
- PyObject **dictptr = _PyObject_GetDictPtr(inst);
- if (dictptr != NULL) {
-   dict = *dictptr;
-   if (dict == NULL) {
-     dict = PyDict_New();
-     *dictptr = dict;
-   }
-   PyDict_SetItem(dict, SWIG_This(), swig_this);
-   return;
- }
-#endif
- dict = PyObject_GetAttrString(inst, (char*)"__dict__");
- PyDict_SetItem(dict, SWIG_This(), swig_this);
- Py_DECREF(dict);
-} 
-
-
-SWIGINTERN PyObject *
-SWIG_Python_InitShadowInstance(PyObject *args) {
-  PyObject *obj[2];
-  if (!SWIG_Python_UnpackTuple(args, "swiginit", 2, 2, obj)) {
-    return NULL;
-  } else {
-    SwigPyObject *sthis = SWIG_Python_GetSwigThis(obj[0]);
-    if (sthis) {
-      SwigPyObject_append((PyObject*) sthis, obj[1]);
-    } else {
-      SWIG_Python_SetSwigThis(obj[0], obj[1]);
-    }
-    return SWIG_Py_Void();
-  }
-}
-
-/* Create a new pointer object */
-
-SWIGRUNTIME PyObject *
-SWIG_Python_NewPointerObj(PyObject *self, void *ptr, swig_type_info *type, int flags) {
-  SwigPyClientData *clientdata;
-  PyObject * robj;
-  int own;
-
-  if (!ptr)
-    return SWIG_Py_Void();
-
-  clientdata = type ? (SwigPyClientData *)(type->clientdata) : 0;
-  own = (flags & SWIG_POINTER_OWN) ? SWIG_POINTER_OWN : 0;
-  if (clientdata && clientdata->pytype) {
-    SwigPyObject *newobj;
-    if (flags & SWIG_BUILTIN_TP_INIT) {
-      newobj = (SwigPyObject*) self;
-      if (newobj->ptr) {
-        PyObject *next_self = clientdata->pytype->tp_alloc(clientdata->pytype, 0);
-        while (newobj->next)
-	  newobj = (SwigPyObject *) newobj->next;
-        newobj->next = next_self;
-        newobj = (SwigPyObject *)next_self;
-#ifdef SWIGPYTHON_BUILTIN
-        newobj->dict = 0;
-#endif
-      }
-    } else {
-      newobj = PyObject_New(SwigPyObject, clientdata->pytype);
-#ifdef SWIGPYTHON_BUILTIN
-      newobj->dict = 0;
-#endif
-    }
-    if (newobj) {
-      newobj->ptr = ptr;
-      newobj->ty = type;
-      newobj->own = own;
-      newobj->next = 0;
-      return (PyObject*) newobj;
-    }
-    return SWIG_Py_Void();
-  }
-
-  assert(!(flags & SWIG_BUILTIN_TP_INIT));
-
-  robj = SwigPyObject_New(ptr, type, own);
-  if (robj && clientdata && !(flags & SWIG_POINTER_NOSHADOW)) {
-    PyObject *inst = SWIG_Python_NewShadowInstance(clientdata, robj);
-    Py_DECREF(robj);
-    robj = inst;
-  }
-  return robj;
-}
-
-/* Create a new packed object */
-
-SWIGRUNTIMEINLINE PyObject *
-SWIG_Python_NewPackedObj(void *ptr, size_t sz, swig_type_info *type) {
-  return ptr ? SwigPyPacked_New((void *) ptr, sz, type) : SWIG_Py_Void();
-}
-
-/* -----------------------------------------------------------------------------*
- *  Get type list 
- * -----------------------------------------------------------------------------*/
-
-#ifdef SWIG_LINK_RUNTIME
-void *SWIG_ReturnGlobalTypeList(void *);
-#endif
-
-SWIGRUNTIME swig_module_info *
-SWIG_Python_GetModule(void *SWIGUNUSEDPARM(clientdata)) {
-  static void *type_pointer = (void *)0;
-  /* first check if module already created */
-  if (!type_pointer) {
-#ifdef SWIG_LINK_RUNTIME
-    type_pointer = SWIG_ReturnGlobalTypeList((void *)0);
-#else
-# ifdef SWIGPY_USE_CAPSULE
-    type_pointer = PyCapsule_Import(SWIGPY_CAPSULE_NAME, 0);
-# else
-    type_pointer = PyCObject_Import((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION,
-				    (char*)"type_pointer" SWIG_TYPE_TABLE_NAME);
-# endif
-    if (PyErr_Occurred()) {
-      PyErr_Clear();
-      type_pointer = (void *)0;
-    }
-#endif
-  }
-  return (swig_module_info *) type_pointer;
-}
-
-#if PY_MAJOR_VERSION < 2
-/* PyModule_AddObject function was introduced in Python 2.0.  The following function
-   is copied out of Python/modsupport.c in python version 2.3.4 */
-SWIGINTERN int
-PyModule_AddObject(PyObject *m, char *name, PyObject *o)
-{
-  PyObject *dict;
-  if (!PyModule_Check(m)) {
-    PyErr_SetString(PyExc_TypeError, "PyModule_AddObject() needs module as first arg");
-    return SWIG_ERROR;
-  }
-  if (!o) {
-    PyErr_SetString(PyExc_TypeError, "PyModule_AddObject() needs non-NULL value");
-    return SWIG_ERROR;
-  }
-  
-  dict = PyModule_GetDict(m);
-  if (dict == NULL) {
-    /* Internal error -- modules must have a dict! */
-    PyErr_Format(PyExc_SystemError, "module '%s' has no __dict__",
-		 PyModule_GetName(m));
-    return SWIG_ERROR;
-  }
-  if (PyDict_SetItemString(dict, name, o))
-    return SWIG_ERROR;
-  Py_DECREF(o);
-  return SWIG_OK;
-}
-#endif
-
-SWIGRUNTIME void
-#ifdef SWIGPY_USE_CAPSULE
-SWIG_Python_DestroyModule(PyObject *obj)
-#else
-SWIG_Python_DestroyModule(void *vptr)
-#endif
-{
-#ifdef SWIGPY_USE_CAPSULE
-  swig_module_info *swig_module = (swig_module_info *) PyCapsule_GetPointer(obj, SWIGPY_CAPSULE_NAME);
-#else
-  swig_module_info *swig_module = (swig_module_info *) vptr;
-#endif
-  swig_type_info **types = swig_module->types;
-  size_t i;
-  for (i =0; i < swig_module->size; ++i) {
-    swig_type_info *ty = types[i];
-    if (ty->owndata) {
-      SwigPyClientData *data = (SwigPyClientData *) ty->clientdata;
-      if (data) SwigPyClientData_Del(data);
-    }
-  }
-  Py_DECREF(SWIG_This());
-  swig_this = NULL;
-}
-
-SWIGRUNTIME void
-SWIG_Python_SetModule(swig_module_info *swig_module) {
-#if PY_VERSION_HEX >= 0x03000000
- /* Add a dummy module object into sys.modules */
-  PyObject *module = PyImport_AddModule((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION);
-#else
-  static PyMethodDef swig_empty_runtime_method_table[] = { {NULL, NULL, 0, NULL} }; /* Sentinel */
-  PyObject *module = Py_InitModule((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION, swig_empty_runtime_method_table);
-#endif
-#ifdef SWIGPY_USE_CAPSULE
-  PyObject *pointer = PyCapsule_New((void *) swig_module, SWIGPY_CAPSULE_NAME, SWIG_Python_DestroyModule);
-  if (pointer && module) {
-    PyModule_AddObject(module, (char*)"type_pointer_capsule" SWIG_TYPE_TABLE_NAME, pointer);
-  } else {
-    Py_XDECREF(pointer);
-  }
-#else
-  PyObject *pointer = PyCObject_FromVoidPtr((void *) swig_module, SWIG_Python_DestroyModule);
-  if (pointer && module) {
-    PyModule_AddObject(module, (char*)"type_pointer" SWIG_TYPE_TABLE_NAME, pointer);
-  } else {
-    Py_XDECREF(pointer);
-  }
-#endif
-}
-
-/* The python cached type query */
-SWIGRUNTIME PyObject *
-SWIG_Python_TypeCache(void) {
-  static PyObject *SWIG_STATIC_POINTER(cache) = PyDict_New();
-  return cache;
-}
-
-SWIGRUNTIME swig_type_info *
-SWIG_Python_TypeQuery(const char *type)
-{
-  PyObject *cache = SWIG_Python_TypeCache();
-  PyObject *key = SWIG_Python_str_FromChar(type); 
-  PyObject *obj = PyDict_GetItem(cache, key);
-  swig_type_info *descriptor;
-  if (obj) {
-#ifdef SWIGPY_USE_CAPSULE
-    descriptor = (swig_type_info *) PyCapsule_GetPointer(obj, NULL);
-#else
-    descriptor = (swig_type_info *) PyCObject_AsVoidPtr(obj);
-#endif
-  } else {
-    swig_module_info *swig_module = SWIG_GetModule(0);
-    descriptor = SWIG_TypeQueryModule(swig_module, swig_module, type);
-    if (descriptor) {
-#ifdef SWIGPY_USE_CAPSULE
-      obj = PyCapsule_New((void*) descriptor, NULL, NULL);
-#else
-      obj = PyCObject_FromVoidPtr(descriptor, NULL);
-#endif
-      PyDict_SetItem(cache, key, obj);
-      Py_DECREF(obj);
-    }
-  }
-  Py_DECREF(key);
-  return descriptor;
-}
-
-/* 
-   For backward compatibility only
-*/
-#define SWIG_POINTER_EXCEPTION  0
-#define SWIG_arg_fail(arg)      SWIG_Python_ArgFail(arg)
-#define SWIG_MustGetPtr(p, type, argnum, flags)  SWIG_Python_MustGetPtr(p, type, argnum, flags)
-
-SWIGRUNTIME int
-SWIG_Python_AddErrMesg(const char* mesg, int infront)
-{  
-  if (PyErr_Occurred()) {
-    PyObject *type = 0;
-    PyObject *value = 0;
-    PyObject *traceback = 0;
-    PyErr_Fetch(&type, &value, &traceback);
-    if (value) {
-      char *tmp;
-      PyObject *old_str = PyObject_Str(value);
-      Py_XINCREF(type);
-      PyErr_Clear();
-      if (infront) {
-	PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
-      } else {
-	PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
-      }
-      SWIG_Python_str_DelForPy3(tmp);
-      Py_DECREF(old_str);
-    }
-    return 1;
-  } else {
-    return 0;
-  }
-}
-  
-SWIGRUNTIME int
-SWIG_Python_ArgFail(int argnum)
-{
-  if (PyErr_Occurred()) {
-    /* add information about failing argument */
-    char mesg[256];
-    PyOS_snprintf(mesg, sizeof(mesg), "argument number %d:", argnum);
-    return SWIG_Python_AddErrMesg(mesg, 1);
-  } else {
-    return 0;
-  }
-}
-
-SWIGRUNTIMEINLINE const char *
-SwigPyObject_GetDesc(PyObject *self)
-{
-  SwigPyObject *v = (SwigPyObject *)self;
-  swig_type_info *ty = v ? v->ty : 0;
-  return ty ? ty->str : "";
-}
-
-SWIGRUNTIME void
-SWIG_Python_TypeError(const char *type, PyObject *obj)
-{
-  if (type) {
-#if defined(SWIG_COBJECT_TYPES)
-    if (obj && SwigPyObject_Check(obj)) {
-      const char *otype = (const char *) SwigPyObject_GetDesc(obj);
-      if (otype) {
-	PyErr_Format(PyExc_TypeError, "a '%s' is expected, 'SwigPyObject(%s)' is received",
-		     type, otype);
-	return;
-      }
-    } else 
-#endif      
-    {
-      const char *otype = (obj ? obj->ob_type->tp_name : 0); 
-      if (otype) {
-	PyObject *str = PyObject_Str(obj);
-	const char *cstr = str ? SWIG_Python_str_AsChar(str) : 0;
-	if (cstr) {
-	  PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s(%s)' is received",
-		       type, otype, cstr);
-          SWIG_Python_str_DelForPy3(cstr);
-	} else {
-	  PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s' is received",
-		       type, otype);
-	}
-	Py_XDECREF(str);
-	return;
-      }
-    }   
-    PyErr_Format(PyExc_TypeError, "a '%s' is expected", type);
-  } else {
-    PyErr_Format(PyExc_TypeError, "unexpected type is received");
-  }
-}
-
-
-/* Convert a pointer value, signal an exception on a type mismatch */
-SWIGRUNTIME void *
-SWIG_Python_MustGetPtr(PyObject *obj, swig_type_info *ty, int SWIGUNUSEDPARM(argnum), int flags) {
-  void *result;
-  if (SWIG_Python_ConvertPtr(obj, &result, ty, flags) == -1) {
-    PyErr_Clear();
-#if SWIG_POINTER_EXCEPTION
-    if (flags) {
-      SWIG_Python_TypeError(SWIG_TypePrettyName(ty), obj);
-      SWIG_Python_ArgFail(argnum);
-    }
-#endif
-  }
-  return result;
-}
-
-#ifdef SWIGPYTHON_BUILTIN
-SWIGRUNTIME int
-SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
-  PyTypeObject *tp = obj->ob_type;
-  PyObject *descr;
-  PyObject *encoded_name;
-  descrsetfunc f;
-  int res = -1;
-
-# ifdef Py_USING_UNICODE
-  if (PyString_Check(name)) {
-    name = PyUnicode_Decode(PyString_AsString(name), PyString_Size(name), NULL, NULL);
-    if (!name)
-      return -1;
-  } else if (!PyUnicode_Check(name))
-# else
-  if (!PyString_Check(name))
-# endif
-  {
-    PyErr_Format(PyExc_TypeError, "attribute name must be string, not '%.200s'", name->ob_type->tp_name);
-    return -1;
-  } else {
-    Py_INCREF(name);
-  }
-
-  if (!tp->tp_dict) {
-    if (PyType_Ready(tp) < 0)
-      goto done;
-  }
-
-  descr = _PyType_Lookup(tp, name);
-  f = NULL;
-  if (descr != NULL)
-    f = descr->ob_type->tp_descr_set;
-  if (!f) {
-    if (PyString_Check(name)) {
-      encoded_name = name;
-      Py_INCREF(name);
-    } else {
-      encoded_name = PyUnicode_AsUTF8String(name);
-    }
-    PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
-    Py_DECREF(encoded_name);
-  } else {
-    res = f(descr, obj, value);
-  }
-  
-  done:
-  Py_DECREF(name);
-  return res;
-}
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-
-#define SWIG_exception_fail(code, msg) do { SWIG_Error(code, msg); SWIG_fail; } while(0) 
-
-#define SWIG_contract_assert(expr, msg) if (!(expr)) { SWIG_Error(SWIG_RuntimeError, msg); SWIG_fail; } else 
-
-
-
-  #define SWIG_exception(code, msg) do { SWIG_Error(code, msg); SWIG_fail;; } while(0) 
-
-
-/* -------- TYPES TABLE (BEGIN) -------- */
-
-#define SWIGTYPE_p_PathTrie swig_types[0]
-#define SWIGTYPE_p_RetriveStrEnumerateVocab swig_types[1]
-#define SWIGTYPE_p_Scorer swig_types[2]
-#define SWIGTYPE_p_StringPiece swig_types[3]
-#define SWIGTYPE_p_allocator_type swig_types[4]
-#define SWIGTYPE_p_char swig_types[5]
-#define SWIGTYPE_p_const_reference swig_types[6]
-#define SWIGTYPE_p_difference_type swig_types[7]
-#define SWIGTYPE_p_first_type swig_types[8]
-#define SWIGTYPE_p_fst__StdVectorFst swig_types[9]
-#define SWIGTYPE_p_lm__WordIndex swig_types[10]
-#define SWIGTYPE_p_p_PyObject swig_types[11]
-#define SWIGTYPE_p_reference swig_types[12]
-#define SWIGTYPE_p_second_type swig_types[13]
-#define SWIGTYPE_p_size_type swig_types[14]
-#define SWIGTYPE_p_std__allocatorT_PathTrie_p_t swig_types[15]
-#define SWIGTYPE_p_std__allocatorT_bool_t swig_types[16]
-#define SWIGTYPE_p_std__allocatorT_double_t swig_types[17]
-#define SWIGTYPE_p_std__allocatorT_float_t swig_types[18]
-#define SWIGTYPE_p_std__allocatorT_int_t swig_types[19]
-#define SWIGTYPE_p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t swig_types[20]
-#define SWIGTYPE_p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t swig_types[21]
-#define SWIGTYPE_p_std__allocatorT_std__string_t swig_types[22]
-#define SWIGTYPE_p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t swig_types[23]
-#define SWIGTYPE_p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t swig_types[24]
-#define SWIGTYPE_p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t swig_types[25]
-#define SWIGTYPE_p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t swig_types[26]
-#define SWIGTYPE_p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t swig_types[27]
-#define SWIGTYPE_p_std__invalid_argument swig_types[28]
-#define SWIGTYPE_p_std__pairT_double_std__string_t swig_types[29]
-#define SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t swig_types[30]
-#define SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t swig_types[31]
-#define SWIGTYPE_p_std__pairT_int_double_t swig_types[32]
-#define SWIGTYPE_p_std__pairT_std__string_double_t swig_types[33]
-#define SWIGTYPE_p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t swig_types[34]
-#define SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t swig_types[35]
-#define SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t swig_types[36]
-#define SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t swig_types[37]
-#define SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t swig_types[38]
-#define SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t swig_types[39]
-#define SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t swig_types[40]
-#define SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t swig_types[41]
-#define SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t swig_types[42]
-#define SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t swig_types[43]
-#define SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t swig_types[44]
-#define SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t swig_types[45]
-#define SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t swig_types[46]
-#define SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t swig_types[47]
-#define SWIGTYPE_p_swig__SwigPyIterator swig_types[48]
-#define SWIGTYPE_p_value_type swig_types[49]
-#define SWIGTYPE_p_void swig_types[50]
-static swig_type_info *swig_types[52];
-static swig_module_info swig_module = {swig_types, 51, 0, 0, 0, 0};
-#define SWIG_TypeQuery(name) SWIG_TypeQueryModule(&swig_module, &swig_module, name)
-#define SWIG_MangledTypeQuery(name) SWIG_MangledTypeQueryModule(&swig_module, &swig_module, name)
-
-/* -------- TYPES TABLE (END) -------- */
-
-#if (PY_VERSION_HEX <= 0x02000000)
-# if !defined(SWIG_PYTHON_CLASSIC)
-#  error "This python version requires swig to be run with the '-classic' option"
-# endif
-#endif
-
-/*-----------------------------------------------
-              @(target):= _swig_decoders.so
-  ------------------------------------------------*/
-#if PY_VERSION_HEX >= 0x03000000
-#  define SWIG_init    PyInit__swig_decoders
-
-#else
-#  define SWIG_init    init_swig_decoders
-
-#endif
-#define SWIG_name    "_swig_decoders"
-
-#define SWIGVERSION 0x030012 
-#define SWIG_VERSION SWIGVERSION
-
-
-#define SWIG_as_voidptr(a) const_cast< void * >(static_cast< const void * >(a)) 
-#define SWIG_as_voidptrptr(a) ((void)SWIG_as_voidptr(*a),reinterpret_cast< void** >(a)) 
-
-
-#include <stdexcept>
-
-
-namespace swig {
-  class SwigPtr_PyObject {
-  protected:
-    PyObject *_obj;
-
-  public:
-    SwigPtr_PyObject() :_obj(0)
-    {
-    }
-
-    SwigPtr_PyObject(const SwigPtr_PyObject& item) : _obj(item._obj)
-    {
-      SWIG_PYTHON_THREAD_BEGIN_BLOCK;
-      Py_XINCREF(_obj);      
-      SWIG_PYTHON_THREAD_END_BLOCK;
-    }
-    
-    SwigPtr_PyObject(PyObject *obj, bool initial_ref = true) :_obj(obj)
-    {
-      if (initial_ref) {
-        SWIG_PYTHON_THREAD_BEGIN_BLOCK;
-        Py_XINCREF(_obj);
-        SWIG_PYTHON_THREAD_END_BLOCK;
-      }
-    }
-    
-    SwigPtr_PyObject & operator=(const SwigPtr_PyObject& item) 
-    {
-      SWIG_PYTHON_THREAD_BEGIN_BLOCK;
-      Py_XINCREF(item._obj);
-      Py_XDECREF(_obj);
-      _obj = item._obj;
-      SWIG_PYTHON_THREAD_END_BLOCK;
-      return *this;      
-    }
-    
-    ~SwigPtr_PyObject() 
-    {
-      SWIG_PYTHON_THREAD_BEGIN_BLOCK;
-      Py_XDECREF(_obj);
-      SWIG_PYTHON_THREAD_END_BLOCK;
-    }
-    
-    operator PyObject *() const
-    {
-      return _obj;
-    }
-
-    PyObject *operator->() const
-    {
-      return _obj;
-    }
-  };
-}
-
-
-namespace swig {
-  struct SwigVar_PyObject : SwigPtr_PyObject {
-    SwigVar_PyObject(PyObject* obj = 0) : SwigPtr_PyObject(obj, false) { }
-    
-    SwigVar_PyObject & operator = (PyObject* obj)
-    {
-      Py_XDECREF(_obj);
-      _obj = obj;
-      return *this;      
-    }
-  };
-}
-
-
-#include "scorer.h"
-#include "ctc_beam_search_decoder.h"
-#include "decoder_utils.h"
-#include "path_trie.h"
-
-
-#include <iostream>
-
-#if PY_VERSION_HEX >= 0x03020000
-# define SWIGPY_SLICE_ARG(obj) ((PyObject*) (obj))
-#else
-# define SWIGPY_SLICE_ARG(obj) ((PySliceObject*) (obj))
-#endif
-
-
-#include <typeinfo>
-#include <stdexcept>
-
-
-#if defined(__GNUC__)
-#  if __GNUC__ == 2 && __GNUC_MINOR <= 96
-#     define SWIG_STD_NOMODERN_STL
-#  endif
-#endif
-
-
-#include <string>
-
-
-#include <stddef.h>
-
-
-namespace swig {
-  struct stop_iteration {
-  };
-
-  struct SwigPyIterator {
-  private:
-    SwigPtr_PyObject _seq;
-
-  protected:
-    SwigPyIterator(PyObject *seq) : _seq(seq)
-    {
-    }
-      
-  public:
-    virtual ~SwigPyIterator() {}
-
-    // Access iterator method, required by Python
-    virtual PyObject *value() const = 0;
-
-    // Forward iterator method, required by Python
-    virtual SwigPyIterator *incr(size_t n = 1) = 0;
-    
-    // Backward iterator method, very common in C++, but not required in Python
-    virtual SwigPyIterator *decr(size_t /*n*/ = 1)
-    {
-      throw stop_iteration();
-    }
-
-    // Random access iterator methods, but not required in Python
-    virtual ptrdiff_t distance(const SwigPyIterator &/*x*/) const
-    {
-      throw std::invalid_argument("operation not supported");
-    }
-
-    virtual bool equal (const SwigPyIterator &/*x*/) const
-    {
-      throw std::invalid_argument("operation not supported");
-    }
-    
-    // C++ common/needed methods
-    virtual SwigPyIterator *copy() const = 0;
-
-    PyObject *next()     
-    {
-      SWIG_PYTHON_THREAD_BEGIN_BLOCK; // disable threads       
-      PyObject *obj = value();
-      incr();       
-      SWIG_PYTHON_THREAD_END_BLOCK; // re-enable threads
-      return obj;     
-    }
-
-    /* Make an alias for Python 3.x */
-    PyObject *__next__()
-    {
-      return next();
-    }
-
-    PyObject *previous()
-    {
-      SWIG_PYTHON_THREAD_BEGIN_BLOCK; // disable threads       
-      decr();
-      PyObject *obj = value();
-      SWIG_PYTHON_THREAD_END_BLOCK; // re-enable threads       
-      return obj;
-    }
-
-    SwigPyIterator *advance(ptrdiff_t n)
-    {
-      return  (n > 0) ?  incr(n) : decr(-n);
-    }
-      
-    bool operator == (const SwigPyIterator& x)  const
-    {
-      return equal(x);
-    }
-      
-    bool operator != (const SwigPyIterator& x) const
-    {
-      return ! operator==(x);
-    }
-      
-    SwigPyIterator& operator += (ptrdiff_t n)
-    {
-      return *advance(n);
-    }
-
-    SwigPyIterator& operator -= (ptrdiff_t n)
-    {
-      return *advance(-n);
-    }
-      
-    SwigPyIterator* operator + (ptrdiff_t n) const
-    {
-      return copy()->advance(n);
-    }
-
-    SwigPyIterator* operator - (ptrdiff_t n) const
-    {
-      return copy()->advance(-n);
-    }
-      
-    ptrdiff_t operator - (const SwigPyIterator& x) const
-    {
-      return x.distance(*this);
-    }
-      
-    static swig_type_info* descriptor() {
-      static int init = 0;
-      static swig_type_info* desc = 0;
-      if (!init) {
-	desc = SWIG_TypeQuery("swig::SwigPyIterator *");
-	init = 1;
-      }	
-      return desc;
-    }    
-  };
-
-#if defined(SWIGPYTHON_BUILTIN)
-  inline PyObject* make_output_iterator_builtin (PyObject *pyself)
-  {
-    Py_INCREF(pyself);
-    return pyself;
-  }
-#endif
-}
-
-
-SWIGINTERN int
-SWIG_AsVal_double (PyObject *obj, double *val)
-{
-  int res = SWIG_TypeError;
-  if (PyFloat_Check(obj)) {
-    if (val) *val = PyFloat_AsDouble(obj);
-    return SWIG_OK;
-#if PY_VERSION_HEX < 0x03000000
-  } else if (PyInt_Check(obj)) {
-    if (val) *val = (double) PyInt_AsLong(obj);
-    return SWIG_OK;
-#endif
-  } else if (PyLong_Check(obj)) {
-    double v = PyLong_AsDouble(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = v;
-      return SWIG_OK;
-    } else {
-      PyErr_Clear();
-    }
-  }
-#ifdef SWIG_PYTHON_CAST_MODE
-  {
-    int dispatch = 0;
-    double d = PyFloat_AsDouble(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = d;
-      return SWIG_AddCast(SWIG_OK);
-    } else {
-      PyErr_Clear();
-    }
-    if (!dispatch) {
-      long v = PyLong_AsLong(obj);
-      if (!PyErr_Occurred()) {
-	if (val) *val = v;
-	return SWIG_AddCast(SWIG_AddCast(SWIG_OK));
-      } else {
-	PyErr_Clear();
-      }
-    }
-  }
-#endif
-  return res;
-}
-
-
-#include <float.h>
-
-
-#include <math.h>
-
-
-SWIGINTERNINLINE int
-SWIG_CanCastAsInteger(double *d, double min, double max) {
-  double x = *d;
-  if ((min <= x && x <= max)) {
-   double fx = floor(x);
-   double cx = ceil(x);
-   double rd =  ((x - fx) < 0.5) ? fx : cx; /* simple rint */
-   if ((errno == EDOM) || (errno == ERANGE)) {
-     errno = 0;
-   } else {
-     double summ, reps, diff;
-     if (rd < x) {
-       diff = x - rd;
-     } else if (rd > x) {
-       diff = rd - x;
-     } else {
-       return 1;
-     }
-     summ = rd + x;
-     reps = diff/summ;
-     if (reps < 8*DBL_EPSILON) {
-       *d = rd;
-       return 1;
-     }
-   }
-  }
-  return 0;
-}
-
-
-SWIGINTERN int
-SWIG_AsVal_unsigned_SS_long (PyObject *obj, unsigned long *val) 
-{
-#if PY_VERSION_HEX < 0x03000000
-  if (PyInt_Check(obj)) {
-    long v = PyInt_AsLong(obj);
-    if (v >= 0) {
-      if (val) *val = v;
-      return SWIG_OK;
-    } else {
-      return SWIG_OverflowError;
-    }
-  } else
-#endif
-  if (PyLong_Check(obj)) {
-    unsigned long v = PyLong_AsUnsignedLong(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = v;
-      return SWIG_OK;
-    } else {
-      PyErr_Clear();
-      return SWIG_OverflowError;
-    }
-  }
-#ifdef SWIG_PYTHON_CAST_MODE
-  {
-    int dispatch = 0;
-    unsigned long v = PyLong_AsUnsignedLong(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = v;
-      return SWIG_AddCast(SWIG_OK);
-    } else {
-      PyErr_Clear();
-    }
-    if (!dispatch) {
-      double d;
-      int res = SWIG_AddCast(SWIG_AsVal_double (obj,&d));
-      if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, 0, ULONG_MAX)) {
-	if (val) *val = (unsigned long)(d);
-	return res;
-      }
-    }
-  }
-#endif
-  return SWIG_TypeError;
-}
-
-
-#include <limits.h>
-#if !defined(SWIG_NO_LLONG_MAX)
-# if !defined(LLONG_MAX) && defined(__GNUC__) && defined (__LONG_LONG_MAX__)
-#   define LLONG_MAX __LONG_LONG_MAX__
-#   define LLONG_MIN (-LLONG_MAX - 1LL)
-#   define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
-# endif
-#endif
-
-
-#if defined(LLONG_MAX) && !defined(SWIG_LONG_LONG_AVAILABLE)
-#  define SWIG_LONG_LONG_AVAILABLE
-#endif
-
-
-#ifdef SWIG_LONG_LONG_AVAILABLE
-SWIGINTERN int
-SWIG_AsVal_unsigned_SS_long_SS_long (PyObject *obj, unsigned long long *val)
-{
-  int res = SWIG_TypeError;
-  if (PyLong_Check(obj)) {
-    unsigned long long v = PyLong_AsUnsignedLongLong(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = v;
-      return SWIG_OK;
-    } else {
-      PyErr_Clear();
-      res = SWIG_OverflowError;
-    }
-  } else {
-    unsigned long v;
-    res = SWIG_AsVal_unsigned_SS_long (obj,&v);
-    if (SWIG_IsOK(res)) {
-      if (val) *val = v;
-      return res;
-    }
-  }
-#ifdef SWIG_PYTHON_CAST_MODE
-  {
-    const double mant_max = 1LL << DBL_MANT_DIG;
-    double d;
-    res = SWIG_AsVal_double (obj,&d);
-    if (SWIG_IsOK(res) && !SWIG_CanCastAsInteger(&d, 0, mant_max))
-      return SWIG_OverflowError;
-    if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, 0, mant_max)) {
-      if (val) *val = (unsigned long long)(d);
-      return SWIG_AddCast(res);
-    }
-    res = SWIG_TypeError;
-  }
-#endif
-  return res;
-}
-#endif
-
-
-SWIGINTERNINLINE int
-SWIG_AsVal_size_t (PyObject * obj, size_t *val)
-{
-  int res = SWIG_TypeError;
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  if (sizeof(size_t) <= sizeof(unsigned long)) {
-#endif
-    unsigned long v;
-    res = SWIG_AsVal_unsigned_SS_long (obj, val ? &v : 0);
-    if (SWIG_IsOK(res) && val) *val = static_cast< size_t >(v);
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  } else if (sizeof(size_t) <= sizeof(unsigned long long)) {
-    unsigned long long v;
-    res = SWIG_AsVal_unsigned_SS_long_SS_long (obj, val ? &v : 0);
-    if (SWIG_IsOK(res) && val) *val = static_cast< size_t >(v);
-  }
-#endif
-  return res;
-}
-
-
-  #define SWIG_From_long   PyInt_FromLong 
-
-
-#ifdef SWIG_LONG_LONG_AVAILABLE
-SWIGINTERNINLINE PyObject* 
-SWIG_From_long_SS_long  (long long value)
-{
-  return ((value < LONG_MIN) || (value > LONG_MAX)) ?
-    PyLong_FromLongLong(value) : PyInt_FromLong(static_cast< long >(value));
-}
-#endif
-
-
-SWIGINTERNINLINE PyObject *
-SWIG_From_ptrdiff_t  (ptrdiff_t value)
-{    
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  if (sizeof(ptrdiff_t) <= sizeof(long)) {
-#endif
-    return SWIG_From_long  (static_cast< long >(value));
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  } else {
-    /* assume sizeof(ptrdiff_t) <= sizeof(long long) */
-    return SWIG_From_long_SS_long  (static_cast< long long >(value));
-  }
-#endif
-}
-
-
-SWIGINTERNINLINE PyObject*
-  SWIG_From_bool  (bool value)
-{
-  return PyBool_FromLong(value ? 1 : 0);
-}
-
-
-SWIGINTERN int
-SWIG_AsVal_long (PyObject *obj, long* val)
-{
-#if PY_VERSION_HEX < 0x03000000
-  if (PyInt_Check(obj)) {
-    if (val) *val = PyInt_AsLong(obj);
-    return SWIG_OK;
-  } else
-#endif
-  if (PyLong_Check(obj)) {
-    long v = PyLong_AsLong(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = v;
-      return SWIG_OK;
-    } else {
-      PyErr_Clear();
-      return SWIG_OverflowError;
-    }
-  }
-#ifdef SWIG_PYTHON_CAST_MODE
-  {
-    int dispatch = 0;
-    long v = PyInt_AsLong(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = v;
-      return SWIG_AddCast(SWIG_OK);
-    } else {
-      PyErr_Clear();
-    }
-    if (!dispatch) {
-      double d;
-      int res = SWIG_AddCast(SWIG_AsVal_double (obj,&d));
-      if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, LONG_MIN, LONG_MAX)) {
-	if (val) *val = (long)(d);
-	return res;
-      }
-    }
-  }
-#endif
-  return SWIG_TypeError;
-}
-
-
-#ifdef SWIG_LONG_LONG_AVAILABLE
-SWIGINTERN int
-SWIG_AsVal_long_SS_long (PyObject *obj, long long *val)
-{
-  int res = SWIG_TypeError;
-  if (PyLong_Check(obj)) {
-    long long v = PyLong_AsLongLong(obj);
-    if (!PyErr_Occurred()) {
-      if (val) *val = v;
-      return SWIG_OK;
-    } else {
-      PyErr_Clear();
-      res = SWIG_OverflowError;
-    }
-  } else {
-    long v;
-    res = SWIG_AsVal_long (obj,&v);
-    if (SWIG_IsOK(res)) {
-      if (val) *val = v;
-      return res;
-    }
-  }
-#ifdef SWIG_PYTHON_CAST_MODE
-  {
-    const double mant_max = 1LL << DBL_MANT_DIG;
-    const double mant_min = -mant_max;
-    double d;
-    res = SWIG_AsVal_double (obj,&d);
-    if (SWIG_IsOK(res) && !SWIG_CanCastAsInteger(&d, mant_min, mant_max))
-      return SWIG_OverflowError;
-    if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, mant_min, mant_max)) {
-      if (val) *val = (long long)(d);
-      return SWIG_AddCast(res);
-    }
-    res = SWIG_TypeError;
-  }
-#endif
-  return res;
-}
-#endif
-
-
-SWIGINTERNINLINE int
-SWIG_AsVal_ptrdiff_t (PyObject * obj, ptrdiff_t *val)
-{
-  int res = SWIG_TypeError;
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  if (sizeof(ptrdiff_t) <= sizeof(long)) {
-#endif
-    long v;
-    res = SWIG_AsVal_long (obj, val ? &v : 0);
-    if (SWIG_IsOK(res) && val) *val = static_cast< ptrdiff_t >(v);
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  } else if (sizeof(ptrdiff_t) <= sizeof(long long)) {
-    long long v;
-    res = SWIG_AsVal_long_SS_long (obj, val ? &v : 0);
-    if (SWIG_IsOK(res) && val) *val = static_cast< ptrdiff_t >(v);
-  }
-#endif
-  return res;
-}
-
-
-#include <algorithm>
-
-
-#include <vector>
-
-
-#include <utility>
-
-
-SWIGINTERN int
-SWIG_AsVal_int (PyObject * obj, int *val)
-{
-  long v;
-  int res = SWIG_AsVal_long (obj, &v);
-  if (SWIG_IsOK(res)) {
-    if ((v < INT_MIN || v > INT_MAX)) {
-      return SWIG_OverflowError;
-    } else {
-      if (val) *val = static_cast< int >(v);
-    }
-  }  
-  return res;
-}
-
-
-SWIGINTERN int
-SWIG_AsVal_bool (PyObject *obj, bool *val)
-{
-  int r;
-  if (!PyBool_Check(obj))
-    return SWIG_ERROR;
-  r = PyObject_IsTrue(obj);
-  if (r == -1)
-    return SWIG_ERROR;
-  if (val) *val = r ? true : false;
-  return SWIG_OK;
-}
-
-
-/* Getting isfinite working pre C99 across multiple platforms is non-trivial. Users can provide SWIG_isfinite on older platforms. */
-#ifndef SWIG_isfinite
-/* isfinite() is a macro for C99 */
-# if defined(isfinite)
-#  define SWIG_isfinite(X) (isfinite(X))
-# elif defined __cplusplus && __cplusplus >= 201103L
-/* Use a template so that this works whether isfinite() is std::isfinite() or
- * in the global namespace.  The reality seems to vary between compiler
- * versions.
- *
- * Make sure namespace std exists to avoid compiler warnings.
- *
- * extern "C++" is required as this fragment can end up inside an extern "C" { } block
- */
-namespace std { }
-extern "C++" template<typename T>
-inline int SWIG_isfinite_func(T x) {
-  using namespace std;
-  return isfinite(x);
-}
-#  define SWIG_isfinite(X) (SWIG_isfinite_func(X))
-# elif defined(_MSC_VER)
-#  define SWIG_isfinite(X) (_finite(X))
-# elif defined(__sun) && defined(__SVR4)
-#  include <ieeefp.h>
-#  define SWIG_isfinite(X) (finite(X))
-# endif
-#endif
-
-
-/* Accept infinite as a valid float value unless we are unable to check if a value is finite */
-#ifdef SWIG_isfinite
-# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX) && SWIG_isfinite(X))
-#else
-# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX))
-#endif
-
-
-SWIGINTERN int
-SWIG_AsVal_float (PyObject * obj, float *val)
-{
-  double v;
-  int res = SWIG_AsVal_double (obj, &v);
-  if (SWIG_IsOK(res)) {
-    if (SWIG_Float_Overflow_Check(v)) {
-      return SWIG_OverflowError;
-    } else {
-      if (val) *val = static_cast< float >(v);
-    }
-  }  
-  return res;
-}
-
-
-  #define SWIG_From_double   PyFloat_FromDouble 
-
-
-SWIGINTERNINLINE PyObject *
-SWIG_From_float  (float value)
-{    
-  return SWIG_From_double  (value);
-}
-
-
-SWIGINTERNINLINE PyObject*
-  SWIG_From_int  (int value)
-{
-  return PyInt_FromLong((long) value);
-}
-
-
-namespace swig {
-  template <class Type>
-  struct noconst_traits {
-    typedef Type noconst_type;
-  };
-
-  template <class Type>
-  struct noconst_traits<const Type> {
-    typedef Type noconst_type;
-  };
-
-  /*
-    type categories
-  */
-  struct pointer_category { };
-  struct value_category { };
-
-  /*
-    General traits that provides type_name and type_info
-  */
-  template <class Type> struct traits { };
-
-  template <class Type>
-  inline const char* type_name() {
-    return traits<typename noconst_traits<Type >::noconst_type >::type_name();
-  }
-
-  template <class Type> struct traits_info {
-    static swig_type_info *type_query(std::string name) {
-      name += " *";
-      return SWIG_TypeQuery(name.c_str());
-    }
-    static swig_type_info *type_info() {
-      static swig_type_info *info = type_query(type_name<Type>());
-      return info;
-    }
-  };
-
-  /*
-    Partial specialization for pointers (traits_info)
-  */
-  template <class Type> struct traits_info<Type *> {
-    static swig_type_info *type_query(std::string name) {
-      name += " *";
-      return SWIG_TypeQuery(name.c_str());
-    }
-    static swig_type_info *type_info() {
-      static swig_type_info *info = type_query(type_name<Type>());
-      return info;
-    }
-  };
-
-  template <class Type>
-  inline swig_type_info *type_info() {
-    return traits_info<Type>::type_info();
-  }
-
-  /*
-    Partial specialization for pointers (traits)
-  */
-  template <class Type> struct traits <Type *> {
-    typedef pointer_category category;
-    static std::string make_ptr_name(const char* name) {
-      std::string ptrname = name;
-      ptrname += " *";
-      return ptrname;
-    }
-    static const char* type_name() {
-      static std::string name = make_ptr_name(swig::type_name<Type>());
-      return name.c_str();
-    }
-  };
-
-  template <class Type, class Category>
-  struct traits_as { };
-
-  template <class Type, class Category>
-  struct traits_check { };
-
-}
-
-
-namespace swig {  
-  /*
-    Traits that provides the from method
-  */
-  template <class Type> struct traits_from_ptr {
-    static PyObject *from(Type *val, int owner = 0) {
-      return SWIG_InternalNewPointerObj(val, type_info<Type>(), owner);
-    }
-  };
-
-  template <class Type> struct traits_from {
-    static PyObject *from(const Type& val) {
-      return traits_from_ptr<Type>::from(new Type(val), 1);
-    }
-  };
-
-  template <class Type> struct traits_from<Type *> {
-    static PyObject *from(Type* val) {
-      return traits_from_ptr<Type>::from(val, 0);
-    }
-  };
-
-  template <class Type> struct traits_from<const Type *> {
-    static PyObject *from(const Type* val) {
-      return traits_from_ptr<Type>::from(const_cast<Type*>(val), 0);
-    }
-  };
-
-
-  template <class Type>
-  inline PyObject *from(const Type& val) {
-    return traits_from<Type>::from(val);
-  }
-
-  template <class Type>
-  inline PyObject *from_ptr(Type* val, int owner) {
-    return traits_from_ptr<Type>::from(val, owner);
-  }
-
-  /*
-    Traits that provides the asval/as/check method
-  */
-  template <class Type>
-  struct traits_asptr {   
-    static int asptr(PyObject *obj, Type **val) {
-      Type *p;
-      swig_type_info *descriptor = type_info<Type>();
-      int res = descriptor ? SWIG_ConvertPtr(obj, (void **)&p, descriptor, 0) : SWIG_ERROR;
-      if (SWIG_IsOK(res)) {
-	if (val) *val = p;
-      }
-      return res;
-    }
-  }; 
-
-  template <class Type>
-  inline int asptr(PyObject *obj, Type **vptr) {
-    return traits_asptr<Type>::asptr(obj, vptr);
-  }
-
-  template <class Type> 
-  struct traits_asval {
-    static int asval(PyObject *obj, Type *val) {
-      if (val) {
-	Type *p = 0;
-	int res = traits_asptr<Type>::asptr(obj, &p);
-	if (!SWIG_IsOK(res)) return res;	
-	if (p) {
-	  typedef typename noconst_traits<Type>::noconst_type noconst_type;
-	  *(const_cast<noconst_type*>(val)) = *p;
-	  if (SWIG_IsNewObj(res)){
-	    delete p;
-	    res = SWIG_DelNewMask(res);
-	  }
-	  return res;
-	} else {
-	  return SWIG_ERROR;
-	}
-      } else {
-	return traits_asptr<Type>::asptr(obj, (Type **)(0));
-      }
-    }
-  };
-
-  template <class Type> struct traits_asval<Type*> {
-    static int asval(PyObject *obj, Type **val) {
-      if (val) {
-        typedef typename noconst_traits<Type>::noconst_type noconst_type;
-        noconst_type *p = 0;
-        int res = traits_asptr<noconst_type>::asptr(obj,  &p);
-        if (SWIG_IsOK(res)) {
-          *(const_cast<noconst_type**>(val)) = p;
-	}
-	return res;
-      } else {
-	return traits_asptr<Type>::asptr(obj, (Type **)(0));
-      }
-    }
-  };
-  
-  template <class Type>
-  inline int asval(PyObject *obj, Type *val) {
-    return traits_asval<Type>::asval(obj, val);
-  }
-
-  template <class Type> 
-  struct traits_as<Type, value_category> {
-    static Type as(PyObject *obj, bool throw_error) {
-      Type v;
-      int res = asval(obj, &v);
-      if (!obj || !SWIG_IsOK(res)) {
-	if (!PyErr_Occurred()) {
-	  ::SWIG_Error(SWIG_TypeError,  swig::type_name<Type>());
-	}
-	if (throw_error) throw std::invalid_argument("bad type");
-      }
-      return v;
-    }
-  };
-
-  template <class Type> 
-  struct traits_as<Type, pointer_category> {
-    static Type as(PyObject *obj, bool throw_error) {
-      Type *v = 0;      
-      int res = (obj ? traits_asptr<Type>::asptr(obj, &v) : SWIG_ERROR);
-      if (SWIG_IsOK(res) && v) {
-	if (SWIG_IsNewObj(res)) {
-	  Type r(*v);
-	  delete v;
-	  return r;
-	} else {
-	  return *v;
-	}
-      } else {
-	// Uninitialized return value, no Type() constructor required.
-	static Type *v_def = (Type*) malloc(sizeof(Type));
-	if (!PyErr_Occurred()) {
-	  SWIG_Error(SWIG_TypeError,  swig::type_name<Type>());
-	}
-	if (throw_error) throw std::invalid_argument("bad type");
-	memset(v_def,0,sizeof(Type));
-	return *v_def;
-      }
-    }
-  };
-
-  template <class Type> 
-  struct traits_as<Type*, pointer_category> {
-    static Type* as(PyObject *obj, bool throw_error) {
-      Type *v = 0;      
-      int res = (obj ? traits_asptr<Type>::asptr(obj, &v) : SWIG_ERROR);
-      if (SWIG_IsOK(res)) {
-	return v;
-      } else {
-	if (!PyErr_Occurred()) {
-	  SWIG_Error(SWIG_TypeError,  swig::type_name<Type>());
-	}
-	if (throw_error) throw std::invalid_argument("bad type");
-	return 0;
-      }
-    }
-  };
-    
-  template <class Type>
-  inline Type as(PyObject *obj, bool te = false) {
-    return traits_as<Type, typename traits<Type>::category>::as(obj, te);
-  }
-
-  template <class Type> 
-  struct traits_check<Type, value_category> {
-    static bool check(PyObject *obj) {
-      int res = obj ? asval(obj, (Type *)(0)) : SWIG_ERROR;
-      return SWIG_IsOK(res) ? true : false;
-    }
-  };
-
-  template <class Type> 
-  struct traits_check<Type, pointer_category> {
-    static bool check(PyObject *obj) {
-      int res = obj ? asptr(obj, (Type **)(0)) : SWIG_ERROR;
-      return SWIG_IsOK(res) ? true : false;
-    }
-  };
-
-  template <class Type>
-  inline bool check(PyObject *obj) {
-    return traits_check<Type, typename traits<Type>::category>::check(obj);
-  }
-}
-
-
-#include <functional>
-
-namespace std {
-  template <>
-  struct less <PyObject *>
-  {
-    bool
-    operator()(PyObject * v, PyObject *w) const
-    { 
-      bool res;
-      SWIG_PYTHON_THREAD_BEGIN_BLOCK;
-      res = PyObject_RichCompareBool(v, w, Py_LT) ? true : false;
-      /* This may fall into a case of inconsistent
-               eg. ObjA > ObjX > ObjB
-               but ObjA < ObjB
-      */
-      if( PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_TypeError) )
-      {
-        /* Objects can't be compared, this mostly occurred in Python 3.0 */
-        /* Compare their ptr directly for a workaround */
-        res = (v < w);
-        PyErr_Clear();
-      }
-      SWIG_PYTHON_THREAD_END_BLOCK;
-      return res;
-    }
-  };
-
-  template <>
-  struct less <swig::SwigPtr_PyObject>
-  {
-    bool
-    operator()(const swig::SwigPtr_PyObject& v, const swig::SwigPtr_PyObject& w) const
-    {
-      return std::less<PyObject *>()(v, w);
-    }
-  };
-
-  template <>
-  struct less <swig::SwigVar_PyObject>
-  {
-    bool
-    operator()(const swig::SwigVar_PyObject& v, const swig::SwigVar_PyObject& w) const
-    {
-      return std::less<PyObject *>()(v, w);
-    }
-  };
-
-}
-
-namespace swig {
-  template <> struct traits<PyObject *> {
-    typedef value_category category;
-    static const char* type_name() { return "PyObject *"; }
-  };  
-
-  template <>  struct traits_asval<PyObject * > {   
-    typedef PyObject * value_type;
-    static int asval(PyObject *obj, value_type *val) {
-      if (val) *val = obj;
-      return SWIG_OK;
-    }
-  };
-
-  template <> 
-  struct traits_check<PyObject *, value_category> {
-    static bool check(PyObject *) {
-      return true;
-    }
-  };
-
-  template <>  struct traits_from<PyObject *> {
-    typedef PyObject * value_type;
-    static PyObject *from(const value_type& val) {
-      Py_XINCREF(val);
-      return val;
-    }
-  };
-  
-}
-
-namespace swig {
-  template <class Difference>
-  inline size_t
-  check_index(Difference i, size_t size, bool insert = false) {
-    if ( i < 0 ) {
-      if ((size_t) (-i) <= size)
-	return (size_t) (i + size);
-    } else if ( (size_t) i < size ) {
-      return (size_t) i;
-    } else if (insert && ((size_t) i == size)) {
-      return size;
-    }
-    throw std::out_of_range("index out of range");
-  }
-
-  template <class Difference>
-  void
-  slice_adjust(Difference i, Difference j, Py_ssize_t step, size_t size, Difference &ii, Difference &jj, bool insert = false) {
-    if (step == 0) {
-      throw std::invalid_argument("slice step cannot be zero");
-    } else if (step > 0) {
-      // Required range: 0 <= i < size, 0 <= j < size, i <= j
-      if (i < 0) {
-        ii = 0;
-      } else if (i < (Difference)size) {
-        ii = i;
-      } else if (insert && (i >= (Difference)size)) {
-        ii = (Difference)size;
-      }
-      if (j < 0) {
-        jj = 0;
-      } else {
-        jj = (j < (Difference)size) ? j : (Difference)size;
-      }
-      if (jj < ii)
-        jj = ii;
-    } else {
-      // Required range: -1 <= i < size-1, -1 <= j < size-1, i >= j
-      if (i < -1) {
-        ii = -1;
-      } else if (i < (Difference) size) {
-        ii = i;
-      } else if (i >= (Difference)(size-1)) {
-        ii = (Difference)(size-1);
-      }
-      if (j < -1) {
-        jj = -1;
-      } else {
-        jj = (j < (Difference)size ) ? j : (Difference)(size-1);
-      }
-      if (ii < jj)
-        ii = jj;
-    }
-  }
-
-  template <class Sequence, class Difference>
-  inline typename Sequence::iterator
-  getpos(Sequence* self, Difference i)  {
-    typename Sequence::iterator pos = self->begin();
-    std::advance(pos, check_index(i,self->size()));
-    return pos;
-  }
-
-  template <class Sequence, class Difference>
-  inline typename Sequence::const_iterator
-  cgetpos(const Sequence* self, Difference i)  {
-    typename Sequence::const_iterator pos = self->begin();
-    std::advance(pos, check_index(i,self->size()));
-    return pos;
-  }
-
-  template <class Sequence>
-  inline void
-  erase(Sequence* seq, const typename Sequence::iterator& position) {
-    seq->erase(position);
-  }
-
-  template <class Sequence>
-  struct traits_reserve {
-    static void reserve(Sequence & /*seq*/, typename Sequence::size_type /*n*/) {
-      // This should be specialized for types that support reserve
-    }
-  };
-
-  template <class Sequence, class Difference>
-  inline Sequence*
-  getslice(const Sequence* self, Difference i, Difference j, Py_ssize_t step) {
-    typename Sequence::size_type size = self->size();
-    Difference ii = 0;
-    Difference jj = 0;
-    swig::slice_adjust(i, j, step, size, ii, jj);
-
-    if (step > 0) {
-      typename Sequence::const_iterator sb = self->begin();
-      typename Sequence::const_iterator se = self->begin();
-      std::advance(sb,ii);
-      std::advance(se,jj);
-      if (step == 1) {
-        return new Sequence(sb, se);
-      } else {
-        Sequence *sequence = new Sequence();
-        swig::traits_reserve<Sequence>::reserve(*sequence, (jj - ii + step - 1) / step);
-        typename Sequence::const_iterator it = sb;
-        while (it!=se) {
-          sequence->push_back(*it);
-          for (Py_ssize_t c=0; c<step && it!=se; ++c)
-            it++;
-        }
-        return sequence;
-      } 
-    } else {
-      Sequence *sequence = new Sequence();
-      swig::traits_reserve<Sequence>::reserve(*sequence, (ii - jj - step - 1) / -step);
-      typename Sequence::const_reverse_iterator sb = self->rbegin();
-      typename Sequence::const_reverse_iterator se = self->rbegin();
-      std::advance(sb,size-ii-1);
-      std::advance(se,size-jj-1);
-      typename Sequence::const_reverse_iterator it = sb;
-      while (it!=se) {
-        sequence->push_back(*it);
-        for (Py_ssize_t c=0; c<-step && it!=se; ++c)
-          it++;
-      }
-      return sequence;
-    }
-  }
-
-  template <class Sequence, class Difference, class InputSeq>
-  inline void
-  setslice(Sequence* self, Difference i, Difference j, Py_ssize_t step, const InputSeq& is = InputSeq()) {
-    typename Sequence::size_type size = self->size();
-    Difference ii = 0;
-    Difference jj = 0;
-    swig::slice_adjust(i, j, step, size, ii, jj, true);
-    if (step > 0) {
-      if (step == 1) {
-        size_t ssize = jj - ii;
-        if (ssize <= is.size()) {
-          // expanding/staying the same size
-          swig::traits_reserve<Sequence>::reserve(*self, self->size() - ssize + is.size());
-          typename Sequence::iterator sb = self->begin();
-          typename InputSeq::const_iterator isit = is.begin();
-          std::advance(sb,ii);
-          std::advance(isit, jj - ii);
-          self->insert(std::copy(is.begin(), isit, sb), isit, is.end());
-        } else {
-          // shrinking
-          typename Sequence::iterator sb = self->begin();
-          typename Sequence::iterator se = self->begin();
-          std::advance(sb,ii);
-          std::advance(se,jj);
-          self->erase(sb,se);
-          sb = self->begin();
-          std::advance(sb,ii);
-          self->insert(sb, is.begin(), is.end());
-        }
-      } else {
-        size_t replacecount = (jj - ii + step - 1) / step;
-        if (is.size() != replacecount) {
-          char msg[1024];
-          sprintf(msg, "attempt to assign sequence of size %lu to extended slice of size %lu", (unsigned long)is.size(), (unsigned long)replacecount);
-          throw std::invalid_argument(msg);
-        }
-        typename Sequence::const_iterator isit = is.begin();
-        typename Sequence::iterator it = self->begin();
-        std::advance(it,ii);
-        for (size_t rc=0; rc<replacecount && it != self->end(); ++rc) {
-          *it++ = *isit++;
-          for (Py_ssize_t c=0; c<(step-1) && it != self->end(); ++c)
-            it++;
-        }
-      }
-    } else {
-      size_t replacecount = (ii - jj - step - 1) / -step;
-      if (is.size() != replacecount) {
-        char msg[1024];
-        sprintf(msg, "attempt to assign sequence of size %lu to extended slice of size %lu", (unsigned long)is.size(), (unsigned long)replacecount);
-        throw std::invalid_argument(msg);
-      }
-      typename Sequence::const_iterator isit = is.begin();
-      typename Sequence::reverse_iterator it = self->rbegin();
-      std::advance(it,size-ii-1);
-      for (size_t rc=0; rc<replacecount && it != self->rend(); ++rc) {
-        *it++ = *isit++;
-        for (Py_ssize_t c=0; c<(-step-1) && it != self->rend(); ++c)
-          it++;
-      }
-    }
-  }
-
-  template <class Sequence, class Difference>
-  inline void
-  delslice(Sequence* self, Difference i, Difference j, Py_ssize_t step) {
-    typename Sequence::size_type size = self->size();
-    Difference ii = 0;
-    Difference jj = 0;
-    swig::slice_adjust(i, j, step, size, ii, jj, true);
-    if (step > 0) {
-      typename Sequence::iterator sb = self->begin();
-      std::advance(sb,ii);
-      if (step == 1) {
-        typename Sequence::iterator se = self->begin();
-        std::advance(se,jj);
-        self->erase(sb,se);
-      } else {
-        typename Sequence::iterator it = sb;
-        size_t delcount = (jj - ii + step - 1) / step;
-        while (delcount) {
-          it = self->erase(it);
-          for (Py_ssize_t c=0; c<(step-1) && it != self->end(); ++c)
-            it++;
-          delcount--;
-        }
-      }
-    } else {
-      typename Sequence::reverse_iterator sb = self->rbegin();
-      std::advance(sb,size-ii-1);
-      typename Sequence::reverse_iterator it = sb;
-      size_t delcount = (ii - jj - step - 1) / -step;
-      while (delcount) {
-        it = typename Sequence::reverse_iterator(self->erase((++it).base()));
-        for (Py_ssize_t c=0; c<(-step-1) && it != self->rend(); ++c)
-          it++;
-        delcount--;
-      }
-    }
-  }
-}
-
-
-#if defined(__SUNPRO_CC) && defined(_RWSTD_VER)
-#  if !defined(SWIG_NO_STD_NOITERATOR_TRAITS_STL)
-#    define SWIG_STD_NOITERATOR_TRAITS_STL
-#  endif
-#endif
-
-#if !defined(SWIG_STD_NOITERATOR_TRAITS_STL)
-#include <iterator>
-#else
-namespace std {
-  template <class Iterator>
-  struct iterator_traits {
-    typedef ptrdiff_t difference_type;
-    typedef typename Iterator::value_type value_type;
-  };
-
-  template <class Iterator, class Category,class T, class Reference, class Pointer, class Distance>
-  struct iterator_traits<__reverse_bi_iterator<Iterator,Category,T,Reference,Pointer,Distance> > {
-    typedef Distance difference_type;
-    typedef T value_type;
-  };
-
-  template <class T>
-  struct iterator_traits<T*> {
-    typedef T value_type;
-    typedef ptrdiff_t difference_type;
-  };
-
-  template<typename _InputIterator>
-  inline typename iterator_traits<_InputIterator>::difference_type
-  distance(_InputIterator __first, _InputIterator __last)
-  {
-    typename iterator_traits<_InputIterator>::difference_type __n = 0;
-    while (__first != __last) {
-      ++__first; ++__n;
-    }
-    return __n;
-  }
-}
-#endif
-
-
-namespace swig {
-  template<typename OutIterator>
-  class SwigPyIterator_T :  public SwigPyIterator
-  {
-  public:
-    typedef OutIterator out_iterator;
-    typedef typename std::iterator_traits<out_iterator>::value_type value_type;    
-    typedef SwigPyIterator_T<out_iterator> self_type;
-
-    SwigPyIterator_T(out_iterator curr, PyObject *seq)
-      : SwigPyIterator(seq), current(curr)
-    {
-    }
-
-    const out_iterator& get_current() const
-    {
-      return current;
-    }
-
-    
-    bool equal (const SwigPyIterator &iter) const
-    {
-      const self_type *iters = dynamic_cast<const self_type *>(&iter);
-      if (iters) {
-	return (current == iters->get_current());
-      } else {
-	throw std::invalid_argument("bad iterator type");
-      }
-    }
-    
-    ptrdiff_t distance(const SwigPyIterator &iter) const
-    {
-      const self_type *iters = dynamic_cast<const self_type *>(&iter);
-      if (iters) {
-	return std::distance(current, iters->get_current());
-      } else {
-	throw std::invalid_argument("bad iterator type");
-      }
-    }    
-    
-  protected:
-    out_iterator current;
-  };
-  
-  template <class ValueType>
-  struct from_oper 
-  {
-    typedef const ValueType& argument_type;
-    typedef PyObject *result_type;
-    result_type operator()(argument_type v) const
-    {
-      return swig::from(v);
-    }
-  };
-
-  template<typename OutIterator, 
-	   typename ValueType = typename std::iterator_traits<OutIterator>::value_type,
-	   typename FromOper = from_oper<ValueType> >
-  class SwigPyIteratorOpen_T :  public SwigPyIterator_T<OutIterator>
-  {
-  public:
-    FromOper from;
-    typedef OutIterator out_iterator;
-    typedef ValueType value_type;
-    typedef SwigPyIterator_T<out_iterator>  base;
-    typedef SwigPyIteratorOpen_T<OutIterator, ValueType, FromOper> self_type;
-    
-    SwigPyIteratorOpen_T(out_iterator curr, PyObject *seq)
-      : SwigPyIterator_T<OutIterator>(curr, seq)
-    {
-    }
-    
-    PyObject *value() const {
-      return from(static_cast<const value_type&>(*(base::current)));
-    }
-    
-    SwigPyIterator *copy() const
-    {
-      return new self_type(*this);
-    }
-
-    SwigPyIterator *incr(size_t n = 1)
-    {
-      while (n--) {
-	++base::current;
-      }
-      return this;
-    }
-
-    SwigPyIterator *decr(size_t n = 1)
-    {
-      while (n--) {
-	--base::current;
-      }
-      return this;
-    }
-  };
-
-  template<typename OutIterator, 
-	   typename ValueType = typename std::iterator_traits<OutIterator>::value_type,
-	   typename FromOper = from_oper<ValueType> >
-  class SwigPyIteratorClosed_T :  public SwigPyIterator_T<OutIterator>
-  {
-  public:
-    FromOper from;
-    typedef OutIterator out_iterator;
-    typedef ValueType value_type;
-    typedef SwigPyIterator_T<out_iterator>  base;    
-    typedef SwigPyIteratorClosed_T<OutIterator, ValueType, FromOper> self_type;
-    
-    SwigPyIteratorClosed_T(out_iterator curr, out_iterator first, out_iterator last, PyObject *seq)
-      : SwigPyIterator_T<OutIterator>(curr, seq), begin(first), end(last)
-    {
-    }
-    
-    PyObject *value() const {
-      if (base::current == end) {
-	throw stop_iteration();
-      } else {
-	return from(static_cast<const value_type&>(*(base::current)));
-      }
-    }
-    
-    SwigPyIterator *copy() const
-    {
-      return new self_type(*this);
-    }
-
-    SwigPyIterator *incr(size_t n = 1)
-    {
-      while (n--) {
-	if (base::current == end) {
-	  throw stop_iteration();
-	} else {
-	  ++base::current;
-	}
-      }
-      return this;
-    }
-
-    SwigPyIterator *decr(size_t n = 1)
-    {
-      while (n--) {
-	if (base::current == begin) {
-	  throw stop_iteration();
-	} else {
-	  --base::current;
-	}
-      }
-      return this;
-    }
-
-  private:
-    out_iterator begin;
-    out_iterator end;
-  };
-
-  template<typename OutIter>
-  inline SwigPyIterator*
-  make_output_iterator(const OutIter& current, const OutIter& begin,const OutIter& end, PyObject *seq = 0)
-  {
-    return new SwigPyIteratorClosed_T<OutIter>(current, begin, end, seq);
-  }
-
-  template<typename OutIter>
-  inline SwigPyIterator*
-  make_output_iterator(const OutIter& current, PyObject *seq = 0)
-  {
-    return new SwigPyIteratorOpen_T<OutIter>(current, seq);
-  }
-
-}
-
-
-namespace swig
-{
-  template <class T>
-  struct SwigPySequence_Ref
-  {
-    SwigPySequence_Ref(PyObject* seq, Py_ssize_t index)
-      : _seq(seq), _index(index)
-    {
-    }
-    
-    operator T () const
-    {
-      swig::SwigVar_PyObject item = PySequence_GetItem(_seq, _index);
-      try {
-	return swig::as<T>(item, true);
-      } catch (std::exception& e) {
-	char msg[1024];
-	sprintf(msg, "in sequence element %d ", (int)_index);
-	if (!PyErr_Occurred()) {
-	  ::SWIG_Error(SWIG_TypeError,  swig::type_name<T>());
-	}
-	SWIG_Python_AddErrorMsg(msg);
-	SWIG_Python_AddErrorMsg(e.what());
-	throw;
-      }
-    }
-
-    SwigPySequence_Ref& operator=(const T& v)
-    {
-      PySequence_SetItem(_seq, _index, swig::from<T>(v));
-      return *this;
-    }
-
-  private:
-    PyObject* _seq;
-    Py_ssize_t _index;
-  };
-
-  template <class T>
-  struct SwigPySequence_ArrowProxy
-  {
-    SwigPySequence_ArrowProxy(const T& x): m_value(x) {}
-    const T* operator->() const { return &m_value; }
-    operator const T*() const { return &m_value; }
-    T m_value;
-  };
-
-  template <class T, class Reference >
-  struct SwigPySequence_InputIterator
-  {
-    typedef SwigPySequence_InputIterator<T, Reference > self;
-
-    typedef std::random_access_iterator_tag iterator_category;
-    typedef Reference reference;
-    typedef T value_type;
-    typedef T* pointer;
-    typedef Py_ssize_t difference_type;
-
-    SwigPySequence_InputIterator()
-    {
-    }
-
-    SwigPySequence_InputIterator(PyObject* seq, Py_ssize_t index)
-      : _seq(seq), _index(index)
-    {
-    }
-
-    reference operator*() const
-    {
-      return reference(_seq, _index);
-    }
-
-    SwigPySequence_ArrowProxy<T>
-    operator->() const {
-      return SwigPySequence_ArrowProxy<T>(operator*());
-    }
-
-    bool operator==(const self& ri) const
-    {
-      return (_index == ri._index) && (_seq == ri._seq);
-    }
-
-    bool operator!=(const self& ri) const
-    {
-      return !(operator==(ri));
-    }
-
-    self& operator ++ ()
-    {
-      ++_index;
-      return *this;
-    }
-
-    self& operator -- ()
-    {
-      --_index;
-      return *this;
-    }
-
-    self& operator += (difference_type n)
-    {
-      _index += n;
-      return *this;
-    }
-
-    self operator +(difference_type n) const
-    {
-      return self(_seq, _index + n);
-    }
-
-    self& operator -= (difference_type n)
-    {
-      _index -= n;
-      return *this;
-    }
-
-    self operator -(difference_type n) const
-    {
-      return self(_seq, _index - n);
-    }
-
-    difference_type operator - (const self& ri) const
-    {
-      return _index - ri._index;
-    }
-
-    bool operator < (const self& ri) const
-    {
-      return _index < ri._index;
-    }
-
-    reference
-    operator[](difference_type n) const
-    {
-      return reference(_seq, _index + n);
-    }
-
-  private:
-    PyObject* _seq;
-    difference_type _index;
-  };
-
-  // STL container wrapper around a Python sequence
-  template <class T>
-  struct SwigPySequence_Cont
-  {
-    typedef SwigPySequence_Ref<T> reference;
-    typedef const SwigPySequence_Ref<T> const_reference;
-    typedef T value_type;
-    typedef T* pointer;
-    typedef Py_ssize_t difference_type;
-    typedef size_t size_type;
-    typedef const pointer const_pointer;
-    typedef SwigPySequence_InputIterator<T, reference> iterator;
-    typedef SwigPySequence_InputIterator<T, const_reference> const_iterator;
-
-    SwigPySequence_Cont(PyObject* seq) : _seq(0)
-    {
-      if (!PySequence_Check(seq)) {
-	throw std::invalid_argument("a sequence is expected");
-      }
-      _seq = seq;
-      Py_INCREF(_seq);
-    }
-
-    ~SwigPySequence_Cont()
-    {
-      Py_XDECREF(_seq);
-    }
-
-    size_type size() const
-    {
-      return static_cast<size_type>(PySequence_Size(_seq));
-    }
-
-    bool empty() const
-    {
-      return size() == 0;
-    }
-
-    iterator begin()
-    {
-      return iterator(_seq, 0);
-    }
-
-    const_iterator begin() const
-    {
-      return const_iterator(_seq, 0);
-    }
-
-    iterator end()
-    {
-      return iterator(_seq, size());
-    }
-
-    const_iterator end() const
-    {
-      return const_iterator(_seq, size());
-    }
-
-    reference operator[](difference_type n)
-    {
-      return reference(_seq, n);
-    }
-
-    const_reference operator[](difference_type n)  const
-    {
-      return const_reference(_seq, n);
-    }
-
-    bool check(bool set_err = true) const
-    {
-      Py_ssize_t s = size();
-      for (Py_ssize_t i = 0; i < s; ++i) {
-	swig::SwigVar_PyObject item = PySequence_GetItem(_seq, i);
-	if (!swig::check<value_type>(item)) {
-	  if (set_err) {
-	    char msg[1024];
-	    sprintf(msg, "in sequence element %d", (int)i);
-	    SWIG_Error(SWIG_RuntimeError, msg);
-	  }
-	  return false;
-	}
-      }
-      return true;
-    }
-
-  private:
-    PyObject* _seq;
-  };
-
-}
-
-
-namespace swig {
-  template <> struct traits< double > {
-    typedef value_category category;
-    static const char* type_name() { return"double"; }
-  };
-  template <>  struct traits_asval< double > {
-    typedef double value_type;
-    static int asval(PyObject *obj, value_type *val) {
-      return SWIG_AsVal_double (obj, val);
-    }
-  };
-  template <>  struct traits_from< double > {
-    typedef double value_type;
-    static PyObject *from(const value_type& val) {
-      return SWIG_From_double  (val);
-    }
-  };
-}
-
-
-namespace swig {
-  template <class SwigPySeq, class Seq>
-  inline void
-  assign(const SwigPySeq& swigpyseq, Seq* seq) {
-    // seq->assign(swigpyseq.begin(), swigpyseq.end()); // not used as not always implemented
-    typedef typename SwigPySeq::value_type value_type;
-    typename SwigPySeq::const_iterator it = swigpyseq.begin();
-    for (;it != swigpyseq.end(); ++it) {
-      seq->insert(seq->end(),(value_type)(*it));
-    }
-  }
-
-  template <class Seq, class T = typename Seq::value_type >
-  struct traits_asptr_stdseq {
-    typedef Seq sequence;
-    typedef T value_type;
-
-    static int asptr(PyObject *obj, sequence **seq) {
-      if (obj == Py_None || SWIG_Python_GetSwigThis(obj)) {
-	sequence *p;
-	swig_type_info *descriptor = swig::type_info<sequence>();
-	if (descriptor && SWIG_IsOK(::SWIG_ConvertPtr(obj, (void **)&p, descriptor, 0))) {
-	  if (seq) *seq = p;
-	  return SWIG_OLDOBJ;
-	}
-      } else if (PySequence_Check(obj)) {
-	try {
-	  SwigPySequence_Cont<value_type> swigpyseq(obj);
-	  if (seq) {
-	    sequence *pseq = new sequence();
-	    assign(swigpyseq, pseq);
-	    *seq = pseq;
-	    return SWIG_NEWOBJ;
-	  } else {
-	    return swigpyseq.check() ? SWIG_OK : SWIG_ERROR;
-	  }
-	} catch (std::exception& e) {
-	  if (seq) {
-	    if (!PyErr_Occurred()) {
-	      PyErr_SetString(PyExc_TypeError, e.what());
-	    }
-	  }
-	  return SWIG_ERROR;
-	}
-      }
-      return SWIG_ERROR;
-    }
-  };
-
-  template <class Seq, class T = typename Seq::value_type >
-  struct traits_from_stdseq {
-    typedef Seq sequence;
-    typedef T value_type;
-    typedef typename Seq::size_type size_type;
-    typedef typename sequence::const_iterator const_iterator;
-
-    static PyObject *from(const sequence& seq) {
-#ifdef SWIG_PYTHON_EXTRA_NATIVE_CONTAINERS
-      swig_type_info *desc = swig::type_info<sequence>();
-      if (desc && desc->clientdata) {
-	return SWIG_InternalNewPointerObj(new sequence(seq), desc, SWIG_POINTER_OWN);
-      }
-#endif
-      size_type size = seq.size();
-      if (size <= (size_type)INT_MAX) {
-	PyObject *obj = PyTuple_New((Py_ssize_t)size);
-	Py_ssize_t i = 0;
-	for (const_iterator it = seq.begin(); it != seq.end(); ++it, ++i) {
-	  PyTuple_SetItem(obj,i,swig::from<value_type>(*it));
-	}
-	return obj;
-      } else {
-	PyErr_SetString(PyExc_OverflowError,"sequence size not valid in python");
-	return NULL;
-      }
-    }
-  };
-}
-
-
-  namespace swig {
-    template <class T>
-    struct traits_reserve<std::vector<T> > {
-      static void reserve(std::vector<T> &seq, typename std::vector<T>::size_type n) {
-        seq.reserve(n);
-      }
-    };
-
-    template <class T>
-    struct traits_asptr<std::vector<T> >  {
-      static int asptr(PyObject *obj, std::vector<T> **vec) {
-	return traits_asptr_stdseq<std::vector<T> >::asptr(obj, vec);
-      }
-    };
-    
-    template <class T>
-    struct traits_from<std::vector<T> > {
-      static PyObject *from(const std::vector<T>& vec) {
-	return traits_from_stdseq<std::vector<T> >::from(vec);
-      }
-    };
-  }
-
-
-      namespace swig {
-	template <>  struct traits<std::vector< double, std::allocator< double > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "double" "," "std::allocator< double >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_double_Sg__iterator(std::vector< double > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_double_Sg____nonzero__(std::vector< double > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_double_Sg____bool__(std::vector< double > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< double >::size_type std_vector_Sl_double_Sg____len__(std::vector< double > const *self){
-      return self->size();
-    }
-
-SWIGINTERNINLINE PyObject* 
-SWIG_From_unsigned_SS_long  (unsigned long value)
-{
-  return (value > LONG_MAX) ?
-    PyLong_FromUnsignedLong(value) : PyInt_FromLong(static_cast< long >(value));
-}
-
-
-#ifdef SWIG_LONG_LONG_AVAILABLE
-SWIGINTERNINLINE PyObject* 
-SWIG_From_unsigned_SS_long_SS_long  (unsigned long long value)
-{
-  return (value > LONG_MAX) ?
-    PyLong_FromUnsignedLongLong(value) : PyInt_FromLong(static_cast< long >(value));
-}
-#endif
-
-
-SWIGINTERNINLINE PyObject *
-SWIG_From_size_t  (size_t value)
-{    
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  if (sizeof(size_t) <= sizeof(unsigned long)) {
-#endif
-    return SWIG_From_unsigned_SS_long  (static_cast< unsigned long >(value));
-#ifdef SWIG_LONG_LONG_AVAILABLE
-  } else {
-    /* assume sizeof(size_t) <= sizeof(unsigned long long) */
-    return SWIG_From_unsigned_SS_long_SS_long  (static_cast< unsigned long long >(value));
-  }
-#endif
-}
-
-SWIGINTERN std::vector< double,std::allocator< double > > *std_vector_Sl_double_Sg____getslice__(std::vector< double > *self,std::vector< double >::difference_type i,std::vector< double >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____setslice____SWIG_0(std::vector< double > *self,std::vector< double >::difference_type i,std::vector< double >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< double,std::allocator< double > >());
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____setslice____SWIG_1(std::vector< double > *self,std::vector< double >::difference_type i,std::vector< double >::difference_type j,std::vector< double,std::allocator< double > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____delslice__(std::vector< double > *self,std::vector< double >::difference_type i,std::vector< double >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____delitem____SWIG_0(std::vector< double > *self,std::vector< double >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< double,std::allocator< double > > *std_vector_Sl_double_Sg____getitem____SWIG_0(std::vector< double > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< double,std::allocator< double > >::difference_type id = i;
-      std::vector< double,std::allocator< double > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____setitem____SWIG_0(std::vector< double > *self,PySliceObject *slice,std::vector< double,std::allocator< double > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< double,std::allocator< double > >::difference_type id = i;
-      std::vector< double,std::allocator< double > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____setitem____SWIG_1(std::vector< double > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< double,std::allocator< double > >::difference_type id = i;
-      std::vector< double,std::allocator< double > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____delitem____SWIG_1(std::vector< double > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< double,std::allocator< double > >::difference_type id = i;
-      std::vector< double,std::allocator< double > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< double >::value_type const &std_vector_Sl_double_Sg____getitem____SWIG_1(std::vector< double > const *self,std::vector< double >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_double_Sg____setitem____SWIG_2(std::vector< double > *self,std::vector< double >::difference_type i,std::vector< double >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< double >::value_type std_vector_Sl_double_Sg__pop(std::vector< double > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< double,std::allocator< double > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_double_Sg__append(std::vector< double > *self,std::vector< double >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< double >::iterator std_vector_Sl_double_Sg__erase__SWIG_0(std::vector< double > *self,std::vector< double >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< double >::iterator std_vector_Sl_double_Sg__erase__SWIG_1(std::vector< double > *self,std::vector< double >::iterator first,std::vector< double >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< double >::iterator std_vector_Sl_double_Sg__insert__SWIG_0(std::vector< double > *self,std::vector< double >::iterator pos,std::vector< double >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_double_Sg__insert__SWIG_1(std::vector< double > *self,std::vector< double >::iterator pos,std::vector< double >::size_type n,std::vector< double >::value_type const &x){ self->insert(pos, n, x); }
-
-namespace swig {
-  template <> struct traits< int > {
-    typedef value_category category;
-    static const char* type_name() { return"int"; }
-  };
-  template <>  struct traits_asval< int > {
-    typedef int value_type;
-    static int asval(PyObject *obj, value_type *val) {
-      return SWIG_AsVal_int (obj, val);
-    }
-  };
-  template <>  struct traits_from< int > {
-    typedef int value_type;
-    static PyObject *from(const value_type& val) {
-      return SWIG_From_int  (val);
-    }
-  };
-}
-
-
-      namespace swig {
-	template <>  struct traits<std::vector< int, std::allocator< int > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "int" "," "std::allocator< int >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_int_Sg__iterator(std::vector< int > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_int_Sg____nonzero__(std::vector< int > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_int_Sg____bool__(std::vector< int > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< int >::size_type std_vector_Sl_int_Sg____len__(std::vector< int > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< int,std::allocator< int > > *std_vector_Sl_int_Sg____getslice__(std::vector< int > *self,std::vector< int >::difference_type i,std::vector< int >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____setslice____SWIG_0(std::vector< int > *self,std::vector< int >::difference_type i,std::vector< int >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< int,std::allocator< int > >());
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____setslice____SWIG_1(std::vector< int > *self,std::vector< int >::difference_type i,std::vector< int >::difference_type j,std::vector< int,std::allocator< int > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____delslice__(std::vector< int > *self,std::vector< int >::difference_type i,std::vector< int >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____delitem____SWIG_0(std::vector< int > *self,std::vector< int >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< int,std::allocator< int > > *std_vector_Sl_int_Sg____getitem____SWIG_0(std::vector< int > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< int,std::allocator< int > >::difference_type id = i;
-      std::vector< int,std::allocator< int > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____setitem____SWIG_0(std::vector< int > *self,PySliceObject *slice,std::vector< int,std::allocator< int > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< int,std::allocator< int > >::difference_type id = i;
-      std::vector< int,std::allocator< int > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____setitem____SWIG_1(std::vector< int > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< int,std::allocator< int > >::difference_type id = i;
-      std::vector< int,std::allocator< int > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____delitem____SWIG_1(std::vector< int > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< int,std::allocator< int > >::difference_type id = i;
-      std::vector< int,std::allocator< int > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< int >::value_type const &std_vector_Sl_int_Sg____getitem____SWIG_1(std::vector< int > const *self,std::vector< int >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_int_Sg____setitem____SWIG_2(std::vector< int > *self,std::vector< int >::difference_type i,std::vector< int >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< int >::value_type std_vector_Sl_int_Sg__pop(std::vector< int > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< int,std::allocator< int > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_int_Sg__append(std::vector< int > *self,std::vector< int >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< int >::iterator std_vector_Sl_int_Sg__erase__SWIG_0(std::vector< int > *self,std::vector< int >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< int >::iterator std_vector_Sl_int_Sg__erase__SWIG_1(std::vector< int > *self,std::vector< int >::iterator first,std::vector< int >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< int >::iterator std_vector_Sl_int_Sg__insert__SWIG_0(std::vector< int > *self,std::vector< int >::iterator pos,std::vector< int >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_int_Sg__insert__SWIG_1(std::vector< int > *self,std::vector< int >::iterator pos,std::vector< int >::size_type n,std::vector< int >::value_type const &x){ self->insert(pos, n, x); }
-
-SWIGINTERN swig_type_info*
-SWIG_pchar_descriptor(void)
-{
-  static int init = 0;
-  static swig_type_info* info = 0;
-  if (!init) {
-    info = SWIG_TypeQuery("_p_char");
-    init = 1;
-  }
-  return info;
-}
-
-
-SWIGINTERN int
-SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
-{
-#if PY_VERSION_HEX>=0x03000000
-#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-  if (PyBytes_Check(obj))
-#else
-  if (PyUnicode_Check(obj))
-#endif
-#else  
-  if (PyString_Check(obj))
-#endif
-  {
-    char *cstr; Py_ssize_t len;
-#if PY_VERSION_HEX>=0x03000000
-#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-    if (!alloc && cptr) {
-        /* We can't allow converting without allocation, since the internal
-           representation of string in Python 3 is UCS-2/UCS-4 but we require
-           a UTF-8 representation.
-           TODO(bhy) More detailed explanation */
-        return SWIG_RuntimeError;
-    }
-    obj = PyUnicode_AsUTF8String(obj);
-    if(alloc) *alloc = SWIG_NEWOBJ;
-#endif
-    PyBytes_AsStringAndSize(obj, &cstr, &len);
-#else
-    PyString_AsStringAndSize(obj, &cstr, &len);
-#endif
-    if (cptr) {
-      if (alloc) {
-	/* 
-	   In python the user should not be able to modify the inner
-	   string representation. To warranty that, if you define
-	   SWIG_PYTHON_SAFE_CSTRINGS, a new/copy of the python string
-	   buffer is always returned.
-
-	   The default behavior is just to return the pointer value,
-	   so, be careful.
-	*/ 
-#if defined(SWIG_PYTHON_SAFE_CSTRINGS)
-	if (*alloc != SWIG_OLDOBJ) 
-#else
-	if (*alloc == SWIG_NEWOBJ) 
-#endif
-	{
-	  *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1)));
-	  *alloc = SWIG_NEWOBJ;
-	} else {
-	  *cptr = cstr;
-	  *alloc = SWIG_OLDOBJ;
-	}
-      } else {
-#if PY_VERSION_HEX>=0x03000000
-#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-	*cptr = PyBytes_AsString(obj);
-#else
-	assert(0); /* Should never reach here with Unicode strings in Python 3 */
-#endif
-#else
-	*cptr = SWIG_Python_str_AsChar(obj);
-#endif
-      }
-    }
-    if (psize) *psize = len + 1;
-#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-    Py_XDECREF(obj);
-#endif
-    return SWIG_OK;
-  } else {
-#if defined(SWIG_PYTHON_2_UNICODE)
-#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-#error "Cannot use both SWIG_PYTHON_2_UNICODE and SWIG_PYTHON_STRICT_BYTE_CHAR at once"
-#endif
-#if PY_VERSION_HEX<0x03000000
-    if (PyUnicode_Check(obj)) {
-      char *cstr; Py_ssize_t len;
-      if (!alloc && cptr) {
-        return SWIG_RuntimeError;
-      }
-      obj = PyUnicode_AsUTF8String(obj);
-      if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
-        if (cptr) {
-          if (alloc) *alloc = SWIG_NEWOBJ;
-          *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1)));
-        }
-        if (psize) *psize = len + 1;
-
-        Py_XDECREF(obj);
-        return SWIG_OK;
-      } else {
-        Py_XDECREF(obj);
-      }
-    }
-#endif
-#endif
-
-    swig_type_info* pchar_descriptor = SWIG_pchar_descriptor();
-    if (pchar_descriptor) {
-      void* vptr = 0;
-      if (SWIG_ConvertPtr(obj, &vptr, pchar_descriptor, 0) == SWIG_OK) {
-	if (cptr) *cptr = (char *) vptr;
-	if (psize) *psize = vptr ? (strlen((char *)vptr) + 1) : 0;
-	if (alloc) *alloc = SWIG_OLDOBJ;
-	return SWIG_OK;
-      }
-    }
-  }
-  return SWIG_TypeError;
-}
-
-
-SWIGINTERN int
-SWIG_AsPtr_std_string (PyObject * obj, std::string **val) 
-{
-  char* buf = 0 ; size_t size = 0; int alloc = SWIG_OLDOBJ;
-  if (SWIG_IsOK((SWIG_AsCharPtrAndSize(obj, &buf, &size, &alloc)))) {
-    if (buf) {
-      if (val) *val = new std::string(buf, size - 1);
-      if (alloc == SWIG_NEWOBJ) delete[] buf;
-      return SWIG_NEWOBJ;
-    } else {
-      if (val) *val = 0;
-      return SWIG_OLDOBJ;
-    }
-  } else {
-    static int init = 0;
-    static swig_type_info* descriptor = 0;
-    if (!init) {
-      descriptor = SWIG_TypeQuery("std::string" " *");
-      init = 1;
-    }
-    if (descriptor) {
-      std::string *vptr;
-      int res = SWIG_ConvertPtr(obj, (void**)&vptr, descriptor, 0);
-      if (SWIG_IsOK(res) && val) *val = vptr;
-      return res;
-    }
-  }
-  return SWIG_ERROR;
-}
-
-
-SWIGINTERN int
-SWIG_AsVal_std_string (PyObject * obj, std::string *val)
-{
-  std::string* v = (std::string *) 0;
-  int res = SWIG_AsPtr_std_string (obj, &v);
-  if (!SWIG_IsOK(res)) return res;
-  if (v) {
-    if (val) *val = *v;
-    if (SWIG_IsNewObj(res)) {
-      delete v;
-      res = SWIG_DelNewMask(res);
-    }
-    return res;
-  }
-  return SWIG_ERROR;
-}
-
-
-SWIGINTERNINLINE PyObject *
-SWIG_FromCharPtrAndSize(const char* carray, size_t size)
-{
-  if (carray) {
-    if (size > INT_MAX) {
-      swig_type_info* pchar_descriptor = SWIG_pchar_descriptor();
-      return pchar_descriptor ? 
-	SWIG_InternalNewPointerObj(const_cast< char * >(carray), pchar_descriptor, 0) : SWIG_Py_Void();
-    } else {
-#if PY_VERSION_HEX >= 0x03000000
-#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-      return PyBytes_FromStringAndSize(carray, static_cast< Py_ssize_t >(size));
-#else
-#if PY_VERSION_HEX >= 0x03010000
-      return PyUnicode_DecodeUTF8(carray, static_cast< Py_ssize_t >(size), "surrogateescape");
-#else
-      return PyUnicode_FromStringAndSize(carray, static_cast< Py_ssize_t >(size));
-#endif
-#endif
-#else
-      return PyString_FromStringAndSize(carray, static_cast< Py_ssize_t >(size));
-#endif
-    }
-  } else {
-    return SWIG_Py_Void();
-  }
-}
-
-
-SWIGINTERNINLINE PyObject *
-SWIG_From_std_string  (const std::string& s)
-{
-  return SWIG_FromCharPtrAndSize(s.data(), s.size());
-}
-
-
-namespace swig {
-  template <> struct traits< std::string > {
-    typedef value_category category;
-    static const char* type_name() { return"std::string"; }
-  };
-  template <>  struct traits_asval< std::string > {
-    typedef std::string value_type;
-    static int asval(PyObject *obj, value_type *val) {
-      return SWIG_AsVal_std_string (obj, val);
-    }
-  };
-  template <>  struct traits_from< std::string > {
-    typedef std::string value_type;
-    static PyObject *from(const value_type& val) {
-      return SWIG_From_std_string  (val);
-    }
-  };
-}
-
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::string, std::allocator< std::string > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::string" "," "std::allocator< std::string >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_string_Sg__iterator(std::vector< std::string > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_string_Sg____nonzero__(std::vector< std::string > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_string_Sg____bool__(std::vector< std::string > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::string >::size_type std_vector_Sl_std_string_Sg____len__(std::vector< std::string > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::string,std::allocator< std::string > > *std_vector_Sl_std_string_Sg____getslice__(std::vector< std::string > *self,std::vector< std::string >::difference_type i,std::vector< std::string >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____setslice____SWIG_0(std::vector< std::string > *self,std::vector< std::string >::difference_type i,std::vector< std::string >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::string,std::allocator< std::string > >());
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____setslice____SWIG_1(std::vector< std::string > *self,std::vector< std::string >::difference_type i,std::vector< std::string >::difference_type j,std::vector< std::string,std::allocator< std::string > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____delslice__(std::vector< std::string > *self,std::vector< std::string >::difference_type i,std::vector< std::string >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____delitem____SWIG_0(std::vector< std::string > *self,std::vector< std::string >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::string,std::allocator< std::string > > *std_vector_Sl_std_string_Sg____getitem____SWIG_0(std::vector< std::string > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::string,std::allocator< std::string > >::difference_type id = i;
-      std::vector< std::string,std::allocator< std::string > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____setitem____SWIG_0(std::vector< std::string > *self,PySliceObject *slice,std::vector< std::string,std::allocator< std::string > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::string,std::allocator< std::string > >::difference_type id = i;
-      std::vector< std::string,std::allocator< std::string > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____setitem____SWIG_1(std::vector< std::string > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::string,std::allocator< std::string > >::difference_type id = i;
-      std::vector< std::string,std::allocator< std::string > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____delitem____SWIG_1(std::vector< std::string > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::string,std::allocator< std::string > >::difference_type id = i;
-      std::vector< std::string,std::allocator< std::string > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::string >::value_type const &std_vector_Sl_std_string_Sg____getitem____SWIG_1(std::vector< std::string > const *self,std::vector< std::string >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg____setitem____SWIG_2(std::vector< std::string > *self,std::vector< std::string >::difference_type i,std::vector< std::string >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::string >::value_type std_vector_Sl_std_string_Sg__pop(std::vector< std::string > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::string,std::allocator< std::string > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_string_Sg__append(std::vector< std::string > *self,std::vector< std::string >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::string >::iterator std_vector_Sl_std_string_Sg__erase__SWIG_0(std::vector< std::string > *self,std::vector< std::string >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::string >::iterator std_vector_Sl_std_string_Sg__erase__SWIG_1(std::vector< std::string > *self,std::vector< std::string >::iterator first,std::vector< std::string >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::string >::iterator std_vector_Sl_std_string_Sg__insert__SWIG_0(std::vector< std::string > *self,std::vector< std::string >::iterator pos,std::vector< std::string >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_string_Sg__insert__SWIG_1(std::vector< std::string > *self,std::vector< std::string >::iterator pos,std::vector< std::string >::size_type n,std::vector< std::string >::value_type const &x){ self->insert(pos, n, x); }
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::vector< double,std::allocator< double > >, std::allocator< std::vector< double,std::allocator< double > > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::vector< double,std::allocator< double > >" "," "std::allocator< std::vector< double,std::allocator< double > > >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_vector_Sl_double_Sg__Sg__iterator(std::vector< std::vector< double > > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_double_Sg__Sg____nonzero__(std::vector< std::vector< double > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_double_Sg__Sg____bool__(std::vector< std::vector< double > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::vector< double > >::size_type std_vector_Sl_std_vector_Sl_double_Sg__Sg____len__(std::vector< std::vector< double > > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *std_vector_Sl_std_vector_Sl_double_Sg__Sg____getslice__(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::difference_type i,std::vector< std::vector< double > >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____setslice____SWIG_0(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::difference_type i,std::vector< std::vector< double > >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >());
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____setslice____SWIG_1(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::difference_type i,std::vector< std::vector< double > >::difference_type j,std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____delslice__(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::difference_type i,std::vector< std::vector< double > >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____delitem____SWIG_0(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *std_vector_Sl_std_vector_Sl_double_Sg__Sg____getitem____SWIG_0(std::vector< std::vector< double > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type id = i;
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____setitem____SWIG_0(std::vector< std::vector< double > > *self,PySliceObject *slice,std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type id = i;
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____setitem____SWIG_1(std::vector< std::vector< double > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type id = i;
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____delitem____SWIG_1(std::vector< std::vector< double > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type id = i;
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::vector< double > >::value_type const &std_vector_Sl_std_vector_Sl_double_Sg__Sg____getitem____SWIG_1(std::vector< std::vector< double > > const *self,std::vector< std::vector< double > >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg____setitem____SWIG_2(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::difference_type i,std::vector< std::vector< double > >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::vector< double > >::value_type std_vector_Sl_std_vector_Sl_double_Sg__Sg__pop(std::vector< std::vector< double > > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg__append(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::vector< double > >::iterator std_vector_Sl_std_vector_Sl_double_Sg__Sg__erase__SWIG_0(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::vector< double > >::iterator std_vector_Sl_std_vector_Sl_double_Sg__Sg__erase__SWIG_1(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::iterator first,std::vector< std::vector< double > >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::vector< double > >::iterator std_vector_Sl_std_vector_Sl_double_Sg__Sg__insert__SWIG_0(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::iterator pos,std::vector< std::vector< double > >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_double_Sg__Sg__insert__SWIG_1(std::vector< std::vector< double > > *self,std::vector< std::vector< double > >::iterator pos,std::vector< std::vector< double > >::size_type n,std::vector< std::vector< double > >::value_type const &x){ self->insert(pos, n, x); }
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::vector< int,std::allocator< int > >, std::allocator< std::vector< int,std::allocator< int > > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::vector< int,std::allocator< int > >" "," "std::allocator< std::vector< int,std::allocator< int > > >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_vector_Sl_int_Sg__Sg__iterator(std::vector< std::vector< int > > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_int_Sg__Sg____nonzero__(std::vector< std::vector< int > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_int_Sg__Sg____bool__(std::vector< std::vector< int > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::vector< int > >::size_type std_vector_Sl_std_vector_Sl_int_Sg__Sg____len__(std::vector< std::vector< int > > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *std_vector_Sl_std_vector_Sl_int_Sg__Sg____getslice__(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::difference_type i,std::vector< std::vector< int > >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____setslice____SWIG_0(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::difference_type i,std::vector< std::vector< int > >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >());
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____setslice____SWIG_1(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::difference_type i,std::vector< std::vector< int > >::difference_type j,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____delslice__(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::difference_type i,std::vector< std::vector< int > >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____delitem____SWIG_0(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *std_vector_Sl_std_vector_Sl_int_Sg__Sg____getitem____SWIG_0(std::vector< std::vector< int > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type id = i;
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____setitem____SWIG_0(std::vector< std::vector< int > > *self,PySliceObject *slice,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type id = i;
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____setitem____SWIG_1(std::vector< std::vector< int > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type id = i;
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____delitem____SWIG_1(std::vector< std::vector< int > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type id = i;
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::vector< int > >::value_type const &std_vector_Sl_std_vector_Sl_int_Sg__Sg____getitem____SWIG_1(std::vector< std::vector< int > > const *self,std::vector< std::vector< int > >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg____setitem____SWIG_2(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::difference_type i,std::vector< std::vector< int > >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::vector< int > >::value_type std_vector_Sl_std_vector_Sl_int_Sg__Sg__pop(std::vector< std::vector< int > > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg__append(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::vector< int > >::iterator std_vector_Sl_std_vector_Sl_int_Sg__Sg__erase__SWIG_0(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::vector< int > >::iterator std_vector_Sl_std_vector_Sl_int_Sg__Sg__erase__SWIG_1(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::iterator first,std::vector< std::vector< int > >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::vector< int > >::iterator std_vector_Sl_std_vector_Sl_int_Sg__Sg__insert__SWIG_0(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::iterator pos,std::vector< std::vector< int > >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_int_Sg__Sg__insert__SWIG_1(std::vector< std::vector< int > > *self,std::vector< std::vector< int > >::iterator pos,std::vector< std::vector< int > >::size_type n,std::vector< std::vector< int > >::value_type const &x){ self->insert(pos, n, x); }
-
-namespace swig {
-  template <> struct traits< float > {
-    typedef value_category category;
-    static const char* type_name() { return"float"; }
-  };
-  template <>  struct traits_asval< float > {
-    typedef float value_type;
-    static int asval(PyObject *obj, value_type *val) {
-      return SWIG_AsVal_float (obj, val);
-    }
-  };
-  template <>  struct traits_from< float > {
-    typedef float value_type;
-    static PyObject *from(const value_type& val) {
-      return SWIG_From_float  (val);
-    }
-  };
-}
-
-
-      namespace swig {
-	template <>  struct traits<std::vector< float, std::allocator< float > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "float" "," "std::allocator< float >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_float_Sg__iterator(std::vector< float > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_float_Sg____nonzero__(std::vector< float > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_float_Sg____bool__(std::vector< float > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< float >::size_type std_vector_Sl_float_Sg____len__(std::vector< float > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< float,std::allocator< float > > *std_vector_Sl_float_Sg____getslice__(std::vector< float > *self,std::vector< float >::difference_type i,std::vector< float >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____setslice____SWIG_0(std::vector< float > *self,std::vector< float >::difference_type i,std::vector< float >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< float,std::allocator< float > >());
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____setslice____SWIG_1(std::vector< float > *self,std::vector< float >::difference_type i,std::vector< float >::difference_type j,std::vector< float,std::allocator< float > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____delslice__(std::vector< float > *self,std::vector< float >::difference_type i,std::vector< float >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____delitem____SWIG_0(std::vector< float > *self,std::vector< float >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< float,std::allocator< float > > *std_vector_Sl_float_Sg____getitem____SWIG_0(std::vector< float > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< float,std::allocator< float > >::difference_type id = i;
-      std::vector< float,std::allocator< float > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____setitem____SWIG_0(std::vector< float > *self,PySliceObject *slice,std::vector< float,std::allocator< float > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< float,std::allocator< float > >::difference_type id = i;
-      std::vector< float,std::allocator< float > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____setitem____SWIG_1(std::vector< float > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< float,std::allocator< float > >::difference_type id = i;
-      std::vector< float,std::allocator< float > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____delitem____SWIG_1(std::vector< float > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< float,std::allocator< float > >::difference_type id = i;
-      std::vector< float,std::allocator< float > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< float >::value_type const &std_vector_Sl_float_Sg____getitem____SWIG_1(std::vector< float > const *self,std::vector< float >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_float_Sg____setitem____SWIG_2(std::vector< float > *self,std::vector< float >::difference_type i,std::vector< float >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< float >::value_type std_vector_Sl_float_Sg__pop(std::vector< float > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< float,std::allocator< float > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_float_Sg__append(std::vector< float > *self,std::vector< float >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< float >::iterator std_vector_Sl_float_Sg__erase__SWIG_0(std::vector< float > *self,std::vector< float >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< float >::iterator std_vector_Sl_float_Sg__erase__SWIG_1(std::vector< float > *self,std::vector< float >::iterator first,std::vector< float >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< float >::iterator std_vector_Sl_float_Sg__insert__SWIG_0(std::vector< float > *self,std::vector< float >::iterator pos,std::vector< float >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_float_Sg__insert__SWIG_1(std::vector< float > *self,std::vector< float >::iterator pos,std::vector< float >::size_type n,std::vector< float >::value_type const &x){ self->insert(pos, n, x); }
-
-  namespace swig {
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    template <class T, class U >
-    struct traits_asptr<std::pair<T,U> >  {
-      typedef std::pair<T,U> value_type;
-
-      static int get_pair(PyObject* first, PyObject* second,
-			  std::pair<T,U> **val) 
-      {
-	if (val) {
-	  value_type *vp = (new std::pair<T,U>());
-	  T *pfirst = &(vp->first);
-	  int res1 = swig::asval((PyObject*)first, pfirst);
-	  if (!SWIG_IsOK(res1)) {
-	    delete vp;
-	    return res1;
-	  }
-	  U *psecond = &(vp->second);
-	  int res2 = swig::asval((PyObject*)second, psecond);
-	  if (!SWIG_IsOK(res2)) {
-	    delete vp;
-	    return res2;
-	  }
-	  *val = vp;
-	  return SWIG_AddNewMask(res1 > res2 ? res1 : res2);
-	} else {
-	  T *pfirst = 0;
-	  int res1 = swig::asval((PyObject*)first, pfirst);
-	  if (!SWIG_IsOK(res1)) return res1;
-	  U *psecond = 0;
-	  int res2 = swig::asval((PyObject*)second, psecond);
-	  if (!SWIG_IsOK(res2)) return res2;
-	  return res1 > res2 ? res1 : res2;
-	}
-      }
-
-      static int asptr(PyObject *obj, std::pair<T,U> **val) {
-	int res = SWIG_ERROR;
-	if (PyTuple_Check(obj)) {
-	  if (PyTuple_GET_SIZE(obj) == 2) {
-	    res = get_pair(PyTuple_GET_ITEM(obj,0),PyTuple_GET_ITEM(obj,1), val);
-	  }
-	} else if (PySequence_Check(obj)) {
-	  if (PySequence_Size(obj) == 2) {
-	    swig::SwigVar_PyObject first = PySequence_GetItem(obj,0);
-	    swig::SwigVar_PyObject second = PySequence_GetItem(obj,1);
-	    res = get_pair(first, second, val);
-	  }
-	} else {
-	  value_type *p;
-	  swig_type_info *descriptor = swig::type_info<value_type>();
-	  res = descriptor ? SWIG_ConvertPtr(obj, (void **)&p, descriptor, 0) : SWIG_ERROR;
-	  if (SWIG_IsOK(res) && val)  *val = p;
-	}
-	return res;
-      }
-    };
-
-
-    template <class T, class U >
-    struct traits_from<std::pair<T,U> >   {
-      static PyObject *from(const std::pair<T,U>& val) {
-	PyObject* obj = PyTuple_New(2);
-	PyTuple_SetItem(obj,0,swig::from(val.first));
-	PyTuple_SetItem(obj,1,swig::from(val.second));
-	return obj;
-      }
-    };
-  }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-      namespace swig {
-	template <>  struct traits<std::pair< float, std::vector< int,std::allocator< int > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::pair<" "float" "," "std::vector< int,std::allocator< int > >" " >";
-	  }
-	};
-      }
-    
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::pair< float,std::vector< int,std::allocator< int > > >, std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::pair< float,std::vector< int,std::allocator< int > > >" "," "std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__iterator(std::vector< std::pair< float,std::vector< int > > > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____nonzero__(std::vector< std::pair< float,std::vector< int > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____bool__(std::vector< std::pair< float,std::vector< int > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::pair< float,std::vector< int > > >::size_type std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____len__(std::vector< std::pair< float,std::vector< int > > > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____getslice__(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::difference_type i,std::vector< std::pair< float,std::vector< int > > >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_0(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::difference_type i,std::vector< std::pair< float,std::vector< int > > >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >());
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_1(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::difference_type i,std::vector< std::pair< float,std::vector< int > > >::difference_type j,std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____delslice__(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::difference_type i,std::vector< std::pair< float,std::vector< int > > >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_0(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_0(std::vector< std::pair< float,std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_0(std::vector< std::pair< float,std::vector< int > > > *self,PySliceObject *slice,std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_1(std::vector< std::pair< float,std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_1(std::vector< std::pair< float,std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::pair< float,std::vector< int > > >::value_type const &std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_1(std::vector< std::pair< float,std::vector< int > > > const *self,std::vector< std::pair< float,std::vector< int > > >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_2(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::difference_type i,std::vector< std::pair< float,std::vector< int > > >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::pair< float,std::vector< int > > >::value_type std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__pop(std::vector< std::pair< float,std::vector< int > > > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__append(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::pair< float,std::vector< int > > >::iterator std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_0(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::pair< float,std::vector< int > > >::iterator std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_1(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::iterator first,std::vector< std::pair< float,std::vector< int > > >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::pair< float,std::vector< int > > >::iterator std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_0(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::iterator pos,std::vector< std::pair< float,std::vector< int > > >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_1(std::vector< std::pair< float,std::vector< int > > > *self,std::vector< std::pair< float,std::vector< int > > >::iterator pos,std::vector< std::pair< float,std::vector< int > > >::size_type n,std::vector< std::pair< float,std::vector< int > > >::value_type const &x){ self->insert(pos, n, x); }
-
-  namespace swig {
-    template <>  struct traits< std::pair< double,std::vector< int,std::allocator< int > > > > {
-      typedef pointer_category category;
-      static const char* type_name() { return"std::pair< double,std::vector< int,std::allocator< int > > >"; }
-    };
-  }
-
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::pair< double,std::vector< int,std::allocator< int > > >, std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::pair< double,std::vector< int,std::allocator< int > > >" "," "std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__iterator(std::vector< std::pair< double,std::vector< int > > > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____nonzero__(std::vector< std::pair< double,std::vector< int > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____bool__(std::vector< std::pair< double,std::vector< int > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::pair< double,std::vector< int > > >::size_type std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____len__(std::vector< std::pair< double,std::vector< int > > > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____getslice__(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::difference_type i,std::vector< std::pair< double,std::vector< int > > >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_0(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::difference_type i,std::vector< std::pair< double,std::vector< int > > >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >());
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_1(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::difference_type i,std::vector< std::pair< double,std::vector< int > > >::difference_type j,std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____delslice__(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::difference_type i,std::vector< std::pair< double,std::vector< int > > >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_0(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_0(std::vector< std::pair< double,std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_0(std::vector< std::pair< double,std::vector< int > > > *self,PySliceObject *slice,std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_1(std::vector< std::pair< double,std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_1(std::vector< std::pair< double,std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type id = i;
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::pair< double,std::vector< int > > >::value_type const &std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_1(std::vector< std::pair< double,std::vector< int > > > const *self,std::vector< std::pair< double,std::vector< int > > >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_2(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::difference_type i,std::vector< std::pair< double,std::vector< int > > >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::pair< double,std::vector< int > > >::value_type std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__pop(std::vector< std::pair< double,std::vector< int > > > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__append(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::pair< double,std::vector< int > > >::iterator std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_0(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::pair< double,std::vector< int > > >::iterator std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_1(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::iterator first,std::vector< std::pair< double,std::vector< int > > >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::pair< double,std::vector< int > > >::iterator std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_0(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::iterator pos,std::vector< std::pair< double,std::vector< int > > >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_1(std::vector< std::pair< double,std::vector< int > > > *self,std::vector< std::pair< double,std::vector< int > > >::iterator pos,std::vector< std::pair< double,std::vector< int > > >::size_type n,std::vector< std::pair< double,std::vector< int > > >::value_type const &x){ self->insert(pos, n, x); }
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >, std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >" "," "std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__iterator(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____nonzero__(std::vector< std::vector< std::pair< double,std::vector< int > > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____bool__(std::vector< std::vector< std::pair< double,std::vector< int > > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____len__(std::vector< std::vector< std::pair< double,std::vector< int > > > > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____getslice__(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type i,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setslice____SWIG_0(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type i,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >());
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setslice____SWIG_1(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type i,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type j,std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____delslice__(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type i,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____delitem____SWIG_0(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____getitem____SWIG_0(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setitem____SWIG_0(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,PySliceObject *slice,std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setitem____SWIG_1(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____delitem____SWIG_1(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____getitem____SWIG_1(std::vector< std::vector< std::pair< double,std::vector< int > > > > const *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setitem____SWIG_2(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type i,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__pop(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__append(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__erase__SWIG_0(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__erase__SWIG_1(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator first,std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__insert__SWIG_0(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator pos,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__insert__SWIG_1(std::vector< std::vector< std::pair< double,std::vector< int > > > > *self,std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator pos,std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type n,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &x){ self->insert(pos, n, x); }
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >, std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >" "," "std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__iterator(std::vector< std::vector< std::vector< double > > > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____nonzero__(std::vector< std::vector< std::vector< double > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____bool__(std::vector< std::vector< std::vector< double > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::vector< std::vector< double > > >::size_type std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____len__(std::vector< std::vector< std::vector< double > > > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____getslice__(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::difference_type i,std::vector< std::vector< std::vector< double > > >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setslice____SWIG_0(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::difference_type i,std::vector< std::vector< std::vector< double > > >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >());
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setslice____SWIG_1(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::difference_type i,std::vector< std::vector< std::vector< double > > >::difference_type j,std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____delslice__(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::difference_type i,std::vector< std::vector< std::vector< double > > >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____delitem____SWIG_0(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____getitem____SWIG_0(std::vector< std::vector< std::vector< double > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setitem____SWIG_0(std::vector< std::vector< std::vector< double > > > *self,PySliceObject *slice,std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setitem____SWIG_1(std::vector< std::vector< std::vector< double > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____delitem____SWIG_1(std::vector< std::vector< std::vector< double > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::vector< std::vector< double > > >::value_type const &std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____getitem____SWIG_1(std::vector< std::vector< std::vector< double > > > const *self,std::vector< std::vector< std::vector< double > > >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setitem____SWIG_2(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::difference_type i,std::vector< std::vector< std::vector< double > > >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::vector< std::vector< double > > >::value_type std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__pop(std::vector< std::vector< std::vector< double > > > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__append(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::vector< std::vector< double > > >::iterator std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__erase__SWIG_0(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::vector< std::vector< double > > >::iterator std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__erase__SWIG_1(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::iterator first,std::vector< std::vector< std::vector< double > > >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::vector< std::vector< double > > >::iterator std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__insert__SWIG_0(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::iterator pos,std::vector< std::vector< std::vector< double > > >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__insert__SWIG_1(std::vector< std::vector< std::vector< double > > > *self,std::vector< std::vector< std::vector< double > > >::iterator pos,std::vector< std::vector< std::vector< double > > >::size_type n,std::vector< std::vector< std::vector< double > > >::value_type const &x){ self->insert(pos, n, x); }
-
-      namespace swig {
-	template <>  struct traits<std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >, std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > > {
-	  typedef pointer_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >" "," "std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__iterator(std::vector< std::vector< std::vector< int > > > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____nonzero__(std::vector< std::vector< std::vector< int > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____bool__(std::vector< std::vector< std::vector< int > > > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< std::vector< std::vector< int > > >::size_type std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____len__(std::vector< std::vector< std::vector< int > > > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____getslice__(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::difference_type i,std::vector< std::vector< std::vector< int > > >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_0(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::difference_type i,std::vector< std::vector< std::vector< int > > >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >());
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_1(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::difference_type i,std::vector< std::vector< std::vector< int > > >::difference_type j,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____delslice__(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::difference_type i,std::vector< std::vector< std::vector< int > > >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_0(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_0(std::vector< std::vector< std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_0(std::vector< std::vector< std::vector< int > > > *self,PySliceObject *slice,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_1(std::vector< std::vector< std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_1(std::vector< std::vector< std::vector< int > > > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type id = i;
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< std::vector< std::vector< int > > >::value_type const &std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_1(std::vector< std::vector< std::vector< int > > > const *self,std::vector< std::vector< std::vector< int > > >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_2(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::difference_type i,std::vector< std::vector< std::vector< int > > >::value_type const &x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< std::vector< std::vector< int > > >::value_type std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__pop(std::vector< std::vector< std::vector< int > > > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__append(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::value_type const &x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< std::vector< std::vector< int > > >::iterator std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_0(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< std::vector< std::vector< int > > >::iterator std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_1(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::iterator first,std::vector< std::vector< std::vector< int > > >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< std::vector< std::vector< int > > >::iterator std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_0(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::iterator pos,std::vector< std::vector< std::vector< int > > >::value_type const &x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_1(std::vector< std::vector< std::vector< int > > > *self,std::vector< std::vector< std::vector< int > > >::iterator pos,std::vector< std::vector< std::vector< int > > >::size_type n,std::vector< std::vector< std::vector< int > > >::value_type const &x){ self->insert(pos, n, x); }
-
-  namespace swig {
-    template <>  struct traits< PathTrie > {
-      typedef pointer_category category;
-      static const char* type_name() { return"PathTrie"; }
-    };
-  }
-
-
-      namespace swig {
-	template <>  struct traits<std::vector< PathTrie*, std::allocator< PathTrie * > > > {
-	  typedef value_category category;
-	  static const char* type_name() {
-	    return "std::vector<" "PathTrie" " *," "std::allocator< PathTrie * >" " >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_PathTrie_Sm__Sg__iterator(std::vector< PathTrie * > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_PathTrie_Sm__Sg____nonzero__(std::vector< PathTrie * > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_PathTrie_Sm__Sg____bool__(std::vector< PathTrie * > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< PathTrie * >::size_type std_vector_Sl_PathTrie_Sm__Sg____len__(std::vector< PathTrie * > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< PathTrie *,std::allocator< PathTrie * > > *std_vector_Sl_PathTrie_Sm__Sg____getslice__(std::vector< PathTrie * > *self,std::vector< PathTrie * >::difference_type i,std::vector< PathTrie * >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____setslice____SWIG_0(std::vector< PathTrie * > *self,std::vector< PathTrie * >::difference_type i,std::vector< PathTrie * >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector< PathTrie*,std::allocator< PathTrie * > >());
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____setslice____SWIG_1(std::vector< PathTrie * > *self,std::vector< PathTrie * >::difference_type i,std::vector< PathTrie * >::difference_type j,std::vector< PathTrie *,std::allocator< PathTrie * > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____delslice__(std::vector< PathTrie * > *self,std::vector< PathTrie * >::difference_type i,std::vector< PathTrie * >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____delitem____SWIG_0(std::vector< PathTrie * > *self,std::vector< PathTrie * >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< PathTrie *,std::allocator< PathTrie * > > *std_vector_Sl_PathTrie_Sm__Sg____getitem____SWIG_0(std::vector< PathTrie * > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type id = i;
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____setitem____SWIG_0(std::vector< PathTrie * > *self,PySliceObject *slice,std::vector< PathTrie *,std::allocator< PathTrie * > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type id = i;
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____setitem____SWIG_1(std::vector< PathTrie * > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type id = i;
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____delitem____SWIG_1(std::vector< PathTrie * > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type id = i;
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< PathTrie * >::value_type std_vector_Sl_PathTrie_Sm__Sg____getitem____SWIG_1(std::vector< PathTrie * > *self,std::vector< PathTrie * >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg____setitem____SWIG_2(std::vector< PathTrie * > *self,std::vector< PathTrie * >::difference_type i,std::vector< PathTrie * >::value_type x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< PathTrie * >::value_type std_vector_Sl_PathTrie_Sm__Sg__pop(std::vector< PathTrie * > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector< PathTrie*,std::allocator< PathTrie * > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg__append(std::vector< PathTrie * > *self,std::vector< PathTrie * >::value_type x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< PathTrie * >::iterator std_vector_Sl_PathTrie_Sm__Sg__erase__SWIG_0(std::vector< PathTrie * > *self,std::vector< PathTrie * >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< PathTrie * >::iterator std_vector_Sl_PathTrie_Sm__Sg__erase__SWIG_1(std::vector< PathTrie * > *self,std::vector< PathTrie * >::iterator first,std::vector< PathTrie * >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< PathTrie * >::iterator std_vector_Sl_PathTrie_Sm__Sg__insert__SWIG_0(std::vector< PathTrie * > *self,std::vector< PathTrie * >::iterator pos,std::vector< PathTrie * >::value_type x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_PathTrie_Sm__Sg__insert__SWIG_1(std::vector< PathTrie * > *self,std::vector< PathTrie * >::iterator pos,std::vector< PathTrie * >::size_type n,std::vector< PathTrie * >::value_type x){ self->insert(pos, n, x); }
-
-namespace swig {
-  template <> struct traits< bool > {
-    typedef value_category category;
-    static const char* type_name() { return"bool"; }
-  };
-  template <>  struct traits_asval< bool > {
-    typedef bool value_type;
-    static int asval(PyObject *obj, value_type *val) {
-      return SWIG_AsVal_bool (obj, val);
-    }
-  };
-  template <>  struct traits_from< bool > {
-    typedef bool value_type;
-    static PyObject *from(const value_type& val) {
-      return SWIG_From_bool  (val);
-    }
-  };
-}
-
-
-      namespace swig {
-	template <>  struct traits<std::vector<bool, std::allocator< bool > > > {
-	  typedef value_category category;
-	  static const char* type_name() {
-	    return "std::vector<bool, std::allocator< bool > >";
-	  }
-	};
-      }
-    
-SWIGINTERN swig::SwigPyIterator *std_vector_Sl_bool_Sg__iterator(std::vector< bool > *self,PyObject **PYTHON_SELF){
-      return swig::make_output_iterator(self->begin(), self->begin(), self->end(), *PYTHON_SELF);
-    }
-SWIGINTERN bool std_vector_Sl_bool_Sg____nonzero__(std::vector< bool > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN bool std_vector_Sl_bool_Sg____bool__(std::vector< bool > const *self){
-      return !(self->empty());
-    }
-SWIGINTERN std::vector< bool >::size_type std_vector_Sl_bool_Sg____len__(std::vector< bool > const *self){
-      return self->size();
-    }
-SWIGINTERN std::vector< bool,std::allocator< bool > > *std_vector_Sl_bool_Sg____getslice__(std::vector< bool > *self,std::vector< bool >::difference_type i,std::vector< bool >::difference_type j){
-      return swig::getslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____setslice____SWIG_0(std::vector< bool > *self,std::vector< bool >::difference_type i,std::vector< bool >::difference_type j){
-      swig::setslice(self, i, j, 1, std::vector<bool,std::allocator< bool > >());
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____setslice____SWIG_1(std::vector< bool > *self,std::vector< bool >::difference_type i,std::vector< bool >::difference_type j,std::vector< bool,std::allocator< bool > > const &v){
-      swig::setslice(self, i, j, 1, v);
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____delslice__(std::vector< bool > *self,std::vector< bool >::difference_type i,std::vector< bool >::difference_type j){
-      swig::delslice(self, i, j, 1);
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____delitem____SWIG_0(std::vector< bool > *self,std::vector< bool >::difference_type i){
-      swig::erase(self, swig::getpos(self, i));
-    }
-SWIGINTERN std::vector< bool,std::allocator< bool > > *std_vector_Sl_bool_Sg____getitem____SWIG_0(std::vector< bool > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return NULL;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector<bool,std::allocator< bool > >::difference_type id = i;
-      std::vector<bool,std::allocator< bool > >::difference_type jd = j;
-      return swig::getslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____setitem____SWIG_0(std::vector< bool > *self,PySliceObject *slice,std::vector< bool,std::allocator< bool > > const &v){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector<bool,std::allocator< bool > >::difference_type id = i;
-      std::vector<bool,std::allocator< bool > >::difference_type jd = j;
-      swig::setslice(self, id, jd, step, v);
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____setitem____SWIG_1(std::vector< bool > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector<bool,std::allocator< bool > >::difference_type id = i;
-      std::vector<bool,std::allocator< bool > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____delitem____SWIG_1(std::vector< bool > *self,PySliceObject *slice){
-      Py_ssize_t i, j, step;
-      if( !PySlice_Check(slice) ) {
-        SWIG_Error(SWIG_TypeError, "Slice object expected.");
-        return;
-      }
-      PySlice_GetIndices(SWIGPY_SLICE_ARG(slice), (Py_ssize_t)self->size(), &i, &j, &step);
-      std::vector<bool,std::allocator< bool > >::difference_type id = i;
-      std::vector<bool,std::allocator< bool > >::difference_type jd = j;
-      swig::delslice(self, id, jd, step);
-    }
-SWIGINTERN std::vector< bool >::value_type std_vector_Sl_bool_Sg____getitem____SWIG_1(std::vector< bool > *self,std::vector< bool >::difference_type i){
-      return *(swig::cgetpos(self, i));
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg____setitem____SWIG_2(std::vector< bool > *self,std::vector< bool >::difference_type i,std::vector< bool >::value_type x){
-      *(swig::getpos(self,i)) = x;
-    }
-SWIGINTERN std::vector< bool >::value_type std_vector_Sl_bool_Sg__pop(std::vector< bool > *self){
-      if (self->size() == 0)
-	throw std::out_of_range("pop from empty container");
-      std::vector<bool,std::allocator< bool > >::value_type x = self->back();
-      self->pop_back();
-      return x;
-    }
-SWIGINTERN void std_vector_Sl_bool_Sg__append(std::vector< bool > *self,std::vector< bool >::value_type x){
-      self->push_back(x);
-    }
-SWIGINTERN std::vector< bool >::iterator std_vector_Sl_bool_Sg__erase__SWIG_0(std::vector< bool > *self,std::vector< bool >::iterator pos){ return self->erase(pos); }
-SWIGINTERN std::vector< bool >::iterator std_vector_Sl_bool_Sg__erase__SWIG_1(std::vector< bool > *self,std::vector< bool >::iterator first,std::vector< bool >::iterator last){ return self->erase(first, last); }
-SWIGINTERN std::vector< bool >::iterator std_vector_Sl_bool_Sg__insert__SWIG_0(std::vector< bool > *self,std::vector< bool >::iterator pos,std::vector< bool >::value_type x){ return self->insert(pos, x); }
-SWIGINTERN void std_vector_Sl_bool_Sg__insert__SWIG_1(std::vector< bool > *self,std::vector< bool >::iterator pos,std::vector< bool >::size_type n,std::vector< bool >::value_type x){ self->insert(pos, n, x); }
-#ifdef __cplusplus
-extern "C" {
-#endif
-SWIGINTERN PyObject *_wrap_delete_SwigPyIterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_SwigPyIterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_SwigPyIterator" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_value(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:SwigPyIterator_value",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_value" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  try {
-    result = (PyObject *)((swig::SwigPyIterator const *)arg1)->value();
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = result;
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_incr__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  size_t arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator_incr",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_incr" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SwigPyIterator_incr" "', argument " "2"" of type '" "size_t""'");
-  } 
-  arg2 = static_cast< size_t >(val2);
-  try {
-    result = (swig::SwigPyIterator *)(arg1)->incr(arg2);
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_incr__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:SwigPyIterator_incr",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_incr" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  try {
-    result = (swig::SwigPyIterator *)(arg1)->incr();
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_incr(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 1) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_swig__SwigPyIterator, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_SwigPyIterator_incr__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_swig__SwigPyIterator, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_SwigPyIterator_incr__SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'SwigPyIterator_incr'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    swig::SwigPyIterator::incr(size_t)\n"
-    "    swig::SwigPyIterator::incr()\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_decr__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  size_t arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator_decr",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_decr" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SwigPyIterator_decr" "', argument " "2"" of type '" "size_t""'");
-  } 
-  arg2 = static_cast< size_t >(val2);
-  try {
-    result = (swig::SwigPyIterator *)(arg1)->decr(arg2);
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_decr__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:SwigPyIterator_decr",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_decr" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  try {
-    result = (swig::SwigPyIterator *)(arg1)->decr();
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_decr(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 1) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_swig__SwigPyIterator, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_SwigPyIterator_decr__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_swig__SwigPyIterator, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_SwigPyIterator_decr__SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'SwigPyIterator_decr'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    swig::SwigPyIterator::decr(size_t)\n"
-    "    swig::SwigPyIterator::decr()\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_distance(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  swig::SwigPyIterator *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  ptrdiff_t result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator_distance",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_distance" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_swig__SwigPyIterator,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "SwigPyIterator_distance" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "SwigPyIterator_distance" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  arg2 = reinterpret_cast< swig::SwigPyIterator * >(argp2);
-  try {
-    result = ((swig::SwigPyIterator const *)arg1)->distance((swig::SwigPyIterator const &)*arg2);
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_Python_Raise(SWIG_NewPointerObj((new std::invalid_argument(static_cast< const std::invalid_argument& >(_e))),SWIGTYPE_p_std__invalid_argument,SWIG_POINTER_OWN), "std::invalid_argument", SWIGTYPE_p_std__invalid_argument); SWIG_fail;
-  }
-  
-  resultobj = SWIG_From_ptrdiff_t(static_cast< ptrdiff_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_equal(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  swig::SwigPyIterator *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator_equal",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_equal" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_swig__SwigPyIterator,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "SwigPyIterator_equal" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "SwigPyIterator_equal" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  arg2 = reinterpret_cast< swig::SwigPyIterator * >(argp2);
-  try {
-    result = (bool)((swig::SwigPyIterator const *)arg1)->equal((swig::SwigPyIterator const &)*arg2);
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_Python_Raise(SWIG_NewPointerObj((new std::invalid_argument(static_cast< const std::invalid_argument& >(_e))),SWIGTYPE_p_std__invalid_argument,SWIG_POINTER_OWN), "std::invalid_argument", SWIGTYPE_p_std__invalid_argument); SWIG_fail;
-  }
-  
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_copy(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:SwigPyIterator_copy",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_copy" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  result = (swig::SwigPyIterator *)((swig::SwigPyIterator const *)arg1)->copy();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_next(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:SwigPyIterator_next",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_next" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  try {
-    result = (PyObject *)(arg1)->next();
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = result;
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___next__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:SwigPyIterator___next__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___next__" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  try {
-    result = (PyObject *)(arg1)->__next__();
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = result;
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_previous(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:SwigPyIterator_previous",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_previous" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  try {
-    result = (PyObject *)(arg1)->previous();
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = result;
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator_advance(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  ptrdiff_t arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator_advance",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator_advance" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SwigPyIterator_advance" "', argument " "2"" of type '" "ptrdiff_t""'");
-  } 
-  arg2 = static_cast< ptrdiff_t >(val2);
-  try {
-    result = (swig::SwigPyIterator *)(arg1)->advance(arg2);
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___eq__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  swig::SwigPyIterator *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator___eq__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___eq__" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_swig__SwigPyIterator,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "SwigPyIterator___eq__" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "SwigPyIterator___eq__" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  arg2 = reinterpret_cast< swig::SwigPyIterator * >(argp2);
-  result = (bool)((swig::SwigPyIterator const *)arg1)->operator ==((swig::SwigPyIterator const &)*arg2);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___ne__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  swig::SwigPyIterator *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator___ne__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___ne__" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_swig__SwigPyIterator,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "SwigPyIterator___ne__" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "SwigPyIterator___ne__" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  arg2 = reinterpret_cast< swig::SwigPyIterator * >(argp2);
-  result = (bool)((swig::SwigPyIterator const *)arg1)->operator !=((swig::SwigPyIterator const &)*arg2);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___iadd__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  ptrdiff_t arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator___iadd__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___iadd__" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SwigPyIterator___iadd__" "', argument " "2"" of type '" "ptrdiff_t""'");
-  } 
-  arg2 = static_cast< ptrdiff_t >(val2);
-  try {
-    result = (swig::SwigPyIterator *) &(arg1)->operator +=(arg2);
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___isub__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  ptrdiff_t arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator___isub__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___isub__" "', argument " "1"" of type '" "swig::SwigPyIterator *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SwigPyIterator___isub__" "', argument " "2"" of type '" "ptrdiff_t""'");
-  } 
-  arg2 = static_cast< ptrdiff_t >(val2);
-  try {
-    result = (swig::SwigPyIterator *) &(arg1)->operator -=(arg2);
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___add__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  ptrdiff_t arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator___add__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___add__" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SwigPyIterator___add__" "', argument " "2"" of type '" "ptrdiff_t""'");
-  } 
-  arg2 = static_cast< ptrdiff_t >(val2);
-  try {
-    result = (swig::SwigPyIterator *)((swig::SwigPyIterator const *)arg1)->operator +(arg2);
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___sub____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  ptrdiff_t arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator___sub__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___sub__" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SwigPyIterator___sub__" "', argument " "2"" of type '" "ptrdiff_t""'");
-  } 
-  arg2 = static_cast< ptrdiff_t >(val2);
-  try {
-    result = (swig::SwigPyIterator *)((swig::SwigPyIterator const *)arg1)->operator -(arg2);
-  }
-  catch(swig::stop_iteration &_e) {
-    {
-      (void)_e;
-      SWIG_SetErrorObj(PyExc_StopIteration, SWIG_Py_Void());
-      SWIG_fail;
-    }
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___sub____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  swig::SwigPyIterator *arg1 = (swig::SwigPyIterator *) 0 ;
-  swig::SwigPyIterator *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  ptrdiff_t result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:SwigPyIterator___sub__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_swig__SwigPyIterator, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SwigPyIterator___sub__" "', argument " "1"" of type '" "swig::SwigPyIterator const *""'"); 
-  }
-  arg1 = reinterpret_cast< swig::SwigPyIterator * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_swig__SwigPyIterator,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "SwigPyIterator___sub__" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "SwigPyIterator___sub__" "', argument " "2"" of type '" "swig::SwigPyIterator const &""'"); 
-  }
-  arg2 = reinterpret_cast< swig::SwigPyIterator * >(argp2);
-  result = ((swig::SwigPyIterator const *)arg1)->operator -((swig::SwigPyIterator const &)*arg2);
-  resultobj = SWIG_From_ptrdiff_t(static_cast< ptrdiff_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SwigPyIterator___sub__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_swig__SwigPyIterator, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_swig__SwigPyIterator, 0);
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_SwigPyIterator___sub____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_swig__SwigPyIterator, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_SwigPyIterator___sub____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  Py_INCREF(Py_NotImplemented);
-  return Py_NotImplemented;
-}
-
-
-SWIGINTERN PyObject *SwigPyIterator_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_swig__SwigPyIterator, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_new_PathTrie(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_PathTrie")) SWIG_fail;
-  result = (PathTrie *)new PathTrie();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_PathTrie(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_PathTrie",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_PathTrie" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_get_path_trie__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  int arg2 ;
-  bool arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int val2 ;
-  int ecode2 = 0 ;
-  bool val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PathTrie *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PathTrie_get_path_trie",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_get_path_trie" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_int(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_get_path_trie" "', argument " "2"" of type '" "int""'");
-  } 
-  arg2 = static_cast< int >(val2);
-  ecode3 = SWIG_AsVal_bool(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PathTrie_get_path_trie" "', argument " "3"" of type '" "bool""'");
-  } 
-  arg3 = static_cast< bool >(val3);
-  result = (PathTrie *)(arg1)->get_path_trie(arg2,arg3);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_get_path_trie__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  int arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PathTrie *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_get_path_trie",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_get_path_trie" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_int(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_get_path_trie" "', argument " "2"" of type '" "int""'");
-  } 
-  arg2 = static_cast< int >(val2);
-  result = (PathTrie *)(arg1)->get_path_trie(arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_get_path_trie(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_PathTrie, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_int(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PathTrie_get_path_trie__SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_PathTrie, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_int(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_bool(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_PathTrie_get_path_trie__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PathTrie_get_path_trie'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    PathTrie::get_path_trie(int,bool)\n"
-    "    PathTrie::get_path_trie(int)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_get_path_vec__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  std::vector< int,std::allocator< int > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PathTrie *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_get_path_vec",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_get_path_vec" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_get_path_vec" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PathTrie_get_path_vec" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< int,std::allocator< int > > * >(argp2);
-  result = (PathTrie *)(arg1)->get_path_vec(*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_get_path_vec__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  std::vector< int,std::allocator< int > > *arg2 = 0 ;
-  int arg3 ;
-  size_t arg4 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  int val3 ;
-  int ecode3 = 0 ;
-  size_t val4 ;
-  int ecode4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PathTrie *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:PathTrie_get_path_vec",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_get_path_vec" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_get_path_vec" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PathTrie_get_path_vec" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< int,std::allocator< int > > * >(argp2);
-  ecode3 = SWIG_AsVal_int(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PathTrie_get_path_vec" "', argument " "3"" of type '" "int""'");
-  } 
-  arg3 = static_cast< int >(val3);
-  ecode4 = SWIG_AsVal_size_t(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "PathTrie_get_path_vec" "', argument " "4"" of type '" "size_t""'");
-  } 
-  arg4 = static_cast< size_t >(val4);
-  result = (PathTrie *)(arg1)->get_path_vec(*arg2,arg3,arg4);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_get_path_vec__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  std::vector< int,std::allocator< int > > *arg2 = 0 ;
-  int arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  int val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PathTrie *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PathTrie_get_path_vec",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_get_path_vec" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_get_path_vec" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PathTrie_get_path_vec" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< int,std::allocator< int > > * >(argp2);
-  ecode3 = SWIG_AsVal_int(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PathTrie_get_path_vec" "', argument " "3"" of type '" "int""'");
-  } 
-  arg3 = static_cast< int >(val3);
-  result = (PathTrie *)(arg1)->get_path_vec(*arg2,arg3);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_get_path_vec(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_PathTrie, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      void *vptr = 0;
-      int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0);
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_PathTrie_get_path_vec__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_PathTrie, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      void *vptr = 0;
-      int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0);
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        {
-          int res = SWIG_AsVal_int(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_PathTrie_get_path_vec__SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    void *vptr = 0;
-    int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_PathTrie, 0);
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      void *vptr = 0;
-      int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0);
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        {
-          int res = SWIG_AsVal_int(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_size_t(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            return _wrap_PathTrie_get_path_vec__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PathTrie_get_path_vec'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    PathTrie::get_path_vec(std::vector< int,std::allocator< int > > &)\n"
-    "    PathTrie::get_path_vec(std::vector< int,std::allocator< int > > &,int,size_t)\n"
-    "    PathTrie::get_path_vec(std::vector< int,std::allocator< int > > &,int)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_iterate_to_vec(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_iterate_to_vec",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_iterate_to_vec" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_iterate_to_vec" "', argument " "2"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PathTrie_iterate_to_vec" "', argument " "2"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< PathTrie *,std::allocator< PathTrie * > > * >(argp2);
-  (arg1)->iterate_to_vec(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_iterate_to_vec_only(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_iterate_to_vec_only",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_iterate_to_vec_only" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_iterate_to_vec_only" "', argument " "2"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PathTrie_iterate_to_vec_only" "', argument " "2"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< PathTrie *,std::allocator< PathTrie * > > * >(argp2);
-  (arg1)->iterate_to_vec_only(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_set_dictionary(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  fst::StdVectorFst *arg2 = (fst::StdVectorFst *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_set_dictionary",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_set_dictionary" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_fst__StdVectorFst, 0 |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_set_dictionary" "', argument " "2"" of type '" "fst::StdVectorFst *""'"); 
-  }
-  arg2 = reinterpret_cast< fst::StdVectorFst * >(argp2);
-  (arg1)->set_dictionary(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_set_matcher(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  std::shared_ptr< fst::SortedMatcher< fst::StdVectorFst > > arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_set_matcher",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_set_matcher" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  {
-    res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t,  0  | 0);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_set_matcher" "', argument " "2"" of type '" "std::shared_ptr< fst::SortedMatcher< fst::StdVectorFst > >""'"); 
-    }  
-    if (!argp2) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PathTrie_set_matcher" "', argument " "2"" of type '" "std::shared_ptr< fst::SortedMatcher< fst::StdVectorFst > >""'");
-    } else {
-      std::shared_ptr< fst::SortedMatcher< fst::StdVectorFst > > * temp = reinterpret_cast< std::shared_ptr< fst::SortedMatcher< fst::StdVectorFst > > * >(argp2);
-      arg2 = *temp;
-      if (SWIG_IsNewObj(res2)) delete temp;
-    }
-  }
-  (arg1)->set_matcher(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_is_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_is_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_is_empty" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (bool)(arg1)->is_empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_remove(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_remove",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_remove" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  (arg1)->remove();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_b_prev_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  float arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_log_prob_b_prev_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_b_prev_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_log_prob_b_prev_set" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  if (arg1) (arg1)->log_prob_b_prev = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_b_prev_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  float result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_log_prob_b_prev_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_b_prev_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (float) ((arg1)->log_prob_b_prev);
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_nb_prev_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  float arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_log_prob_nb_prev_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_nb_prev_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_log_prob_nb_prev_set" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  if (arg1) (arg1)->log_prob_nb_prev = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_nb_prev_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  float result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_log_prob_nb_prev_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_nb_prev_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (float) ((arg1)->log_prob_nb_prev);
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_b_cur_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  float arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_log_prob_b_cur_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_b_cur_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_log_prob_b_cur_set" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  if (arg1) (arg1)->log_prob_b_cur = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_b_cur_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  float result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_log_prob_b_cur_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_b_cur_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (float) ((arg1)->log_prob_b_cur);
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_nb_cur_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  float arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_log_prob_nb_cur_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_nb_cur_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_log_prob_nb_cur_set" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  if (arg1) (arg1)->log_prob_nb_cur = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_log_prob_nb_cur_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  float result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_log_prob_nb_cur_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_log_prob_nb_cur_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (float) ((arg1)->log_prob_nb_cur);
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_score_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  float arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_score_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_score_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_score_set" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  if (arg1) (arg1)->score = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_score_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  float result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_score_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_score_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (float) ((arg1)->score);
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_approx_ctc_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  float arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_approx_ctc_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_approx_ctc_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_approx_ctc_set" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  if (arg1) (arg1)->approx_ctc = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_approx_ctc_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  float result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_approx_ctc_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_approx_ctc_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (float) ((arg1)->approx_ctc);
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_character_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  int arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_character_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_character_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  ecode2 = SWIG_AsVal_int(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PathTrie_character_set" "', argument " "2"" of type '" "int""'");
-  } 
-  arg2 = static_cast< int >(val2);
-  if (arg1) (arg1)->character = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_character_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  int result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_character_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_character_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (int) ((arg1)->character);
-  resultobj = SWIG_From_int(static_cast< int >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_parent_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  PathTrie *arg2 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PathTrie_parent_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_parent_set" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_PathTrie, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PathTrie_parent_set" "', argument " "2"" of type '" "PathTrie *""'"); 
-  }
-  arg2 = reinterpret_cast< PathTrie * >(argp2);
-  if (arg1) (arg1)->parent = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PathTrie_parent_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  PathTrie *arg1 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PathTrie *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PathTrie_parent_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PathTrie_parent_get" "', argument " "1"" of type '" "PathTrie *""'"); 
-  }
-  arg1 = reinterpret_cast< PathTrie * >(argp1);
-  result = (PathTrie *) ((arg1)->parent);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *PathTrie_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_PathTrie, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_DoubleVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_iterator" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_double_Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___nonzero__" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (bool)std_vector_Sl_double_Sg____nonzero__((std::vector< double > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___bool__" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (bool)std_vector_Sl_double_Sg____bool__((std::vector< double > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___len__" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = std_vector_Sl_double_Sg____len__((std::vector< double > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::difference_type arg2 ;
-  std::vector< double >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< double,std::allocator< double > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___getslice__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector___getslice__" "', argument " "2"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector___getslice__" "', argument " "3"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< double >::difference_type >(val3);
-  try {
-    result = (std::vector< double,std::allocator< double > > *)std_vector_Sl_double_Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::difference_type arg2 ;
-  std::vector< double >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___setslice__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector___setslice__" "', argument " "2"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector___setslice__" "', argument " "3"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< double >::difference_type >(val3);
-  try {
-    std_vector_Sl_double_Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::difference_type arg2 ;
-  std::vector< double >::difference_type arg3 ;
-  std::vector< double,std::allocator< double > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:DoubleVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___setslice__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector___setslice__" "', argument " "2"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector___setslice__" "', argument " "3"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< double >::difference_type >(val3);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "DoubleVector___setslice__" "', argument " "4"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector___setslice__" "', argument " "4"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_double_Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< double,std::allocator< double > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_DoubleVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< double,std::allocator< double > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_DoubleVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::__setslice__(std::vector< double >::difference_type,std::vector< double >::difference_type)\n"
-    "    std::vector< double >::__setslice__(std::vector< double >::difference_type,std::vector< double >::difference_type,std::vector< double,std::allocator< double > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::difference_type arg2 ;
-  std::vector< double >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___delslice__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector___delslice__" "', argument " "2"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector___delslice__" "', argument " "3"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< double >::difference_type >(val3);
-  try {
-    std_vector_Sl_double_Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___delitem__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector___delitem__" "', argument " "2"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::difference_type >(val2);
-  try {
-    std_vector_Sl_double_Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< double,std::allocator< double > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___getitem__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< double,std::allocator< double > > *)std_vector_Sl_double_Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< double,std::allocator< double > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___setitem__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "DoubleVector___setitem__" "', argument " "3"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector___setitem__" "', argument " "3"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_double_Sg____setitem____SWIG_0(arg1,arg2,(std::vector< double,std::allocator< double > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___setitem__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_double_Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___delitem__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_double_Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_DoubleVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_DoubleVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::__delitem__(std::vector< double >::difference_type)\n"
-    "    std::vector< double >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< double >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___getitem__" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector___getitem__" "', argument " "2"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::difference_type >(val2);
-  try {
-    result = (std::vector< double >::value_type *) &std_vector_Sl_double_Sg____getitem____SWIG_1((std::vector< double > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_double(static_cast< double >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_DoubleVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_DoubleVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::__getitem__(PySliceObject *)\n"
-    "    std::vector< double >::__getitem__(std::vector< double >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::difference_type arg2 ;
-  std::vector< double >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< double >::value_type temp3 ;
-  double val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector___setitem__" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector___setitem__" "', argument " "2"" of type '" "std::vector< double >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_double(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector___setitem__" "', argument " "3"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< double >::value_type >(val3);
-  arg3 = &temp3;
-  try {
-    std_vector_Sl_double_Sg____setitem____SWIG_2(arg1,arg2,(double const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_DoubleVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< double,std::allocator< double > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_DoubleVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_double(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_DoubleVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::__setitem__(PySliceObject *,std::vector< double,std::allocator< double > > const &)\n"
-    "    std::vector< double >::__setitem__(PySliceObject *)\n"
-    "    std::vector< double >::__setitem__(std::vector< double >::difference_type,std::vector< double >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_pop" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  try {
-    result = (std::vector< double >::value_type)std_vector_Sl_double_Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_double(static_cast< double >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  std::vector< double >::value_type temp2 ;
-  double val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_append" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_double(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector_append" "', argument " "2"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< double >::value_type >(val2);
-  arg2 = &temp2;
-  std_vector_Sl_double_Sg__append(arg1,(double const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_DoubleVector")) SWIG_fail;
-  result = (std::vector< double > *)new std::vector< double >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< double > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_DoubleVector",&obj0)) SWIG_fail;
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_DoubleVector" "', argument " "1"" of type '" "std::vector< double > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_DoubleVector" "', argument " "1"" of type '" "std::vector< double > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< double > *)new std::vector< double >((std::vector< double > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_empty" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (bool)((std::vector< double > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_size" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = ((std::vector< double > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_swap" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "DoubleVector_swap" "', argument " "2"" of type '" "std::vector< double > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector_swap" "', argument " "2"" of type '" "std::vector< double > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< double > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_begin" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< double >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_end" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< double >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_rbegin" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< double >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_rend" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< double >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_clear" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< double > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_get_allocator" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = ((std::vector< double > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< double >::allocator_type(static_cast< const std::vector< double >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_double_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_DoubleVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_DoubleVector" "', argument " "1"" of type '" "std::vector< double >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< double >::size_type >(val1);
-  result = (std::vector< double > *)new std::vector< double >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_pop_back" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_resize" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector_resize" "', argument " "2"" of type '" "std::vector< double >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< double >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_erase" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_erase" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< double >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_erase" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_double_Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< double >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::iterator arg2 ;
-  std::vector< double >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< double >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_erase" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_erase" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< double >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_erase" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_erase" "', argument " "3"" of type '" "std::vector< double >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< double >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_erase" "', argument " "3"" of type '" "std::vector< double >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_double_Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< double >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_DoubleVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_DoubleVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::erase(std::vector< double >::iterator)\n"
-    "    std::vector< double >::erase(std::vector< double >::iterator,std::vector< double >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double >::size_type arg1 ;
-  std::vector< double >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  std::vector< double >::value_type temp2 ;
-  double val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< double > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_DoubleVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_DoubleVector" "', argument " "1"" of type '" "std::vector< double >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< double >::size_type >(val1);
-  ecode2 = SWIG_AsVal_double(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "new_DoubleVector" "', argument " "2"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< double >::value_type >(val2);
-  arg2 = &temp2;
-  result = (std::vector< double > *)new std::vector< double >(arg1,(std::vector< double >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_DoubleVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_DoubleVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_DoubleVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      {
-        int res = SWIG_AsVal_double(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_new_DoubleVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_DoubleVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::vector()\n"
-    "    std::vector< double >::vector(std::vector< double > const &)\n"
-    "    std::vector< double >::vector(std::vector< double >::size_type)\n"
-    "    std::vector< double >::vector(std::vector< double >::size_type,std::vector< double >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  std::vector< double >::value_type temp2 ;
-  double val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_push_back" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_double(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector_push_back" "', argument " "2"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< double >::value_type >(val2);
-  arg2 = &temp2;
-  (arg1)->push_back((std::vector< double >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_front" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (std::vector< double >::value_type *) &((std::vector< double > const *)arg1)->front();
-  resultobj = SWIG_From_double(static_cast< double >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_back" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = (std::vector< double >::value_type *) &((std::vector< double > const *)arg1)->back();
-  resultobj = SWIG_From_double(static_cast< double >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::size_type arg2 ;
-  std::vector< double >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< double >::value_type temp3 ;
-  double val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_assign" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector_assign" "', argument " "2"" of type '" "std::vector< double >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::size_type >(val2);
-  ecode3 = SWIG_AsVal_double(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector_assign" "', argument " "3"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< double >::value_type >(val3);
-  arg3 = &temp3;
-  (arg1)->assign(arg2,(std::vector< double >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::size_type arg2 ;
-  std::vector< double >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< double >::value_type temp3 ;
-  double val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_resize" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector_resize" "', argument " "2"" of type '" "std::vector< double >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::size_type >(val2);
-  ecode3 = SWIG_AsVal_double(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector_resize" "', argument " "3"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< double >::value_type >(val3);
-  arg3 = &temp3;
-  (arg1)->resize(arg2,(std::vector< double >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_DoubleVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_double(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_DoubleVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::resize(std::vector< double >::size_type)\n"
-    "    std::vector< double >::resize(std::vector< double >::size_type,std::vector< double >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::iterator arg2 ;
-  std::vector< double >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  std::vector< double >::value_type temp3 ;
-  double val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< double >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_insert" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_insert" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< double >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_insert" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_double(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector_insert" "', argument " "3"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< double >::value_type >(val3);
-  arg3 = &temp3;
-  result = std_vector_Sl_double_Sg__insert__SWIG_0(arg1,arg2,(double const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< double >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::iterator arg2 ;
-  std::vector< double >::size_type arg3 ;
-  std::vector< double >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  std::vector< double >::value_type temp4 ;
-  double val4 ;
-  int ecode4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:DoubleVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_insert" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_insert" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< double >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector_insert" "', argument " "2"" of type '" "std::vector< double >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector_insert" "', argument " "3"" of type '" "std::vector< double >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< double >::size_type >(val3);
-  ecode4 = SWIG_AsVal_double(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "DoubleVector_insert" "', argument " "4"" of type '" "std::vector< double >::value_type""'");
-  } 
-  temp4 = static_cast< std::vector< double >::value_type >(val4);
-  arg4 = &temp4;
-  std_vector_Sl_double_Sg__insert__SWIG_1(arg1,arg2,arg3,(double const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_double(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_DoubleVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< double,std::allocator< double > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< double >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_double(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            return _wrap_DoubleVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< double >::insert(std::vector< double >::iterator,std::vector< double >::value_type const &)\n"
-    "    std::vector< double >::insert(std::vector< double >::iterator,std::vector< double >::size_type,std::vector< double >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  std::vector< double >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_reserve" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector_reserve" "', argument " "2"" of type '" "std::vector< double >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< double >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< double >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector_capacity" "', argument " "1"" of type '" "std::vector< double > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  result = ((std::vector< double > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_DoubleVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< double > *arg1 = (std::vector< double > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_DoubleVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_DoubleVector" "', argument " "1"" of type '" "std::vector< double > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< double > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *DoubleVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_double_std__allocatorT_double_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_IntVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_iterator" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_int_Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___nonzero__" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (bool)std_vector_Sl_int_Sg____nonzero__((std::vector< int > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___bool__" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (bool)std_vector_Sl_int_Sg____bool__((std::vector< int > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___len__" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = std_vector_Sl_int_Sg____len__((std::vector< int > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::difference_type arg2 ;
-  std::vector< int >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< int,std::allocator< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___getslice__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector___getslice__" "', argument " "2"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector___getslice__" "', argument " "3"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< int >::difference_type >(val3);
-  try {
-    result = (std::vector< int,std::allocator< int > > *)std_vector_Sl_int_Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::difference_type arg2 ;
-  std::vector< int >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___setslice__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector___setslice__" "', argument " "2"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector___setslice__" "', argument " "3"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< int >::difference_type >(val3);
-  try {
-    std_vector_Sl_int_Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::difference_type arg2 ;
-  std::vector< int >::difference_type arg3 ;
-  std::vector< int,std::allocator< int > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:IntVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___setslice__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector___setslice__" "', argument " "2"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector___setslice__" "', argument " "3"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< int >::difference_type >(val3);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "IntVector___setslice__" "', argument " "4"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector___setslice__" "', argument " "4"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_int_Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< int,std::allocator< int > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_IntVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< int,std::allocator< int > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_IntVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::__setslice__(std::vector< int >::difference_type,std::vector< int >::difference_type)\n"
-    "    std::vector< int >::__setslice__(std::vector< int >::difference_type,std::vector< int >::difference_type,std::vector< int,std::allocator< int > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::difference_type arg2 ;
-  std::vector< int >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___delslice__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector___delslice__" "', argument " "2"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector___delslice__" "', argument " "3"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< int >::difference_type >(val3);
-  try {
-    std_vector_Sl_int_Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___delitem__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector___delitem__" "', argument " "2"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::difference_type >(val2);
-  try {
-    std_vector_Sl_int_Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< int,std::allocator< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___getitem__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< int,std::allocator< int > > *)std_vector_Sl_int_Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< int,std::allocator< int > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___setitem__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "IntVector___setitem__" "', argument " "3"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector___setitem__" "', argument " "3"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_int_Sg____setitem____SWIG_0(arg1,arg2,(std::vector< int,std::allocator< int > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___setitem__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_int_Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___delitem__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_int_Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_IntVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_IntVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::__delitem__(std::vector< int >::difference_type)\n"
-    "    std::vector< int >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< int >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___getitem__" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector___getitem__" "', argument " "2"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::difference_type >(val2);
-  try {
-    result = (std::vector< int >::value_type *) &std_vector_Sl_int_Sg____getitem____SWIG_1((std::vector< int > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_int(static_cast< int >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_IntVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_IntVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::__getitem__(PySliceObject *)\n"
-    "    std::vector< int >::__getitem__(std::vector< int >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::difference_type arg2 ;
-  std::vector< int >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< int >::value_type temp3 ;
-  int val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector___setitem__" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector___setitem__" "', argument " "2"" of type '" "std::vector< int >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_int(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector___setitem__" "', argument " "3"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< int >::value_type >(val3);
-  arg3 = &temp3;
-  try {
-    std_vector_Sl_int_Sg____setitem____SWIG_2(arg1,arg2,(int const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_IntVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< int,std::allocator< int > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_IntVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_int(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_IntVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::__setitem__(PySliceObject *,std::vector< int,std::allocator< int > > const &)\n"
-    "    std::vector< int >::__setitem__(PySliceObject *)\n"
-    "    std::vector< int >::__setitem__(std::vector< int >::difference_type,std::vector< int >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_pop" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  try {
-    result = (std::vector< int >::value_type)std_vector_Sl_int_Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_int(static_cast< int >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  std::vector< int >::value_type temp2 ;
-  int val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_append" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_int(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector_append" "', argument " "2"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< int >::value_type >(val2);
-  arg2 = &temp2;
-  std_vector_Sl_int_Sg__append(arg1,(int const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_IntVector")) SWIG_fail;
-  result = (std::vector< int > *)new std::vector< int >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< int > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_IntVector",&obj0)) SWIG_fail;
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_IntVector" "', argument " "1"" of type '" "std::vector< int > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_IntVector" "', argument " "1"" of type '" "std::vector< int > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< int > *)new std::vector< int >((std::vector< int > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_empty" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (bool)((std::vector< int > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_size" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = ((std::vector< int > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_swap" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "IntVector_swap" "', argument " "2"" of type '" "std::vector< int > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector_swap" "', argument " "2"" of type '" "std::vector< int > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< int > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_begin" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< int >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_end" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< int >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_rbegin" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< int >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_rend" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< int >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_clear" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< int > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_get_allocator" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = ((std::vector< int > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< int >::allocator_type(static_cast< const std::vector< int >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_int_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_IntVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_IntVector" "', argument " "1"" of type '" "std::vector< int >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< int >::size_type >(val1);
-  result = (std::vector< int > *)new std::vector< int >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_pop_back" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_resize" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector_resize" "', argument " "2"" of type '" "std::vector< int >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< int >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_erase" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_erase" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< int >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_erase" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_int_Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< int >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::iterator arg2 ;
-  std::vector< int >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< int >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_erase" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_erase" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< int >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_erase" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_erase" "', argument " "3"" of type '" "std::vector< int >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< int >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_erase" "', argument " "3"" of type '" "std::vector< int >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_int_Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< int >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_IntVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_IntVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::erase(std::vector< int >::iterator)\n"
-    "    std::vector< int >::erase(std::vector< int >::iterator,std::vector< int >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int >::size_type arg1 ;
-  std::vector< int >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  std::vector< int >::value_type temp2 ;
-  int val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< int > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_IntVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_IntVector" "', argument " "1"" of type '" "std::vector< int >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< int >::size_type >(val1);
-  ecode2 = SWIG_AsVal_int(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "new_IntVector" "', argument " "2"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< int >::value_type >(val2);
-  arg2 = &temp2;
-  result = (std::vector< int > *)new std::vector< int >(arg1,(std::vector< int >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_IntVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_IntVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_IntVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      {
-        int res = SWIG_AsVal_int(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_new_IntVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_IntVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::vector()\n"
-    "    std::vector< int >::vector(std::vector< int > const &)\n"
-    "    std::vector< int >::vector(std::vector< int >::size_type)\n"
-    "    std::vector< int >::vector(std::vector< int >::size_type,std::vector< int >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  std::vector< int >::value_type temp2 ;
-  int val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_push_back" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_int(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector_push_back" "', argument " "2"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< int >::value_type >(val2);
-  arg2 = &temp2;
-  (arg1)->push_back((std::vector< int >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_front" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (std::vector< int >::value_type *) &((std::vector< int > const *)arg1)->front();
-  resultobj = SWIG_From_int(static_cast< int >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_back" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = (std::vector< int >::value_type *) &((std::vector< int > const *)arg1)->back();
-  resultobj = SWIG_From_int(static_cast< int >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::size_type arg2 ;
-  std::vector< int >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< int >::value_type temp3 ;
-  int val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_assign" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector_assign" "', argument " "2"" of type '" "std::vector< int >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::size_type >(val2);
-  ecode3 = SWIG_AsVal_int(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector_assign" "', argument " "3"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< int >::value_type >(val3);
-  arg3 = &temp3;
-  (arg1)->assign(arg2,(std::vector< int >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::size_type arg2 ;
-  std::vector< int >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< int >::value_type temp3 ;
-  int val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_resize" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector_resize" "', argument " "2"" of type '" "std::vector< int >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::size_type >(val2);
-  ecode3 = SWIG_AsVal_int(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector_resize" "', argument " "3"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< int >::value_type >(val3);
-  arg3 = &temp3;
-  (arg1)->resize(arg2,(std::vector< int >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_IntVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_int(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_IntVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::resize(std::vector< int >::size_type)\n"
-    "    std::vector< int >::resize(std::vector< int >::size_type,std::vector< int >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::iterator arg2 ;
-  std::vector< int >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  std::vector< int >::value_type temp3 ;
-  int val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< int >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_insert" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_insert" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< int >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_insert" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_int(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector_insert" "', argument " "3"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< int >::value_type >(val3);
-  arg3 = &temp3;
-  result = std_vector_Sl_int_Sg__insert__SWIG_0(arg1,arg2,(int const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< int >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::iterator arg2 ;
-  std::vector< int >::size_type arg3 ;
-  std::vector< int >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  std::vector< int >::value_type temp4 ;
-  int val4 ;
-  int ecode4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:IntVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_insert" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_insert" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< int >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector_insert" "', argument " "2"" of type '" "std::vector< int >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector_insert" "', argument " "3"" of type '" "std::vector< int >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< int >::size_type >(val3);
-  ecode4 = SWIG_AsVal_int(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "IntVector_insert" "', argument " "4"" of type '" "std::vector< int >::value_type""'");
-  } 
-  temp4 = static_cast< std::vector< int >::value_type >(val4);
-  arg4 = &temp4;
-  std_vector_Sl_int_Sg__insert__SWIG_1(arg1,arg2,arg3,(int const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_int(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_IntVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< int >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_int(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            return _wrap_IntVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< int >::insert(std::vector< int >::iterator,std::vector< int >::value_type const &)\n"
-    "    std::vector< int >::insert(std::vector< int >::iterator,std::vector< int >::size_type,std::vector< int >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  std::vector< int >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_reserve" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector_reserve" "', argument " "2"" of type '" "std::vector< int >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< int >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector_capacity" "', argument " "1"" of type '" "std::vector< int > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  result = ((std::vector< int > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_IntVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int > *arg1 = (std::vector< int > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_IntVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_IntVector" "', argument " "1"" of type '" "std::vector< int > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< int > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *IntVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_StringVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_iterator" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_string_Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___nonzero__" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (bool)std_vector_Sl_std_string_Sg____nonzero__((std::vector< std::string > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___bool__" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (bool)std_vector_Sl_std_string_Sg____bool__((std::vector< std::string > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___len__" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = std_vector_Sl_std_string_Sg____len__((std::vector< std::string > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::difference_type arg2 ;
-  std::vector< std::string >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___getslice__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector___getslice__" "', argument " "2"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "StringVector___getslice__" "', argument " "3"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::string >::difference_type >(val3);
-  try {
-    result = (std::vector< std::string,std::allocator< std::string > > *)std_vector_Sl_std_string_Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::difference_type arg2 ;
-  std::vector< std::string >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___setslice__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector___setslice__" "', argument " "2"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "StringVector___setslice__" "', argument " "3"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::string >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_string_Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::difference_type arg2 ;
-  std::vector< std::string >::difference_type arg3 ;
-  std::vector< std::string,std::allocator< std::string > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:StringVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___setslice__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector___setslice__" "', argument " "2"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "StringVector___setslice__" "', argument " "3"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::string >::difference_type >(val3);
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "StringVector___setslice__" "', argument " "4"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector___setslice__" "', argument " "4"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_string_Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::string,std::allocator< std::string > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_StringVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::string,std::allocator< std::string > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_StringVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'StringVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::__setslice__(std::vector< std::string >::difference_type,std::vector< std::string >::difference_type)\n"
-    "    std::vector< std::string >::__setslice__(std::vector< std::string >::difference_type,std::vector< std::string >::difference_type,std::vector< std::string,std::allocator< std::string > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::difference_type arg2 ;
-  std::vector< std::string >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___delslice__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector___delslice__" "', argument " "2"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "StringVector___delslice__" "', argument " "3"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::string >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_string_Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___delitem__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector___delitem__" "', argument " "2"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_string_Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___getitem__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::string,std::allocator< std::string > > *)std_vector_Sl_std_string_Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___setitem__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "StringVector___setitem__" "', argument " "3"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector___setitem__" "', argument " "3"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_string_Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::string,std::allocator< std::string > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___setitem__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_string_Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___delitem__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_string_Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_StringVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_StringVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'StringVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::__delitem__(std::vector< std::string >::difference_type)\n"
-    "    std::vector< std::string >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::string >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___getitem__" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector___getitem__" "', argument " "2"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::difference_type >(val2);
-  try {
-    result = (std::vector< std::string >::value_type *) &std_vector_Sl_std_string_Sg____getitem____SWIG_1((std::vector< std::string > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_std_string(static_cast< std::string >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_StringVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_StringVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'StringVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::string >::__getitem__(std::vector< std::string >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::difference_type arg2 ;
-  std::vector< std::string >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector___setitem__" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector___setitem__" "', argument " "2"" of type '" "std::vector< std::string >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::difference_type >(val2);
-  {
-    std::string *ptr = (std::string *)0;
-    res3 = SWIG_AsPtr_std_string(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "StringVector___setitem__" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector___setitem__" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_string_Sg____setitem____SWIG_2(arg1,arg2,(std::string const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_StringVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::string,std::allocator< std::string > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_StringVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = SWIG_AsPtr_std_string(argv[2], (std::string**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_StringVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'StringVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::__setitem__(PySliceObject *,std::vector< std::string,std::allocator< std::string > > const &)\n"
-    "    std::vector< std::string >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::string >::__setitem__(std::vector< std::string >::difference_type,std::vector< std::string >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_pop" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  try {
-    result = std_vector_Sl_std_string_Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_std_string(static_cast< std::string >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_append" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  {
-    std::string *ptr = (std::string *)0;
-    res2 = SWIG_AsPtr_std_string(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "StringVector_append" "', argument " "2"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector_append" "', argument " "2"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  std_vector_Sl_std_string_Sg__append(arg1,(std::string const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_StringVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_StringVector")) SWIG_fail;
-  result = (std::vector< std::string > *)new std::vector< std::string >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_StringVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_StringVector",&obj0)) SWIG_fail;
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_StringVector" "', argument " "1"" of type '" "std::vector< std::string > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_StringVector" "', argument " "1"" of type '" "std::vector< std::string > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::string > *)new std::vector< std::string >((std::vector< std::string > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_empty" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (bool)((std::vector< std::string > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_size" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = ((std::vector< std::string > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_swap" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "StringVector_swap" "', argument " "2"" of type '" "std::vector< std::string > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector_swap" "', argument " "2"" of type '" "std::vector< std::string > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::string > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_begin" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::string >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_end" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::string >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_rbegin" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::string >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_rend" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::string >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_clear" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::string > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_get_allocator" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = ((std::vector< std::string > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::string >::allocator_type(static_cast< const std::vector< std::string >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__string_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_StringVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_StringVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_StringVector" "', argument " "1"" of type '" "std::vector< std::string >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::string >::size_type >(val1);
-  result = (std::vector< std::string > *)new std::vector< std::string >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_pop_back" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_resize" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector_resize" "', argument " "2"" of type '" "std::vector< std::string >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::string >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_erase" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_erase" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::string >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_erase" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_string_Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::string >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::iterator arg2 ;
-  std::vector< std::string >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::string >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_erase" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_erase" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::string >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_erase" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_erase" "', argument " "3"" of type '" "std::vector< std::string >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::string >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_erase" "', argument " "3"" of type '" "std::vector< std::string >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_string_Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::string >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_StringVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_StringVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'StringVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::erase(std::vector< std::string >::iterator)\n"
-    "    std::vector< std::string >::erase(std::vector< std::string >::iterator,std::vector< std::string >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_StringVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string >::size_type arg1 ;
-  std::vector< std::string >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::string > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_StringVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_StringVector" "', argument " "1"" of type '" "std::vector< std::string >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::string >::size_type >(val1);
-  {
-    std::string *ptr = (std::string *)0;
-    res2 = SWIG_AsPtr_std_string(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_StringVector" "', argument " "2"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_StringVector" "', argument " "2"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (std::vector< std::string > *)new std::vector< std::string >(arg1,(std::vector< std::string >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_StringVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_StringVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_StringVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_StringVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = SWIG_AsPtr_std_string(argv[1], (std::string**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_StringVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_StringVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::vector()\n"
-    "    std::vector< std::string >::vector(std::vector< std::string > const &)\n"
-    "    std::vector< std::string >::vector(std::vector< std::string >::size_type)\n"
-    "    std::vector< std::string >::vector(std::vector< std::string >::size_type,std::vector< std::string >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_push_back" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  {
-    std::string *ptr = (std::string *)0;
-    res2 = SWIG_AsPtr_std_string(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "StringVector_push_back" "', argument " "2"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector_push_back" "', argument " "2"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  (arg1)->push_back((std::vector< std::string >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_front" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (std::vector< std::string >::value_type *) &((std::vector< std::string > const *)arg1)->front();
-  resultobj = SWIG_From_std_string(static_cast< std::string >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_back" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = (std::vector< std::string >::value_type *) &((std::vector< std::string > const *)arg1)->back();
-  resultobj = SWIG_From_std_string(static_cast< std::string >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::size_type arg2 ;
-  std::vector< std::string >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_assign" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector_assign" "', argument " "2"" of type '" "std::vector< std::string >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::size_type >(val2);
-  {
-    std::string *ptr = (std::string *)0;
-    res3 = SWIG_AsPtr_std_string(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "StringVector_assign" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector_assign" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->assign(arg2,(std::vector< std::string >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::size_type arg2 ;
-  std::vector< std::string >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_resize" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector_resize" "', argument " "2"" of type '" "std::vector< std::string >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::size_type >(val2);
-  {
-    std::string *ptr = (std::string *)0;
-    res3 = SWIG_AsPtr_std_string(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "StringVector_resize" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector_resize" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->resize(arg2,(std::vector< std::string >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_StringVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = SWIG_AsPtr_std_string(argv[2], (std::string**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_StringVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'StringVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::resize(std::vector< std::string >::size_type)\n"
-    "    std::vector< std::string >::resize(std::vector< std::string >::size_type,std::vector< std::string >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::iterator arg2 ;
-  std::vector< std::string >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::string >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:StringVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_insert" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_insert" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::string >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_insert" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-    }
-  }
-  {
-    std::string *ptr = (std::string *)0;
-    res3 = SWIG_AsPtr_std_string(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "StringVector_insert" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector_insert" "', argument " "3"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  result = std_vector_Sl_std_string_Sg__insert__SWIG_0(arg1,arg2,(std::string const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::string >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::iterator arg2 ;
-  std::vector< std::string >::size_type arg3 ;
-  std::vector< std::string >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:StringVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_insert" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_insert" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::string >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "StringVector_insert" "', argument " "2"" of type '" "std::vector< std::string >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "StringVector_insert" "', argument " "3"" of type '" "std::vector< std::string >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::string >::size_type >(val3);
-  {
-    std::string *ptr = (std::string *)0;
-    res4 = SWIG_AsPtr_std_string(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "StringVector_insert" "', argument " "4"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringVector_insert" "', argument " "4"" of type '" "std::vector< std::string >::value_type const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  std_vector_Sl_std_string_Sg__insert__SWIG_1(arg1,arg2,arg3,(std::string const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = SWIG_AsPtr_std_string(argv[2], (std::string**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_StringVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::string,std::allocator< std::string > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::string >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = SWIG_AsPtr_std_string(argv[3], (std::string**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_StringVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'StringVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::string >::insert(std::vector< std::string >::iterator,std::vector< std::string >::value_type const &)\n"
-    "    std::vector< std::string >::insert(std::vector< std::string >::iterator,std::vector< std::string >::size_type,std::vector< std::string >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  std::vector< std::string >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_reserve" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "StringVector_reserve" "', argument " "2"" of type '" "std::vector< std::string >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::string >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:StringVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringVector_capacity" "', argument " "1"" of type '" "std::vector< std::string > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  result = ((std::vector< std::string > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_StringVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::string > *arg1 = (std::vector< std::string > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_StringVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_StringVector" "', argument " "1"" of type '" "std::vector< std::string > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::string > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *StringVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_iterator" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_vector_Sl_double_Sg__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___nonzero__" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_double_Sg__Sg____nonzero__((std::vector< std::vector< double > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___bool__" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_double_Sg__Sg____bool__((std::vector< std::vector< double > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___len__" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = std_vector_Sl_std_vector_Sl_double_Sg__Sg____len__((std::vector< std::vector< double > > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::difference_type arg2 ;
-  std::vector< std::vector< double > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___getslice__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble___getslice__" "', argument " "2"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorDouble___getslice__" "', argument " "3"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< double > >::difference_type >(val3);
-  try {
-    result = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)std_vector_Sl_std_vector_Sl_double_Sg__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::difference_type arg2 ;
-  std::vector< std::vector< double > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< double > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::difference_type arg2 ;
-  std::vector< std::vector< double > >::difference_type arg3 ;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:VectorOfStructVectorDouble___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< double > >::difference_type >(val3);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_VectorOfStructVectorDouble___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_VectorOfStructVectorDouble___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorDouble___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::__setslice__(std::vector< std::vector< double > >::difference_type,std::vector< std::vector< double > >::difference_type)\n"
-    "    std::vector< std::vector< double > >::__setslice__(std::vector< std::vector< double > >::difference_type,std::vector< std::vector< double > >::difference_type,std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::difference_type arg2 ;
-  std::vector< std::vector< double > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___delslice__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble___delslice__" "', argument " "2"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorDouble___delslice__" "', argument " "3"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< double > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble___delitem__" "', argument " "2"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)std_vector_Sl_std_vector_Sl_double_Sg__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorDouble___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorDouble___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorDouble___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::__delitem__(std::vector< std::vector< double > >::difference_type)\n"
-    "    std::vector< std::vector< double > >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< double > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble___getitem__" "', argument " "2"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::difference_type >(val2);
-  try {
-    result = (std::vector< std::vector< double > >::value_type *) &std_vector_Sl_std_vector_Sl_double_Sg__Sg____getitem____SWIG_1((std::vector< std::vector< double > > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< double,std::allocator< double > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorDouble___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorDouble___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorDouble___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< double > >::__getitem__(std::vector< std::vector< double > >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::difference_type arg2 ;
-  std::vector< std::vector< double > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "2"" of type '" "std::vector< std::vector< double > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::difference_type >(val2);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_double_Sg__Sg____setitem____SWIG_2(arg1,arg2,(std::vector< double,std::allocator< double > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorDouble___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorDouble___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< double,std::allocator< double > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorDouble___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorDouble___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::__setitem__(PySliceObject *,std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)\n"
-    "    std::vector< std::vector< double > >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< double > >::__setitem__(std::vector< std::vector< double > >::difference_type,std::vector< std::vector< double > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_pop" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  try {
-    result = std_vector_Sl_std_vector_Sl_double_Sg__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< double,std::allocator< double > > >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_append" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "VectorOfStructVectorDouble_append" "', argument " "2"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble_append" "', argument " "2"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_double_Sg__Sg__append(arg1,(std::vector< double,std::allocator< double > > const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorDouble__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_VectorOfStructVectorDouble")) SWIG_fail;
-  result = (std::vector< std::vector< double > > *)new std::vector< std::vector< double > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorDouble__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double,std::allocator< double > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_VectorOfStructVectorDouble",&obj0)) SWIG_fail;
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_VectorOfStructVectorDouble" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_VectorOfStructVectorDouble" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::vector< double > > *)new std::vector< std::vector< double > >((std::vector< std::vector< double,std::allocator< double > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_empty" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (bool)((std::vector< std::vector< double > > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_size" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = ((std::vector< std::vector< double > > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double,std::allocator< double > > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_swap" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "VectorOfStructVectorDouble_swap" "', argument " "2"" of type '" "std::vector< std::vector< double,std::allocator< double > > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble_swap" "', argument " "2"" of type '" "std::vector< std::vector< double,std::allocator< double > > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::vector< double,std::allocator< double > > > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_begin" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< double > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_end" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< double > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_rbegin" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< double > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_rend" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< double > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_clear" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::vector< double,std::allocator< double > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_get_allocator" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = ((std::vector< std::vector< double > > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::vector< double > >::allocator_type(static_cast< const std::vector< std::vector< double > >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorDouble__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_VectorOfStructVectorDouble",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_VectorOfStructVectorDouble" "', argument " "1"" of type '" "std::vector< std::vector< double > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< double > >::size_type >(val1);
-  result = (std::vector< std::vector< double > > *)new std::vector< std::vector< double > >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_pop_back" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_resize" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble_resize" "', argument " "2"" of type '" "std::vector< std::vector< double > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< double > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_double_Sg__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< double > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::iterator arg2 ;
-  std::vector< std::vector< double > >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< double > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "3"" of type '" "std::vector< std::vector< double > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_erase" "', argument " "3"" of type '" "std::vector< std::vector< double > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_double_Sg__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< double > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_VectorOfStructVectorDouble_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_VectorOfStructVectorDouble_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorDouble_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::erase(std::vector< std::vector< double > >::iterator)\n"
-    "    std::vector< std::vector< double > >::erase(std::vector< std::vector< double > >::iterator,std::vector< std::vector< double > >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorDouble__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > >::size_type arg1 ;
-  std::vector< std::vector< double > >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< double > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_VectorOfStructVectorDouble",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_VectorOfStructVectorDouble" "', argument " "1"" of type '" "std::vector< std::vector< double > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< double > >::size_type >(val1);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_VectorOfStructVectorDouble" "', argument " "2"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_VectorOfStructVectorDouble" "', argument " "2"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (std::vector< std::vector< double > > *)new std::vector< std::vector< double > >(arg1,(std::vector< std::vector< double > >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorDouble(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_VectorOfStructVectorDouble__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_VectorOfStructVectorDouble__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_VectorOfStructVectorDouble__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< double,std::allocator< double > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_VectorOfStructVectorDouble__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_VectorOfStructVectorDouble'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::vector()\n"
-    "    std::vector< std::vector< double > >::vector(std::vector< std::vector< double,std::allocator< double > > > const &)\n"
-    "    std::vector< std::vector< double > >::vector(std::vector< std::vector< double > >::size_type)\n"
-    "    std::vector< std::vector< double > >::vector(std::vector< std::vector< double > >::size_type,std::vector< std::vector< double > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_push_back" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "VectorOfStructVectorDouble_push_back" "', argument " "2"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble_push_back" "', argument " "2"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  (arg1)->push_back((std::vector< std::vector< double > >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_front" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (std::vector< std::vector< double > >::value_type *) &((std::vector< std::vector< double > > const *)arg1)->front();
-  resultobj = swig::from(static_cast< std::vector< double,std::allocator< double > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_back" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = (std::vector< std::vector< double > >::value_type *) &((std::vector< std::vector< double > > const *)arg1)->back();
-  resultobj = swig::from(static_cast< std::vector< double,std::allocator< double > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::size_type arg2 ;
-  std::vector< std::vector< double > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_assign" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble_assign" "', argument " "2"" of type '" "std::vector< std::vector< double > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::size_type >(val2);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorDouble_assign" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble_assign" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->assign(arg2,(std::vector< std::vector< double > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::size_type arg2 ;
-  std::vector< std::vector< double > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_resize" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble_resize" "', argument " "2"" of type '" "std::vector< std::vector< double > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::size_type >(val2);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorDouble_resize" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble_resize" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->resize(arg2,(std::vector< std::vector< double > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorDouble_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< double,std::allocator< double > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorDouble_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorDouble_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::resize(std::vector< std::vector< double > >::size_type)\n"
-    "    std::vector< std::vector< double > >::resize(std::vector< std::vector< double > >::size_type,std::vector< std::vector< double > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::iterator arg2 ;
-  std::vector< std::vector< double > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< double > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorDouble_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-    }
-  }
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble_insert" "', argument " "3"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  result = std_vector_Sl_std_vector_Sl_double_Sg__Sg__insert__SWIG_0(arg1,arg2,(std::vector< double,std::allocator< double > > const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< double > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::iterator arg2 ;
-  std::vector< std::vector< double > >::size_type arg3 ;
-  std::vector< std::vector< double > >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:VectorOfStructVectorDouble_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "2"" of type '" "std::vector< std::vector< double > >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "3"" of type '" "std::vector< std::vector< double > >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< double > >::size_type >(val3);
-  {
-    std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "VectorOfStructVectorDouble_insert" "', argument " "4"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorDouble_insert" "', argument " "4"" of type '" "std::vector< std::vector< double > >::value_type const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_double_Sg__Sg__insert__SWIG_1(arg1,arg2,arg3,(std::vector< double,std::allocator< double > > const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< double,std::allocator< double > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorDouble_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< double > >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< double,std::allocator< double > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_VectorOfStructVectorDouble_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorDouble_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< double > >::insert(std::vector< std::vector< double > >::iterator,std::vector< std::vector< double > >::value_type const &)\n"
-    "    std::vector< std::vector< double > >::insert(std::vector< std::vector< double > >::iterator,std::vector< std::vector< double > >::size_type,std::vector< std::vector< double > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  std::vector< std::vector< double > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorDouble_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_reserve" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorDouble_reserve" "', argument " "2"" of type '" "std::vector< std::vector< double > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< double > >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorDouble_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< double > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorDouble_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorDouble_capacity" "', argument " "1"" of type '" "std::vector< std::vector< double > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  result = ((std::vector< std::vector< double > > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_VectorOfStructVectorDouble(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double > > *arg1 = (std::vector< std::vector< double > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_VectorOfStructVectorDouble",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_VectorOfStructVectorDouble" "', argument " "1"" of type '" "std::vector< std::vector< double > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< double > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *VectorOfStructVectorDouble_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_iterator" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_vector_Sl_int_Sg__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___nonzero__" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_int_Sg__Sg____nonzero__((std::vector< std::vector< int > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___bool__" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_int_Sg__Sg____bool__((std::vector< std::vector< int > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___len__" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = std_vector_Sl_std_vector_Sl_int_Sg__Sg____len__((std::vector< std::vector< int > > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::difference_type arg2 ;
-  std::vector< std::vector< int > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___getslice__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt___getslice__" "', argument " "2"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorInt___getslice__" "', argument " "3"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< int > >::difference_type >(val3);
-  try {
-    result = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)std_vector_Sl_std_vector_Sl_int_Sg__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::difference_type arg2 ;
-  std::vector< std::vector< int > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< int > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::difference_type arg2 ;
-  std::vector< std::vector< int > >::difference_type arg3 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:VectorOfStructVectorInt___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< int > >::difference_type >(val3);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_VectorOfStructVectorInt___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_VectorOfStructVectorInt___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorInt___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::__setslice__(std::vector< std::vector< int > >::difference_type,std::vector< std::vector< int > >::difference_type)\n"
-    "    std::vector< std::vector< int > >::__setslice__(std::vector< std::vector< int > >::difference_type,std::vector< std::vector< int > >::difference_type,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::difference_type arg2 ;
-  std::vector< std::vector< int > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___delslice__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt___delslice__" "', argument " "2"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorInt___delslice__" "', argument " "3"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< int > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt___delitem__" "', argument " "2"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)std_vector_Sl_std_vector_Sl_int_Sg__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorInt___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorInt___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorInt___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::__delitem__(std::vector< std::vector< int > >::difference_type)\n"
-    "    std::vector< std::vector< int > >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< int > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt___getitem__" "', argument " "2"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::difference_type >(val2);
-  try {
-    result = (std::vector< std::vector< int > >::value_type *) &std_vector_Sl_std_vector_Sl_int_Sg__Sg____getitem____SWIG_1((std::vector< std::vector< int > > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< int,std::allocator< int > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorInt___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorInt___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorInt___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< int > >::__getitem__(std::vector< std::vector< int > >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::difference_type arg2 ;
-  std::vector< std::vector< int > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "2"" of type '" "std::vector< std::vector< int > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::difference_type >(val2);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_int_Sg__Sg____setitem____SWIG_2(arg1,arg2,(std::vector< int,std::allocator< int > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorInt___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorInt___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< int,std::allocator< int > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorInt___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorInt___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::__setitem__(PySliceObject *,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)\n"
-    "    std::vector< std::vector< int > >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< int > >::__setitem__(std::vector< std::vector< int > >::difference_type,std::vector< std::vector< int > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_pop" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  try {
-    result = std_vector_Sl_std_vector_Sl_int_Sg__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< int,std::allocator< int > > >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_append" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "VectorOfStructVectorInt_append" "', argument " "2"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt_append" "', argument " "2"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_int_Sg__Sg__append(arg1,(std::vector< int,std::allocator< int > > const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorInt__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_VectorOfStructVectorInt")) SWIG_fail;
-  result = (std::vector< std::vector< int > > *)new std::vector< std::vector< int > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorInt__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int,std::allocator< int > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_VectorOfStructVectorInt",&obj0)) SWIG_fail;
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_VectorOfStructVectorInt" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_VectorOfStructVectorInt" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::vector< int > > *)new std::vector< std::vector< int > >((std::vector< std::vector< int,std::allocator< int > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_empty" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (bool)((std::vector< std::vector< int > > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_size" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = ((std::vector< std::vector< int > > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int,std::allocator< int > > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_swap" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "VectorOfStructVectorInt_swap" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt_swap" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::vector< int,std::allocator< int > > > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_begin" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< int > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_end" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< int > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_rbegin" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< int > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_rend" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< int > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_clear" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::vector< int,std::allocator< int > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_get_allocator" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = ((std::vector< std::vector< int > > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::vector< int > >::allocator_type(static_cast< const std::vector< std::vector< int > >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorInt__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_VectorOfStructVectorInt",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_VectorOfStructVectorInt" "', argument " "1"" of type '" "std::vector< std::vector< int > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< int > >::size_type >(val1);
-  result = (std::vector< std::vector< int > > *)new std::vector< std::vector< int > >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_pop_back" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_resize" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt_resize" "', argument " "2"" of type '" "std::vector< std::vector< int > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< int > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_erase" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_erase" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_erase" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_int_Sg__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< int > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::iterator arg2 ;
-  std::vector< std::vector< int > >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< int > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_erase" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_erase" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_erase" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_erase" "', argument " "3"" of type '" "std::vector< std::vector< int > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_erase" "', argument " "3"" of type '" "std::vector< std::vector< int > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_int_Sg__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< int > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_VectorOfStructVectorInt_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_VectorOfStructVectorInt_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorInt_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::erase(std::vector< std::vector< int > >::iterator)\n"
-    "    std::vector< std::vector< int > >::erase(std::vector< std::vector< int > >::iterator,std::vector< std::vector< int > >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorInt__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > >::size_type arg1 ;
-  std::vector< std::vector< int > >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_VectorOfStructVectorInt",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_VectorOfStructVectorInt" "', argument " "1"" of type '" "std::vector< std::vector< int > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< int > >::size_type >(val1);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_VectorOfStructVectorInt" "', argument " "2"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_VectorOfStructVectorInt" "', argument " "2"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (std::vector< std::vector< int > > *)new std::vector< std::vector< int > >(arg1,(std::vector< std::vector< int > >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_VectorOfStructVectorInt(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_VectorOfStructVectorInt__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_VectorOfStructVectorInt__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_VectorOfStructVectorInt__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< int,std::allocator< int > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_VectorOfStructVectorInt__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_VectorOfStructVectorInt'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::vector()\n"
-    "    std::vector< std::vector< int > >::vector(std::vector< std::vector< int,std::allocator< int > > > const &)\n"
-    "    std::vector< std::vector< int > >::vector(std::vector< std::vector< int > >::size_type)\n"
-    "    std::vector< std::vector< int > >::vector(std::vector< std::vector< int > >::size_type,std::vector< std::vector< int > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_push_back" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "VectorOfStructVectorInt_push_back" "', argument " "2"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt_push_back" "', argument " "2"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  (arg1)->push_back((std::vector< std::vector< int > >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_front" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (std::vector< std::vector< int > >::value_type *) &((std::vector< std::vector< int > > const *)arg1)->front();
-  resultobj = swig::from(static_cast< std::vector< int,std::allocator< int > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_back" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = (std::vector< std::vector< int > >::value_type *) &((std::vector< std::vector< int > > const *)arg1)->back();
-  resultobj = swig::from(static_cast< std::vector< int,std::allocator< int > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::size_type arg2 ;
-  std::vector< std::vector< int > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_assign" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt_assign" "', argument " "2"" of type '" "std::vector< std::vector< int > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::size_type >(val2);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorInt_assign" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt_assign" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->assign(arg2,(std::vector< std::vector< int > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::size_type arg2 ;
-  std::vector< std::vector< int > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_resize" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt_resize" "', argument " "2"" of type '" "std::vector< std::vector< int > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::size_type >(val2);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorInt_resize" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt_resize" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->resize(arg2,(std::vector< std::vector< int > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_VectorOfStructVectorInt_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< int,std::allocator< int > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorInt_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorInt_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::resize(std::vector< std::vector< int > >::size_type)\n"
-    "    std::vector< std::vector< int > >::resize(std::vector< std::vector< int > >::size_type,std::vector< std::vector< int > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::iterator arg2 ;
-  std::vector< std::vector< int > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< int > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:VectorOfStructVectorInt_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_insert" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_insert" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_insert" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-    }
-  }
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "VectorOfStructVectorInt_insert" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt_insert" "', argument " "3"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  result = std_vector_Sl_std_vector_Sl_int_Sg__Sg__insert__SWIG_0(arg1,arg2,(std::vector< int,std::allocator< int > > const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< int > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::iterator arg2 ;
-  std::vector< std::vector< int > >::size_type arg3 ;
-  std::vector< std::vector< int > >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:VectorOfStructVectorInt_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_insert" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_insert" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "VectorOfStructVectorInt_insert" "', argument " "2"" of type '" "std::vector< std::vector< int > >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "VectorOfStructVectorInt_insert" "', argument " "3"" of type '" "std::vector< std::vector< int > >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< int > >::size_type >(val3);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "VectorOfStructVectorInt_insert" "', argument " "4"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "VectorOfStructVectorInt_insert" "', argument " "4"" of type '" "std::vector< std::vector< int > >::value_type const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_int_Sg__Sg__insert__SWIG_1(arg1,arg2,arg3,(std::vector< int,std::allocator< int > > const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< int,std::allocator< int > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_VectorOfStructVectorInt_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< int > >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< int,std::allocator< int > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_VectorOfStructVectorInt_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'VectorOfStructVectorInt_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< int > >::insert(std::vector< std::vector< int > >::iterator,std::vector< std::vector< int > >::value_type const &)\n"
-    "    std::vector< std::vector< int > >::insert(std::vector< std::vector< int > >::iterator,std::vector< std::vector< int > >::size_type,std::vector< std::vector< int > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  std::vector< std::vector< int > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:VectorOfStructVectorInt_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_reserve" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "VectorOfStructVectorInt_reserve" "', argument " "2"" of type '" "std::vector< std::vector< int > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< int > >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_VectorOfStructVectorInt_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< int > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:VectorOfStructVectorInt_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "VectorOfStructVectorInt_capacity" "', argument " "1"" of type '" "std::vector< std::vector< int > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  result = ((std::vector< std::vector< int > > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_VectorOfStructVectorInt(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int > > *arg1 = (std::vector< std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_VectorOfStructVectorInt",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_VectorOfStructVectorInt" "', argument " "1"" of type '" "std::vector< std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< int > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *VectorOfStructVectorInt_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_FloatVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_iterator" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_float_Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___nonzero__" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (bool)std_vector_Sl_float_Sg____nonzero__((std::vector< float > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___bool__" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (bool)std_vector_Sl_float_Sg____bool__((std::vector< float > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___len__" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = std_vector_Sl_float_Sg____len__((std::vector< float > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::difference_type arg2 ;
-  std::vector< float >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< float,std::allocator< float > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___getslice__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector___getslice__" "', argument " "2"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector___getslice__" "', argument " "3"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< float >::difference_type >(val3);
-  try {
-    result = (std::vector< float,std::allocator< float > > *)std_vector_Sl_float_Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::difference_type arg2 ;
-  std::vector< float >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___setslice__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector___setslice__" "', argument " "2"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector___setslice__" "', argument " "3"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< float >::difference_type >(val3);
-  try {
-    std_vector_Sl_float_Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::difference_type arg2 ;
-  std::vector< float >::difference_type arg3 ;
-  std::vector< float,std::allocator< float > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:FloatVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___setslice__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector___setslice__" "', argument " "2"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector___setslice__" "', argument " "3"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< float >::difference_type >(val3);
-  {
-    std::vector< float,std::allocator< float > > *ptr = (std::vector< float,std::allocator< float > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "FloatVector___setslice__" "', argument " "4"" of type '" "std::vector< float,std::allocator< float > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "FloatVector___setslice__" "', argument " "4"" of type '" "std::vector< float,std::allocator< float > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_float_Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< float,std::allocator< float > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_FloatVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< float,std::allocator< float > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_FloatVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'FloatVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::__setslice__(std::vector< float >::difference_type,std::vector< float >::difference_type)\n"
-    "    std::vector< float >::__setslice__(std::vector< float >::difference_type,std::vector< float >::difference_type,std::vector< float,std::allocator< float > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::difference_type arg2 ;
-  std::vector< float >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___delslice__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector___delslice__" "', argument " "2"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector___delslice__" "', argument " "3"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< float >::difference_type >(val3);
-  try {
-    std_vector_Sl_float_Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___delitem__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector___delitem__" "', argument " "2"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::difference_type >(val2);
-  try {
-    std_vector_Sl_float_Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< float,std::allocator< float > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___getitem__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< float,std::allocator< float > > *)std_vector_Sl_float_Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< float,std::allocator< float > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___setitem__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< float,std::allocator< float > > *ptr = (std::vector< float,std::allocator< float > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "FloatVector___setitem__" "', argument " "3"" of type '" "std::vector< float,std::allocator< float > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "FloatVector___setitem__" "', argument " "3"" of type '" "std::vector< float,std::allocator< float > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_float_Sg____setitem____SWIG_0(arg1,arg2,(std::vector< float,std::allocator< float > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___setitem__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_float_Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___delitem__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_float_Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_FloatVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_FloatVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'FloatVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::__delitem__(std::vector< float >::difference_type)\n"
-    "    std::vector< float >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< float >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___getitem__" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector___getitem__" "', argument " "2"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::difference_type >(val2);
-  try {
-    result = (std::vector< float >::value_type *) &std_vector_Sl_float_Sg____getitem____SWIG_1((std::vector< float > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_float(static_cast< float >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_FloatVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_FloatVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'FloatVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::__getitem__(PySliceObject *)\n"
-    "    std::vector< float >::__getitem__(std::vector< float >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::difference_type arg2 ;
-  std::vector< float >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< float >::value_type temp3 ;
-  float val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector___setitem__" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector___setitem__" "', argument " "2"" of type '" "std::vector< float >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_float(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector___setitem__" "', argument " "3"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< float >::value_type >(val3);
-  arg3 = &temp3;
-  try {
-    std_vector_Sl_float_Sg____setitem____SWIG_2(arg1,arg2,(float const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_FloatVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< float,std::allocator< float > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_FloatVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_float(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_FloatVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'FloatVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::__setitem__(PySliceObject *,std::vector< float,std::allocator< float > > const &)\n"
-    "    std::vector< float >::__setitem__(PySliceObject *)\n"
-    "    std::vector< float >::__setitem__(std::vector< float >::difference_type,std::vector< float >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_pop" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  try {
-    result = (std::vector< float >::value_type)std_vector_Sl_float_Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  std::vector< float >::value_type temp2 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_append" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector_append" "', argument " "2"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< float >::value_type >(val2);
-  arg2 = &temp2;
-  std_vector_Sl_float_Sg__append(arg1,(float const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_FloatVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_FloatVector")) SWIG_fail;
-  result = (std::vector< float > *)new std::vector< float >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_FloatVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< float > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_FloatVector",&obj0)) SWIG_fail;
-  {
-    std::vector< float,std::allocator< float > > *ptr = (std::vector< float,std::allocator< float > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_FloatVector" "', argument " "1"" of type '" "std::vector< float > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_FloatVector" "', argument " "1"" of type '" "std::vector< float > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< float > *)new std::vector< float >((std::vector< float > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_empty" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (bool)((std::vector< float > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_size" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = ((std::vector< float > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_swap" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "FloatVector_swap" "', argument " "2"" of type '" "std::vector< float > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "FloatVector_swap" "', argument " "2"" of type '" "std::vector< float > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< float > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_begin" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< float >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_end" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< float >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_rbegin" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< float >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_rend" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< float >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_clear" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< float > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_get_allocator" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = ((std::vector< float > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< float >::allocator_type(static_cast< const std::vector< float >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_float_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_FloatVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_FloatVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_FloatVector" "', argument " "1"" of type '" "std::vector< float >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< float >::size_type >(val1);
-  result = (std::vector< float > *)new std::vector< float >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_pop_back" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_resize" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector_resize" "', argument " "2"" of type '" "std::vector< float >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< float >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_erase" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_erase" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< float >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_erase" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_float_Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< float >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::iterator arg2 ;
-  std::vector< float >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< float >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_erase" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_erase" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< float >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_erase" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_erase" "', argument " "3"" of type '" "std::vector< float >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< float >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_erase" "', argument " "3"" of type '" "std::vector< float >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_float_Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< float >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_FloatVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_FloatVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'FloatVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::erase(std::vector< float >::iterator)\n"
-    "    std::vector< float >::erase(std::vector< float >::iterator,std::vector< float >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_FloatVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float >::size_type arg1 ;
-  std::vector< float >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  std::vector< float >::value_type temp2 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< float > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_FloatVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_FloatVector" "', argument " "1"" of type '" "std::vector< float >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< float >::size_type >(val1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "new_FloatVector" "', argument " "2"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< float >::value_type >(val2);
-  arg2 = &temp2;
-  result = (std::vector< float > *)new std::vector< float >(arg1,(std::vector< float >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_FloatVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_FloatVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_FloatVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_FloatVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      {
-        int res = SWIG_AsVal_float(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_new_FloatVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_FloatVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::vector()\n"
-    "    std::vector< float >::vector(std::vector< float > const &)\n"
-    "    std::vector< float >::vector(std::vector< float >::size_type)\n"
-    "    std::vector< float >::vector(std::vector< float >::size_type,std::vector< float >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  std::vector< float >::value_type temp2 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_push_back" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector_push_back" "', argument " "2"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp2 = static_cast< std::vector< float >::value_type >(val2);
-  arg2 = &temp2;
-  (arg1)->push_back((std::vector< float >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_front" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (std::vector< float >::value_type *) &((std::vector< float > const *)arg1)->front();
-  resultobj = SWIG_From_float(static_cast< float >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_back" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = (std::vector< float >::value_type *) &((std::vector< float > const *)arg1)->back();
-  resultobj = SWIG_From_float(static_cast< float >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::size_type arg2 ;
-  std::vector< float >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< float >::value_type temp3 ;
-  float val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_assign" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector_assign" "', argument " "2"" of type '" "std::vector< float >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::size_type >(val2);
-  ecode3 = SWIG_AsVal_float(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector_assign" "', argument " "3"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< float >::value_type >(val3);
-  arg3 = &temp3;
-  (arg1)->assign(arg2,(std::vector< float >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::size_type arg2 ;
-  std::vector< float >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  std::vector< float >::value_type temp3 ;
-  float val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_resize" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector_resize" "', argument " "2"" of type '" "std::vector< float >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::size_type >(val2);
-  ecode3 = SWIG_AsVal_float(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector_resize" "', argument " "3"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< float >::value_type >(val3);
-  arg3 = &temp3;
-  (arg1)->resize(arg2,(std::vector< float >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_FloatVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_float(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_FloatVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'FloatVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::resize(std::vector< float >::size_type)\n"
-    "    std::vector< float >::resize(std::vector< float >::size_type,std::vector< float >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::iterator arg2 ;
-  std::vector< float >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  std::vector< float >::value_type temp3 ;
-  float val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< float >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:FloatVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_insert" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_insert" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< float >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_insert" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_float(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector_insert" "', argument " "3"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp3 = static_cast< std::vector< float >::value_type >(val3);
-  arg3 = &temp3;
-  result = std_vector_Sl_float_Sg__insert__SWIG_0(arg1,arg2,(float const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< float >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::iterator arg2 ;
-  std::vector< float >::size_type arg3 ;
-  std::vector< float >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  std::vector< float >::value_type temp4 ;
-  float val4 ;
-  int ecode4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:FloatVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_insert" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_insert" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< float >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "FloatVector_insert" "', argument " "2"" of type '" "std::vector< float >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "FloatVector_insert" "', argument " "3"" of type '" "std::vector< float >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< float >::size_type >(val3);
-  ecode4 = SWIG_AsVal_float(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "FloatVector_insert" "', argument " "4"" of type '" "std::vector< float >::value_type""'");
-  } 
-  temp4 = static_cast< std::vector< float >::value_type >(val4);
-  arg4 = &temp4;
-  std_vector_Sl_float_Sg__insert__SWIG_1(arg1,arg2,arg3,(float const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_float(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_FloatVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< float,std::allocator< float > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< float >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_float(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            return _wrap_FloatVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'FloatVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< float >::insert(std::vector< float >::iterator,std::vector< float >::value_type const &)\n"
-    "    std::vector< float >::insert(std::vector< float >::iterator,std::vector< float >::size_type,std::vector< float >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  std::vector< float >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:FloatVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_reserve" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "FloatVector_reserve" "', argument " "2"" of type '" "std::vector< float >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< float >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_FloatVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< float >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:FloatVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "FloatVector_capacity" "', argument " "1"" of type '" "std::vector< float > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  result = ((std::vector< float > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_FloatVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< float > *arg1 = (std::vector< float > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_FloatVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_FloatVector" "', argument " "1"" of type '" "std::vector< float > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< float > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *FloatVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_float_std__allocatorT_float_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_new_Pair__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< float,std::vector< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_Pair")) SWIG_fail;
-  result = (std::pair< float,std::vector< int > > *)new std::pair< float,std::vector< int > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_Pair__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  float arg1 ;
-  std::vector< int,std::allocator< int > > arg2 ;
-  float val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::pair< float,std::vector< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_Pair",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_float(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_Pair" "', argument " "1"" of type '" "float""'");
-  } 
-  arg1 = static_cast< float >(val1);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    int res = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res) || !ptr) {
-      SWIG_exception_fail(SWIG_ArgError((ptr ? res : SWIG_TypeError)), "in method '" "new_Pair" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > >""'"); 
-    }
-    arg2 = *ptr;
-    if (SWIG_IsNewObj(res)) delete ptr;
-  }
-  result = (std::pair< float,std::vector< int > > *)new std::pair< float,std::vector< int > >(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_Pair__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< float,std::vector< int,std::allocator< int > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::pair< float,std::vector< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_Pair",&obj0)) SWIG_fail;
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_Pair" "', argument " "1"" of type '" "std::pair< float,std::vector< int,std::allocator< int > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Pair" "', argument " "1"" of type '" "std::pair< float,std::vector< int,std::allocator< int > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::pair< float,std::vector< int > > *)new std::pair< float,std::vector< int > >((std::pair< float,std::vector< int,std::allocator< int > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_Pair(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_Pair__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::pair< float,std::vector< int,std::allocator< int > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_Pair__SWIG_2(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_float(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< int,std::allocator< int > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_Pair__SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_Pair'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::pair< float,std::vector< int > >::pair()\n"
-    "    std::pair< float,std::vector< int > >::pair(float,std::vector< int,std::allocator< int > >)\n"
-    "    std::pair< float,std::vector< int > >::pair(std::pair< float,std::vector< int,std::allocator< int > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_Pair_first_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< float,std::vector< int > > *arg1 = (std::pair< float,std::vector< int > > *) 0 ;
-  float arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Pair_first_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Pair_first_set" "', argument " "1"" of type '" "std::pair< float,std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< float,std::vector< int > > * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "Pair_first_set" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  if (arg1) (arg1)->first = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Pair_first_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< float,std::vector< int > > *arg1 = (std::pair< float,std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  float result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Pair_first_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Pair_first_get" "', argument " "1"" of type '" "std::pair< float,std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< float,std::vector< int > > * >(argp1);
-  result = (float) ((arg1)->first);
-  resultobj = SWIG_From_float(static_cast< float >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Pair_second_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< float,std::vector< int > > *arg1 = (std::pair< float,std::vector< int > > *) 0 ;
-  std::vector< int,std::allocator< int > > *arg2 = (std::vector< int,std::allocator< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Pair_second_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Pair_second_set" "', argument " "1"" of type '" "std::pair< float,std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< float,std::vector< int > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Pair_second_set" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > *""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< int,std::allocator< int > > * >(argp2);
-  if (arg1) (arg1)->second = *arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Pair_second_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< float,std::vector< int > > *arg1 = (std::pair< float,std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< int,std::allocator< int > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Pair_second_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Pair_second_get" "', argument " "1"" of type '" "std::pair< float,std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< float,std::vector< int > > * >(argp1);
-  result = (std::vector< int,std::allocator< int > > *)& ((arg1)->second);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_int_std__allocatorT_int_t_t, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_Pair(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< float,std::vector< int > > *arg1 = (std::pair< float,std::vector< int > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_Pair",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_Pair" "', argument " "1"" of type '" "std::pair< float,std::vector< int > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< float,std::vector< int > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *Pair_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_iterator" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___nonzero__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____nonzero__((std::vector< std::pair< float,std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___bool__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____bool__((std::vector< std::pair< float,std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___len__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____len__((std::vector< std::pair< float,std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___getslice__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector___getslice__" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairFloatVectorVector___getslice__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val3);
-  try {
-    result = (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *)std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___setslice__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector___setslice__" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairFloatVectorVector___setslice__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg3 ;
-  std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:PairFloatVectorVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___setslice__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector___setslice__" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairFloatVectorVector___setslice__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val3);
-  {
-    std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "PairFloatVectorVector___setslice__" "', argument " "4"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector___setslice__" "', argument " "4"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_PairFloatVectorVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_PairFloatVectorVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairFloatVectorVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__setslice__(std::vector< std::pair< float,std::vector< int > > >::difference_type,std::vector< std::pair< float,std::vector< int > > >::difference_type)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__setslice__(std::vector< std::pair< float,std::vector< int > > >::difference_type,std::vector< std::pair< float,std::vector< int > > >::difference_type,std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___delslice__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector___delslice__" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairFloatVectorVector___delslice__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___delitem__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector___delitem__" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___getitem__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *)std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___setitem__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairFloatVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___setitem__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___delitem__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairFloatVectorVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairFloatVectorVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairFloatVectorVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__delitem__(std::vector< std::pair< float,std::vector< int > > >::difference_type)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___getitem__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector___getitem__" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val2);
-  try {
-    result = (std::vector< std::pair< float,std::vector< int > > >::value_type *) &std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_1((std::vector< std::pair< float,std::vector< int > > > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::pair< float,std::vector< int,std::allocator< int > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairFloatVectorVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairFloatVectorVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairFloatVectorVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__getitem__(std::vector< std::pair< float,std::vector< int > > >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector___setitem__" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector___setitem__" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::difference_type >(val2);
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairFloatVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_2(arg1,arg2,(std::pair< float,std::vector< int,std::allocator< int > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairFloatVectorVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairFloatVectorVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::pair< float,std::vector< int,std::allocator< int > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairFloatVectorVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairFloatVectorVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__setitem__(PySliceObject *,std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > const &)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::__setitem__(std::vector< std::pair< float,std::vector< int > > >::difference_type,std::vector< std::pair< float,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_pop" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  try {
-    result = std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::pair< float,std::vector< int,std::allocator< int > > > >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_append" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairFloatVectorVector_append" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector_append" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__append(arg1,(std::pair< float,std::vector< int,std::allocator< int > > > const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairFloatVectorVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_PairFloatVectorVector")) SWIG_fail;
-  result = (std::vector< std::pair< float,std::vector< int > > > *)new std::vector< std::pair< float,std::vector< int > > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairFloatVectorVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_PairFloatVectorVector",&obj0)) SWIG_fail;
-  {
-    std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_PairFloatVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_PairFloatVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::pair< float,std::vector< int > > > *)new std::vector< std::pair< float,std::vector< int > > >((std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_empty" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (bool)((std::vector< std::pair< float,std::vector< int > > > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_size" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::pair< float,std::vector< int > > > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_swap" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairFloatVectorVector_swap" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector_swap" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_begin" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< float,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_end" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< float,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_rbegin" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< float,std::vector< int > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_rend" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< float,std::vector< int > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_clear" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_get_allocator" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::pair< float,std::vector< int > > > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::pair< float,std::vector< int > > >::allocator_type(static_cast< const std::vector< std::pair< float,std::vector< int > > >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairFloatVectorVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_PairFloatVectorVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_PairFloatVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::pair< float,std::vector< int > > >::size_type >(val1);
-  result = (std::vector< std::pair< float,std::vector< int > > > *)new std::vector< std::pair< float,std::vector< int > > >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_pop_back" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_resize" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector_resize" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_erase" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< float,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_erase" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_erase" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_erase" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< float,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_PairFloatVectorVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_PairFloatVectorVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairFloatVectorVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::erase(std::vector< std::pair< float,std::vector< int > > >::iterator)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::erase(std::vector< std::pair< float,std::vector< int > > >::iterator,std::vector< std::pair< float,std::vector< int > > >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairFloatVectorVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > >::size_type arg1 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_PairFloatVectorVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_PairFloatVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::pair< float,std::vector< int > > >::size_type >(val1);
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_PairFloatVectorVector" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_PairFloatVectorVector" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (std::vector< std::pair< float,std::vector< int > > > *)new std::vector< std::pair< float,std::vector< int > > >(arg1,(std::vector< std::pair< float,std::vector< int > > >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairFloatVectorVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_PairFloatVectorVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_PairFloatVectorVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_PairFloatVectorVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::pair< float,std::vector< int,std::allocator< int > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_PairFloatVectorVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_PairFloatVectorVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::vector()\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::vector(std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > const &)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::vector(std::vector< std::pair< float,std::vector< int > > >::size_type)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::vector(std::vector< std::pair< float,std::vector< int > > >::size_type,std::vector< std::pair< float,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_push_back" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairFloatVectorVector_push_back" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector_push_back" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  (arg1)->push_back((std::vector< std::pair< float,std::vector< int > > >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_front" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (std::vector< std::pair< float,std::vector< int > > >::value_type *) &((std::vector< std::pair< float,std::vector< int > > > const *)arg1)->front();
-  resultobj = swig::from(static_cast< std::pair< float,std::vector< int,std::allocator< int > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_back" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = (std::vector< std::pair< float,std::vector< int > > >::value_type *) &((std::vector< std::pair< float,std::vector< int > > > const *)arg1)->back();
-  resultobj = swig::from(static_cast< std::pair< float,std::vector< int,std::allocator< int > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_assign" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector_assign" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::size_type >(val2);
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairFloatVectorVector_assign" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector_assign" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->assign(arg2,(std::vector< std::pair< float,std::vector< int > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_resize" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector_resize" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::size_type >(val2);
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairFloatVectorVector_resize" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector_resize" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->resize(arg2,(std::vector< std::pair< float,std::vector< int > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairFloatVectorVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::pair< float,std::vector< int,std::allocator< int > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairFloatVectorVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairFloatVectorVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::resize(std::vector< std::pair< float,std::vector< int > > >::size_type)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::resize(std::vector< std::pair< float,std::vector< int > > >::size_type,std::vector< std::pair< float,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairFloatVectorVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_insert" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-    }
-  }
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairFloatVectorVector_insert" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector_insert" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  result = std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_0(arg1,arg2,(std::pair< float,std::vector< int,std::allocator< int > > > const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< float,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::iterator arg2 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type arg3 ;
-  std::vector< std::pair< float,std::vector< int > > >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:PairFloatVectorVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_insert" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairFloatVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairFloatVectorVector_insert" "', argument " "3"" of type '" "std::vector< std::pair< float,std::vector< int > > >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< float,std::vector< int > > >::size_type >(val3);
-  {
-    std::pair< float,std::vector< int,std::allocator< int > > > *ptr = (std::pair< float,std::vector< int,std::allocator< int > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "PairFloatVectorVector_insert" "', argument " "4"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairFloatVectorVector_insert" "', argument " "4"" of type '" "std::vector< std::pair< float,std::vector< int > > >::value_type const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  std_vector_Sl_std_pair_Sl_float_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_1(arg1,arg2,arg3,(std::pair< float,std::vector< int,std::allocator< int > > > const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::pair< float,std::vector< int,std::allocator< int > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairFloatVectorVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< float,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::pair< float,std::vector< int,std::allocator< int > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_PairFloatVectorVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairFloatVectorVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::insert(std::vector< std::pair< float,std::vector< int > > >::iterator,std::vector< std::pair< float,std::vector< int > > >::value_type const &)\n"
-    "    std::vector< std::pair< float,std::vector< int > > >::insert(std::vector< std::pair< float,std::vector< int > > >::iterator,std::vector< std::pair< float,std::vector< int > > >::size_type,std::vector< std::pair< float,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairFloatVectorVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_reserve" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairFloatVectorVector_reserve" "', argument " "2"" of type '" "std::vector< std::pair< float,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< float,std::vector< int > > >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairFloatVectorVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< float,std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairFloatVectorVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairFloatVectorVector_capacity" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::pair< float,std::vector< int > > > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_PairFloatVectorVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< float,std::vector< int > > > *arg1 = (std::vector< std::pair< float,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_PairFloatVectorVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_PairFloatVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< float,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< float,std::vector< int > > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *PairFloatVectorVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_iterator" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___nonzero__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____nonzero__((std::vector< std::pair< double,std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___bool__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____bool__((std::vector< std::pair< double,std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___len__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____len__((std::vector< std::pair< double,std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___getslice__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector___getslice__" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector___getslice__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val3);
-  try {
-    result = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___setslice__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector___setslice__" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector___setslice__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg3 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:PairDoubleVectorVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___setslice__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector___setslice__" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector___setslice__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val3);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "PairDoubleVectorVector___setslice__" "', argument " "4"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector___setslice__" "', argument " "4"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_PairDoubleVectorVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_PairDoubleVectorVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__setslice__(std::vector< std::pair< double,std::vector< int > > >::difference_type,std::vector< std::pair< double,std::vector< int > > >::difference_type)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__setslice__(std::vector< std::pair< double,std::vector< int > > >::difference_type,std::vector< std::pair< double,std::vector< int > > >::difference_type,std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___delslice__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector___delslice__" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector___delslice__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___delitem__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector___delitem__" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___getitem__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___delitem__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__delitem__(std::vector< std::pair< double,std::vector< int > > >::difference_type)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___getitem__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector___getitem__" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val2);
-  try {
-    result = (std::vector< std::pair< double,std::vector< int > > >::value_type *) &std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_1((std::vector< std::pair< double,std::vector< int > > > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__getitem__(std::vector< std::pair< double,std::vector< int > > >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::difference_type >(val2);
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector___setitem__" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp3);
-  try {
-    std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_2(arg1,arg2,(std::pair< double,std::vector< int,std::allocator< int > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = SWIG_ConvertPtr(argv[2], 0, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__setitem__(PySliceObject *,std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::__setitem__(std::vector< std::pair< double,std::vector< int > > >::difference_type,std::vector< std::pair< double,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::pair< double,std::vector< int,std::allocator< int > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_pop" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  try {
-    result = std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj((new std::vector< std::pair< double,std::vector< int > > >::value_type(static_cast< const std::vector< std::pair< double,std::vector< int > > >::value_type& >(result))), SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_append" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairDoubleVectorVector_append" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector_append" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp2);
-  std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__append(arg1,(std::pair< double,std::vector< int,std::allocator< int > > > const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_PairDoubleVectorVector")) SWIG_fail;
-  result = (std::vector< std::pair< double,std::vector< int > > > *)new std::vector< std::pair< double,std::vector< int > > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_PairDoubleVectorVector",&obj0)) SWIG_fail;
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_PairDoubleVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_PairDoubleVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::pair< double,std::vector< int > > > *)new std::vector< std::pair< double,std::vector< int > > >((std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_empty" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (bool)((std::vector< std::pair< double,std::vector< int > > > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_size" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::pair< double,std::vector< int > > > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_swap" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairDoubleVectorVector_swap" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector_swap" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_begin" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< double,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_end" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< double,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_rbegin" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< double,std::vector< int > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_rend" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< double,std::vector< int > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_clear" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_get_allocator" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::pair< double,std::vector< int > > > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::pair< double,std::vector< int > > >::allocator_type(static_cast< const std::vector< std::pair< double,std::vector< int > > >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_PairDoubleVectorVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_PairDoubleVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::pair< double,std::vector< int > > >::size_type >(val1);
-  result = (std::vector< std::pair< double,std::vector< int > > > *)new std::vector< std::pair< double,std::vector< int > > >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_pop_back" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_resize" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector_resize" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_erase" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< double,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_erase" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_erase" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_erase" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_erase" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< double,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_PairDoubleVectorVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_PairDoubleVectorVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::erase(std::vector< std::pair< double,std::vector< int > > >::iterator)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::erase(std::vector< std::pair< double,std::vector< int > > >::iterator,std::vector< std::pair< double,std::vector< int > > >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > >::size_type arg1 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_PairDoubleVectorVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_PairDoubleVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::pair< double,std::vector< int > > >::size_type >(val1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_PairDoubleVectorVector" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_PairDoubleVectorVector" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp2);
-  result = (std::vector< std::pair< double,std::vector< int > > > *)new std::vector< std::pair< double,std::vector< int > > >(arg1,(std::vector< std::pair< double,std::vector< int > > >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_PairDoubleVectorVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_PairDoubleVectorVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_PairDoubleVectorVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0);
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_PairDoubleVectorVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_PairDoubleVectorVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::vector()\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::vector(std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > const &)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::vector(std::vector< std::pair< double,std::vector< int > > >::size_type)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::vector(std::vector< std::pair< double,std::vector< int > > >::size_type,std::vector< std::pair< double,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_push_back" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairDoubleVectorVector_push_back" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector_push_back" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp2);
-  (arg1)->push_back((std::vector< std::pair< double,std::vector< int > > >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_front" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (std::vector< std::pair< double,std::vector< int > > >::value_type *) &((std::vector< std::pair< double,std::vector< int > > > const *)arg1)->front();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_back" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = (std::vector< std::pair< double,std::vector< int > > >::value_type *) &((std::vector< std::pair< double,std::vector< int > > > const *)arg1)->back();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_assign" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector_assign" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::size_type >(val2);
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector_assign" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector_assign" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp3);
-  (arg1)->assign(arg2,(std::vector< std::pair< double,std::vector< int > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_resize" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector_resize" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::size_type >(val2);
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector_resize" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector_resize" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp3);
-  (arg1)->resize(arg2,(std::vector< std::pair< double,std::vector< int > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = SWIG_ConvertPtr(argv[2], 0, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::resize(std::vector< std::pair< double,std::vector< int > > >::size_type)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::resize(std::vector< std::pair< double,std::vector< int > > >::size_type,std::vector< std::pair< double,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_insert" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector_insert" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector_insert" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp3);
-  result = std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_0(arg1,arg2,(std::pair< double,std::vector< int,std::allocator< int > > > const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::pair< double,std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::iterator arg2 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type arg3 ;
-  std::vector< std::pair< double,std::vector< int > > >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  void *argp4 = 0 ;
-  int res4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:PairDoubleVectorVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_insert" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector_insert" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector_insert" "', argument " "3"" of type '" "std::vector< std::pair< double,std::vector< int > > >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::pair< double,std::vector< int > > >::size_type >(val3);
-  res4 = SWIG_ConvertPtr(obj3, &argp4, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,  0  | 0);
-  if (!SWIG_IsOK(res4)) {
-    SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "PairDoubleVectorVector_insert" "', argument " "4"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  if (!argp4) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector_insert" "', argument " "4"" of type '" "std::vector< std::pair< double,std::vector< int > > >::value_type const &""'"); 
-  }
-  arg4 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > >::value_type * >(argp4);
-  std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_1(arg1,arg2,arg3,(std::pair< double,std::vector< int,std::allocator< int > > > const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = SWIG_ConvertPtr(argv[2], 0, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::pair< double,std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = SWIG_ConvertPtr(argv[3], 0, SWIGTYPE_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0);
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_PairDoubleVectorVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::insert(std::vector< std::pair< double,std::vector< int > > >::iterator,std::vector< std::pair< double,std::vector< int > > >::value_type const &)\n"
-    "    std::vector< std::pair< double,std::vector< int > > >::insert(std::vector< std::pair< double,std::vector< int > > >::iterator,std::vector< std::pair< double,std::vector< int > > >::size_type,std::vector< std::pair< double,std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_reserve" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector_reserve" "', argument " "2"" of type '" "std::vector< std::pair< double,std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::pair< double,std::vector< int > > >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::pair< double,std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector_capacity" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::pair< double,std::vector< int > > > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_PairDoubleVectorVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::pair< double,std::vector< int > > > *arg1 = (std::vector< std::pair< double,std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_PairDoubleVectorVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_PairDoubleVectorVector" "', argument " "1"" of type '" "std::vector< std::pair< double,std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::pair< double,std::vector< int > > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *PairDoubleVectorVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_iterator" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___nonzero__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____nonzero__((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___bool__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____bool__((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___len__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____len__((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___getslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2___getslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector2___getslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val3);
-  try {
-    result = (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *)std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg3 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:PairDoubleVectorVector2___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val3);
-  {
-    std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *ptr = (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_PairDoubleVectorVector2___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_PairDoubleVectorVector2___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector2___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__setslice__(std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__setslice__(std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type,std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type,std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___delslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2___delslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector2___delslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2___delitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *)std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *ptr = (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector2___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector2___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector2___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__delitem__(std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2___getitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val2);
-  try {
-    result = (std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *) &std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____getitem____SWIG_1((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector2___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector2___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector2___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__getitem__(std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type >(val2);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg____setitem____SWIG_2(arg1,arg2,(std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector2___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector2___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector2___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector2___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__setitem__(PySliceObject *,std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > const &)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::__setitem__(std::vector< std::vector< std::pair< double,std::vector< int > > > >::difference_type,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_pop" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  try {
-    result = std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_append" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairDoubleVectorVector2_append" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2_append" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__append(arg1,(std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector2__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_PairDoubleVectorVector2")) SWIG_fail;
-  result = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *)new std::vector< std::vector< std::pair< double,std::vector< int > > > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector2__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_PairDoubleVectorVector2",&obj0)) SWIG_fail;
-  {
-    std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *ptr = (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_PairDoubleVectorVector2" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_PairDoubleVectorVector2" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *)new std::vector< std::vector< std::pair< double,std::vector< int > > > >((std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_empty" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (bool)((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_size" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = ((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_swap" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairDoubleVectorVector2_swap" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2_swap" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_begin" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_end" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_rbegin" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_rend" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_clear" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_get_allocator" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = ((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::vector< std::pair< double,std::vector< int > > > >::allocator_type(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector2__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_PairDoubleVectorVector2",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_PairDoubleVectorVector2" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type >(val1);
-  result = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *)new std::vector< std::vector< std::pair< double,std::vector< int > > > >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_pop_back" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_resize" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2_resize" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_erase" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_erase" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_erase" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_erase" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_PairDoubleVectorVector2_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_PairDoubleVectorVector2_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector2_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::erase(std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::erase(std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator,std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector2__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type arg1 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_PairDoubleVectorVector2",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_PairDoubleVectorVector2" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type >(val1);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_PairDoubleVectorVector2" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_PairDoubleVectorVector2" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *)new std::vector< std::vector< std::pair< double,std::vector< int > > > >(arg1,(std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_PairDoubleVectorVector2(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_PairDoubleVectorVector2__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_PairDoubleVectorVector2__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_PairDoubleVectorVector2__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_PairDoubleVectorVector2__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_PairDoubleVectorVector2'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::vector()\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::vector(std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > const &)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::vector(std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::vector(std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_push_back" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "PairDoubleVectorVector2_push_back" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2_push_back" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  (arg1)->push_back((std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_front" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *) &((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1)->front();
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_back" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = (std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *) &((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1)->back();
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_assign" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2_assign" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type >(val2);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector2_assign" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2_assign" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->assign(arg2,(std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_resize" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2_resize" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type >(val2);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector2_resize" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2_resize" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->resize(arg2,(std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_PairDoubleVectorVector2_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector2_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector2_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::resize(std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::resize(std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:PairDoubleVectorVector2_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_insert" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-    }
-  }
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "PairDoubleVectorVector2_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  result = std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__insert__SWIG_0(arg1,arg2,(std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator arg2 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type arg3 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:PairDoubleVectorVector2_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_insert" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "PairDoubleVectorVector2_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "PairDoubleVectorVector2_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type >(val3);
-  {
-    std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *ptr = (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "PairDoubleVectorVector2_insert" "', argument " "4"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "PairDoubleVectorVector2_insert" "', argument " "4"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_std_pair_Sl_double_Sc_std_vector_Sl_int_Sg__Sg__Sg__Sg__insert__SWIG_1(arg1,arg2,arg3,(std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_PairDoubleVectorVector2_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_PairDoubleVectorVector2_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'PairDoubleVectorVector2_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::insert(std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)\n"
-    "    std::vector< std::vector< std::pair< double,std::vector< int > > > >::insert(std::vector< std::vector< std::pair< double,std::vector< int > > > >::iterator,std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type,std::vector< std::vector< std::pair< double,std::vector< int > > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:PairDoubleVectorVector2_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_reserve" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "PairDoubleVectorVector2_reserve" "', argument " "2"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_PairDoubleVectorVector2_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:PairDoubleVectorVector2_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "PairDoubleVectorVector2_capacity" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  result = ((std::vector< std::vector< std::pair< double,std::vector< int > > > > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_PairDoubleVectorVector2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::pair< double,std::vector< int > > > > *arg1 = (std::vector< std::vector< std::pair< double,std::vector< int > > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_PairDoubleVectorVector2",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_PairDoubleVectorVector2" "', argument " "1"" of type '" "std::vector< std::vector< std::pair< double,std::vector< int > > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::pair< double,std::vector< int > > > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *PairDoubleVectorVector2_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_iterator" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___nonzero__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____nonzero__((std::vector< std::vector< std::vector< double > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___bool__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____bool__((std::vector< std::vector< std::vector< double > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___len__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____len__((std::vector< std::vector< std::vector< double > > > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___getslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3___getslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector3___getslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val3);
-  try {
-    result = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector3___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg3 ;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:DoubleVector3___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector3___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val3);
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "DoubleVector3___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_DoubleVector3___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_DoubleVector3___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector3___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::__setslice__(std::vector< std::vector< std::vector< double > > >::difference_type,std::vector< std::vector< std::vector< double > > >::difference_type)\n"
-    "    std::vector< std::vector< std::vector< double > > >::__setslice__(std::vector< std::vector< std::vector< double > > >::difference_type,std::vector< std::vector< std::vector< double > > >::difference_type,std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___delslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3___delslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector3___delslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3___delitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "DoubleVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_DoubleVector3___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_DoubleVector3___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector3___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::__delitem__(std::vector< std::vector< std::vector< double > > >::difference_type)\n"
-    "    std::vector< std::vector< std::vector< double > > >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3___getitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val2);
-  try {
-    result = (std::vector< std::vector< std::vector< double > > >::value_type *) &std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____getitem____SWIG_1((std::vector< std::vector< std::vector< double > > > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_DoubleVector3___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_DoubleVector3___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector3___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< std::vector< double > > >::__getitem__(std::vector< std::vector< std::vector< double > > >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3___setitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::difference_type >(val2);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "DoubleVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg____setitem____SWIG_2(arg1,arg2,(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_DoubleVector3___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_DoubleVector3___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_DoubleVector3___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector3___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::__setitem__(PySliceObject *,std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)\n"
-    "    std::vector< std::vector< std::vector< double > > >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< std::vector< double > > >::__setitem__(std::vector< std::vector< std::vector< double > > >::difference_type,std::vector< std::vector< std::vector< double > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_pop" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  try {
-    result = std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_append" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "DoubleVector3_append" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3_append" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__append(arg1,(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector3__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_DoubleVector3")) SWIG_fail;
-  result = (std::vector< std::vector< std::vector< double > > > *)new std::vector< std::vector< std::vector< double > > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector3__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_DoubleVector3",&obj0)) SWIG_fail;
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_DoubleVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_DoubleVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::vector< std::vector< double > > > *)new std::vector< std::vector< std::vector< double > > >((std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_empty" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (bool)((std::vector< std::vector< std::vector< double > > > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_size" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = ((std::vector< std::vector< std::vector< double > > > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_swap" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "DoubleVector3_swap" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3_swap" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_begin" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< double > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_end" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< double > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_rbegin" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< double > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_rend" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< double > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_clear" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_get_allocator" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = ((std::vector< std::vector< std::vector< double > > > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::vector< std::vector< double > > >::allocator_type(static_cast< const std::vector< std::vector< std::vector< double > > >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector3__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_DoubleVector3",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_DoubleVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< std::vector< double > > >::size_type >(val1);
-  result = (std::vector< std::vector< std::vector< double > > > *)new std::vector< std::vector< std::vector< double > > >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_pop_back" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_resize" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3_resize" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_erase" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< double > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator arg2 ;
-  std::vector< std::vector< std::vector< double > > >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_erase" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_erase" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_erase" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< double > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_DoubleVector3_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_DoubleVector3_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector3_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::erase(std::vector< std::vector< std::vector< double > > >::iterator)\n"
-    "    std::vector< std::vector< std::vector< double > > >::erase(std::vector< std::vector< std::vector< double > > >::iterator,std::vector< std::vector< std::vector< double > > >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector3__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > >::size_type arg1 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< double > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_DoubleVector3",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_DoubleVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< std::vector< double > > >::size_type >(val1);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_DoubleVector3" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_DoubleVector3" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (std::vector< std::vector< std::vector< double > > > *)new std::vector< std::vector< std::vector< double > > >(arg1,(std::vector< std::vector< std::vector< double > > >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_DoubleVector3(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_DoubleVector3__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_DoubleVector3__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_DoubleVector3__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_DoubleVector3__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_DoubleVector3'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::vector()\n"
-    "    std::vector< std::vector< std::vector< double > > >::vector(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > const &)\n"
-    "    std::vector< std::vector< std::vector< double > > >::vector(std::vector< std::vector< std::vector< double > > >::size_type)\n"
-    "    std::vector< std::vector< std::vector< double > > >::vector(std::vector< std::vector< std::vector< double > > >::size_type,std::vector< std::vector< std::vector< double > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_push_back" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "DoubleVector3_push_back" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3_push_back" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  (arg1)->push_back((std::vector< std::vector< std::vector< double > > >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_front" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (std::vector< std::vector< std::vector< double > > >::value_type *) &((std::vector< std::vector< std::vector< double > > > const *)arg1)->front();
-  resultobj = swig::from(static_cast< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_back" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = (std::vector< std::vector< std::vector< double > > >::value_type *) &((std::vector< std::vector< std::vector< double > > > const *)arg1)->back();
-  resultobj = swig::from(static_cast< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::size_type arg2 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_assign" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3_assign" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::size_type >(val2);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "DoubleVector3_assign" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3_assign" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->assign(arg2,(std::vector< std::vector< std::vector< double > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::size_type arg2 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_resize" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3_resize" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::size_type >(val2);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "DoubleVector3_resize" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3_resize" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->resize(arg2,(std::vector< std::vector< std::vector< double > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_DoubleVector3_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_DoubleVector3_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector3_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::resize(std::vector< std::vector< std::vector< double > > >::size_type)\n"
-    "    std::vector< std::vector< std::vector< double > > >::resize(std::vector< std::vector< std::vector< double > > >::size_type,std::vector< std::vector< std::vector< double > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator arg2 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:DoubleVector3_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_insert" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-    }
-  }
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "DoubleVector3_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__insert__SWIG_0(arg1,arg2,(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< double > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::iterator arg2 ;
-  std::vector< std::vector< std::vector< double > > >::size_type arg3 ;
-  std::vector< std::vector< std::vector< double > > >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:DoubleVector3_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_insert" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "DoubleVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "DoubleVector3_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< double > > >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< double > > >::size_type >(val3);
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "DoubleVector3_insert" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleVector3_insert" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< double > > >::value_type const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_std_vector_Sl_double_Sg__Sg__Sg__insert__SWIG_1(arg1,arg2,arg3,(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_DoubleVector3_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< double > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_DoubleVector3_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'DoubleVector3_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< double > > >::insert(std::vector< std::vector< std::vector< double > > >::iterator,std::vector< std::vector< std::vector< double > > >::value_type const &)\n"
-    "    std::vector< std::vector< std::vector< double > > >::insert(std::vector< std::vector< std::vector< double > > >::iterator,std::vector< std::vector< std::vector< double > > >::size_type,std::vector< std::vector< std::vector< double > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  std::vector< std::vector< std::vector< double > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleVector3_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_reserve" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "DoubleVector3_reserve" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< double > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< double > > >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleVector3_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< double > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:DoubleVector3_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleVector3_capacity" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  result = ((std::vector< std::vector< std::vector< double > > > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_DoubleVector3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double > > > *arg1 = (std::vector< std::vector< std::vector< double > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_DoubleVector3",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_DoubleVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< double > > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *DoubleVector3_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_IntVector3_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_iterator" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___nonzero__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____nonzero__((std::vector< std::vector< std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___bool__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (bool)std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____bool__((std::vector< std::vector< std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___len__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____len__((std::vector< std::vector< std::vector< int > > > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___getslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3___getslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector3___getslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val3);
-  try {
-    result = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector3___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg3 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:IntVector3___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___setslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3___setslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector3___setslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val3);
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "IntVector3___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3___setslice__" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_IntVector3___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_IntVector3___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector3___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::__setslice__(std::vector< std::vector< std::vector< int > > >::difference_type,std::vector< std::vector< std::vector< int > > >::difference_type)\n"
-    "    std::vector< std::vector< std::vector< int > > >::__setslice__(std::vector< std::vector< std::vector< int > > >::difference_type,std::vector< std::vector< std::vector< int > > >::difference_type,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___delslice__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3___delslice__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector3___delslice__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val3);
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3___delitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val2);
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "IntVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___delitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_IntVector3___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_IntVector3___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector3___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::__delitem__(std::vector< std::vector< std::vector< int > > >::difference_type)\n"
-    "    std::vector< std::vector< std::vector< int > > >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___getitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3___getitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val2);
-  try {
-    result = (std::vector< std::vector< std::vector< int > > >::value_type *) &std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____getitem____SWIG_1((std::vector< std::vector< std::vector< int > > > const *)arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_IntVector3___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_IntVector3___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector3___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::__getitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< std::vector< int > > >::__getitem__(std::vector< std::vector< std::vector< int > > >::difference_type) const\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::difference_type arg2 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3___setitem__" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3___setitem__" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::difference_type >(val2);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "IntVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3___setitem__" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg____setitem____SWIG_2(arg1,arg2,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_IntVector3___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_IntVector3___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_IntVector3___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector3___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::__setitem__(PySliceObject *,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)\n"
-    "    std::vector< std::vector< std::vector< int > > >::__setitem__(PySliceObject *)\n"
-    "    std::vector< std::vector< std::vector< int > > >::__setitem__(std::vector< std::vector< std::vector< int > > >::difference_type,std::vector< std::vector< std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_pop" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  try {
-    result = std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = swig::from(static_cast< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_append" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "IntVector3_append" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3_append" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__append(arg1,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector3__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_IntVector3")) SWIG_fail;
-  result = (std::vector< std::vector< std::vector< int > > > *)new std::vector< std::vector< std::vector< int > > >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector3__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_IntVector3",&obj0)) SWIG_fail;
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_IntVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_IntVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< std::vector< std::vector< int > > > *)new std::vector< std::vector< std::vector< int > > >((std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_empty" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (bool)((std::vector< std::vector< std::vector< int > > > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_size" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::vector< std::vector< int > > > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_swap" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "IntVector3_swap" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3_swap" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_begin" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_end" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_rbegin" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< int > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_rend" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< int > > >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_clear" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_get_allocator" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::vector< std::vector< int > > > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< std::vector< std::vector< int > > >::allocator_type(static_cast< const std::vector< std::vector< std::vector< int > > >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector3__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_IntVector3",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_IntVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< std::vector< int > > >::size_type >(val1);
-  result = (std::vector< std::vector< std::vector< int > > > *)new std::vector< std::vector< std::vector< int > > >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_pop_back" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_resize" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3_resize" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_erase" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator arg2 ;
-  std::vector< std::vector< std::vector< int > > >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_erase" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_erase" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_erase" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_erase" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_IntVector3_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_IntVector3_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector3_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::erase(std::vector< std::vector< std::vector< int > > >::iterator)\n"
-    "    std::vector< std::vector< std::vector< int > > >::erase(std::vector< std::vector< std::vector< int > > >::iterator,std::vector< std::vector< std::vector< int > > >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector3__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > >::size_type arg1 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg2 = 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::vector< std::vector< int > > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_IntVector3",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_IntVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< std::vector< std::vector< int > > >::size_type >(val1);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_IntVector3" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_IntVector3" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (std::vector< std::vector< std::vector< int > > > *)new std::vector< std::vector< std::vector< int > > >(arg1,(std::vector< std::vector< std::vector< int > > >::value_type const &)*arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_IntVector3(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_IntVector3__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_IntVector3__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_IntVector3__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_IntVector3__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_IntVector3'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::vector()\n"
-    "    std::vector< std::vector< std::vector< int > > >::vector(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > const &)\n"
-    "    std::vector< std::vector< std::vector< int > > >::vector(std::vector< std::vector< std::vector< int > > >::size_type)\n"
-    "    std::vector< std::vector< std::vector< int > > >::vector(std::vector< std::vector< std::vector< int > > >::size_type,std::vector< std::vector< std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_push_back" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "IntVector3_push_back" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3_push_back" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  (arg1)->push_back((std::vector< std::vector< std::vector< int > > >::value_type const &)*arg2);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_front" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (std::vector< std::vector< std::vector< int > > >::value_type *) &((std::vector< std::vector< std::vector< int > > > const *)arg1)->front();
-  resultobj = swig::from(static_cast< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_back" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = (std::vector< std::vector< std::vector< int > > >::value_type *) &((std::vector< std::vector< std::vector< int > > > const *)arg1)->back();
-  resultobj = swig::from(static_cast< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > >(*result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::size_type arg2 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_assign" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3_assign" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::size_type >(val2);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "IntVector3_assign" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3_assign" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->assign(arg2,(std::vector< std::vector< std::vector< int > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::size_type arg2 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_resize" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3_resize" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::size_type >(val2);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "IntVector3_resize" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3_resize" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  (arg1)->resize(arg2,(std::vector< std::vector< std::vector< int > > >::value_type const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_IntVector3_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_IntVector3_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector3_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::resize(std::vector< std::vector< std::vector< int > > >::size_type)\n"
-    "    std::vector< std::vector< std::vector< int > > >::resize(std::vector< std::vector< std::vector< int > > >::size_type,std::vector< std::vector< std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator arg2 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:IntVector3_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_insert" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-    }
-  }
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "IntVector3_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  result = std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_0(arg1,arg2,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< std::vector< std::vector< int > > >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::iterator arg2 ;
-  std::vector< std::vector< std::vector< int > > >::size_type arg3 ;
-  std::vector< std::vector< std::vector< int > > >::value_type *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:IntVector3_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_insert" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "IntVector3_insert" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "IntVector3_insert" "', argument " "3"" of type '" "std::vector< std::vector< std::vector< int > > >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< std::vector< std::vector< int > > >::size_type >(val3);
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "IntVector3_insert" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntVector3_insert" "', argument " "4"" of type '" "std::vector< std::vector< std::vector< int > > >::value_type const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  std_vector_Sl_std_vector_Sl_std_vector_Sl_int_Sg__Sg__Sg__insert__SWIG_1(arg1,arg2,arg3,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg4);
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_IntVector3_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< std::vector< std::vector< int > > >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_IntVector3_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'IntVector3_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< std::vector< std::vector< int > > >::insert(std::vector< std::vector< std::vector< int > > >::iterator,std::vector< std::vector< std::vector< int > > >::value_type const &)\n"
-    "    std::vector< std::vector< std::vector< int > > >::insert(std::vector< std::vector< std::vector< int > > >::iterator,std::vector< std::vector< std::vector< int > > >::size_type,std::vector< std::vector< std::vector< int > > >::value_type const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  std::vector< std::vector< std::vector< int > > >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntVector3_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_reserve" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "IntVector3_reserve" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int > > >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< std::vector< std::vector< int > > >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_IntVector3_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::vector< std::vector< int > > >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:IntVector3_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntVector3_capacity" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  result = ((std::vector< std::vector< std::vector< int > > > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_IntVector3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< int > > > *arg1 = (std::vector< std::vector< std::vector< int > > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_IntVector3",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_IntVector3" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< int > > > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< std::vector< std::vector< int > > > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *IntVector3_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_TrieVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_iterator" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_PathTrie_Sm__Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___nonzero__" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (bool)std_vector_Sl_PathTrie_Sm__Sg____nonzero__((std::vector< PathTrie * > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___bool__" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (bool)std_vector_Sl_PathTrie_Sm__Sg____bool__((std::vector< PathTrie * > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___len__" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = std_vector_Sl_PathTrie_Sm__Sg____len__((std::vector< PathTrie * > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::difference_type arg2 ;
-  std::vector< PathTrie * >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___getslice__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector___getslice__" "', argument " "2"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "TrieVector___getslice__" "', argument " "3"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< PathTrie * >::difference_type >(val3);
-  try {
-    result = (std::vector< PathTrie *,std::allocator< PathTrie * > > *)std_vector_Sl_PathTrie_Sm__Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::difference_type arg2 ;
-  std::vector< PathTrie * >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___setslice__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector___setslice__" "', argument " "2"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "TrieVector___setslice__" "', argument " "3"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< PathTrie * >::difference_type >(val3);
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::difference_type arg2 ;
-  std::vector< PathTrie * >::difference_type arg3 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:TrieVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___setslice__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector___setslice__" "', argument " "2"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "TrieVector___setslice__" "', argument " "3"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< PathTrie * >::difference_type >(val3);
-  {
-    std::vector< PathTrie*,std::allocator< PathTrie * > > *ptr = (std::vector< PathTrie*,std::allocator< PathTrie * > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "TrieVector___setslice__" "', argument " "4"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "TrieVector___setslice__" "', argument " "4"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< PathTrie *,std::allocator< PathTrie * > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_TrieVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_TrieVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'TrieVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::__setslice__(std::vector< PathTrie * >::difference_type,std::vector< PathTrie * >::difference_type)\n"
-    "    std::vector< PathTrie * >::__setslice__(std::vector< PathTrie * >::difference_type,std::vector< PathTrie * >::difference_type,std::vector< PathTrie *,std::allocator< PathTrie * > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::difference_type arg2 ;
-  std::vector< PathTrie * >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___delslice__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector___delslice__" "', argument " "2"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "TrieVector___delslice__" "', argument " "3"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< PathTrie * >::difference_type >(val3);
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___delitem__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector___delitem__" "', argument " "2"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::difference_type >(val2);
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___getitem__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< PathTrie *,std::allocator< PathTrie * > > *)std_vector_Sl_PathTrie_Sm__Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___setitem__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector< PathTrie*,std::allocator< PathTrie * > > *ptr = (std::vector< PathTrie*,std::allocator< PathTrie * > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "TrieVector___setitem__" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "TrieVector___setitem__" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____setitem____SWIG_0(arg1,arg2,(std::vector< PathTrie *,std::allocator< PathTrie * > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___setitem__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___delitem__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_TrieVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_TrieVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'TrieVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::__delitem__(std::vector< PathTrie * >::difference_type)\n"
-    "    std::vector< PathTrie * >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< PathTrie * >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___getitem__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector___getitem__" "', argument " "2"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::difference_type >(val2);
-  try {
-    result = (std::vector< PathTrie * >::value_type)std_vector_Sl_PathTrie_Sm__Sg____getitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_TrieVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_TrieVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'TrieVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::__getitem__(PySliceObject *)\n"
-    "    std::vector< PathTrie * >::__getitem__(std::vector< PathTrie * >::difference_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::difference_type arg2 ;
-  std::vector< PathTrie * >::value_type arg3 = (std::vector< PathTrie * >::value_type) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector___setitem__" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector___setitem__" "', argument " "2"" of type '" "std::vector< PathTrie * >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::difference_type >(val2);
-  res3 = SWIG_ConvertPtr(obj2, &argp3,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "TrieVector___setitem__" "', argument " "3"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp3);
-  try {
-    std_vector_Sl_PathTrie_Sm__Sg____setitem____SWIG_2(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_TrieVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_TrieVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_TrieVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'TrieVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::__setitem__(PySliceObject *,std::vector< PathTrie *,std::allocator< PathTrie * > > const &)\n"
-    "    std::vector< PathTrie * >::__setitem__(PySliceObject *)\n"
-    "    std::vector< PathTrie * >::__setitem__(std::vector< PathTrie * >::difference_type,std::vector< PathTrie * >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_pop" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  try {
-    result = (std::vector< PathTrie * >::value_type)std_vector_Sl_PathTrie_Sm__Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::value_type arg2 = (std::vector< PathTrie * >::value_type) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_append" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "TrieVector_append" "', argument " "2"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp2);
-  std_vector_Sl_PathTrie_Sm__Sg__append(arg1,arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_TrieVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_TrieVector")) SWIG_fail;
-  result = (std::vector< PathTrie * > *)new std::vector< PathTrie * >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_TrieVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_TrieVector",&obj0)) SWIG_fail;
-  {
-    std::vector< PathTrie*,std::allocator< PathTrie * > > *ptr = (std::vector< PathTrie*,std::allocator< PathTrie * > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_TrieVector" "', argument " "1"" of type '" "std::vector< PathTrie * > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_TrieVector" "', argument " "1"" of type '" "std::vector< PathTrie * > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< PathTrie * > *)new std::vector< PathTrie * >((std::vector< PathTrie * > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_empty" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (bool)((std::vector< PathTrie * > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_size" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = ((std::vector< PathTrie * > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_swap" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "TrieVector_swap" "', argument " "2"" of type '" "std::vector< PathTrie * > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "TrieVector_swap" "', argument " "2"" of type '" "std::vector< PathTrie * > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< PathTrie * > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_begin" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< PathTrie * >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_end" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< PathTrie * >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_rbegin" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< PathTrie * >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_rend" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< PathTrie * >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_clear" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< PathTrie * > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_get_allocator" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = ((std::vector< PathTrie * > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< PathTrie * >::allocator_type(static_cast< const std::vector< PathTrie * >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_PathTrie_p_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_TrieVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_TrieVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_TrieVector" "', argument " "1"" of type '" "std::vector< PathTrie * >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< PathTrie * >::size_type >(val1);
-  result = (std::vector< PathTrie * > *)new std::vector< PathTrie * >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_pop_back" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_resize" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector_resize" "', argument " "2"" of type '" "std::vector< PathTrie * >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< PathTrie * >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_erase" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_erase" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_erase" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_PathTrie_Sm__Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< PathTrie * >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::iterator arg2 ;
-  std::vector< PathTrie * >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< PathTrie * >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_erase" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_erase" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_erase" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_erase" "', argument " "3"" of type '" "std::vector< PathTrie * >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_erase" "', argument " "3"" of type '" "std::vector< PathTrie * >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_PathTrie_Sm__Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< PathTrie * >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_TrieVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_TrieVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'TrieVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::erase(std::vector< PathTrie * >::iterator)\n"
-    "    std::vector< PathTrie * >::erase(std::vector< PathTrie * >::iterator,std::vector< PathTrie * >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_TrieVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * >::size_type arg1 ;
-  std::vector< PathTrie * >::value_type arg2 = (std::vector< PathTrie * >::value_type) 0 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< PathTrie * > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_TrieVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_TrieVector" "', argument " "1"" of type '" "std::vector< PathTrie * >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< PathTrie * >::size_type >(val1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_TrieVector" "', argument " "2"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp2);
-  result = (std::vector< PathTrie * > *)new std::vector< PathTrie * >(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_TrieVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_TrieVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_TrieVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_TrieVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      void *vptr = 0;
-      int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_PathTrie, 0);
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_new_TrieVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_TrieVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::vector()\n"
-    "    std::vector< PathTrie * >::vector(std::vector< PathTrie * > const &)\n"
-    "    std::vector< PathTrie * >::vector(std::vector< PathTrie * >::size_type)\n"
-    "    std::vector< PathTrie * >::vector(std::vector< PathTrie * >::size_type,std::vector< PathTrie * >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::value_type arg2 = (std::vector< PathTrie * >::value_type) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_push_back" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "TrieVector_push_back" "', argument " "2"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp2);
-  (arg1)->push_back(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_front" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (std::vector< PathTrie * >::value_type)((std::vector< PathTrie * > const *)arg1)->front();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_back" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = (std::vector< PathTrie * >::value_type)((std::vector< PathTrie * > const *)arg1)->back();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_PathTrie, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::size_type arg2 ;
-  std::vector< PathTrie * >::value_type arg3 = (std::vector< PathTrie * >::value_type) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_assign" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector_assign" "', argument " "2"" of type '" "std::vector< PathTrie * >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::size_type >(val2);
-  res3 = SWIG_ConvertPtr(obj2, &argp3,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "TrieVector_assign" "', argument " "3"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp3);
-  (arg1)->assign(arg2,arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::size_type arg2 ;
-  std::vector< PathTrie * >::value_type arg3 = (std::vector< PathTrie * >::value_type) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_resize" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector_resize" "', argument " "2"" of type '" "std::vector< PathTrie * >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::size_type >(val2);
-  res3 = SWIG_ConvertPtr(obj2, &argp3,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "TrieVector_resize" "', argument " "3"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp3);
-  (arg1)->resize(arg2,arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_TrieVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_TrieVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'TrieVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::resize(std::vector< PathTrie * >::size_type)\n"
-    "    std::vector< PathTrie * >::resize(std::vector< PathTrie * >::size_type,std::vector< PathTrie * >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::iterator arg2 ;
-  std::vector< PathTrie * >::value_type arg3 = (std::vector< PathTrie * >::value_type) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< PathTrie * >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:TrieVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_insert" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_insert" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_insert" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "TrieVector_insert" "', argument " "3"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp3);
-  result = std_vector_Sl_PathTrie_Sm__Sg__insert__SWIG_0(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< PathTrie * >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::iterator arg2 ;
-  std::vector< PathTrie * >::size_type arg3 ;
-  std::vector< PathTrie * >::value_type arg4 = (std::vector< PathTrie * >::value_type) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  void *argp4 = 0 ;
-  int res4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:TrieVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_insert" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_insert" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "TrieVector_insert" "', argument " "2"" of type '" "std::vector< PathTrie * >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "TrieVector_insert" "', argument " "3"" of type '" "std::vector< PathTrie * >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< PathTrie * >::size_type >(val3);
-  res4 = SWIG_ConvertPtr(obj3, &argp4,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res4)) {
-    SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "TrieVector_insert" "', argument " "4"" of type '" "std::vector< PathTrie * >::value_type""'"); 
-  }
-  arg4 = reinterpret_cast< std::vector< PathTrie * >::value_type >(argp4);
-  std_vector_Sl_PathTrie_Sm__Sg__insert__SWIG_1(arg1,arg2,arg3,arg4);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter) != 0));
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_TrieVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< PathTrie*,std::allocator< PathTrie * > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< PathTrie * >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          void *vptr = 0;
-          int res = SWIG_ConvertPtr(argv[3], &vptr, SWIGTYPE_p_PathTrie, 0);
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_TrieVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'TrieVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< PathTrie * >::insert(std::vector< PathTrie * >::iterator,std::vector< PathTrie * >::value_type)\n"
-    "    std::vector< PathTrie * >::insert(std::vector< PathTrie * >::iterator,std::vector< PathTrie * >::size_type,std::vector< PathTrie * >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  std::vector< PathTrie * >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:TrieVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_reserve" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "TrieVector_reserve" "', argument " "2"" of type '" "std::vector< PathTrie * >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< PathTrie * >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_TrieVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< PathTrie * >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:TrieVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "TrieVector_capacity" "', argument " "1"" of type '" "std::vector< PathTrie * > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  result = ((std::vector< PathTrie * > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_TrieVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< PathTrie * > *arg1 = (std::vector< PathTrie * > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_TrieVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_TrieVector" "', argument " "1"" of type '" "std::vector< PathTrie * > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< PathTrie * > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *TrieVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_BoolVector_iterator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  PyObject **arg2 = (PyObject **) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  swig::SwigPyIterator *result = 0 ;
-  
-  arg2 = &obj0;
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_iterator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_iterator" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (swig::SwigPyIterator *)std_vector_Sl_bool_Sg__iterator(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_swig__SwigPyIterator, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___nonzero__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector___nonzero__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___nonzero__" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (bool)std_vector_Sl_bool_Sg____nonzero__((std::vector< bool > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___bool__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector___bool__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___bool__" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (bool)std_vector_Sl_bool_Sg____bool__((std::vector< bool > const *)arg1);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___len__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector___len__",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___len__" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = std_vector_Sl_bool_Sg____len__((std::vector< bool > const *)arg1);
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___getslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::difference_type arg2 ;
-  std::vector< bool >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< bool,std::allocator< bool > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector___getslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___getslice__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector___getslice__" "', argument " "2"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector___getslice__" "', argument " "3"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::difference_type >(val3);
-  try {
-    result = (std::vector< bool,std::allocator< bool > > *)std_vector_Sl_bool_Sg____getslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___setslice____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::difference_type arg2 ;
-  std::vector< bool >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector___setslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___setslice__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector___setslice__" "', argument " "2"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector___setslice__" "', argument " "3"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::difference_type >(val3);
-  try {
-    std_vector_Sl_bool_Sg____setslice____SWIG_0(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___setslice____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::difference_type arg2 ;
-  std::vector< bool >::difference_type arg3 ;
-  std::vector< bool,std::allocator< bool > > *arg4 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:BoolVector___setslice__",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___setslice__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector___setslice__" "', argument " "2"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector___setslice__" "', argument " "3"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::difference_type >(val3);
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "BoolVector___setslice__" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "BoolVector___setslice__" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  try {
-    std_vector_Sl_bool_Sg____setslice____SWIG_1(arg1,arg2,arg3,(std::vector< bool,std::allocator< bool > > const &)*arg4);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___setslice__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_BoolVector___setslice____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_ptrdiff_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector<bool,std::allocator< bool > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            return _wrap_BoolVector___setslice____SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'BoolVector___setslice__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::__setslice__(std::vector< bool >::difference_type,std::vector< bool >::difference_type)\n"
-    "    std::vector< bool >::__setslice__(std::vector< bool >::difference_type,std::vector< bool >::difference_type,std::vector< bool,std::allocator< bool > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___delslice__(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::difference_type arg2 ;
-  std::vector< bool >::difference_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  ptrdiff_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector___delslice__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___delslice__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector___delslice__" "', argument " "2"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_ptrdiff_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector___delslice__" "', argument " "3"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::difference_type >(val3);
-  try {
-    std_vector_Sl_bool_Sg____delslice__(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___delitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___delitem__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector___delitem__" "', argument " "2"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::difference_type >(val2);
-  try {
-    std_vector_Sl_bool_Sg____delitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___getitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< bool,std::allocator< bool > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___getitem__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector___getitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    result = (std::vector< bool,std::allocator< bool > > *)std_vector_Sl_bool_Sg____getitem____SWIG_0(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___setitem____SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  std::vector< bool,std::allocator< bool > > *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___setitem__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res3 = swig::asptr(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "BoolVector___setitem__" "', argument " "3"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "BoolVector___setitem__" "', argument " "3"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  try {
-    std_vector_Sl_bool_Sg____setitem____SWIG_0(arg1,arg2,(std::vector< bool,std::allocator< bool > > const &)*arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___setitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector___setitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___setitem__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector___setitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_bool_Sg____setitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___delitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  PySliceObject *arg2 = (PySliceObject *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector___delitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___delitem__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  {
-    if (!PySlice_Check(obj1)) {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector___delitem__" "', argument " "2"" of type '" "PySliceObject *""'");
-    }
-    arg2 = (PySliceObject *) obj1;
-  }
-  try {
-    std_vector_Sl_bool_Sg____delitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  catch(std::invalid_argument &_e) {
-    SWIG_exception_fail(SWIG_ValueError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___delitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_BoolVector___delitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_BoolVector___delitem____SWIG_0(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'BoolVector___delitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::__delitem__(std::vector< bool >::difference_type)\n"
-    "    std::vector< bool >::__delitem__(PySliceObject *)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___getitem____SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::difference_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< bool >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector___getitem__",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___getitem__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector___getitem__" "', argument " "2"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::difference_type >(val2);
-  try {
-    result = (std::vector< bool >::value_type)std_vector_Sl_bool_Sg____getitem____SWIG_1(arg1,arg2);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___getitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_BoolVector___getitem____SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_BoolVector___getitem____SWIG_1(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'BoolVector___getitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::__getitem__(PySliceObject *)\n"
-    "    std::vector< bool >::__getitem__(std::vector< bool >::difference_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___setitem____SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::difference_type arg2 ;
-  std::vector< bool >::value_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  ptrdiff_t val2 ;
-  int ecode2 = 0 ;
-  bool val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector___setitem__",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector___setitem__" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_ptrdiff_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector___setitem__" "', argument " "2"" of type '" "std::vector< bool >::difference_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::difference_type >(val2);
-  ecode3 = SWIG_AsVal_bool(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector___setitem__" "', argument " "3"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::value_type >(val3);
-  try {
-    std_vector_Sl_bool_Sg____setitem____SWIG_2(arg1,arg2,arg3);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector___setitem__(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        return _wrap_BoolVector___setitem____SWIG_1(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        _v = PySlice_Check(argv[1]);
-      }
-      if (_v) {
-        int res = swig::asptr(argv[2], (std::vector<bool,std::allocator< bool > >**)(0));
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          return _wrap_BoolVector___setitem____SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_ptrdiff_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_bool(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_BoolVector___setitem____SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'BoolVector___setitem__'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::__setitem__(PySliceObject *,std::vector< bool,std::allocator< bool > > const &)\n"
-    "    std::vector< bool >::__setitem__(PySliceObject *)\n"
-    "    std::vector< bool >::__setitem__(std::vector< bool >::difference_type,std::vector< bool >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_pop(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_pop",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_pop" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  try {
-    result = (std::vector< bool >::value_type)std_vector_Sl_bool_Sg__pop(arg1);
-  }
-  catch(std::out_of_range &_e) {
-    SWIG_exception_fail(SWIG_IndexError, (&_e)->what());
-  }
-  
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_append(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::value_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  bool val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector_append",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_append" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_bool(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector_append" "', argument " "2"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::value_type >(val2);
-  std_vector_Sl_bool_Sg__append(arg1,arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_BoolVector__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_BoolVector")) SWIG_fail;
-  result = (std::vector< bool > *)new std::vector< bool >();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_BoolVector__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_BoolVector",&obj0)) SWIG_fail;
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_BoolVector" "', argument " "1"" of type '" "std::vector< bool > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_BoolVector" "', argument " "1"" of type '" "std::vector< bool > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  result = (std::vector< bool > *)new std::vector< bool >((std::vector< bool > const &)*arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_empty(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_empty",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_empty" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (bool)((std::vector< bool > const *)arg1)->empty();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_size" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = ((std::vector< bool > const *)arg1)->size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_swap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector_swap",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_swap" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t,  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "BoolVector_swap" "', argument " "2"" of type '" "std::vector< bool > &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "BoolVector_swap" "', argument " "2"" of type '" "std::vector< bool > &""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< bool > * >(argp2);
-  (arg1)->swap(*arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_begin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_begin" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (arg1)->begin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< bool >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_end",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_end" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (arg1)->end();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< bool >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_rbegin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_rbegin",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_rbegin" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (arg1)->rbegin();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< bool >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_rend(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::reverse_iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_rend",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_rend" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (arg1)->rend();
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< bool >::reverse_iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_clear(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_clear",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_clear" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  (arg1)->clear();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_get_allocator(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  SwigValueWrapper< std::allocator< bool > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_get_allocator",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_get_allocator" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = ((std::vector< bool > const *)arg1)->get_allocator();
-  resultobj = SWIG_NewPointerObj((new std::vector< bool >::allocator_type(static_cast< const std::vector< bool >::allocator_type& >(result))), SWIGTYPE_p_std__allocatorT_bool_t, SWIG_POINTER_OWN |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_BoolVector__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool >::size_type arg1 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:new_BoolVector",&obj0)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_BoolVector" "', argument " "1"" of type '" "std::vector< bool >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< bool >::size_type >(val1);
-  result = (std::vector< bool > *)new std::vector< bool >(arg1);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_pop_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_pop_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_pop_back" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  (arg1)->pop_back();
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_resize__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector_resize",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_resize" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector_resize" "', argument " "2"" of type '" "std::vector< bool >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::size_type >(val2);
-  (arg1)->resize(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_erase__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::iterator arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< bool >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector_erase",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_erase" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_erase" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< bool >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_erase" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_bool_Sg__erase__SWIG_0(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< bool >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_erase__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::iterator arg2 ;
-  std::vector< bool >::iterator arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  swig::SwigPyIterator *iter3 = 0 ;
-  int res3 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< bool >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector_erase",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_erase" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_erase" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< bool >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_erase" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, SWIG_as_voidptrptr(&iter3), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res3) || !iter3) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_erase" "', argument " "3"" of type '" "std::vector< bool >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< bool >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter3);
-    if (iter_t) {
-      arg3 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_erase" "', argument " "3"" of type '" "std::vector< bool >::iterator""'");
-    }
-  }
-  result = std_vector_Sl_bool_Sg__erase__SWIG_1(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< bool >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_erase(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter) != 0));
-      if (_v) {
-        return _wrap_BoolVector_erase__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter) != 0));
-      if (_v) {
-        swig::SwigPyIterator *iter = 0;
-        int res = SWIG_ConvertPtr(argv[2], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-        _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter) != 0));
-        if (_v) {
-          return _wrap_BoolVector_erase__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'BoolVector_erase'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::erase(std::vector< bool >::iterator)\n"
-    "    std::vector< bool >::erase(std::vector< bool >::iterator,std::vector< bool >::iterator)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_BoolVector__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool >::size_type arg1 ;
-  std::vector< bool >::value_type arg2 ;
-  size_t val1 ;
-  int ecode1 = 0 ;
-  bool val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< bool > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:new_BoolVector",&obj0,&obj1)) SWIG_fail;
-  ecode1 = SWIG_AsVal_size_t(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_BoolVector" "', argument " "1"" of type '" "std::vector< bool >::size_type""'");
-  } 
-  arg1 = static_cast< std::vector< bool >::size_type >(val1);
-  ecode2 = SWIG_AsVal_bool(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "new_BoolVector" "', argument " "2"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::value_type >(val2);
-  result = (std::vector< bool > *)new std::vector< bool >(arg1,arg2);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_BoolVector(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[3] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 2) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 0) {
-    return _wrap_new_BoolVector__SWIG_0(self, args);
-  }
-  if (argc == 1) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      return _wrap_new_BoolVector__SWIG_2(self, args);
-    }
-  }
-  if (argc == 1) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      return _wrap_new_BoolVector__SWIG_1(self, args);
-    }
-  }
-  if (argc == 2) {
-    int _v;
-    {
-      int res = SWIG_AsVal_size_t(argv[0], NULL);
-      _v = SWIG_CheckState(res);
-    }
-    if (_v) {
-      {
-        int res = SWIG_AsVal_bool(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_new_BoolVector__SWIG_3(self, args);
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'new_BoolVector'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::vector()\n"
-    "    std::vector< bool >::vector(std::vector< bool > const &)\n"
-    "    std::vector< bool >::vector(std::vector< bool >::size_type)\n"
-    "    std::vector< bool >::vector(std::vector< bool >::size_type,std::vector< bool >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_push_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::value_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  bool val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector_push_back",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_push_back" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_bool(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector_push_back" "', argument " "2"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::value_type >(val2);
-  (arg1)->push_back(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_front(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_front",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_front" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (std::vector< bool >::value_type)((std::vector< bool > const *)arg1)->front();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_back(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::value_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_back",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_back" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = (std::vector< bool >::value_type)((std::vector< bool > const *)arg1)->back();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_assign(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::size_type arg2 ;
-  std::vector< bool >::value_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  bool val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector_assign",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_assign" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector_assign" "', argument " "2"" of type '" "std::vector< bool >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::size_type >(val2);
-  ecode3 = SWIG_AsVal_bool(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector_assign" "', argument " "3"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::value_type >(val3);
-  (arg1)->assign(arg2,arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_resize__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::size_type arg2 ;
-  std::vector< bool >::value_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  bool val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector_resize",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_resize" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector_resize" "', argument " "2"" of type '" "std::vector< bool >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::size_type >(val2);
-  ecode3 = SWIG_AsVal_bool(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector_resize" "', argument " "3"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::value_type >(val3);
-  (arg1)->resize(arg2,arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_resize(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[4] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 3) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        return _wrap_BoolVector_resize__SWIG_0(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      {
-        int res = SWIG_AsVal_size_t(argv[1], NULL);
-        _v = SWIG_CheckState(res);
-      }
-      if (_v) {
-        {
-          int res = SWIG_AsVal_bool(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_BoolVector_resize__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'BoolVector_resize'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::resize(std::vector< bool >::size_type)\n"
-    "    std::vector< bool >::resize(std::vector< bool >::size_type,std::vector< bool >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_insert__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::iterator arg2 ;
-  std::vector< bool >::value_type arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  bool val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< bool >::iterator result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:BoolVector_insert",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_insert" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_insert" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< bool >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_insert" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_bool(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector_insert" "', argument " "3"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::value_type >(val3);
-  result = std_vector_Sl_bool_Sg__insert__SWIG_0(arg1,arg2,arg3);
-  resultobj = SWIG_NewPointerObj(swig::make_output_iterator(static_cast< const std::vector< bool >::iterator & >(result)),
-    swig::SwigPyIterator::descriptor(),SWIG_POINTER_OWN);
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_insert__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::iterator arg2 ;
-  std::vector< bool >::size_type arg3 ;
-  std::vector< bool >::value_type arg4 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  swig::SwigPyIterator *iter2 = 0 ;
-  int res2 ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:BoolVector_insert",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_insert" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, SWIG_as_voidptrptr(&iter2), swig::SwigPyIterator::descriptor(), 0);
-  if (!SWIG_IsOK(res2) || !iter2) {
-    SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_insert" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-  } else {
-    swig::SwigPyIterator_T<std::vector< bool >::iterator > *iter_t = dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter2);
-    if (iter_t) {
-      arg2 = iter_t->get_current();
-    } else {
-      SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "BoolVector_insert" "', argument " "2"" of type '" "std::vector< bool >::iterator""'");
-    }
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "BoolVector_insert" "', argument " "3"" of type '" "std::vector< bool >::size_type""'");
-  } 
-  arg3 = static_cast< std::vector< bool >::size_type >(val3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "BoolVector_insert" "', argument " "4"" of type '" "std::vector< bool >::value_type""'");
-  } 
-  arg4 = static_cast< std::vector< bool >::value_type >(val4);
-  std_vector_Sl_bool_Sg__insert__SWIG_1(arg1,arg2,arg3,arg4);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_insert(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_bool(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_BoolVector_insert__SWIG_0(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector<bool,std::allocator< bool > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      swig::SwigPyIterator *iter = 0;
-      int res = SWIG_ConvertPtr(argv[1], SWIG_as_voidptrptr(&iter), swig::SwigPyIterator::descriptor(), 0);
-      _v = (SWIG_IsOK(res) && iter && (dynamic_cast<swig::SwigPyIterator_T<std::vector< bool >::iterator > *>(iter) != 0));
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            return _wrap_BoolVector_insert__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'BoolVector_insert'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    std::vector< bool >::insert(std::vector< bool >::iterator,std::vector< bool >::value_type)\n"
-    "    std::vector< bool >::insert(std::vector< bool >::iterator,std::vector< bool >::size_type,std::vector< bool >::value_type)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_reserve(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  std::vector< bool >::size_type arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  size_t val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:BoolVector_reserve",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_reserve" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  ecode2 = SWIG_AsVal_size_t(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "BoolVector_reserve" "', argument " "2"" of type '" "std::vector< bool >::size_type""'");
-  } 
-  arg2 = static_cast< std::vector< bool >::size_type >(val2);
-  (arg1)->reserve(arg2);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_BoolVector_capacity(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< bool >::size_type result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:BoolVector_capacity",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "BoolVector_capacity" "', argument " "1"" of type '" "std::vector< bool > const *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  result = ((std::vector< bool > const *)arg1)->capacity();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_BoolVector(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< bool > *arg1 = (std::vector< bool > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_BoolVector",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_BoolVector" "', argument " "1"" of type '" "std::vector< bool > *""'"); 
-  }
-  arg1 = reinterpret_cast< std::vector< bool > * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *BoolVector_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_std__vectorT_bool_std__allocatorT_bool_t_t, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_IntDoublePairCompSecondRev(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< int,double > *arg1 = 0 ;
-  std::pair< int,double > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:IntDoublePairCompSecondRev",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1, SWIGTYPE_p_std__pairT_int_double_t,  0  | 0);
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "IntDoublePairCompSecondRev" "', argument " "1"" of type '" "std::pair< int,double > const &""'"); 
-  }
-  if (!argp1) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntDoublePairCompSecondRev" "', argument " "1"" of type '" "std::pair< int,double > const &""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< int,double > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__pairT_int_double_t,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "IntDoublePairCompSecondRev" "', argument " "2"" of type '" "std::pair< int,double > const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "IntDoublePairCompSecondRev" "', argument " "2"" of type '" "std::pair< int,double > const &""'"); 
-  }
-  arg2 = reinterpret_cast< std::pair< int,double > * >(argp2);
-  result = (bool)pair_comp_second_rev< int,double >((std::pair< int,double > const &)*arg1,(std::pair< int,double > const &)*arg2);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_StringDoublePairCompSecondRev(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< std::string,double > *arg1 = 0 ;
-  std::pair< std::string,double > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:StringDoublePairCompSecondRev",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1, SWIGTYPE_p_std__pairT_std__string_double_t,  0  | 0);
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "StringDoublePairCompSecondRev" "', argument " "1"" of type '" "std::pair< std::string,double > const &""'"); 
-  }
-  if (!argp1) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringDoublePairCompSecondRev" "', argument " "1"" of type '" "std::pair< std::string,double > const &""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< std::string,double > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__pairT_std__string_double_t,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "StringDoublePairCompSecondRev" "', argument " "2"" of type '" "std::pair< std::string,double > const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "StringDoublePairCompSecondRev" "', argument " "2"" of type '" "std::pair< std::string,double > const &""'"); 
-  }
-  arg2 = reinterpret_cast< std::pair< std::string,double > * >(argp2);
-  result = (bool)pair_comp_second_rev< std::string,double >((std::pair< std::string,double > const &)*arg1,(std::pair< std::string,double > const &)*arg2);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_DoubleStringPairCompFirstRev(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::pair< double,std::string > *arg1 = 0 ;
-  std::pair< double,std::string > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:DoubleStringPairCompFirstRev",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1, SWIGTYPE_p_std__pairT_double_std__string_t,  0  | 0);
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "DoubleStringPairCompFirstRev" "', argument " "1"" of type '" "std::pair< double,std::string > const &""'"); 
-  }
-  if (!argp1) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleStringPairCompFirstRev" "', argument " "1"" of type '" "std::pair< double,std::string > const &""'"); 
-  }
-  arg1 = reinterpret_cast< std::pair< double,std::string > * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_std__pairT_double_std__string_t,  0  | 0);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "DoubleStringPairCompFirstRev" "', argument " "2"" of type '" "std::pair< double,std::string > const &""'"); 
-  }
-  if (!argp2) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "DoubleStringPairCompFirstRev" "', argument " "2"" of type '" "std::pair< double,std::string > const &""'"); 
-  }
-  arg2 = reinterpret_cast< std::pair< double,std::string > * >(argp2);
-  result = (bool)pair_comp_first_rev< double,std::string >((std::pair< double,std::string > const &)*arg1,(std::pair< double,std::string > const &)*arg2);
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN int Swig_var_OOV_SCORE_set(PyObject *) {
-  SWIG_Error(SWIG_AttributeError,"Variable OOV_SCORE is read-only.");
-  return 1;
-}
-
-
-SWIGINTERN PyObject *Swig_var_OOV_SCORE_get(void) {
-  PyObject *pyobj = 0;
-  
-  pyobj = SWIG_From_double(static_cast< double >(OOV_SCORE));
-  return pyobj;
-}
-
-
-SWIGINTERN int Swig_var_START_TOKEN_set(PyObject *) {
-  SWIG_Error(SWIG_AttributeError,"Variable START_TOKEN is read-only.");
-  return 1;
-}
-
-
-SWIGINTERN PyObject *Swig_var_START_TOKEN_get(void) {
-  PyObject *pyobj = 0;
-  
-  pyobj = SWIG_From_std_string(static_cast< std::string >(START_TOKEN));
-  return pyobj;
-}
-
-
-SWIGINTERN int Swig_var_UNK_TOKEN_set(PyObject *) {
-  SWIG_Error(SWIG_AttributeError,"Variable UNK_TOKEN is read-only.");
-  return 1;
-}
-
-
-SWIGINTERN PyObject *Swig_var_UNK_TOKEN_get(void) {
-  PyObject *pyobj = 0;
-  
-  pyobj = SWIG_From_std_string(static_cast< std::string >(UNK_TOKEN));
-  return pyobj;
-}
-
-
-SWIGINTERN int Swig_var_END_TOKEN_set(PyObject *) {
-  SWIG_Error(SWIG_AttributeError,"Variable END_TOKEN is read-only.");
-  return 1;
-}
-
-
-SWIGINTERN PyObject *Swig_var_END_TOKEN_get(void) {
-  PyObject *pyobj = 0;
-  
-  pyobj = SWIG_From_std_string(static_cast< std::string >(END_TOKEN));
-  return pyobj;
-}
-
-
-SWIGINTERN PyObject *_wrap_new_RetriveStrEnumerateVocab(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  RetriveStrEnumerateVocab *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)":new_RetriveStrEnumerateVocab")) SWIG_fail;
-  result = (RetriveStrEnumerateVocab *)new RetriveStrEnumerateVocab();
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_RetriveStrEnumerateVocab, SWIG_POINTER_NEW |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_RetriveStrEnumerateVocab_Add(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  RetriveStrEnumerateVocab *arg1 = (RetriveStrEnumerateVocab *) 0 ;
-  lm::WordIndex arg2 ;
-  StringPiece *arg3 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 ;
-  int res2 = 0 ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:RetriveStrEnumerateVocab_Add",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_RetriveStrEnumerateVocab, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "RetriveStrEnumerateVocab_Add" "', argument " "1"" of type '" "RetriveStrEnumerateVocab *""'"); 
-  }
-  arg1 = reinterpret_cast< RetriveStrEnumerateVocab * >(argp1);
-  {
-    res2 = SWIG_ConvertPtr(obj1, &argp2, SWIGTYPE_p_lm__WordIndex,  0  | 0);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "RetriveStrEnumerateVocab_Add" "', argument " "2"" of type '" "lm::WordIndex""'"); 
-    }  
-    if (!argp2) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "RetriveStrEnumerateVocab_Add" "', argument " "2"" of type '" "lm::WordIndex""'");
-    } else {
-      lm::WordIndex * temp = reinterpret_cast< lm::WordIndex * >(argp2);
-      arg2 = *temp;
-      if (SWIG_IsNewObj(res2)) delete temp;
-    }
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_StringPiece,  0  | 0);
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "RetriveStrEnumerateVocab_Add" "', argument " "3"" of type '" "StringPiece const &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "RetriveStrEnumerateVocab_Add" "', argument " "3"" of type '" "StringPiece const &""'"); 
-  }
-  arg3 = reinterpret_cast< StringPiece * >(argp3);
-  (arg1)->Add(arg2,(StringPiece const &)*arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_RetriveStrEnumerateVocab_vocabulary_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  RetriveStrEnumerateVocab *arg1 = (RetriveStrEnumerateVocab *) 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = (std::vector< std::string,std::allocator< std::string > > *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:RetriveStrEnumerateVocab_vocabulary_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_RetriveStrEnumerateVocab, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "RetriveStrEnumerateVocab_vocabulary_set" "', argument " "1"" of type '" "RetriveStrEnumerateVocab *""'"); 
-  }
-  arg1 = reinterpret_cast< RetriveStrEnumerateVocab * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "RetriveStrEnumerateVocab_vocabulary_set" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > *""'"); 
-  }
-  arg2 = reinterpret_cast< std::vector< std::string,std::allocator< std::string > > * >(argp2);
-  if (arg1) (arg1)->vocabulary = *arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_RetriveStrEnumerateVocab_vocabulary_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  RetriveStrEnumerateVocab *arg1 = (RetriveStrEnumerateVocab *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:RetriveStrEnumerateVocab_vocabulary_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_RetriveStrEnumerateVocab, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "RetriveStrEnumerateVocab_vocabulary_get" "', argument " "1"" of type '" "RetriveStrEnumerateVocab *""'"); 
-  }
-  arg1 = reinterpret_cast< RetriveStrEnumerateVocab * >(argp1);
-  result = (std::vector< std::string,std::allocator< std::string > > *)& ((arg1)->vocabulary);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_RetriveStrEnumerateVocab(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  RetriveStrEnumerateVocab *arg1 = (RetriveStrEnumerateVocab *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_RetriveStrEnumerateVocab",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_RetriveStrEnumerateVocab, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_RetriveStrEnumerateVocab" "', argument " "1"" of type '" "RetriveStrEnumerateVocab *""'"); 
-  }
-  arg1 = reinterpret_cast< RetriveStrEnumerateVocab * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *RetriveStrEnumerateVocab_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_RetriveStrEnumerateVocab, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_new_Scorer(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  double arg1 ;
-  double arg2 ;
-  std::string *arg3 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg4 = 0 ;
-  double val1 ;
-  int ecode1 = 0 ;
-  double val2 ;
-  int ecode2 = 0 ;
-  int res3 = SWIG_OLDOBJ ;
-  int res4 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  Scorer *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:new_Scorer",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  ecode1 = SWIG_AsVal_double(obj0, &val1);
-  if (!SWIG_IsOK(ecode1)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "new_Scorer" "', argument " "1"" of type '" "double""'");
-  } 
-  arg1 = static_cast< double >(val1);
-  ecode2 = SWIG_AsVal_double(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "new_Scorer" "', argument " "2"" of type '" "double""'");
-  } 
-  arg2 = static_cast< double >(val2);
-  {
-    std::string *ptr = (std::string *)0;
-    res3 = SWIG_AsPtr_std_string(obj2, &ptr);
-    if (!SWIG_IsOK(res3)) {
-      SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "new_Scorer" "', argument " "3"" of type '" "std::string const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Scorer" "', argument " "3"" of type '" "std::string const &""'"); 
-    }
-    arg3 = ptr;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "new_Scorer" "', argument " "4"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Scorer" "', argument " "4"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  result = (Scorer *)new Scorer(arg1,arg2,(std::string const &)*arg3,(std::vector< std::string,std::allocator< std::string > > const &)*arg4);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_Scorer, SWIG_POINTER_NEW |  0 );
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res3)) delete arg3;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_delete_Scorer(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:delete_Scorer",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, SWIG_POINTER_DISOWN |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_Scorer" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  delete arg1;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_get_log_cond_prob(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  double result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Scorer_get_log_cond_prob",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_get_log_cond_prob" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Scorer_get_log_cond_prob" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Scorer_get_log_cond_prob" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (double)(arg1)->get_log_cond_prob((std::vector< std::string,std::allocator< std::string > > const &)*arg2);
-  resultobj = SWIG_From_double(static_cast< double >(result));
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_get_sent_log_prob(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  double result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Scorer_get_sent_log_prob",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_get_sent_log_prob" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Scorer_get_sent_log_prob" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Scorer_get_sent_log_prob" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (double)(arg1)->get_sent_log_prob((std::vector< std::string,std::allocator< std::string > > const &)*arg2);
-  resultobj = SWIG_From_double(static_cast< double >(result));
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_get_max_order(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  size_t result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Scorer_get_max_order",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_get_max_order" "', argument " "1"" of type '" "Scorer const *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  result = ((Scorer const *)arg1)->get_max_order();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_get_dict_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  size_t result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Scorer_get_dict_size",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_get_dict_size" "', argument " "1"" of type '" "Scorer const *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  result = ((Scorer const *)arg1)->get_dict_size();
-  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_is_character_based(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  bool result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Scorer_is_character_based",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_is_character_based" "', argument " "1"" of type '" "Scorer const *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  result = (bool)((Scorer const *)arg1)->is_character_based();
-  resultobj = SWIG_From_bool(static_cast< bool >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_reset_params(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  float arg2 ;
-  float arg3 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  float val2 ;
-  int ecode2 = 0 ;
-  float val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:Scorer_reset_params",&obj0,&obj1,&obj2)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_reset_params" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  ecode2 = SWIG_AsVal_float(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "Scorer_reset_params" "', argument " "2"" of type '" "float""'");
-  } 
-  arg2 = static_cast< float >(val2);
-  ecode3 = SWIG_AsVal_float(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "Scorer_reset_params" "', argument " "3"" of type '" "float""'");
-  } 
-  arg3 = static_cast< float >(val3);
-  (arg1)->reset_params(arg2,arg3);
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_make_ngram(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  PathTrie *arg2 = (PathTrie *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  void *argp2 = 0 ;
-  int res2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Scorer_make_ngram",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_make_ngram" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1, &argp2,SWIGTYPE_p_PathTrie, 0 |  0 );
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Scorer_make_ngram" "', argument " "2"" of type '" "PathTrie *""'"); 
-  }
-  arg2 = reinterpret_cast< PathTrie * >(argp2);
-  result = (arg1)->make_ngram(arg2);
-  resultobj = swig::from(static_cast< std::vector< std::string,std::allocator< std::string > > >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_split_labels(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  std::vector< int,std::allocator< int > > *arg2 = 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Scorer_split_labels",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_split_labels" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Scorer_split_labels" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Scorer_split_labels" "', argument " "2"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = (arg1)->split_labels((std::vector< int,std::allocator< int > > const &)*arg2);
-  resultobj = swig::from(static_cast< std::vector< std::string,std::allocator< std::string > > >(result));
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_alpha_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  double arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  double val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Scorer_alpha_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_alpha_set" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  ecode2 = SWIG_AsVal_double(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "Scorer_alpha_set" "', argument " "2"" of type '" "double""'");
-  } 
-  arg2 = static_cast< double >(val2);
-  if (arg1) (arg1)->alpha = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_alpha_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  double result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Scorer_alpha_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_alpha_get" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  result = (double) ((arg1)->alpha);
-  resultobj = SWIG_From_double(static_cast< double >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_beta_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  double arg2 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  double val2 ;
-  int ecode2 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Scorer_beta_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_beta_set" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  ecode2 = SWIG_AsVal_double(obj1, &val2);
-  if (!SWIG_IsOK(ecode2)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "Scorer_beta_set" "', argument " "2"" of type '" "double""'");
-  } 
-  arg2 = static_cast< double >(val2);
-  if (arg1) (arg1)->beta = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_beta_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  double result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Scorer_beta_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_beta_get" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  result = (double) ((arg1)->beta);
-  resultobj = SWIG_From_double(static_cast< double >(result));
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_dictionary_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *arg2 = (void *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int res2 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:Scorer_dictionary_set",&obj0,&obj1)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_dictionary_set" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  res2 = SWIG_ConvertPtr(obj1,SWIG_as_voidptrptr(&arg2), 0, SWIG_POINTER_DISOWN);
-  if (!SWIG_IsOK(res2)) {
-    SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Scorer_dictionary_set" "', argument " "2"" of type '" "void *""'"); 
-  }
-  if (arg1) (arg1)->dictionary = arg2;
-  resultobj = SWIG_Py_Void();
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_Scorer_dictionary_get(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  Scorer *arg1 = (Scorer *) 0 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  PyObject * obj0 = 0 ;
-  void *result = 0 ;
-  
-  if (!PyArg_ParseTuple(args,(char *)"O:Scorer_dictionary_get",&obj0)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(obj0, &argp1,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Scorer_dictionary_get" "', argument " "1"" of type '" "Scorer *""'"); 
-  }
-  arg1 = reinterpret_cast< Scorer * >(argp1);
-  result = (void *) ((arg1)->dictionary);
-  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_void, 0 |  0 );
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *Scorer_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *obj;
-  if (!PyArg_ParseTuple(args,(char *)"O:swigregister", &obj)) return NULL;
-  SWIG_TypeNewClientData(SWIGTYPE_p_Scorer, SWIG_NewClientData(obj));
-  return SWIG_Py_Void();
-}
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *arg1 = 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg2 = 0 ;
-  PathTrie *arg3 = 0 ;
-  bool arg4 ;
-  size_t arg5 ;
-  int arg6 ;
-  int arg7 ;
-  double arg8 ;
-  Scorer *arg9 = (Scorer *) 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  int val6 ;
-  int ecode6 = 0 ;
-  int val7 ;
-  int ecode7 = 0 ;
-  double val8 ;
-  int ecode8 = 0 ;
-  void *argp9 = 0 ;
-  int res9 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  PyObject * obj6 = 0 ;
-  PyObject * obj7 = 0 ;
-  PyObject * obj8 = 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOOOOO:ctc_beam_search_decoder",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5,&obj6,&obj7,&obj8)) SWIG_fail;
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_PathTrie,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  arg3 = reinterpret_cast< PathTrie * >(argp3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "ctc_beam_search_decoder" "', argument " "4"" of type '" "bool""'");
-  } 
-  arg4 = static_cast< bool >(val4);
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_int(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder" "', argument " "6"" of type '" "int""'");
-  } 
-  arg6 = static_cast< int >(val6);
-  ecode7 = SWIG_AsVal_int(obj6, &val7);
-  if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "ctc_beam_search_decoder" "', argument " "7"" of type '" "int""'");
-  } 
-  arg7 = static_cast< int >(val7);
-  ecode8 = SWIG_AsVal_double(obj7, &val8);
-  if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "ctc_beam_search_decoder" "', argument " "8"" of type '" "double""'");
-  } 
-  arg8 = static_cast< double >(val8);
-  res9 = SWIG_ConvertPtr(obj8, &argp9,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res9)) {
-    SWIG_exception_fail(SWIG_ArgError(res9), "in method '" "ctc_beam_search_decoder" "', argument " "9"" of type '" "Scorer *""'"); 
-  }
-  arg9 = reinterpret_cast< Scorer * >(argp9);
-  result = ctc_beam_search_decoder((std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg1,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg2,*arg3,arg4,arg5,arg6,arg7,arg8,arg9);
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *arg1 = 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg2 = 0 ;
-  PathTrie *arg3 = 0 ;
-  bool arg4 ;
-  size_t arg5 ;
-  int arg6 ;
-  int arg7 ;
-  double arg8 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  int val6 ;
-  int ecode6 = 0 ;
-  int val7 ;
-  int ecode7 = 0 ;
-  double val8 ;
-  int ecode8 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  PyObject * obj6 = 0 ;
-  PyObject * obj7 = 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOOOO:ctc_beam_search_decoder",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5,&obj6,&obj7)) SWIG_fail;
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_PathTrie,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  arg3 = reinterpret_cast< PathTrie * >(argp3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "ctc_beam_search_decoder" "', argument " "4"" of type '" "bool""'");
-  } 
-  arg4 = static_cast< bool >(val4);
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_int(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder" "', argument " "6"" of type '" "int""'");
-  } 
-  arg6 = static_cast< int >(val6);
-  ecode7 = SWIG_AsVal_int(obj6, &val7);
-  if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "ctc_beam_search_decoder" "', argument " "7"" of type '" "int""'");
-  } 
-  arg7 = static_cast< int >(val7);
-  ecode8 = SWIG_AsVal_double(obj7, &val8);
-  if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "ctc_beam_search_decoder" "', argument " "8"" of type '" "double""'");
-  } 
-  arg8 = static_cast< double >(val8);
-  result = ctc_beam_search_decoder((std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg1,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg2,*arg3,arg4,arg5,arg6,arg7,arg8);
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *arg1 = 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg2 = 0 ;
-  PathTrie *arg3 = 0 ;
-  bool arg4 ;
-  size_t arg5 ;
-  int arg6 ;
-  int arg7 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  int val6 ;
-  int ecode6 = 0 ;
-  int val7 ;
-  int ecode7 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  PyObject * obj6 = 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOOO:ctc_beam_search_decoder",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5,&obj6)) SWIG_fail;
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_PathTrie,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  arg3 = reinterpret_cast< PathTrie * >(argp3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "ctc_beam_search_decoder" "', argument " "4"" of type '" "bool""'");
-  } 
-  arg4 = static_cast< bool >(val4);
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_int(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder" "', argument " "6"" of type '" "int""'");
-  } 
-  arg6 = static_cast< int >(val6);
-  ecode7 = SWIG_AsVal_int(obj6, &val7);
-  if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "ctc_beam_search_decoder" "', argument " "7"" of type '" "int""'");
-  } 
-  arg7 = static_cast< int >(val7);
-  result = ctc_beam_search_decoder((std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg1,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg2,*arg3,arg4,arg5,arg6,arg7);
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *arg1 = 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg2 = 0 ;
-  PathTrie *arg3 = 0 ;
-  bool arg4 ;
-  size_t arg5 ;
-  int arg6 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  int val6 ;
-  int ecode6 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOO:ctc_beam_search_decoder",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5)) SWIG_fail;
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_PathTrie,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  arg3 = reinterpret_cast< PathTrie * >(argp3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "ctc_beam_search_decoder" "', argument " "4"" of type '" "bool""'");
-  } 
-  arg4 = static_cast< bool >(val4);
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_int(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder" "', argument " "6"" of type '" "int""'");
-  } 
-  arg6 = static_cast< int >(val6);
-  result = ctc_beam_search_decoder((std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg1,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg2,*arg3,arg4,arg5,arg6);
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder__SWIG_4(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *arg1 = 0 ;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg2 = 0 ;
-  PathTrie *arg3 = 0 ;
-  bool arg4 ;
-  size_t arg5 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOO:ctc_beam_search_decoder",&obj0,&obj1,&obj2,&obj3,&obj4)) SWIG_fail;
-  {
-    std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *ptr = (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "1"" of type '" "std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "2"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_PathTrie,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder" "', argument " "3"" of type '" "PathTrie &""'"); 
-  }
-  arg3 = reinterpret_cast< PathTrie * >(argp3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "ctc_beam_search_decoder" "', argument " "4"" of type '" "bool""'");
-  } 
-  arg4 = static_cast< bool >(val4);
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  result = ctc_beam_search_decoder((std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &)*arg1,(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg2,*arg3,arg4,arg5);
-  resultobj = swig::from(static_cast< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[10] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 9) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 5) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              return _wrap_ctc_beam_search_decoder__SWIG_4(self, args);
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 6) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_int(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                return _wrap_ctc_beam_search_decoder__SWIG_3(self, args);
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 7) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_int(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                {
-                  int res = SWIG_AsVal_int(argv[6], NULL);
-                  _v = SWIG_CheckState(res);
-                }
-                if (_v) {
-                  return _wrap_ctc_beam_search_decoder__SWIG_2(self, args);
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 8) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_int(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                {
-                  int res = SWIG_AsVal_int(argv[6], NULL);
-                  _v = SWIG_CheckState(res);
-                }
-                if (_v) {
-                  {
-                    int res = SWIG_AsVal_double(argv[7], NULL);
-                    _v = SWIG_CheckState(res);
-                  }
-                  if (_v) {
-                    return _wrap_ctc_beam_search_decoder__SWIG_1(self, args);
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 9) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_PathTrie, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_int(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                {
-                  int res = SWIG_AsVal_int(argv[6], NULL);
-                  _v = SWIG_CheckState(res);
-                }
-                if (_v) {
-                  {
-                    int res = SWIG_AsVal_double(argv[7], NULL);
-                    _v = SWIG_CheckState(res);
-                  }
-                  if (_v) {
-                    void *vptr = 0;
-                    int res = SWIG_ConvertPtr(argv[8], &vptr, SWIGTYPE_p_Scorer, 0);
-                    _v = SWIG_CheckState(res);
-                    if (_v) {
-                      return _wrap_ctc_beam_search_decoder__SWIG_0(self, args);
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'ctc_beam_search_decoder'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    ctc_beam_search_decoder(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,PathTrie &,bool const,size_t,int,int,double,Scorer *)\n"
-    "    ctc_beam_search_decoder(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,PathTrie &,bool const,size_t,int,int,double)\n"
-    "    ctc_beam_search_decoder(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,PathTrie &,bool const,size_t,int,int)\n"
-    "    ctc_beam_search_decoder(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,PathTrie &,bool const,size_t,int)\n"
-    "    ctc_beam_search_decoder(std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > const &,std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,PathTrie &,bool const,size_t)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder_batch__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *arg1 = 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *arg2 = 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg3 = 0 ;
-  std::vector< bool,std::allocator< bool > > *arg4 = 0 ;
-  size_t arg5 ;
-  size_t arg6 ;
-  int arg7 ;
-  int arg8 ;
-  double arg9 ;
-  Scorer *arg10 = (Scorer *) 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  size_t val6 ;
-  int ecode6 = 0 ;
-  int val7 ;
-  int ecode7 = 0 ;
-  int val8 ;
-  int ecode8 = 0 ;
-  double val9 ;
-  int ecode9 = 0 ;
-  void *argp10 = 0 ;
-  int res10 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  PyObject * obj6 = 0 ;
-  PyObject * obj7 = 0 ;
-  PyObject * obj8 = 0 ;
-  PyObject * obj9 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOOOOOO:ctc_beam_search_decoder_batch",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5,&obj6,&obj7,&obj8,&obj9)) SWIG_fail;
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie *,std::allocator< PathTrie * > > * >(argp3);
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder_batch" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_size_t(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder_batch" "', argument " "6"" of type '" "size_t""'");
-  } 
-  arg6 = static_cast< size_t >(val6);
-  ecode7 = SWIG_AsVal_int(obj6, &val7);
-  if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "ctc_beam_search_decoder_batch" "', argument " "7"" of type '" "int""'");
-  } 
-  arg7 = static_cast< int >(val7);
-  ecode8 = SWIG_AsVal_int(obj7, &val8);
-  if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "ctc_beam_search_decoder_batch" "', argument " "8"" of type '" "int""'");
-  } 
-  arg8 = static_cast< int >(val8);
-  ecode9 = SWIG_AsVal_double(obj8, &val9);
-  if (!SWIG_IsOK(ecode9)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "ctc_beam_search_decoder_batch" "', argument " "9"" of type '" "double""'");
-  } 
-  arg9 = static_cast< double >(val9);
-  res10 = SWIG_ConvertPtr(obj9, &argp10,SWIGTYPE_p_Scorer, 0 |  0 );
-  if (!SWIG_IsOK(res10)) {
-    SWIG_exception_fail(SWIG_ArgError(res10), "in method '" "ctc_beam_search_decoder_batch" "', argument " "10"" of type '" "Scorer *""'"); 
-  }
-  arg10 = reinterpret_cast< Scorer * >(argp10);
-  result = ctc_beam_search_decoder_batch((std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)*arg1,(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)*arg2,*arg3,(std::vector< bool,std::allocator< bool > > const &)*arg4,arg5,arg6,arg7,arg8,arg9,arg10);
-  resultobj = swig::from(static_cast< std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder_batch__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *arg1 = 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *arg2 = 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg3 = 0 ;
-  std::vector< bool,std::allocator< bool > > *arg4 = 0 ;
-  size_t arg5 ;
-  size_t arg6 ;
-  int arg7 ;
-  int arg8 ;
-  double arg9 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  size_t val6 ;
-  int ecode6 = 0 ;
-  int val7 ;
-  int ecode7 = 0 ;
-  int val8 ;
-  int ecode8 = 0 ;
-  double val9 ;
-  int ecode9 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  PyObject * obj6 = 0 ;
-  PyObject * obj7 = 0 ;
-  PyObject * obj8 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOOOOO:ctc_beam_search_decoder_batch",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5,&obj6,&obj7,&obj8)) SWIG_fail;
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie *,std::allocator< PathTrie * > > * >(argp3);
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder_batch" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_size_t(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder_batch" "', argument " "6"" of type '" "size_t""'");
-  } 
-  arg6 = static_cast< size_t >(val6);
-  ecode7 = SWIG_AsVal_int(obj6, &val7);
-  if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "ctc_beam_search_decoder_batch" "', argument " "7"" of type '" "int""'");
-  } 
-  arg7 = static_cast< int >(val7);
-  ecode8 = SWIG_AsVal_int(obj7, &val8);
-  if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "ctc_beam_search_decoder_batch" "', argument " "8"" of type '" "int""'");
-  } 
-  arg8 = static_cast< int >(val8);
-  ecode9 = SWIG_AsVal_double(obj8, &val9);
-  if (!SWIG_IsOK(ecode9)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "ctc_beam_search_decoder_batch" "', argument " "9"" of type '" "double""'");
-  } 
-  arg9 = static_cast< double >(val9);
-  result = ctc_beam_search_decoder_batch((std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)*arg1,(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)*arg2,*arg3,(std::vector< bool,std::allocator< bool > > const &)*arg4,arg5,arg6,arg7,arg8,arg9);
-  resultobj = swig::from(static_cast< std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder_batch__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *arg1 = 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *arg2 = 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg3 = 0 ;
-  std::vector< bool,std::allocator< bool > > *arg4 = 0 ;
-  size_t arg5 ;
-  size_t arg6 ;
-  int arg7 ;
-  int arg8 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  size_t val6 ;
-  int ecode6 = 0 ;
-  int val7 ;
-  int ecode7 = 0 ;
-  int val8 ;
-  int ecode8 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  PyObject * obj6 = 0 ;
-  PyObject * obj7 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOOOO:ctc_beam_search_decoder_batch",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5,&obj6,&obj7)) SWIG_fail;
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie *,std::allocator< PathTrie * > > * >(argp3);
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder_batch" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_size_t(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder_batch" "', argument " "6"" of type '" "size_t""'");
-  } 
-  arg6 = static_cast< size_t >(val6);
-  ecode7 = SWIG_AsVal_int(obj6, &val7);
-  if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "ctc_beam_search_decoder_batch" "', argument " "7"" of type '" "int""'");
-  } 
-  arg7 = static_cast< int >(val7);
-  ecode8 = SWIG_AsVal_int(obj7, &val8);
-  if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "ctc_beam_search_decoder_batch" "', argument " "8"" of type '" "int""'");
-  } 
-  arg8 = static_cast< int >(val8);
-  result = ctc_beam_search_decoder_batch((std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)*arg1,(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)*arg2,*arg3,(std::vector< bool,std::allocator< bool > > const &)*arg4,arg5,arg6,arg7,arg8);
-  resultobj = swig::from(static_cast< std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder_batch__SWIG_3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *arg1 = 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *arg2 = 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg3 = 0 ;
-  std::vector< bool,std::allocator< bool > > *arg4 = 0 ;
-  size_t arg5 ;
-  size_t arg6 ;
-  int arg7 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  size_t val6 ;
-  int ecode6 = 0 ;
-  int val7 ;
-  int ecode7 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  PyObject * obj6 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOOO:ctc_beam_search_decoder_batch",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5,&obj6)) SWIG_fail;
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie *,std::allocator< PathTrie * > > * >(argp3);
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder_batch" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_size_t(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder_batch" "', argument " "6"" of type '" "size_t""'");
-  } 
-  arg6 = static_cast< size_t >(val6);
-  ecode7 = SWIG_AsVal_int(obj6, &val7);
-  if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "ctc_beam_search_decoder_batch" "', argument " "7"" of type '" "int""'");
-  } 
-  arg7 = static_cast< int >(val7);
-  result = ctc_beam_search_decoder_batch((std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)*arg1,(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)*arg2,*arg3,(std::vector< bool,std::allocator< bool > > const &)*arg4,arg5,arg6,arg7);
-  resultobj = swig::from(static_cast< std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder_batch__SWIG_4(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *arg1 = 0 ;
-  std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *arg2 = 0 ;
-  std::vector< PathTrie *,std::allocator< PathTrie * > > *arg3 = 0 ;
-  std::vector< bool,std::allocator< bool > > *arg4 = 0 ;
-  size_t arg5 ;
-  size_t arg6 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  void *argp3 = 0 ;
-  int res3 = 0 ;
-  int res4 = SWIG_OLDOBJ ;
-  size_t val5 ;
-  int ecode5 = 0 ;
-  size_t val6 ;
-  int ecode6 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  PyObject * obj5 = 0 ;
-  std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOOO:ctc_beam_search_decoder_batch",&obj0,&obj1,&obj2,&obj3,&obj4,&obj5)) SWIG_fail;
-  {
-    std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *ptr = (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "1"" of type '" "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *ptr = (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "2"" of type '" "std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  res3 = SWIG_ConvertPtr(obj2, &argp3, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,  0 );
-  if (!SWIG_IsOK(res3)) {
-    SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  if (!argp3) {
-    SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "3"" of type '" "std::vector< PathTrie *,std::allocator< PathTrie * > > &""'"); 
-  }
-  arg3 = reinterpret_cast< std::vector< PathTrie *,std::allocator< PathTrie * > > * >(argp3);
-  {
-    std::vector<bool,std::allocator< bool > > *ptr = (std::vector<bool,std::allocator< bool > > *)0;
-    res4 = swig::asptr(obj3, &ptr);
-    if (!SWIG_IsOK(res4)) {
-      SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ctc_beam_search_decoder_batch" "', argument " "4"" of type '" "std::vector< bool,std::allocator< bool > > const &""'"); 
-    }
-    arg4 = ptr;
-  }
-  ecode5 = SWIG_AsVal_size_t(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "ctc_beam_search_decoder_batch" "', argument " "5"" of type '" "size_t""'");
-  } 
-  arg5 = static_cast< size_t >(val5);
-  ecode6 = SWIG_AsVal_size_t(obj5, &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "ctc_beam_search_decoder_batch" "', argument " "6"" of type '" "size_t""'");
-  } 
-  arg6 = static_cast< size_t >(val6);
-  result = ctc_beam_search_decoder_batch((std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &)*arg1,(std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &)*arg2,*arg3,(std::vector< bool,std::allocator< bool > > const &)*arg4,arg5,arg6);
-  resultobj = swig::from(static_cast< std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  if (SWIG_IsNewObj(res4)) delete arg4;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_ctc_beam_search_decoder_batch(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[11] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 10) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 6) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector<bool,std::allocator< bool > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_size_t(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                return _wrap_ctc_beam_search_decoder_batch__SWIG_4(self, args);
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 7) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector<bool,std::allocator< bool > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_size_t(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                {
-                  int res = SWIG_AsVal_int(argv[6], NULL);
-                  _v = SWIG_CheckState(res);
-                }
-                if (_v) {
-                  return _wrap_ctc_beam_search_decoder_batch__SWIG_3(self, args);
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 8) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector<bool,std::allocator< bool > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_size_t(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                {
-                  int res = SWIG_AsVal_int(argv[6], NULL);
-                  _v = SWIG_CheckState(res);
-                }
-                if (_v) {
-                  {
-                    int res = SWIG_AsVal_int(argv[7], NULL);
-                    _v = SWIG_CheckState(res);
-                  }
-                  if (_v) {
-                    return _wrap_ctc_beam_search_decoder_batch__SWIG_2(self, args);
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 9) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector<bool,std::allocator< bool > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_size_t(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                {
-                  int res = SWIG_AsVal_int(argv[6], NULL);
-                  _v = SWIG_CheckState(res);
-                }
-                if (_v) {
-                  {
-                    int res = SWIG_AsVal_int(argv[7], NULL);
-                    _v = SWIG_CheckState(res);
-                  }
-                  if (_v) {
-                    {
-                      int res = SWIG_AsVal_double(argv[8], NULL);
-                      _v = SWIG_CheckState(res);
-                    }
-                    if (_v) {
-                      return _wrap_ctc_beam_search_decoder_batch__SWIG_1(self, args);
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  if (argc == 10) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        void *vptr = 0;
-        int res = SWIG_ConvertPtr(argv[2], &vptr, SWIGTYPE_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0);
-        _v = SWIG_CheckState(res);
-        if (_v) {
-          int res = swig::asptr(argv[3], (std::vector<bool,std::allocator< bool > >**)(0));
-          _v = SWIG_CheckState(res);
-          if (_v) {
-            {
-              int res = SWIG_AsVal_size_t(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              {
-                int res = SWIG_AsVal_size_t(argv[5], NULL);
-                _v = SWIG_CheckState(res);
-              }
-              if (_v) {
-                {
-                  int res = SWIG_AsVal_int(argv[6], NULL);
-                  _v = SWIG_CheckState(res);
-                }
-                if (_v) {
-                  {
-                    int res = SWIG_AsVal_int(argv[7], NULL);
-                    _v = SWIG_CheckState(res);
-                  }
-                  if (_v) {
-                    {
-                      int res = SWIG_AsVal_double(argv[8], NULL);
-                      _v = SWIG_CheckState(res);
-                    }
-                    if (_v) {
-                      void *vptr = 0;
-                      int res = SWIG_ConvertPtr(argv[9], &vptr, SWIGTYPE_p_Scorer, 0);
-                      _v = SWIG_CheckState(res);
-                      if (_v) {
-                        return _wrap_ctc_beam_search_decoder_batch__SWIG_0(self, args);
-                      }
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'ctc_beam_search_decoder_batch'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    ctc_beam_search_decoder_batch(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &,std::vector< PathTrie *,std::allocator< PathTrie * > > &,std::vector< bool,std::allocator< bool > > const &,size_t,size_t,int,int,double,Scorer *)\n"
-    "    ctc_beam_search_decoder_batch(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &,std::vector< PathTrie *,std::allocator< PathTrie * > > &,std::vector< bool,std::allocator< bool > > const &,size_t,size_t,int,int,double)\n"
-    "    ctc_beam_search_decoder_batch(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &,std::vector< PathTrie *,std::allocator< PathTrie * > > &,std::vector< bool,std::allocator< bool > > const &,size_t,size_t,int,int)\n"
-    "    ctc_beam_search_decoder_batch(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &,std::vector< PathTrie *,std::allocator< PathTrie * > > &,std::vector< bool,std::allocator< bool > > const &,size_t,size_t,int)\n"
-    "    ctc_beam_search_decoder_batch(std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > const &,std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > const &,std::vector< PathTrie *,std::allocator< PathTrie * > > &,std::vector< bool,std::allocator< bool > > const &,size_t,size_t)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_sent__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int,std::allocator< int > > *arg1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  bool arg3 ;
-  int arg4 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  bool val3 ;
-  int ecode3 = 0 ;
-  int val4 ;
-  int ecode4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  std::string result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:map_sent",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "map_sent" "', argument " "1"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_sent" "', argument " "1"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "map_sent" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_sent" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  ecode3 = SWIG_AsVal_bool(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "map_sent" "', argument " "3"" of type '" "bool""'");
-  } 
-  arg3 = static_cast< bool >(val3);
-  ecode4 = SWIG_AsVal_int(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "map_sent" "', argument " "4"" of type '" "int""'");
-  } 
-  arg4 = static_cast< int >(val4);
-  result = map_sent((std::vector< int,std::allocator< int > > const &)*arg1,(std::vector< std::string,std::allocator< std::string > > const &)*arg2,arg3,arg4);
-  resultobj = SWIG_From_std_string(static_cast< std::string >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_sent__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int,std::allocator< int > > *arg1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  bool arg3 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  bool val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::string result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:map_sent",&obj0,&obj1,&obj2)) SWIG_fail;
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "map_sent" "', argument " "1"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_sent" "', argument " "1"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "map_sent" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_sent" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  ecode3 = SWIG_AsVal_bool(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "map_sent" "', argument " "3"" of type '" "bool""'");
-  } 
-  arg3 = static_cast< bool >(val3);
-  result = map_sent((std::vector< int,std::allocator< int > > const &)*arg1,(std::vector< std::string,std::allocator< std::string > > const &)*arg2,arg3);
-  resultobj = SWIG_From_std_string(static_cast< std::string >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_sent__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< int,std::allocator< int > > *arg1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  std::string result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OO:map_sent",&obj0,&obj1)) SWIG_fail;
-  {
-    std::vector< int,std::allocator< int > > *ptr = (std::vector< int,std::allocator< int > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "map_sent" "', argument " "1"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_sent" "', argument " "1"" of type '" "std::vector< int,std::allocator< int > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "map_sent" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_sent" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  result = map_sent((std::vector< int,std::allocator< int > > const &)*arg1,(std::vector< std::string,std::allocator< std::string > > const &)*arg2);
-  resultobj = SWIG_From_std_string(static_cast< std::string >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_sent(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[5] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 4) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 2) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::string,std::allocator< std::string > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        return _wrap_map_sent__SWIG_2(self, args);
-      }
-    }
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::string,std::allocator< std::string > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        {
-          int res = SWIG_AsVal_bool(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_map_sent__SWIG_1(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< int,std::allocator< int > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::string,std::allocator< std::string > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        {
-          int res = SWIG_AsVal_bool(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_int(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            return _wrap_map_sent__SWIG_0(self, args);
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'map_sent'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    map_sent(std::vector< int,std::allocator< int > > const &,std::vector< std::string,std::allocator< std::string > > const &,bool,int)\n"
-    "    map_sent(std::vector< int,std::allocator< int > > const &,std::vector< std::string,std::allocator< std::string > > const &,bool)\n"
-    "    map_sent(std::vector< int,std::allocator< int > > const &,std::vector< std::string,std::allocator< std::string > > const &)\n");
-  return 0;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_batch__SWIG_0(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  size_t arg3 ;
-  bool arg4 ;
-  int arg5 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  int val5 ;
-  int ecode5 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  PyObject * obj4 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOOO:map_batch",&obj0,&obj1,&obj2,&obj3,&obj4)) SWIG_fail;
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "map_batch" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_batch" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "map_batch" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_batch" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "map_batch" "', argument " "3"" of type '" "size_t""'");
-  } 
-  arg3 = static_cast< size_t >(val3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "map_batch" "', argument " "4"" of type '" "bool""'");
-  } 
-  arg4 = static_cast< bool >(val4);
-  ecode5 = SWIG_AsVal_int(obj4, &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "map_batch" "', argument " "5"" of type '" "int""'");
-  } 
-  arg5 = static_cast< int >(val5);
-  result = map_batch((std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg1,(std::vector< std::string,std::allocator< std::string > > const &)*arg2,arg3,arg4,arg5);
-  resultobj = swig::from(static_cast< std::vector< std::string,std::allocator< std::string > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_batch__SWIG_1(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  size_t arg3 ;
-  bool arg4 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  bool val4 ;
-  int ecode4 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  PyObject * obj3 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOOO:map_batch",&obj0,&obj1,&obj2,&obj3)) SWIG_fail;
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "map_batch" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_batch" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "map_batch" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_batch" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "map_batch" "', argument " "3"" of type '" "size_t""'");
-  } 
-  arg3 = static_cast< size_t >(val3);
-  ecode4 = SWIG_AsVal_bool(obj3, &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "map_batch" "', argument " "4"" of type '" "bool""'");
-  } 
-  arg4 = static_cast< bool >(val4);
-  result = map_batch((std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg1,(std::vector< std::string,std::allocator< std::string > > const &)*arg2,arg3,arg4);
-  resultobj = swig::from(static_cast< std::vector< std::string,std::allocator< std::string > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_batch__SWIG_2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *arg1 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > *arg2 = 0 ;
-  size_t arg3 ;
-  int res1 = SWIG_OLDOBJ ;
-  int res2 = SWIG_OLDOBJ ;
-  size_t val3 ;
-  int ecode3 = 0 ;
-  PyObject * obj0 = 0 ;
-  PyObject * obj1 = 0 ;
-  PyObject * obj2 = 0 ;
-  std::vector< std::string,std::allocator< std::string > > result;
-  
-  if (!PyArg_ParseTuple(args,(char *)"OOO:map_batch",&obj0,&obj1,&obj2)) SWIG_fail;
-  {
-    std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *ptr = (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *)0;
-    res1 = swig::asptr(obj0, &ptr);
-    if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "map_batch" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_batch" "', argument " "1"" of type '" "std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &""'"); 
-    }
-    arg1 = ptr;
-  }
-  {
-    std::vector< std::string,std::allocator< std::string > > *ptr = (std::vector< std::string,std::allocator< std::string > > *)0;
-    res2 = swig::asptr(obj1, &ptr);
-    if (!SWIG_IsOK(res2)) {
-      SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "map_batch" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "map_batch" "', argument " "2"" of type '" "std::vector< std::string,std::allocator< std::string > > const &""'"); 
-    }
-    arg2 = ptr;
-  }
-  ecode3 = SWIG_AsVal_size_t(obj2, &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "map_batch" "', argument " "3"" of type '" "size_t""'");
-  } 
-  arg3 = static_cast< size_t >(val3);
-  result = map_batch((std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &)*arg1,(std::vector< std::string,std::allocator< std::string > > const &)*arg2,arg3);
-  resultobj = swig::from(static_cast< std::vector< std::string,std::allocator< std::string > > >(result));
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return resultobj;
-fail:
-  if (SWIG_IsNewObj(res1)) delete arg1;
-  if (SWIG_IsNewObj(res2)) delete arg2;
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_map_batch(PyObject *self, PyObject *args) {
-  Py_ssize_t argc;
-  PyObject *argv[6] = {
-    0
-  };
-  Py_ssize_t ii;
-  
-  if (!PyTuple_Check(args)) SWIG_fail;
-  argc = PyObject_Length(args);
-  for (ii = 0; (ii < 5) && (ii < argc); ii++) {
-    argv[ii] = PyTuple_GET_ITEM(args,ii);
-  }
-  if (argc == 3) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::string,std::allocator< std::string > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          return _wrap_map_batch__SWIG_2(self, args);
-        }
-      }
-    }
-  }
-  if (argc == 4) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::string,std::allocator< std::string > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            return _wrap_map_batch__SWIG_1(self, args);
-          }
-        }
-      }
-    }
-  }
-  if (argc == 5) {
-    int _v;
-    int res = swig::asptr(argv[0], (std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >**)(0));
-    _v = SWIG_CheckState(res);
-    if (_v) {
-      int res = swig::asptr(argv[1], (std::vector< std::string,std::allocator< std::string > >**)(0));
-      _v = SWIG_CheckState(res);
-      if (_v) {
-        {
-          int res = SWIG_AsVal_size_t(argv[2], NULL);
-          _v = SWIG_CheckState(res);
-        }
-        if (_v) {
-          {
-            int res = SWIG_AsVal_bool(argv[3], NULL);
-            _v = SWIG_CheckState(res);
-          }
-          if (_v) {
-            {
-              int res = SWIG_AsVal_int(argv[4], NULL);
-              _v = SWIG_CheckState(res);
-            }
-            if (_v) {
-              return _wrap_map_batch__SWIG_0(self, args);
-            }
-          }
-        }
-      }
-    }
-  }
-  
-fail:
-  SWIG_SetErrorMsg(PyExc_NotImplementedError,"Wrong number or type of arguments for overloaded function 'map_batch'.\n"
-    "  Possible C/C++ prototypes are:\n"
-    "    map_batch(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,std::vector< std::string,std::allocator< std::string > > const &,size_t,bool,int)\n"
-    "    map_batch(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,std::vector< std::string,std::allocator< std::string > > const &,size_t,bool)\n"
-    "    map_batch(std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > const &,std::vector< std::string,std::allocator< std::string > > const &,size_t)\n");
-  return 0;
-}
-
-
-static PyMethodDef SwigMethods[] = {
-	 { (char *)"SWIG_PyInstanceMethod_New", (PyCFunction)SWIG_PyInstanceMethod_New, METH_O, NULL},
-	 { (char *)"delete_SwigPyIterator", _wrap_delete_SwigPyIterator, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_value", _wrap_SwigPyIterator_value, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_incr", _wrap_SwigPyIterator_incr, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_decr", _wrap_SwigPyIterator_decr, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_distance", _wrap_SwigPyIterator_distance, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_equal", _wrap_SwigPyIterator_equal, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_copy", _wrap_SwigPyIterator_copy, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_next", _wrap_SwigPyIterator_next, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator___next__", _wrap_SwigPyIterator___next__, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_previous", _wrap_SwigPyIterator_previous, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_advance", _wrap_SwigPyIterator_advance, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator___eq__", _wrap_SwigPyIterator___eq__, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator___ne__", _wrap_SwigPyIterator___ne__, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator___iadd__", _wrap_SwigPyIterator___iadd__, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator___isub__", _wrap_SwigPyIterator___isub__, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator___add__", _wrap_SwigPyIterator___add__, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator___sub__", _wrap_SwigPyIterator___sub__, METH_VARARGS, NULL},
-	 { (char *)"SwigPyIterator_swigregister", SwigPyIterator_swigregister, METH_VARARGS, NULL},
-	 { (char *)"new_PathTrie", _wrap_new_PathTrie, METH_VARARGS, NULL},
-	 { (char *)"delete_PathTrie", _wrap_delete_PathTrie, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_get_path_trie", _wrap_PathTrie_get_path_trie, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_get_path_vec", _wrap_PathTrie_get_path_vec, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_iterate_to_vec", _wrap_PathTrie_iterate_to_vec, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_iterate_to_vec_only", _wrap_PathTrie_iterate_to_vec_only, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_set_dictionary", _wrap_PathTrie_set_dictionary, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_set_matcher", _wrap_PathTrie_set_matcher, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_is_empty", _wrap_PathTrie_is_empty, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_remove", _wrap_PathTrie_remove, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_b_prev_set", _wrap_PathTrie_log_prob_b_prev_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_b_prev_get", _wrap_PathTrie_log_prob_b_prev_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_nb_prev_set", _wrap_PathTrie_log_prob_nb_prev_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_nb_prev_get", _wrap_PathTrie_log_prob_nb_prev_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_b_cur_set", _wrap_PathTrie_log_prob_b_cur_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_b_cur_get", _wrap_PathTrie_log_prob_b_cur_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_nb_cur_set", _wrap_PathTrie_log_prob_nb_cur_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_log_prob_nb_cur_get", _wrap_PathTrie_log_prob_nb_cur_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_score_set", _wrap_PathTrie_score_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_score_get", _wrap_PathTrie_score_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_approx_ctc_set", _wrap_PathTrie_approx_ctc_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_approx_ctc_get", _wrap_PathTrie_approx_ctc_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_character_set", _wrap_PathTrie_character_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_character_get", _wrap_PathTrie_character_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_parent_set", _wrap_PathTrie_parent_set, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_parent_get", _wrap_PathTrie_parent_get, METH_VARARGS, NULL},
-	 { (char *)"PathTrie_swigregister", PathTrie_swigregister, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_iterator", _wrap_DoubleVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___nonzero__", _wrap_DoubleVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___bool__", _wrap_DoubleVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___len__", _wrap_DoubleVector___len__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___getslice__", _wrap_DoubleVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___setslice__", _wrap_DoubleVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___delslice__", _wrap_DoubleVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___delitem__", _wrap_DoubleVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___getitem__", _wrap_DoubleVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector___setitem__", _wrap_DoubleVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_pop", _wrap_DoubleVector_pop, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_append", _wrap_DoubleVector_append, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_empty", _wrap_DoubleVector_empty, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_size", _wrap_DoubleVector_size, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_swap", _wrap_DoubleVector_swap, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_begin", _wrap_DoubleVector_begin, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_end", _wrap_DoubleVector_end, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_rbegin", _wrap_DoubleVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_rend", _wrap_DoubleVector_rend, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_clear", _wrap_DoubleVector_clear, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_get_allocator", _wrap_DoubleVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_pop_back", _wrap_DoubleVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_erase", _wrap_DoubleVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_DoubleVector", _wrap_new_DoubleVector, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_push_back", _wrap_DoubleVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_front", _wrap_DoubleVector_front, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_back", _wrap_DoubleVector_back, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_assign", _wrap_DoubleVector_assign, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_resize", _wrap_DoubleVector_resize, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_insert", _wrap_DoubleVector_insert, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_reserve", _wrap_DoubleVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_capacity", _wrap_DoubleVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_DoubleVector", _wrap_delete_DoubleVector, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector_swigregister", DoubleVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"IntVector_iterator", _wrap_IntVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"IntVector___nonzero__", _wrap_IntVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___bool__", _wrap_IntVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___len__", _wrap_IntVector___len__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___getslice__", _wrap_IntVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___setslice__", _wrap_IntVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___delslice__", _wrap_IntVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___delitem__", _wrap_IntVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___getitem__", _wrap_IntVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"IntVector___setitem__", _wrap_IntVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"IntVector_pop", _wrap_IntVector_pop, METH_VARARGS, NULL},
-	 { (char *)"IntVector_append", _wrap_IntVector_append, METH_VARARGS, NULL},
-	 { (char *)"IntVector_empty", _wrap_IntVector_empty, METH_VARARGS, NULL},
-	 { (char *)"IntVector_size", _wrap_IntVector_size, METH_VARARGS, NULL},
-	 { (char *)"IntVector_swap", _wrap_IntVector_swap, METH_VARARGS, NULL},
-	 { (char *)"IntVector_begin", _wrap_IntVector_begin, METH_VARARGS, NULL},
-	 { (char *)"IntVector_end", _wrap_IntVector_end, METH_VARARGS, NULL},
-	 { (char *)"IntVector_rbegin", _wrap_IntVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"IntVector_rend", _wrap_IntVector_rend, METH_VARARGS, NULL},
-	 { (char *)"IntVector_clear", _wrap_IntVector_clear, METH_VARARGS, NULL},
-	 { (char *)"IntVector_get_allocator", _wrap_IntVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"IntVector_pop_back", _wrap_IntVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"IntVector_erase", _wrap_IntVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_IntVector", _wrap_new_IntVector, METH_VARARGS, NULL},
-	 { (char *)"IntVector_push_back", _wrap_IntVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"IntVector_front", _wrap_IntVector_front, METH_VARARGS, NULL},
-	 { (char *)"IntVector_back", _wrap_IntVector_back, METH_VARARGS, NULL},
-	 { (char *)"IntVector_assign", _wrap_IntVector_assign, METH_VARARGS, NULL},
-	 { (char *)"IntVector_resize", _wrap_IntVector_resize, METH_VARARGS, NULL},
-	 { (char *)"IntVector_insert", _wrap_IntVector_insert, METH_VARARGS, NULL},
-	 { (char *)"IntVector_reserve", _wrap_IntVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"IntVector_capacity", _wrap_IntVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_IntVector", _wrap_delete_IntVector, METH_VARARGS, NULL},
-	 { (char *)"IntVector_swigregister", IntVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"StringVector_iterator", _wrap_StringVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"StringVector___nonzero__", _wrap_StringVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___bool__", _wrap_StringVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___len__", _wrap_StringVector___len__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___getslice__", _wrap_StringVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___setslice__", _wrap_StringVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___delslice__", _wrap_StringVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___delitem__", _wrap_StringVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___getitem__", _wrap_StringVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"StringVector___setitem__", _wrap_StringVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"StringVector_pop", _wrap_StringVector_pop, METH_VARARGS, NULL},
-	 { (char *)"StringVector_append", _wrap_StringVector_append, METH_VARARGS, NULL},
-	 { (char *)"StringVector_empty", _wrap_StringVector_empty, METH_VARARGS, NULL},
-	 { (char *)"StringVector_size", _wrap_StringVector_size, METH_VARARGS, NULL},
-	 { (char *)"StringVector_swap", _wrap_StringVector_swap, METH_VARARGS, NULL},
-	 { (char *)"StringVector_begin", _wrap_StringVector_begin, METH_VARARGS, NULL},
-	 { (char *)"StringVector_end", _wrap_StringVector_end, METH_VARARGS, NULL},
-	 { (char *)"StringVector_rbegin", _wrap_StringVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"StringVector_rend", _wrap_StringVector_rend, METH_VARARGS, NULL},
-	 { (char *)"StringVector_clear", _wrap_StringVector_clear, METH_VARARGS, NULL},
-	 { (char *)"StringVector_get_allocator", _wrap_StringVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"StringVector_pop_back", _wrap_StringVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"StringVector_erase", _wrap_StringVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_StringVector", _wrap_new_StringVector, METH_VARARGS, NULL},
-	 { (char *)"StringVector_push_back", _wrap_StringVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"StringVector_front", _wrap_StringVector_front, METH_VARARGS, NULL},
-	 { (char *)"StringVector_back", _wrap_StringVector_back, METH_VARARGS, NULL},
-	 { (char *)"StringVector_assign", _wrap_StringVector_assign, METH_VARARGS, NULL},
-	 { (char *)"StringVector_resize", _wrap_StringVector_resize, METH_VARARGS, NULL},
-	 { (char *)"StringVector_insert", _wrap_StringVector_insert, METH_VARARGS, NULL},
-	 { (char *)"StringVector_reserve", _wrap_StringVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"StringVector_capacity", _wrap_StringVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_StringVector", _wrap_delete_StringVector, METH_VARARGS, NULL},
-	 { (char *)"StringVector_swigregister", StringVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_iterator", _wrap_VectorOfStructVectorDouble_iterator, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___nonzero__", _wrap_VectorOfStructVectorDouble___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___bool__", _wrap_VectorOfStructVectorDouble___bool__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___len__", _wrap_VectorOfStructVectorDouble___len__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___getslice__", _wrap_VectorOfStructVectorDouble___getslice__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___setslice__", _wrap_VectorOfStructVectorDouble___setslice__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___delslice__", _wrap_VectorOfStructVectorDouble___delslice__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___delitem__", _wrap_VectorOfStructVectorDouble___delitem__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___getitem__", _wrap_VectorOfStructVectorDouble___getitem__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble___setitem__", _wrap_VectorOfStructVectorDouble___setitem__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_pop", _wrap_VectorOfStructVectorDouble_pop, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_append", _wrap_VectorOfStructVectorDouble_append, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_empty", _wrap_VectorOfStructVectorDouble_empty, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_size", _wrap_VectorOfStructVectorDouble_size, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_swap", _wrap_VectorOfStructVectorDouble_swap, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_begin", _wrap_VectorOfStructVectorDouble_begin, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_end", _wrap_VectorOfStructVectorDouble_end, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_rbegin", _wrap_VectorOfStructVectorDouble_rbegin, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_rend", _wrap_VectorOfStructVectorDouble_rend, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_clear", _wrap_VectorOfStructVectorDouble_clear, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_get_allocator", _wrap_VectorOfStructVectorDouble_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_pop_back", _wrap_VectorOfStructVectorDouble_pop_back, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_erase", _wrap_VectorOfStructVectorDouble_erase, METH_VARARGS, NULL},
-	 { (char *)"new_VectorOfStructVectorDouble", _wrap_new_VectorOfStructVectorDouble, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_push_back", _wrap_VectorOfStructVectorDouble_push_back, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_front", _wrap_VectorOfStructVectorDouble_front, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_back", _wrap_VectorOfStructVectorDouble_back, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_assign", _wrap_VectorOfStructVectorDouble_assign, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_resize", _wrap_VectorOfStructVectorDouble_resize, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_insert", _wrap_VectorOfStructVectorDouble_insert, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_reserve", _wrap_VectorOfStructVectorDouble_reserve, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_capacity", _wrap_VectorOfStructVectorDouble_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_VectorOfStructVectorDouble", _wrap_delete_VectorOfStructVectorDouble, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorDouble_swigregister", VectorOfStructVectorDouble_swigregister, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_iterator", _wrap_VectorOfStructVectorInt_iterator, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___nonzero__", _wrap_VectorOfStructVectorInt___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___bool__", _wrap_VectorOfStructVectorInt___bool__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___len__", _wrap_VectorOfStructVectorInt___len__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___getslice__", _wrap_VectorOfStructVectorInt___getslice__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___setslice__", _wrap_VectorOfStructVectorInt___setslice__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___delslice__", _wrap_VectorOfStructVectorInt___delslice__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___delitem__", _wrap_VectorOfStructVectorInt___delitem__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___getitem__", _wrap_VectorOfStructVectorInt___getitem__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt___setitem__", _wrap_VectorOfStructVectorInt___setitem__, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_pop", _wrap_VectorOfStructVectorInt_pop, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_append", _wrap_VectorOfStructVectorInt_append, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_empty", _wrap_VectorOfStructVectorInt_empty, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_size", _wrap_VectorOfStructVectorInt_size, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_swap", _wrap_VectorOfStructVectorInt_swap, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_begin", _wrap_VectorOfStructVectorInt_begin, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_end", _wrap_VectorOfStructVectorInt_end, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_rbegin", _wrap_VectorOfStructVectorInt_rbegin, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_rend", _wrap_VectorOfStructVectorInt_rend, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_clear", _wrap_VectorOfStructVectorInt_clear, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_get_allocator", _wrap_VectorOfStructVectorInt_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_pop_back", _wrap_VectorOfStructVectorInt_pop_back, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_erase", _wrap_VectorOfStructVectorInt_erase, METH_VARARGS, NULL},
-	 { (char *)"new_VectorOfStructVectorInt", _wrap_new_VectorOfStructVectorInt, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_push_back", _wrap_VectorOfStructVectorInt_push_back, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_front", _wrap_VectorOfStructVectorInt_front, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_back", _wrap_VectorOfStructVectorInt_back, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_assign", _wrap_VectorOfStructVectorInt_assign, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_resize", _wrap_VectorOfStructVectorInt_resize, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_insert", _wrap_VectorOfStructVectorInt_insert, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_reserve", _wrap_VectorOfStructVectorInt_reserve, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_capacity", _wrap_VectorOfStructVectorInt_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_VectorOfStructVectorInt", _wrap_delete_VectorOfStructVectorInt, METH_VARARGS, NULL},
-	 { (char *)"VectorOfStructVectorInt_swigregister", VectorOfStructVectorInt_swigregister, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_iterator", _wrap_FloatVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___nonzero__", _wrap_FloatVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___bool__", _wrap_FloatVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___len__", _wrap_FloatVector___len__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___getslice__", _wrap_FloatVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___setslice__", _wrap_FloatVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___delslice__", _wrap_FloatVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___delitem__", _wrap_FloatVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___getitem__", _wrap_FloatVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector___setitem__", _wrap_FloatVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_pop", _wrap_FloatVector_pop, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_append", _wrap_FloatVector_append, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_empty", _wrap_FloatVector_empty, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_size", _wrap_FloatVector_size, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_swap", _wrap_FloatVector_swap, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_begin", _wrap_FloatVector_begin, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_end", _wrap_FloatVector_end, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_rbegin", _wrap_FloatVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_rend", _wrap_FloatVector_rend, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_clear", _wrap_FloatVector_clear, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_get_allocator", _wrap_FloatVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_pop_back", _wrap_FloatVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_erase", _wrap_FloatVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_FloatVector", _wrap_new_FloatVector, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_push_back", _wrap_FloatVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_front", _wrap_FloatVector_front, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_back", _wrap_FloatVector_back, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_assign", _wrap_FloatVector_assign, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_resize", _wrap_FloatVector_resize, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_insert", _wrap_FloatVector_insert, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_reserve", _wrap_FloatVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_capacity", _wrap_FloatVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_FloatVector", _wrap_delete_FloatVector, METH_VARARGS, NULL},
-	 { (char *)"FloatVector_swigregister", FloatVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"new_Pair", _wrap_new_Pair, METH_VARARGS, NULL},
-	 { (char *)"Pair_first_set", _wrap_Pair_first_set, METH_VARARGS, NULL},
-	 { (char *)"Pair_first_get", _wrap_Pair_first_get, METH_VARARGS, NULL},
-	 { (char *)"Pair_second_set", _wrap_Pair_second_set, METH_VARARGS, NULL},
-	 { (char *)"Pair_second_get", _wrap_Pair_second_get, METH_VARARGS, NULL},
-	 { (char *)"delete_Pair", _wrap_delete_Pair, METH_VARARGS, NULL},
-	 { (char *)"Pair_swigregister", Pair_swigregister, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_iterator", _wrap_PairFloatVectorVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___nonzero__", _wrap_PairFloatVectorVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___bool__", _wrap_PairFloatVectorVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___len__", _wrap_PairFloatVectorVector___len__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___getslice__", _wrap_PairFloatVectorVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___setslice__", _wrap_PairFloatVectorVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___delslice__", _wrap_PairFloatVectorVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___delitem__", _wrap_PairFloatVectorVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___getitem__", _wrap_PairFloatVectorVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector___setitem__", _wrap_PairFloatVectorVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_pop", _wrap_PairFloatVectorVector_pop, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_append", _wrap_PairFloatVectorVector_append, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_empty", _wrap_PairFloatVectorVector_empty, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_size", _wrap_PairFloatVectorVector_size, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_swap", _wrap_PairFloatVectorVector_swap, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_begin", _wrap_PairFloatVectorVector_begin, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_end", _wrap_PairFloatVectorVector_end, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_rbegin", _wrap_PairFloatVectorVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_rend", _wrap_PairFloatVectorVector_rend, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_clear", _wrap_PairFloatVectorVector_clear, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_get_allocator", _wrap_PairFloatVectorVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_pop_back", _wrap_PairFloatVectorVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_erase", _wrap_PairFloatVectorVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_PairFloatVectorVector", _wrap_new_PairFloatVectorVector, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_push_back", _wrap_PairFloatVectorVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_front", _wrap_PairFloatVectorVector_front, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_back", _wrap_PairFloatVectorVector_back, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_assign", _wrap_PairFloatVectorVector_assign, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_resize", _wrap_PairFloatVectorVector_resize, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_insert", _wrap_PairFloatVectorVector_insert, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_reserve", _wrap_PairFloatVectorVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_capacity", _wrap_PairFloatVectorVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_PairFloatVectorVector", _wrap_delete_PairFloatVectorVector, METH_VARARGS, NULL},
-	 { (char *)"PairFloatVectorVector_swigregister", PairFloatVectorVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_iterator", _wrap_PairDoubleVectorVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___nonzero__", _wrap_PairDoubleVectorVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___bool__", _wrap_PairDoubleVectorVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___len__", _wrap_PairDoubleVectorVector___len__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___getslice__", _wrap_PairDoubleVectorVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___setslice__", _wrap_PairDoubleVectorVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___delslice__", _wrap_PairDoubleVectorVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___delitem__", _wrap_PairDoubleVectorVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___getitem__", _wrap_PairDoubleVectorVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector___setitem__", _wrap_PairDoubleVectorVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_pop", _wrap_PairDoubleVectorVector_pop, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_append", _wrap_PairDoubleVectorVector_append, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_empty", _wrap_PairDoubleVectorVector_empty, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_size", _wrap_PairDoubleVectorVector_size, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_swap", _wrap_PairDoubleVectorVector_swap, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_begin", _wrap_PairDoubleVectorVector_begin, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_end", _wrap_PairDoubleVectorVector_end, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_rbegin", _wrap_PairDoubleVectorVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_rend", _wrap_PairDoubleVectorVector_rend, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_clear", _wrap_PairDoubleVectorVector_clear, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_get_allocator", _wrap_PairDoubleVectorVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_pop_back", _wrap_PairDoubleVectorVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_erase", _wrap_PairDoubleVectorVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_PairDoubleVectorVector", _wrap_new_PairDoubleVectorVector, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_push_back", _wrap_PairDoubleVectorVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_front", _wrap_PairDoubleVectorVector_front, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_back", _wrap_PairDoubleVectorVector_back, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_assign", _wrap_PairDoubleVectorVector_assign, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_resize", _wrap_PairDoubleVectorVector_resize, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_insert", _wrap_PairDoubleVectorVector_insert, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_reserve", _wrap_PairDoubleVectorVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_capacity", _wrap_PairDoubleVectorVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_PairDoubleVectorVector", _wrap_delete_PairDoubleVectorVector, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector_swigregister", PairDoubleVectorVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_iterator", _wrap_PairDoubleVectorVector2_iterator, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___nonzero__", _wrap_PairDoubleVectorVector2___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___bool__", _wrap_PairDoubleVectorVector2___bool__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___len__", _wrap_PairDoubleVectorVector2___len__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___getslice__", _wrap_PairDoubleVectorVector2___getslice__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___setslice__", _wrap_PairDoubleVectorVector2___setslice__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___delslice__", _wrap_PairDoubleVectorVector2___delslice__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___delitem__", _wrap_PairDoubleVectorVector2___delitem__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___getitem__", _wrap_PairDoubleVectorVector2___getitem__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2___setitem__", _wrap_PairDoubleVectorVector2___setitem__, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_pop", _wrap_PairDoubleVectorVector2_pop, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_append", _wrap_PairDoubleVectorVector2_append, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_empty", _wrap_PairDoubleVectorVector2_empty, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_size", _wrap_PairDoubleVectorVector2_size, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_swap", _wrap_PairDoubleVectorVector2_swap, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_begin", _wrap_PairDoubleVectorVector2_begin, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_end", _wrap_PairDoubleVectorVector2_end, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_rbegin", _wrap_PairDoubleVectorVector2_rbegin, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_rend", _wrap_PairDoubleVectorVector2_rend, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_clear", _wrap_PairDoubleVectorVector2_clear, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_get_allocator", _wrap_PairDoubleVectorVector2_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_pop_back", _wrap_PairDoubleVectorVector2_pop_back, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_erase", _wrap_PairDoubleVectorVector2_erase, METH_VARARGS, NULL},
-	 { (char *)"new_PairDoubleVectorVector2", _wrap_new_PairDoubleVectorVector2, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_push_back", _wrap_PairDoubleVectorVector2_push_back, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_front", _wrap_PairDoubleVectorVector2_front, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_back", _wrap_PairDoubleVectorVector2_back, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_assign", _wrap_PairDoubleVectorVector2_assign, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_resize", _wrap_PairDoubleVectorVector2_resize, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_insert", _wrap_PairDoubleVectorVector2_insert, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_reserve", _wrap_PairDoubleVectorVector2_reserve, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_capacity", _wrap_PairDoubleVectorVector2_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_PairDoubleVectorVector2", _wrap_delete_PairDoubleVectorVector2, METH_VARARGS, NULL},
-	 { (char *)"PairDoubleVectorVector2_swigregister", PairDoubleVectorVector2_swigregister, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_iterator", _wrap_DoubleVector3_iterator, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___nonzero__", _wrap_DoubleVector3___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___bool__", _wrap_DoubleVector3___bool__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___len__", _wrap_DoubleVector3___len__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___getslice__", _wrap_DoubleVector3___getslice__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___setslice__", _wrap_DoubleVector3___setslice__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___delslice__", _wrap_DoubleVector3___delslice__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___delitem__", _wrap_DoubleVector3___delitem__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___getitem__", _wrap_DoubleVector3___getitem__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3___setitem__", _wrap_DoubleVector3___setitem__, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_pop", _wrap_DoubleVector3_pop, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_append", _wrap_DoubleVector3_append, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_empty", _wrap_DoubleVector3_empty, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_size", _wrap_DoubleVector3_size, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_swap", _wrap_DoubleVector3_swap, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_begin", _wrap_DoubleVector3_begin, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_end", _wrap_DoubleVector3_end, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_rbegin", _wrap_DoubleVector3_rbegin, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_rend", _wrap_DoubleVector3_rend, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_clear", _wrap_DoubleVector3_clear, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_get_allocator", _wrap_DoubleVector3_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_pop_back", _wrap_DoubleVector3_pop_back, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_erase", _wrap_DoubleVector3_erase, METH_VARARGS, NULL},
-	 { (char *)"new_DoubleVector3", _wrap_new_DoubleVector3, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_push_back", _wrap_DoubleVector3_push_back, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_front", _wrap_DoubleVector3_front, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_back", _wrap_DoubleVector3_back, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_assign", _wrap_DoubleVector3_assign, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_resize", _wrap_DoubleVector3_resize, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_insert", _wrap_DoubleVector3_insert, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_reserve", _wrap_DoubleVector3_reserve, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_capacity", _wrap_DoubleVector3_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_DoubleVector3", _wrap_delete_DoubleVector3, METH_VARARGS, NULL},
-	 { (char *)"DoubleVector3_swigregister", DoubleVector3_swigregister, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_iterator", _wrap_IntVector3_iterator, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___nonzero__", _wrap_IntVector3___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___bool__", _wrap_IntVector3___bool__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___len__", _wrap_IntVector3___len__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___getslice__", _wrap_IntVector3___getslice__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___setslice__", _wrap_IntVector3___setslice__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___delslice__", _wrap_IntVector3___delslice__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___delitem__", _wrap_IntVector3___delitem__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___getitem__", _wrap_IntVector3___getitem__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3___setitem__", _wrap_IntVector3___setitem__, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_pop", _wrap_IntVector3_pop, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_append", _wrap_IntVector3_append, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_empty", _wrap_IntVector3_empty, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_size", _wrap_IntVector3_size, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_swap", _wrap_IntVector3_swap, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_begin", _wrap_IntVector3_begin, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_end", _wrap_IntVector3_end, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_rbegin", _wrap_IntVector3_rbegin, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_rend", _wrap_IntVector3_rend, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_clear", _wrap_IntVector3_clear, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_get_allocator", _wrap_IntVector3_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_pop_back", _wrap_IntVector3_pop_back, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_erase", _wrap_IntVector3_erase, METH_VARARGS, NULL},
-	 { (char *)"new_IntVector3", _wrap_new_IntVector3, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_push_back", _wrap_IntVector3_push_back, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_front", _wrap_IntVector3_front, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_back", _wrap_IntVector3_back, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_assign", _wrap_IntVector3_assign, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_resize", _wrap_IntVector3_resize, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_insert", _wrap_IntVector3_insert, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_reserve", _wrap_IntVector3_reserve, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_capacity", _wrap_IntVector3_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_IntVector3", _wrap_delete_IntVector3, METH_VARARGS, NULL},
-	 { (char *)"IntVector3_swigregister", IntVector3_swigregister, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_iterator", _wrap_TrieVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___nonzero__", _wrap_TrieVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___bool__", _wrap_TrieVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___len__", _wrap_TrieVector___len__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___getslice__", _wrap_TrieVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___setslice__", _wrap_TrieVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___delslice__", _wrap_TrieVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___delitem__", _wrap_TrieVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___getitem__", _wrap_TrieVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector___setitem__", _wrap_TrieVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_pop", _wrap_TrieVector_pop, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_append", _wrap_TrieVector_append, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_empty", _wrap_TrieVector_empty, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_size", _wrap_TrieVector_size, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_swap", _wrap_TrieVector_swap, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_begin", _wrap_TrieVector_begin, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_end", _wrap_TrieVector_end, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_rbegin", _wrap_TrieVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_rend", _wrap_TrieVector_rend, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_clear", _wrap_TrieVector_clear, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_get_allocator", _wrap_TrieVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_pop_back", _wrap_TrieVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_erase", _wrap_TrieVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_TrieVector", _wrap_new_TrieVector, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_push_back", _wrap_TrieVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_front", _wrap_TrieVector_front, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_back", _wrap_TrieVector_back, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_assign", _wrap_TrieVector_assign, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_resize", _wrap_TrieVector_resize, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_insert", _wrap_TrieVector_insert, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_reserve", _wrap_TrieVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_capacity", _wrap_TrieVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_TrieVector", _wrap_delete_TrieVector, METH_VARARGS, NULL},
-	 { (char *)"TrieVector_swigregister", TrieVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_iterator", _wrap_BoolVector_iterator, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___nonzero__", _wrap_BoolVector___nonzero__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___bool__", _wrap_BoolVector___bool__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___len__", _wrap_BoolVector___len__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___getslice__", _wrap_BoolVector___getslice__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___setslice__", _wrap_BoolVector___setslice__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___delslice__", _wrap_BoolVector___delslice__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___delitem__", _wrap_BoolVector___delitem__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___getitem__", _wrap_BoolVector___getitem__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector___setitem__", _wrap_BoolVector___setitem__, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_pop", _wrap_BoolVector_pop, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_append", _wrap_BoolVector_append, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_empty", _wrap_BoolVector_empty, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_size", _wrap_BoolVector_size, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_swap", _wrap_BoolVector_swap, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_begin", _wrap_BoolVector_begin, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_end", _wrap_BoolVector_end, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_rbegin", _wrap_BoolVector_rbegin, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_rend", _wrap_BoolVector_rend, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_clear", _wrap_BoolVector_clear, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_get_allocator", _wrap_BoolVector_get_allocator, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_pop_back", _wrap_BoolVector_pop_back, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_erase", _wrap_BoolVector_erase, METH_VARARGS, NULL},
-	 { (char *)"new_BoolVector", _wrap_new_BoolVector, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_push_back", _wrap_BoolVector_push_back, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_front", _wrap_BoolVector_front, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_back", _wrap_BoolVector_back, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_assign", _wrap_BoolVector_assign, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_resize", _wrap_BoolVector_resize, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_insert", _wrap_BoolVector_insert, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_reserve", _wrap_BoolVector_reserve, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_capacity", _wrap_BoolVector_capacity, METH_VARARGS, NULL},
-	 { (char *)"delete_BoolVector", _wrap_delete_BoolVector, METH_VARARGS, NULL},
-	 { (char *)"BoolVector_swigregister", BoolVector_swigregister, METH_VARARGS, NULL},
-	 { (char *)"IntDoublePairCompSecondRev", _wrap_IntDoublePairCompSecondRev, METH_VARARGS, NULL},
-	 { (char *)"StringDoublePairCompSecondRev", _wrap_StringDoublePairCompSecondRev, METH_VARARGS, NULL},
-	 { (char *)"DoubleStringPairCompFirstRev", _wrap_DoubleStringPairCompFirstRev, METH_VARARGS, NULL},
-	 { (char *)"new_RetriveStrEnumerateVocab", _wrap_new_RetriveStrEnumerateVocab, METH_VARARGS, NULL},
-	 { (char *)"RetriveStrEnumerateVocab_Add", _wrap_RetriveStrEnumerateVocab_Add, METH_VARARGS, NULL},
-	 { (char *)"RetriveStrEnumerateVocab_vocabulary_set", _wrap_RetriveStrEnumerateVocab_vocabulary_set, METH_VARARGS, NULL},
-	 { (char *)"RetriveStrEnumerateVocab_vocabulary_get", _wrap_RetriveStrEnumerateVocab_vocabulary_get, METH_VARARGS, NULL},
-	 { (char *)"delete_RetriveStrEnumerateVocab", _wrap_delete_RetriveStrEnumerateVocab, METH_VARARGS, NULL},
-	 { (char *)"RetriveStrEnumerateVocab_swigregister", RetriveStrEnumerateVocab_swigregister, METH_VARARGS, NULL},
-	 { (char *)"new_Scorer", _wrap_new_Scorer, METH_VARARGS, NULL},
-	 { (char *)"delete_Scorer", _wrap_delete_Scorer, METH_VARARGS, NULL},
-	 { (char *)"Scorer_get_log_cond_prob", _wrap_Scorer_get_log_cond_prob, METH_VARARGS, NULL},
-	 { (char *)"Scorer_get_sent_log_prob", _wrap_Scorer_get_sent_log_prob, METH_VARARGS, NULL},
-	 { (char *)"Scorer_get_max_order", _wrap_Scorer_get_max_order, METH_VARARGS, NULL},
-	 { (char *)"Scorer_get_dict_size", _wrap_Scorer_get_dict_size, METH_VARARGS, NULL},
-	 { (char *)"Scorer_is_character_based", _wrap_Scorer_is_character_based, METH_VARARGS, NULL},
-	 { (char *)"Scorer_reset_params", _wrap_Scorer_reset_params, METH_VARARGS, NULL},
-	 { (char *)"Scorer_make_ngram", _wrap_Scorer_make_ngram, METH_VARARGS, NULL},
-	 { (char *)"Scorer_split_labels", _wrap_Scorer_split_labels, METH_VARARGS, NULL},
-	 { (char *)"Scorer_alpha_set", _wrap_Scorer_alpha_set, METH_VARARGS, NULL},
-	 { (char *)"Scorer_alpha_get", _wrap_Scorer_alpha_get, METH_VARARGS, NULL},
-	 { (char *)"Scorer_beta_set", _wrap_Scorer_beta_set, METH_VARARGS, NULL},
-	 { (char *)"Scorer_beta_get", _wrap_Scorer_beta_get, METH_VARARGS, NULL},
-	 { (char *)"Scorer_dictionary_set", _wrap_Scorer_dictionary_set, METH_VARARGS, NULL},
-	 { (char *)"Scorer_dictionary_get", _wrap_Scorer_dictionary_get, METH_VARARGS, NULL},
-	 { (char *)"Scorer_swigregister", Scorer_swigregister, METH_VARARGS, NULL},
-	 { (char *)"ctc_beam_search_decoder", _wrap_ctc_beam_search_decoder, METH_VARARGS, NULL},
-	 { (char *)"ctc_beam_search_decoder_batch", _wrap_ctc_beam_search_decoder_batch, METH_VARARGS, NULL},
-	 { (char *)"map_sent", _wrap_map_sent, METH_VARARGS, NULL},
-	 { (char *)"map_batch", _wrap_map_batch, METH_VARARGS, NULL},
-	 { NULL, NULL, 0, NULL }
-};
-
-
-/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (BEGIN) -------- */
-
-static swig_type_info _swigt__p_PathTrie = {"_p_PathTrie", "PathTrie *|std::vector< PathTrie * >::value_type", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_RetriveStrEnumerateVocab = {"_p_RetriveStrEnumerateVocab", "RetriveStrEnumerateVocab *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_Scorer = {"_p_Scorer", "Scorer *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_StringPiece = {"_p_StringPiece", "StringPiece *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_allocator_type = {"_p_allocator_type", "allocator_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_char = {"_p_char", "char *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_const_reference = {"_p_const_reference", "const_reference *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_difference_type = {"_p_difference_type", "difference_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_first_type = {"_p_first_type", "first_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_fst__StdVectorFst = {"_p_fst__StdVectorFst", "fst::StdVectorFst *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_lm__WordIndex = {"_p_lm__WordIndex", "lm::WordIndex *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_p_PyObject = {"_p_p_PyObject", "PyObject **", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_reference = {"_p_reference", "reference *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_second_type = {"_p_second_type", "second_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_size_type = {"_p_size_type", "size_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_PathTrie_p_t = {"_p_std__allocatorT_PathTrie_p_t", "std::vector< PathTrie * >::allocator_type *|std::allocator< PathTrie * > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_bool_t = {"_p_std__allocatorT_bool_t", "std::vector< bool >::allocator_type *|std::allocator< bool > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_double_t = {"_p_std__allocatorT_double_t", "std::vector< double >::allocator_type *|std::allocator< double > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_float_t = {"_p_std__allocatorT_float_t", "std::vector< float >::allocator_type *|std::allocator< float > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_int_t = {"_p_std__allocatorT_int_t", "std::vector< int >::allocator_type *|std::allocator< int > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t = {"_p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t", "std::vector< std::pair< double,std::vector< int > > >::allocator_type *|std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t = {"_p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t", "std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > *|std::vector< std::pair< float,std::vector< int > > >::allocator_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__string_t = {"_p_std__allocatorT_std__string_t", "std::vector< std::string >::allocator_type *|std::allocator< std::string > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t = {"_p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t", "std::vector< std::vector< double > >::allocator_type *|std::allocator< std::vector< double,std::allocator< double > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t = {"_p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t", "std::vector< std::vector< int > >::allocator_type *|std::allocator< std::vector< int,std::allocator< int > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t = {"_p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t", "std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > *|std::vector< std::vector< std::pair< double,std::vector< int > > > >::allocator_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t = {"_p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t", "std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > *|std::vector< std::vector< std::vector< double > > >::allocator_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t = {"_p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t", "std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > *|std::vector< std::vector< std::vector< int > > >::allocator_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__invalid_argument = {"_p_std__invalid_argument", "std::invalid_argument *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__pairT_double_std__string_t = {"_p_std__pairT_double_std__string_t", "std::pair< double,std::string > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t = {"_p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t", "std::pair< double,std::vector< int,std::allocator< int > > > *|std::vector< std::pair< double,std::vector< int > > >::value_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t = {"_p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t", "std::pair< float,std::vector< int > > *|std::pair< float,std::vector< int,std::allocator< int > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__pairT_int_double_t = {"_p_std__pairT_int_double_t", "std::pair< int,double > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__pairT_std__string_double_t = {"_p_std__pairT_std__string_double_t", "std::pair< std::string,double > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t = {"_p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t", "std::shared_ptr< fst::SortedMatcher< fst::StdVectorFst > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t = {"_p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t", "std::vector< PathTrie *,std::allocator< PathTrie * > > *|std::vector< PathTrie * > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_bool_std__allocatorT_bool_t_t = {"_p_std__vectorT_bool_std__allocatorT_bool_t_t", "std::vector< bool,std::allocator< bool > > *|std::vector< bool > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_double_std__allocatorT_double_t_t = {"_p_std__vectorT_double_std__allocatorT_double_t_t", "std::vector< double,std::allocator< double > > *|std::vector< double > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_float_std__allocatorT_float_t_t = {"_p_std__vectorT_float_std__allocatorT_float_t_t", "std::vector< float > *|std::vector< float,std::allocator< float > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_int_std__allocatorT_int_t_t = {"_p_std__vectorT_int_std__allocatorT_int_t_t", "std::vector< int,std::allocator< int > > *|std::vector< int > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t = {"_p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t", "std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > *|std::vector< std::pair< double,std::vector< int,std::allocator< int > > > > *|std::vector< std::pair< double,std::vector< int > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t = {"_p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t", "std::vector< std::pair< float,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< float,std::vector< int,std::allocator< int > > > > > *|std::vector< std::pair< float,std::vector< int,std::allocator< int > > > > *|std::vector< std::pair< float,std::vector< int > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__string_std__allocatorT_std__string_t_t = {"_p_std__vectorT_std__string_std__allocatorT_std__string_t_t", "std::vector< std::string,std::allocator< std::string > > *|std::vector< std::string > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t = {"_p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t", "std::vector< std::vector< double > > *|std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > *|std::vector< std::vector< double,std::allocator< double > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t = {"_p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t", "std::vector< std::vector< int > > *|std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > *|std::vector< std::vector< int,std::allocator< int > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t = {"_p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t", "std::vector< std::vector< std::pair< double,std::vector< int > > > > *|std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > >,std::allocator< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > > *|std::vector< std::vector< std::pair< double,std::vector< int,std::allocator< int > > >,std::allocator< std::pair< double,std::vector< int,std::allocator< int > > > > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t = {"_p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t", "std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > >,std::allocator< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > > *|std::vector< std::vector< std::vector< double,std::allocator< double > >,std::allocator< std::vector< double,std::allocator< double > > > > > *|std::vector< std::vector< std::vector< double > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t = {"_p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t", "std::vector< std::vector< std::vector< int > > > *|std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > >,std::allocator< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > > *|std::vector< std::vector< std::vector< int,std::allocator< int > >,std::allocator< std::vector< int,std::allocator< int > > > > > *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_swig__SwigPyIterator = {"_p_swig__SwigPyIterator", "swig::SwigPyIterator *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_value_type = {"_p_value_type", "value_type *", 0, 0, (void*)0, 0};
-static swig_type_info _swigt__p_void = {"_p_void", "void *", 0, 0, (void*)0, 0};
-
-static swig_type_info *swig_type_initial[] = {
-  &_swigt__p_PathTrie,
-  &_swigt__p_RetriveStrEnumerateVocab,
-  &_swigt__p_Scorer,
-  &_swigt__p_StringPiece,
-  &_swigt__p_allocator_type,
-  &_swigt__p_char,
-  &_swigt__p_const_reference,
-  &_swigt__p_difference_type,
-  &_swigt__p_first_type,
-  &_swigt__p_fst__StdVectorFst,
-  &_swigt__p_lm__WordIndex,
-  &_swigt__p_p_PyObject,
-  &_swigt__p_reference,
-  &_swigt__p_second_type,
-  &_swigt__p_size_type,
-  &_swigt__p_std__allocatorT_PathTrie_p_t,
-  &_swigt__p_std__allocatorT_bool_t,
-  &_swigt__p_std__allocatorT_double_t,
-  &_swigt__p_std__allocatorT_float_t,
-  &_swigt__p_std__allocatorT_int_t,
-  &_swigt__p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t,
-  &_swigt__p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t,
-  &_swigt__p_std__allocatorT_std__string_t,
-  &_swigt__p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t,
-  &_swigt__p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t,
-  &_swigt__p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t,
-  &_swigt__p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t,
-  &_swigt__p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,
-  &_swigt__p_std__invalid_argument,
-  &_swigt__p_std__pairT_double_std__string_t,
-  &_swigt__p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,
-  &_swigt__p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t,
-  &_swigt__p_std__pairT_int_double_t,
-  &_swigt__p_std__pairT_std__string_double_t,
-  &_swigt__p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t,
-  &_swigt__p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,
-  &_swigt__p_std__vectorT_bool_std__allocatorT_bool_t_t,
-  &_swigt__p_std__vectorT_double_std__allocatorT_double_t_t,
-  &_swigt__p_std__vectorT_float_std__allocatorT_float_t_t,
-  &_swigt__p_std__vectorT_int_std__allocatorT_int_t_t,
-  &_swigt__p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,
-  &_swigt__p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,
-  &_swigt__p_std__vectorT_std__string_std__allocatorT_std__string_t_t,
-  &_swigt__p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t,
-  &_swigt__p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t,
-  &_swigt__p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t,
-  &_swigt__p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t,
-  &_swigt__p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t,
-  &_swigt__p_swig__SwigPyIterator,
-  &_swigt__p_value_type,
-  &_swigt__p_void,
-};
-
-static swig_cast_info _swigc__p_PathTrie[] = {  {&_swigt__p_PathTrie, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_RetriveStrEnumerateVocab[] = {  {&_swigt__p_RetriveStrEnumerateVocab, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_Scorer[] = {  {&_swigt__p_Scorer, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_StringPiece[] = {  {&_swigt__p_StringPiece, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_allocator_type[] = {  {&_swigt__p_allocator_type, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_char[] = {  {&_swigt__p_char, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_const_reference[] = {  {&_swigt__p_const_reference, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_difference_type[] = {  {&_swigt__p_difference_type, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_first_type[] = {  {&_swigt__p_first_type, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_fst__StdVectorFst[] = {  {&_swigt__p_fst__StdVectorFst, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_lm__WordIndex[] = {  {&_swigt__p_lm__WordIndex, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_p_PyObject[] = {  {&_swigt__p_p_PyObject, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_reference[] = {  {&_swigt__p_reference, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_second_type[] = {  {&_swigt__p_second_type, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_size_type[] = {  {&_swigt__p_size_type, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_PathTrie_p_t[] = {  {&_swigt__p_std__allocatorT_PathTrie_p_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_bool_t[] = {  {&_swigt__p_std__allocatorT_bool_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_double_t[] = {  {&_swigt__p_std__allocatorT_double_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_float_t[] = {  {&_swigt__p_std__allocatorT_float_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_int_t[] = {  {&_swigt__p_std__allocatorT_int_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t[] = {  {&_swigt__p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t[] = {  {&_swigt__p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__string_t[] = {  {&_swigt__p_std__allocatorT_std__string_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t[] = {  {&_swigt__p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t[] = {  {&_swigt__p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t[] = {  {&_swigt__p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t[] = {  {&_swigt__p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t[] = {  {&_swigt__p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__invalid_argument[] = {  {&_swigt__p_std__invalid_argument, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__pairT_double_std__string_t[] = {  {&_swigt__p_std__pairT_double_std__string_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t[] = {  {&_swigt__p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t[] = {  {&_swigt__p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__pairT_int_double_t[] = {  {&_swigt__p_std__pairT_int_double_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__pairT_std__string_double_t[] = {  {&_swigt__p_std__pairT_std__string_double_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t[] = {  {&_swigt__p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t[] = {  {&_swigt__p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_bool_std__allocatorT_bool_t_t[] = {  {&_swigt__p_std__vectorT_bool_std__allocatorT_bool_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_double_std__allocatorT_double_t_t[] = {  {&_swigt__p_std__vectorT_double_std__allocatorT_double_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_float_std__allocatorT_float_t_t[] = {  {&_swigt__p_std__vectorT_float_std__allocatorT_float_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_int_std__allocatorT_int_t_t[] = {  {&_swigt__p_std__vectorT_int_std__allocatorT_int_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t[] = {  {&_swigt__p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t[] = {  {&_swigt__p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__string_std__allocatorT_std__string_t_t[] = {  {&_swigt__p_std__vectorT_std__string_std__allocatorT_std__string_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t[] = {  {&_swigt__p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t[] = {  {&_swigt__p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t[] = {  {&_swigt__p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t[] = {  {&_swigt__p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t[] = {  {&_swigt__p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_swig__SwigPyIterator[] = {  {&_swigt__p_swig__SwigPyIterator, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_value_type[] = {  {&_swigt__p_value_type, 0, 0, 0},{0, 0, 0, 0}};
-static swig_cast_info _swigc__p_void[] = {  {&_swigt__p_void, 0, 0, 0},{0, 0, 0, 0}};
-
-static swig_cast_info *swig_cast_initial[] = {
-  _swigc__p_PathTrie,
-  _swigc__p_RetriveStrEnumerateVocab,
-  _swigc__p_Scorer,
-  _swigc__p_StringPiece,
-  _swigc__p_allocator_type,
-  _swigc__p_char,
-  _swigc__p_const_reference,
-  _swigc__p_difference_type,
-  _swigc__p_first_type,
-  _swigc__p_fst__StdVectorFst,
-  _swigc__p_lm__WordIndex,
-  _swigc__p_p_PyObject,
-  _swigc__p_reference,
-  _swigc__p_second_type,
-  _swigc__p_size_type,
-  _swigc__p_std__allocatorT_PathTrie_p_t,
-  _swigc__p_std__allocatorT_bool_t,
-  _swigc__p_std__allocatorT_double_t,
-  _swigc__p_std__allocatorT_float_t,
-  _swigc__p_std__allocatorT_int_t,
-  _swigc__p_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t,
-  _swigc__p_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t,
-  _swigc__p_std__allocatorT_std__string_t,
-  _swigc__p_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t,
-  _swigc__p_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t,
-  _swigc__p_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t,
-  _swigc__p_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t,
-  _swigc__p_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,
-  _swigc__p_std__invalid_argument,
-  _swigc__p_std__pairT_double_std__string_t,
-  _swigc__p_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t,
-  _swigc__p_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t,
-  _swigc__p_std__pairT_int_double_t,
-  _swigc__p_std__pairT_std__string_double_t,
-  _swigc__p_std__shared_ptrT_fst__SortedMatcherT_fst__StdVectorFst_t_t,
-  _swigc__p_std__vectorT_PathTrie_p_std__allocatorT_PathTrie_p_t_t,
-  _swigc__p_std__vectorT_bool_std__allocatorT_bool_t_t,
-  _swigc__p_std__vectorT_double_std__allocatorT_double_t_t,
-  _swigc__p_std__vectorT_float_std__allocatorT_float_t_t,
-  _swigc__p_std__vectorT_int_std__allocatorT_int_t_t,
-  _swigc__p_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,
-  _swigc__p_std__vectorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_float_std__vectorT_int_std__allocatorT_int_t_t_t_t_t,
-  _swigc__p_std__vectorT_std__string_std__allocatorT_std__string_t_t,
-  _swigc__p_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t,
-  _swigc__p_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t,
-  _swigc__p_std__vectorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_std__allocatorT_std__vectorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_std__allocatorT_std__pairT_double_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t_t,
-  _swigc__p_std__vectorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_double_std__allocatorT_double_t_t_std__allocatorT_std__vectorT_double_std__allocatorT_double_t_t_t_t_t_t,
-  _swigc__p_std__vectorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_std__allocatorT_std__vectorT_std__vectorT_int_std__allocatorT_int_t_t_std__allocatorT_std__vectorT_int_std__allocatorT_int_t_t_t_t_t_t,
-  _swigc__p_swig__SwigPyIterator,
-  _swigc__p_value_type,
-  _swigc__p_void,
-};
-
-
-/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (END) -------- */
-
-static swig_const_info swig_const_table[] = {
-{0, 0, 0, 0.0, 0, 0}};
-
-#ifdef __cplusplus
-}
-#endif
-/* -----------------------------------------------------------------------------
- * Type initialization:
- * This problem is tough by the requirement that no dynamic
- * memory is used. Also, since swig_type_info structures store pointers to
- * swig_cast_info structures and swig_cast_info structures store pointers back
- * to swig_type_info structures, we need some lookup code at initialization.
- * The idea is that swig generates all the structures that are needed.
- * The runtime then collects these partially filled structures.
- * The SWIG_InitializeModule function takes these initial arrays out of
- * swig_module, and does all the lookup, filling in the swig_module.types
- * array with the correct data and linking the correct swig_cast_info
- * structures together.
- *
- * The generated swig_type_info structures are assigned statically to an initial
- * array. We just loop through that array, and handle each type individually.
- * First we lookup if this type has been already loaded, and if so, use the
- * loaded structure instead of the generated one. Then we have to fill in the
- * cast linked list. The cast data is initially stored in something like a
- * two-dimensional array. Each row corresponds to a type (there are the same
- * number of rows as there are in the swig_type_initial array). Each entry in
- * a column is one of the swig_cast_info structures for that type.
- * The cast_initial array is actually an array of arrays, because each row has
- * a variable number of columns. So to actually build the cast linked list,
- * we find the array of casts associated with the type, and loop through it
- * adding the casts to the list. The one last trick we need to do is making
- * sure the type pointer in the swig_cast_info struct is correct.
- *
- * First off, we lookup the cast->type name to see if it is already loaded.
- * There are three cases to handle:
- *  1) If the cast->type has already been loaded AND the type we are adding
- *     casting info to has not been loaded (it is in this module), THEN we
- *     replace the cast->type pointer with the type pointer that has already
- *     been loaded.
- *  2) If BOTH types (the one we are adding casting info to, and the
- *     cast->type) are loaded, THEN the cast info has already been loaded by
- *     the previous module so we just ignore it.
- *  3) Finally, if cast->type has not already been loaded, then we add that
- *     swig_cast_info to the linked list (because the cast->type) pointer will
- *     be correct.
- * ----------------------------------------------------------------------------- */
-
-#ifdef __cplusplus
-extern "C" {
-#if 0
-} /* c-mode */
-#endif
-#endif
-
-#if 0
-#define SWIGRUNTIME_DEBUG
-#endif
-
-
-SWIGRUNTIME void
-SWIG_InitializeModule(void *clientdata) {
-  size_t i;
-  swig_module_info *module_head, *iter;
-  int init;
-  
-  /* check to see if the circular list has been setup, if not, set it up */
-  if (swig_module.next==0) {
-    /* Initialize the swig_module */
-    swig_module.type_initial = swig_type_initial;
-    swig_module.cast_initial = swig_cast_initial;
-    swig_module.next = &swig_module;
-    init = 1;
-  } else {
-    init = 0;
-  }
-  
-  /* Try and load any already created modules */
-  module_head = SWIG_GetModule(clientdata);
-  if (!module_head) {
-    /* This is the first module loaded for this interpreter */
-    /* so set the swig module into the interpreter */
-    SWIG_SetModule(clientdata, &swig_module);
-  } else {
-    /* the interpreter has loaded a SWIG module, but has it loaded this one? */
-    iter=module_head;
-    do {
-      if (iter==&swig_module) {
-        /* Our module is already in the list, so there's nothing more to do. */
-        return;
-      }
-      iter=iter->next;
-    } while (iter!= module_head);
-    
-    /* otherwise we must add our module into the list */
-    swig_module.next = module_head->next;
-    module_head->next = &swig_module;
-  }
-  
-  /* When multiple interpreters are used, a module could have already been initialized in
-       a different interpreter, but not yet have a pointer in this interpreter.
-       In this case, we do not want to continue adding types... everything should be
-       set up already */
-  if (init == 0) return;
-  
-  /* Now work on filling in swig_module.types */
-#ifdef SWIGRUNTIME_DEBUG
-  printf("SWIG_InitializeModule: size %d\n", swig_module.size);
-#endif
-  for (i = 0; i < swig_module.size; ++i) {
-    swig_type_info *type = 0;
-    swig_type_info *ret;
-    swig_cast_info *cast;
-    
-#ifdef SWIGRUNTIME_DEBUG
-    printf("SWIG_InitializeModule: type %d %s\n", i, swig_module.type_initial[i]->name);
-#endif
-    
-    /* if there is another module already loaded */
-    if (swig_module.next != &swig_module) {
-      type = SWIG_MangledTypeQueryModule(swig_module.next, &swig_module, swig_module.type_initial[i]->name);
-    }
-    if (type) {
-      /* Overwrite clientdata field */
-#ifdef SWIGRUNTIME_DEBUG
-      printf("SWIG_InitializeModule: found type %s\n", type->name);
-#endif
-      if (swig_module.type_initial[i]->clientdata) {
-        type->clientdata = swig_module.type_initial[i]->clientdata;
-#ifdef SWIGRUNTIME_DEBUG
-        printf("SWIG_InitializeModule: found and overwrite type %s \n", type->name);
-#endif
-      }
-    } else {
-      type = swig_module.type_initial[i];
-    }
-    
-    /* Insert casting types */
-    cast = swig_module.cast_initial[i];
-    while (cast->type) {
-      /* Don't need to add information already in the list */
-      ret = 0;
-#ifdef SWIGRUNTIME_DEBUG
-      printf("SWIG_InitializeModule: look cast %s\n", cast->type->name);
-#endif
-      if (swig_module.next != &swig_module) {
-        ret = SWIG_MangledTypeQueryModule(swig_module.next, &swig_module, cast->type->name);
-#ifdef SWIGRUNTIME_DEBUG
-        if (ret) printf("SWIG_InitializeModule: found cast %s\n", ret->name);
-#endif
-      }
-      if (ret) {
-        if (type == swig_module.type_initial[i]) {
-#ifdef SWIGRUNTIME_DEBUG
-          printf("SWIG_InitializeModule: skip old type %s\n", ret->name);
-#endif
-          cast->type = ret;
-          ret = 0;
-        } else {
-          /* Check for casting already in the list */
-          swig_cast_info *ocast = SWIG_TypeCheck(ret->name, type);
-#ifdef SWIGRUNTIME_DEBUG
-          if (ocast) printf("SWIG_InitializeModule: skip old cast %s\n", ret->name);
-#endif
-          if (!ocast) ret = 0;
-        }
-      }
-      
-      if (!ret) {
-#ifdef SWIGRUNTIME_DEBUG
-        printf("SWIG_InitializeModule: adding cast %s\n", cast->type->name);
-#endif
-        if (type->cast) {
-          type->cast->prev = cast;
-          cast->next = type->cast;
-        }
-        type->cast = cast;
-      }
-      cast++;
-    }
-    /* Set entry in modules->types array equal to the type */
-    swig_module.types[i] = type;
-  }
-  swig_module.types[i] = 0;
-  
-#ifdef SWIGRUNTIME_DEBUG
-  printf("**** SWIG_InitializeModule: Cast List ******\n");
-  for (i = 0; i < swig_module.size; ++i) {
-    int j = 0;
-    swig_cast_info *cast = swig_module.cast_initial[i];
-    printf("SWIG_InitializeModule: type %d %s\n", i, swig_module.type_initial[i]->name);
-    while (cast->type) {
-      printf("SWIG_InitializeModule: cast type %s\n", cast->type->name);
-      cast++;
-      ++j;
-    }
-    printf("---- Total casts: %d\n",j);
-  }
-  printf("**** SWIG_InitializeModule: Cast List ******\n");
-#endif
-}
-
-/* This function will propagate the clientdata field of type to
-* any new swig_type_info structures that have been added into the list
-* of equivalent types.  It is like calling
-* SWIG_TypeClientData(type, clientdata) a second time.
-*/
-SWIGRUNTIME void
-SWIG_PropagateClientData(void) {
-  size_t i;
-  swig_cast_info *equiv;
-  static int init_run = 0;
-  
-  if (init_run) return;
-  init_run = 1;
-  
-  for (i = 0; i < swig_module.size; i++) {
-    if (swig_module.types[i]->clientdata) {
-      equiv = swig_module.types[i]->cast;
-      while (equiv) {
-        if (!equiv->converter) {
-          if (equiv->type && !equiv->type->clientdata)
-          SWIG_TypeClientData(equiv->type, swig_module.types[i]->clientdata);
-        }
-        equiv = equiv->next;
-      }
-    }
-  }
-}
-
-#ifdef __cplusplus
-#if 0
-{
-  /* c-mode */
-#endif
-}
-#endif
-
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-  
-  /* Python-specific SWIG API */
-#define SWIG_newvarlink()                             SWIG_Python_newvarlink()
-#define SWIG_addvarlink(p, name, get_attr, set_attr)  SWIG_Python_addvarlink(p, name, get_attr, set_attr)
-#define SWIG_InstallConstants(d, constants)           SWIG_Python_InstallConstants(d, constants)
-  
-  /* -----------------------------------------------------------------------------
-   * global variable support code.
-   * ----------------------------------------------------------------------------- */
-  
-  typedef struct swig_globalvar {
-    char       *name;                  /* Name of global variable */
-    PyObject *(*get_attr)(void);       /* Return the current value */
-    int       (*set_attr)(PyObject *); /* Set the value */
-    struct swig_globalvar *next;
-  } swig_globalvar;
-  
-  typedef struct swig_varlinkobject {
-    PyObject_HEAD
-    swig_globalvar *vars;
-  } swig_varlinkobject;
-  
-  SWIGINTERN PyObject *
-  swig_varlink_repr(swig_varlinkobject *SWIGUNUSEDPARM(v)) {
-#if PY_VERSION_HEX >= 0x03000000
-    return PyUnicode_InternFromString("<Swig global variables>");
-#else
-    return PyString_FromString("<Swig global variables>");
-#endif
-  }
-  
-  SWIGINTERN PyObject *
-  swig_varlink_str(swig_varlinkobject *v) {
-#if PY_VERSION_HEX >= 0x03000000
-    PyObject *str = PyUnicode_InternFromString("(");
-    PyObject *tail;
-    PyObject *joined;
-    swig_globalvar *var;
-    for (var = v->vars; var; var=var->next) {
-      tail = PyUnicode_FromString(var->name);
-      joined = PyUnicode_Concat(str, tail);
-      Py_DecRef(str);
-      Py_DecRef(tail);
-      str = joined;
-      if (var->next) {
-        tail = PyUnicode_InternFromString(", ");
-        joined = PyUnicode_Concat(str, tail);
-        Py_DecRef(str);
-        Py_DecRef(tail);
-        str = joined;
-      }
-    }
-    tail = PyUnicode_InternFromString(")");
-    joined = PyUnicode_Concat(str, tail);
-    Py_DecRef(str);
-    Py_DecRef(tail);
-    str = joined;
-#else
-    PyObject *str = PyString_FromString("(");
-    swig_globalvar *var;
-    for (var = v->vars; var; var=var->next) {
-      PyString_ConcatAndDel(&str,PyString_FromString(var->name));
-      if (var->next) PyString_ConcatAndDel(&str,PyString_FromString(", "));
-    }
-    PyString_ConcatAndDel(&str,PyString_FromString(")"));
-#endif
-    return str;
-  }
-  
-  SWIGINTERN int
-  swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
-    char *tmp;
-    PyObject *str = swig_varlink_str(v);
-    fprintf(fp,"Swig global variables ");
-    fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
-    SWIG_Python_str_DelForPy3(tmp);
-    Py_DECREF(str);
-    return 0;
-  }
-  
-  SWIGINTERN void
-  swig_varlink_dealloc(swig_varlinkobject *v) {
-    swig_globalvar *var = v->vars;
-    while (var) {
-      swig_globalvar *n = var->next;
-      free(var->name);
-      free(var);
-      var = n;
-    }
-  }
-  
-  SWIGINTERN PyObject *
-  swig_varlink_getattr(swig_varlinkobject *v, char *n) {
-    PyObject *res = NULL;
-    swig_globalvar *var = v->vars;
-    while (var) {
-      if (strcmp(var->name,n) == 0) {
-        res = (*var->get_attr)();
-        break;
-      }
-      var = var->next;
-    }
-    if (res == NULL && !PyErr_Occurred()) {
-      PyErr_Format(PyExc_AttributeError, "Unknown C global variable '%s'", n);
-    }
-    return res;
-  }
-  
-  SWIGINTERN int
-  swig_varlink_setattr(swig_varlinkobject *v, char *n, PyObject *p) {
-    int res = 1;
-    swig_globalvar *var = v->vars;
-    while (var) {
-      if (strcmp(var->name,n) == 0) {
-        res = (*var->set_attr)(p);
-        break;
-      }
-      var = var->next;
-    }
-    if (res == 1 && !PyErr_Occurred()) {
-      PyErr_Format(PyExc_AttributeError, "Unknown C global variable '%s'", n);
-    }
-    return res;
-  }
-  
-  SWIGINTERN PyTypeObject*
-  swig_varlink_type(void) {
-    static char varlink__doc__[] = "Swig var link object";
-    static PyTypeObject varlink_type;
-    static int type_init = 0;
-    if (!type_init) {
-      const PyTypeObject tmp = {
-#if PY_VERSION_HEX >= 0x03000000
-        PyVarObject_HEAD_INIT(NULL, 0)
-#else
-        PyObject_HEAD_INIT(NULL)
-        0,                                  /* ob_size */
-#endif
-        (char *)"swigvarlink",              /* tp_name */
-        sizeof(swig_varlinkobject),         /* tp_basicsize */
-        0,                                  /* tp_itemsize */
-        (destructor) swig_varlink_dealloc,  /* tp_dealloc */
-        (printfunc) swig_varlink_print,     /* tp_print */
-        (getattrfunc) swig_varlink_getattr, /* tp_getattr */
-        (setattrfunc) swig_varlink_setattr, /* tp_setattr */
-        0,                                  /* tp_compare */
-        (reprfunc) swig_varlink_repr,       /* tp_repr */
-        0,                                  /* tp_as_number */
-        0,                                  /* tp_as_sequence */
-        0,                                  /* tp_as_mapping */
-        0,                                  /* tp_hash */
-        0,                                  /* tp_call */
-        (reprfunc) swig_varlink_str,        /* tp_str */
-        0,                                  /* tp_getattro */
-        0,                                  /* tp_setattro */
-        0,                                  /* tp_as_buffer */
-        0,                                  /* tp_flags */
-        varlink__doc__,                     /* tp_doc */
-        0,                                  /* tp_traverse */
-        0,                                  /* tp_clear */
-        0,                                  /* tp_richcompare */
-        0,                                  /* tp_weaklistoffset */
-#if PY_VERSION_HEX >= 0x02020000
-        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* tp_iter -> tp_weaklist */
-#endif
-#if PY_VERSION_HEX >= 0x02030000
-        0,                                  /* tp_del */
-#endif
-#if PY_VERSION_HEX >= 0x02060000
-        0,                                  /* tp_version_tag */
-#endif
-#if PY_VERSION_HEX >= 0x03040000
-        0,                                  /* tp_finalize */
-#endif
-#ifdef COUNT_ALLOCS
-        0,                                  /* tp_allocs */
-        0,                                  /* tp_frees */
-        0,                                  /* tp_maxalloc */
-#if PY_VERSION_HEX >= 0x02050000
-        0,                                  /* tp_prev */
-#endif
-        0                                   /* tp_next */
-#endif
-      };
-      varlink_type = tmp;
-      type_init = 1;
-#if PY_VERSION_HEX < 0x02020000
-      varlink_type.ob_type = &PyType_Type;
-#else
-      if (PyType_Ready(&varlink_type) < 0)
-      return NULL;
-#endif
-    }
-    return &varlink_type;
-  }
-  
-  /* Create a variable linking object for use later */
-  SWIGINTERN PyObject *
-  SWIG_Python_newvarlink(void) {
-    swig_varlinkobject *result = PyObject_NEW(swig_varlinkobject, swig_varlink_type());
-    if (result) {
-      result->vars = 0;
-    }
-    return ((PyObject*) result);
-  }
-  
-  SWIGINTERN void 
-  SWIG_Python_addvarlink(PyObject *p, char *name, PyObject *(*get_attr)(void), int (*set_attr)(PyObject *p)) {
-    swig_varlinkobject *v = (swig_varlinkobject *) p;
-    swig_globalvar *gv = (swig_globalvar *) malloc(sizeof(swig_globalvar));
-    if (gv) {
-      size_t size = strlen(name)+1;
-      gv->name = (char *)malloc(size);
-      if (gv->name) {
-        strncpy(gv->name,name,size);
-        gv->get_attr = get_attr;
-        gv->set_attr = set_attr;
-        gv->next = v->vars;
-      }
-    }
-    v->vars = gv;
-  }
-  
-  SWIGINTERN PyObject *
-  SWIG_globals(void) {
-    static PyObject *_SWIG_globals = 0; 
-    if (!_SWIG_globals) _SWIG_globals = SWIG_newvarlink();  
-    return _SWIG_globals;
-  }
-  
-  /* -----------------------------------------------------------------------------
-   * constants/methods manipulation
-   * ----------------------------------------------------------------------------- */
-  
-  /* Install Constants */
-  SWIGINTERN void
-  SWIG_Python_InstallConstants(PyObject *d, swig_const_info constants[]) {
-    PyObject *obj = 0;
-    size_t i;
-    for (i = 0; constants[i].type; ++i) {
-      switch(constants[i].type) {
-      case SWIG_PY_POINTER:
-        obj = SWIG_InternalNewPointerObj(constants[i].pvalue, *(constants[i]).ptype,0);
-        break;
-      case SWIG_PY_BINARY:
-        obj = SWIG_NewPackedObj(constants[i].pvalue, constants[i].lvalue, *(constants[i].ptype));
-        break;
-      default:
-        obj = 0;
-        break;
-      }
-      if (obj) {
-        PyDict_SetItemString(d, constants[i].name, obj);
-        Py_DECREF(obj);
-      }
-    }
-  }
-  
-  /* -----------------------------------------------------------------------------*/
-  /* Fix SwigMethods to carry the callback ptrs when needed */
-  /* -----------------------------------------------------------------------------*/
-  
-  SWIGINTERN void
-  SWIG_Python_FixMethods(PyMethodDef *methods,
-    swig_const_info *const_table,
-    swig_type_info **types,
-    swig_type_info **types_initial) {
-    size_t i;
-    for (i = 0; methods[i].ml_name; ++i) {
-      const char *c = methods[i].ml_doc;
-      if (!c) continue;
-      c = strstr(c, "swig_ptr: ");
-      if (c) {
-        int j;
-        swig_const_info *ci = 0;
-        const char *name = c + 10;
-        for (j = 0; const_table[j].type; ++j) {
-          if (strncmp(const_table[j].name, name, 
-              strlen(const_table[j].name)) == 0) {
-            ci = &(const_table[j]);
-            break;
-          }
-        }
-        if (ci) {
-          void *ptr = (ci->type == SWIG_PY_POINTER) ? ci->pvalue : 0;
-          if (ptr) {
-            size_t shift = (ci->ptype) - types;
-            swig_type_info *ty = types_initial[shift];
-            size_t ldoc = (c - methods[i].ml_doc);
-            size_t lptr = strlen(ty->name)+2*sizeof(void*)+2;
-            char *ndoc = (char*)malloc(ldoc + lptr + 10);
-            if (ndoc) {
-              char *buff = ndoc;
-              memcpy(buff, methods[i].ml_doc, ldoc);
-              buff += ldoc;
-              memcpy(buff, "swig_ptr: ", 10);
-              buff += 10;
-              SWIG_PackVoidPtr(buff, ptr, ty->name, lptr);
-              methods[i].ml_doc = ndoc;
-            }
-          }
-        }
-      }
-    }
-  } 
-  
-#ifdef __cplusplus
-}
-#endif
-
-/* -----------------------------------------------------------------------------*
- *  Partial Init method
- * -----------------------------------------------------------------------------*/
-
-#ifdef __cplusplus
-extern "C"
-#endif
-
-SWIGEXPORT 
-#if PY_VERSION_HEX >= 0x03000000
-PyObject*
-#else
-void
-#endif
-SWIG_init(void) {
-  PyObject *m, *d, *md;
-#if PY_VERSION_HEX >= 0x03000000
-  static struct PyModuleDef SWIG_module = {
-# if PY_VERSION_HEX >= 0x03020000
-    PyModuleDef_HEAD_INIT,
-# else
-    {
-      PyObject_HEAD_INIT(NULL)
-      NULL, /* m_init */
-      0,    /* m_index */
-      NULL, /* m_copy */
-    },
-# endif
-    (char *) SWIG_name,
-    NULL,
-    -1,
-    SwigMethods,
-    NULL,
-    NULL,
-    NULL,
-    NULL
-  };
-#endif
-  
-#if defined(SWIGPYTHON_BUILTIN)
-  static SwigPyClientData SwigPyObject_clientdata = {
-    0, 0, 0, 0, 0, 0, 0
-  };
-  static PyGetSetDef this_getset_def = {
-    (char *)"this", &SwigPyBuiltin_ThisClosure, NULL, NULL, NULL
-  };
-  static SwigPyGetSet thisown_getset_closure = {
-    (PyCFunction) SwigPyObject_own,
-    (PyCFunction) SwigPyObject_own
-  };
-  static PyGetSetDef thisown_getset_def = {
-    (char *)"thisown", SwigPyBuiltin_GetterClosure, SwigPyBuiltin_SetterClosure, NULL, &thisown_getset_closure
-  };
-  PyTypeObject *builtin_pytype;
-  int builtin_base_count;
-  swig_type_info *builtin_basetype;
-  PyObject *tuple;
-  PyGetSetDescrObject *static_getset;
-  PyTypeObject *metatype;
-  PyTypeObject *swigpyobject;
-  SwigPyClientData *cd;
-  PyObject *public_interface, *public_symbol;
-  PyObject *this_descr;
-  PyObject *thisown_descr;
-  PyObject *self = 0;
-  int i;
-  
-  (void)builtin_pytype;
-  (void)builtin_base_count;
-  (void)builtin_basetype;
-  (void)tuple;
-  (void)static_getset;
-  (void)self;
-  
-  /* Metaclass is used to implement static member variables */
-  metatype = SwigPyObjectType();
-  assert(metatype);
-#endif
-  
-  /* Fix SwigMethods to carry the callback ptrs when needed */
-  SWIG_Python_FixMethods(SwigMethods, swig_const_table, swig_types, swig_type_initial);
-  
-#if PY_VERSION_HEX >= 0x03000000
-  m = PyModule_Create(&SWIG_module);
-#else
-  m = Py_InitModule((char *) SWIG_name, SwigMethods);
-#endif
-  
-  md = d = PyModule_GetDict(m);
-  (void)md;
-  
-  SWIG_InitializeModule(0);
-  
-#ifdef SWIGPYTHON_BUILTIN
-  swigpyobject = SwigPyObject_TypeOnce();
-  
-  SwigPyObject_stype = SWIG_MangledTypeQuery("_p_SwigPyObject");
-  assert(SwigPyObject_stype);
-  cd = (SwigPyClientData*) SwigPyObject_stype->clientdata;
-  if (!cd) {
-    SwigPyObject_stype->clientdata = &SwigPyObject_clientdata;
-    SwigPyObject_clientdata.pytype = swigpyobject;
-  } else if (swigpyobject->tp_basicsize != cd->pytype->tp_basicsize) {
-    PyErr_SetString(PyExc_RuntimeError, "Import error: attempted to load two incompatible swig-generated modules.");
-# if PY_VERSION_HEX >= 0x03000000
-    return NULL;
-# else
-    return;
-# endif
-  }
-  
-  /* All objects have a 'this' attribute */
-  this_descr = PyDescr_NewGetSet(SwigPyObject_type(), &this_getset_def);
-  (void)this_descr;
-  
-  /* All objects have a 'thisown' attribute */
-  thisown_descr = PyDescr_NewGetSet(SwigPyObject_type(), &thisown_getset_def);
-  (void)thisown_descr;
-  
-  public_interface = PyList_New(0);
-  public_symbol = 0;
-  (void)public_symbol;
-  
-  PyDict_SetItemString(md, "__all__", public_interface);
-  Py_DECREF(public_interface);
-  for (i = 0; SwigMethods[i].ml_name != NULL; ++i)
-  SwigPyBuiltin_AddPublicSymbol(public_interface, SwigMethods[i].ml_name);
-  for (i = 0; swig_const_table[i].name != 0; ++i)
-  SwigPyBuiltin_AddPublicSymbol(public_interface, swig_const_table[i].name);
-#endif
-  
-  SWIG_InstallConstants(d,swig_const_table);
-  
-  PyDict_SetItemString(md,(char *)"cvar", SWIG_globals());
-  SWIG_addvarlink(SWIG_globals(),(char *)"OOV_SCORE",Swig_var_OOV_SCORE_get, Swig_var_OOV_SCORE_set);
-  SWIG_addvarlink(SWIG_globals(),(char *)"START_TOKEN",Swig_var_START_TOKEN_get, Swig_var_START_TOKEN_set);
-  SWIG_addvarlink(SWIG_globals(),(char *)"UNK_TOKEN",Swig_var_UNK_TOKEN_get, Swig_var_UNK_TOKEN_set);
-  SWIG_addvarlink(SWIG_globals(),(char *)"END_TOKEN",Swig_var_END_TOKEN_get, Swig_var_END_TOKEN_set);
-#if PY_VERSION_HEX >= 0x03000000
-  return m;
-#else
-  return;
-#endif
-}
-
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/path_trie.cpp b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/path_trie.cpp
deleted file mode 100644
index e68affacaed5614261ac64b19a6ab3c2b4089319..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/path_trie.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "path_trie.h"
-
-#include <algorithm>
-#include <limits>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "decoder_utils.h"
-
-PathTrie::PathTrie() {
-  log_prob_b_prev = -NUM_FLT_INF;
-  log_prob_nb_prev = -NUM_FLT_INF;
-  log_prob_b_cur = -NUM_FLT_INF;
-  log_prob_nb_cur = -NUM_FLT_INF;
-  score = -NUM_FLT_INF;
-
-  ROOT_ = -1;
-  character = ROOT_;
-  exists_ = true;
-  parent = nullptr;
-
-  dictionary_ = nullptr;
-  dictionary_state_ = 0;
-  has_dictionary_ = false;
-
-  matcher_ = nullptr;
-}
-
-PathTrie::~PathTrie() {
-  for (auto child : children_) {
-    delete child.second;
-  }
-}
-
-PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
-  auto child = children_.begin();
-  for (child = children_.begin(); child != children_.end(); ++child) {
-    if (child->first == new_char) {
-      break;
-    }
-  }
-  if (child != children_.end()) {
-    if (!child->second->exists_) {
-      child->second->exists_ = true;
-      child->second->log_prob_b_prev = -NUM_FLT_INF;
-      child->second->log_prob_nb_prev = -NUM_FLT_INF;
-      child->second->log_prob_b_cur = -NUM_FLT_INF;
-      child->second->log_prob_nb_cur = -NUM_FLT_INF;
-    }
-    return (child->second);
-  } else {
-    if (has_dictionary_) {
-      matcher_->SetState(dictionary_state_);
-      bool found = matcher_->Find(new_char + 1);
-      if (!found) {
-        // Adding this character causes word outside dictionary
-        auto FSTZERO = fst::TropicalWeight::Zero();
-        auto final_weight = dictionary_->Final(dictionary_state_);
-        bool is_final = (final_weight != FSTZERO);
-        if (is_final && reset) {
-          dictionary_state_ = dictionary_->Start();
-        }
-        return nullptr;
-      } else {
-        PathTrie* new_path = new PathTrie;
-        new_path->character = new_char;
-        new_path->parent = this;
-        new_path->dictionary_ = dictionary_;
-        new_path->dictionary_state_ = matcher_->Value().nextstate;
-        new_path->has_dictionary_ = true;
-        new_path->matcher_ = matcher_;
-        children_.push_back(std::make_pair(new_char, new_path));
-        return new_path;
-      }
-    } else {
-      PathTrie* new_path = new PathTrie;
-      new_path->character = new_char;
-      new_path->parent = this;
-      children_.push_back(std::make_pair(new_char, new_path));
-      return new_path;
-    }
-  }
-}
-
-PathTrie* PathTrie::get_path_vec(std::vector<int>& output) {
-  return get_path_vec(output, ROOT_);
-}
-
-PathTrie* PathTrie::get_path_vec(std::vector<int>& output, int stop,
-                                 size_t max_steps) {
-  if (character == stop || character == ROOT_ || output.size() == max_steps) {
-    std::reverse(output.begin(), output.end());
-    return this;
-  } else {
-    output.push_back(character);
-    return parent->get_path_vec(output, stop, max_steps);
-  }
-}
-
-void PathTrie::iterate_to_vec_only(std::vector<PathTrie*>& output) {
-  if (exists_) {
-    output.push_back(this);
-  }
-  for (auto child : children_) {
-    child.second->iterate_to_vec_only(output);
-  }
-}
-
-void PathTrie::iterate_to_vec(std::vector<PathTrie*>& output) {
-  if (exists_) {
-    log_prob_b_prev = log_prob_b_cur;
-    log_prob_nb_prev = log_prob_nb_cur;
-
-    log_prob_b_cur = -NUM_FLT_INF;
-    log_prob_nb_cur = -NUM_FLT_INF;
-
-    score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev);
-    output.push_back(this);
-  }
-  for (auto child : children_) {
-    child.second->iterate_to_vec(output);
-  }
-}
-
-void PathTrie::remove() {
-  exists_ = false;
-
-  if (children_.size() == 0) {
-    auto child = parent->children_.begin();
-    for (child = parent->children_.begin(); child != parent->children_.end();
-         ++child) {
-      if (child->first == character) {
-        parent->children_.erase(child);
-        break;
-      }
-    }
-
-    if (parent->children_.size() == 0 && !parent->exists_) {
-      parent->remove();
-    }
-
-    delete this;
-  }
-}
-
-void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) {
-  dictionary_ = dictionary;
-  dictionary_state_ = dictionary->Start();
-  has_dictionary_ = true;
-}
-
-using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
-void PathTrie::set_matcher(std::shared_ptr<FSTMATCH> matcher) {
-  matcher_ = matcher;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/path_trie.h b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/path_trie.h
deleted file mode 100644
index b551ed8b8af77d3479d9bc85f51c6e8c2749c306..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/path_trie.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PATH_TRIE_H
-#define PATH_TRIE_H
-
-#include <algorithm>
-#include <limits>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-
-/* Trie tree for prefix storing and manipulating, with a dictionary in
- * finite-state transducer for spelling correction.
- */
-class PathTrie {
- public:
-  PathTrie();
-  ~PathTrie();
-
-  // get new prefix after appending new char
-  PathTrie* get_path_trie(int new_char, bool reset = true);
-
-  // get the prefix in index from root to current node
-  PathTrie* get_path_vec(std::vector<int>& output);
-
-  // get the prefix in index from some stop node to current nodel
-  PathTrie* get_path_vec(std::vector<int>& output, int stop,
-                         size_t max_steps = std::numeric_limits<size_t>::max());
-
-  // update log probs
-  void iterate_to_vec(std::vector<PathTrie*>& output);
-
-  void iterate_to_vec_only(std::vector<PathTrie*>& output);
-
-  // set dictionary for FST
-  void set_dictionary(fst::StdVectorFst* dictionary);
-
-  void set_matcher(std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>>);
-
-  bool is_empty() { return ROOT_ == character; }
-
-  // remove current path from root
-  void remove();
-
-  float log_prob_b_prev;
-  float log_prob_nb_prev;
-  float log_prob_b_cur;
-  float log_prob_nb_cur;
-  float score;
-  float approx_ctc;
-  int character;
-  PathTrie* parent;
-
- private:
-  int ROOT_;
-  bool exists_;
-  bool has_dictionary_;
-
-  std::vector<std::pair<int, PathTrie*>> children_;
-
-  // pointer to dictionary of FST
-  fst::StdVectorFst* dictionary_;
-  fst::StdVectorFst::StateId dictionary_state_;
-  // true if finding ars in FST
-  std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>> matcher_;
-};
-
-#endif  // PATH_TRIE_H
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/scorer.cpp b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/scorer.cpp
deleted file mode 100644
index 953761a9aec5e0a60a85233f024a2dbcafc3bbce..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/scorer.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "scorer.h"
-
-#include <unistd.h>
-#include <iostream>
-
-#include "lm/config.hh"
-#include "lm/model.hh"
-#include "lm/state.hh"
-#include "util/string_piece.hh"
-#include "util/tokenize_piece.hh"
-
-#include "decoder_utils.h"
-
-using namespace lm::ngram;
-
-Scorer::Scorer(double alpha, double beta, const std::string& lm_path,
-               const std::vector<std::string>& vocab_list) {
-  this->alpha = alpha;
-  this->beta = beta;
-
-  dictionary = nullptr;
-  is_character_based_ = true;
-  language_model_ = nullptr;
-
-  max_order_ = 0;
-  dict_size_ = 0;
-  SPACE_ID_ = -1;
-
-  setup(lm_path, vocab_list);
-}
-
-Scorer::~Scorer() {
-  if (language_model_ != nullptr) {
-    delete static_cast<lm::base::Model*>(language_model_);
-  }
-  if (dictionary != nullptr) {
-    delete static_cast<fst::StdVectorFst*>(dictionary);
-  }
-}
-
-void Scorer::setup(const std::string& lm_path,
-                   const std::vector<std::string>& vocab_list) {
-  // load language model
-  load_lm(lm_path);
-  // set char map for scorer
-  set_char_map(vocab_list);
-  // fill the dictionary for FST
-  if (!is_character_based()) {
-    fill_dictionary(true);
-  }
-}
-
-void Scorer::load_lm(const std::string& lm_path) {
-  const char* filename = lm_path.c_str();
-  VALID_CHECK_EQ(access(filename, F_OK), 0, "Invalid language model path");
-
-  RetriveStrEnumerateVocab enumerate;
-  lm::ngram::Config config;
-  config.enumerate_vocab = &enumerate;
-  language_model_ = lm::ngram::LoadVirtual(filename, config);
-  max_order_ = static_cast<lm::base::Model*>(language_model_)->Order();
-  vocabulary_ = enumerate.vocabulary;
-  for (size_t i = 0; i < vocabulary_.size(); ++i) {
-    if (is_character_based_ && vocabulary_[i] != UNK_TOKEN &&
-        vocabulary_[i] != START_TOKEN && vocabulary_[i] != END_TOKEN &&
-        get_utf8_str_len(enumerate.vocabulary[i]) > 1) {
-      is_character_based_ = false;
-    }
-  }
-}
-
-double Scorer::get_log_cond_prob(const std::vector<std::string>& words) {
-  lm::base::Model* model = static_cast<lm::base::Model*>(language_model_);
-  double cond_prob;
-  lm::ngram::State state, tmp_state, out_state;
-  // avoid to inserting <s> in begin
-  model->NullContextWrite(&state);
-  for (size_t i = 0; i < words.size(); ++i) {
-    lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]);
-    // encounter OOV
-    if (word_index == 0) {
-      return OOV_SCORE;
-    }
-    cond_prob = model->BaseScore(&state, word_index, &out_state);
-    tmp_state = state;
-    state = out_state;
-    out_state = tmp_state;
-  }
-  // return  log10 prob
-  return cond_prob;
-}
-
-double Scorer::get_sent_log_prob(const std::vector<std::string>& words) {
-  std::vector<std::string> sentence;
-  if (words.size() == 0) {
-    for (size_t i = 0; i < max_order_; ++i) {
-      sentence.push_back(START_TOKEN);
-    }
-  } else {
-    for (size_t i = 0; i < max_order_ - 1; ++i) {
-      sentence.push_back(START_TOKEN);
-    }
-    sentence.insert(sentence.end(), words.begin(), words.end());
-  }
-  sentence.push_back(END_TOKEN);
-  return get_log_prob(sentence);
-}
-
-double Scorer::get_log_prob(const std::vector<std::string>& words) {
-  assert(words.size() > max_order_);
-  double score = 0.0;
-  for (size_t i = 0; i < words.size() - max_order_ + 1; ++i) {
-    std::vector<std::string> ngram(words.begin() + i,
-                                   words.begin() + i + max_order_);
-    score += get_log_cond_prob(ngram);
-  }
-  return score;
-}
-
-void Scorer::reset_params(float alpha, float beta) {
-  this->alpha = alpha;
-  this->beta = beta;
-}
-
-std::string Scorer::vec2str(const std::vector<int>& input) {
-  std::string word;
-  for (auto ind : input) {
-    word += char_list_[ind];
-  }
-  return word;
-}
-
-std::vector<std::string> Scorer::split_labels(const std::vector<int>& labels) {
-  if (labels.empty()) return {};
-
-  std::string s = vec2str(labels);
-  std::vector<std::string> words;
-  if (is_character_based_) {
-    words = split_utf8_str(s);
-  } else {
-    words = split_str(s, " ");
-  }
-  return words;
-}
-
-void Scorer::set_char_map(const std::vector<std::string>& char_list) {
-  char_list_ = char_list;
-  char_map_.clear();
-
-  // Set the char map for the FST for spelling correction
-  for (size_t i = 0; i < char_list_.size(); i++) {
-    if (char_list_[i] == " ") {
-      SPACE_ID_ = i;
-    }
-    // The initial state of FST is state 0, hence the index of chars in
-    // the FST should start from 1 to avoid the conflict with the initial
-    // state, otherwise wrong decoding results would be given.
-    char_map_[char_list_[i]] = i + 1;
-  }
-}
-
-std::vector<std::string> Scorer::make_ngram(PathTrie* prefix) {
-  std::vector<std::string> ngram;
-  PathTrie* current_node = prefix;
-  PathTrie* new_node = nullptr;
-
-  for (int order = 0; order < max_order_; order++) {
-    std::vector<int> prefix_vec;
-
-    if (is_character_based_) {
-      new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_, 1);
-      current_node = new_node;
-    } else {
-      new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_);
-      current_node = new_node->parent;  // Skipping spaces
-    }
-
-    // reconstruct word
-    std::string word = vec2str(prefix_vec);
-    ngram.push_back(word);
-
-    if (new_node->character == -1) {
-      // No more spaces, but still need order
-      for (int i = 0; i < max_order_ - order - 1; i++) {
-        ngram.push_back(START_TOKEN);
-      }
-      break;
-    }
-  }
-  std::reverse(ngram.begin(), ngram.end());
-  return ngram;
-}
-
-void Scorer::fill_dictionary(bool add_space) {
-  fst::StdVectorFst dictionary;
-  // For each unigram convert to ints and put in trie
-  int dict_size = 0;
-  for (const auto& word : vocabulary_) {
-    bool added = add_word_to_dictionary(word, char_map_, add_space,
-                                        SPACE_ID_ + 1, &dictionary);
-    dict_size += added ? 1 : 0;
-  }
-
-  dict_size_ = dict_size;
-
-  /* Simplify FST
-
-   * This gets rid of "epsilon" transitions in the FST.
-   * These are transitions that don't require a string input to be taken.
-   * Getting rid of them is necessary to make the FST determinisitc, but
-   * can greatly increase the size of the FST
-   */
-  fst::RmEpsilon(&dictionary);
-  fst::StdVectorFst* new_dict = new fst::StdVectorFst;
-
-  /* This makes the FST deterministic, meaning for any string input there's
-   * only one possible state the FST could be in.  It is assumed our
-   * dictionary is deterministic when using it.
-   * (lest we'd have to check for multiple transitions at each state)
-   */
-  fst::Determinize(dictionary, new_dict);
-
-  /* Finds the simplest equivalent fst. This is unnecessary but decreases
-   * memory usage of the dictionary
-   */
-  fst::Minimize(new_dict);
-  this->dictionary = new_dict;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/scorer.h b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/scorer.h
deleted file mode 100644
index e31e05b0a1dad4aad1093a7ef607f45d5505f889..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/scorer.h
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef SCORER_H_
-#define SCORER_H_
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "lm/enumerate_vocab.hh"
-#include "lm/virtual_interface.hh"
-#include "lm/word_index.hh"
-#include "util/string_piece.hh"
-
-#include "path_trie.h"
-
-const double OOV_SCORE = -1000.0;
-const std::string START_TOKEN = "<s>";
-const std::string UNK_TOKEN = "<unk>";
-const std::string END_TOKEN = "</s>";
-
-// Implement a callback to retrive the dictionary of language model.
-class RetriveStrEnumerateVocab : public lm::EnumerateVocab {
- public:
-  RetriveStrEnumerateVocab() {}
-
-  void Add(lm::WordIndex index, const StringPiece &str) {
-    vocabulary.push_back(std::string(str.data(), str.length()));
-  }
-
-  std::vector<std::string> vocabulary;
-};
-
-/* External scorer to query score for n-gram or sentence, including language
- * model scoring and word insertion.
- *
- * Example:
- *     Scorer scorer(alpha, beta, "path_of_language_model");
- *     scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" });
- *     scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" });
- */
-class Scorer {
- public:
-  Scorer(double alpha, double beta, const std::string &lm_path,
-         const std::vector<std::string> &vocabulary);
-  ~Scorer();
-
-  double get_log_cond_prob(const std::vector<std::string> &words);
-
-  double get_sent_log_prob(const std::vector<std::string> &words);
-
-  // return the max order
-  size_t get_max_order() const { return max_order_; }
-
-  // return the dictionary size of language model
-  size_t get_dict_size() const { return dict_size_; }
-
-  // retrun true if the language model is character based
-  bool is_character_based() const { return is_character_based_; }
-
-  // reset params alpha & beta
-  void reset_params(float alpha, float beta);
-
-  // make ngram for a given prefix
-  std::vector<std::string> make_ngram(PathTrie *prefix);
-
-  // trransform the labels in index to the vector of words (word based lm) or
-  // the vector of characters (character based lm)
-  std::vector<std::string> split_labels(const std::vector<int> &labels);
-
-  // language model weight
-  double alpha;
-  // word insertion weight
-  double beta;
-
-  // pointer to the dictionary of FST
-  void *dictionary;
-
- protected:
-  // necessary setup: load language model, set char map, fill FST's dictionary
-  void setup(const std::string &lm_path,
-             const std::vector<std::string> &vocab_list);
-
-  // load language model from given path
-  void load_lm(const std::string &lm_path);
-
-  // fill dictionary for FST
-  void fill_dictionary(bool add_space);
-
-  // set char map
-  void set_char_map(const std::vector<std::string> &char_list);
-
-  double get_log_prob(const std::vector<std::string> &words);
-
-  // translate the vector in index to string
-  std::string vec2str(const std::vector<int> &input);
-
- private:
-  void *language_model_;
-  bool is_character_based_;
-  size_t max_order_;
-  size_t dict_size_;
-
-  int SPACE_ID_;
-  std::vector<std::string> char_list_;
-  std::unordered_map<std::string, int> char_map_;
-
-  std::vector<std::string> vocabulary_;
-};
-
-#endif  // SCORER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/setup.py b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/setup.py
deleted file mode 100644
index 77d85eed7de728ccdd85b40fe70801aa2e3eb2fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/setup.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Script to build and install decoder package."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from setuptools import setup, Extension, distutils
-from distutils import ccompiler
-import glob
-import platform
-import os, sys
-import multiprocessing.pool
-import argparse
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument(
-    "--num_processes",
-    default=1,
-    type=int,
-    help="Number of cpu processes to build package. (default: %(default)d)")
-args = parser.parse_known_args()
-
-# reconstruct sys.argv to pass to setup below
-sys.argv = [sys.argv[0]] + args[1]
-
-
-# monkey-patch for parallel compilation
-# See: https://stackoverflow.com/a/13176803
-def parallelCCompile(self,
-                     sources,
-                     output_dir=None,
-                     macros=None,
-                     include_dirs=None,
-                     debug=0,
-                     extra_preargs=None,
-                     extra_postargs=None,
-                     depends=None):
-    # those lines are copied from distutils.ccompiler.CCompiler directly
-    macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
-        output_dir, macros, include_dirs, sources, depends, extra_postargs)
-    cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
-
-    # parallel code
-    def _single_compile(obj):
-        try:
-            src, ext = build[obj]
-        except KeyError:
-            return
-        self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
-
-    # convert to list, imap is evaluated on-demand
-    thread_pool = multiprocessing.pool.ThreadPool(args[0].num_processes)
-    list(thread_pool.imap(_single_compile, objects))
-    return objects
-
-
-def compile_test(header, library):
-    dummy_path = os.path.join(os.path.dirname(__file__), "dummy")
-    command = "bash -c \"g++ -include " + header \
-                + " -l" + library + " -x c++ - <<<'int main() {}' -o " \
-                + dummy_path + " >/dev/null 2>/dev/null && rm " \
-                + dummy_path + " 2>/dev/null\""
-    return os.system(command) == 0
-
-
-# hack compile to support parallel compiling
-distutils.ccompiler.CCompiler.compile = parallelCCompile
-
-FILES = glob.glob('kenlm/util/*.cc') \
-        + glob.glob('kenlm/lm/*.cc') \
-        + glob.glob('kenlm/util/double-conversion/*.cc')
-
-FILES += glob.glob('openfst-1.6.3/src/lib/*.cc')
-
-FILES = [
-    fn for fn in FILES
-    if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith(
-        'unittest.cc'))
-]
-
-LIBS = ['stdc++']
-if platform.system() != 'Darwin':
-    LIBS.append('rt')
-
-ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11']
-
-if compile_test('zlib.h', 'z'):
-    ARGS.append('-DHAVE_ZLIB')
-    LIBS.append('z')
-
-if compile_test('bzlib.h', 'bz2'):
-    ARGS.append('-DHAVE_BZLIB')
-    LIBS.append('bz2')
-
-if compile_test('lzma.h', 'lzma'):
-    ARGS.append('-DHAVE_XZLIB')
-    LIBS.append('lzma')
-
-os.system('swig -python -c++ ./decoders.i')
-
-decoders_module = [
-    Extension(
-        name='_swig_decoders',
-        sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'),
-        language='c++',
-        include_dirs=[
-            '.',
-            'kenlm',
-            'openfst-1.6.3/src/include',
-            'ThreadPool',
-        ],
-        libraries=LIBS,
-        extra_compile_args=ARGS)
-]
-
-setup(
-    name='swig_decoders',
-    version='1.1',
-    description="""CTC decoders""",
-    ext_modules=decoders_module,
-    py_modules=['swig_decoders'], )
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/setup.sh b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/setup.sh
deleted file mode 100644
index 15774c6cef018c15977dfd4f95cf655d6e2b32ed..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/setup.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-
-if [ ! -d kenlm ]; then
-    git clone https://github.com/kpu/kenlm.git
-    echo -e "\n"
-fi
-
-if [ ! -d openfst-1.6.3 ]; then
-    echo "Download and extract openfst ..."
-    wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz
-    tar -xzvf openfst-1.6.3.tar.gz
-    echo -e "\n"
-fi
-
-if [ ! -d ThreadPool ]; then
-    git clone https://github.com/progschj/ThreadPool.git
-    echo -e "\n"
-fi
-
-echo "Install decoders ..."
-python3 setup.py install --num_processes 10
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/swig_decoders.py b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/swig_decoders.py
deleted file mode 100644
index e6a0eb4fcc9ee4744ed6028a2c9b0461f2782de3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/swig_decoders.py
+++ /dev/null
@@ -1,1859 +0,0 @@
-# This file was automatically generated by SWIG (http://www.swig.org).
-# Version 3.0.12
-#
-# Do not make changes to this file unless you know what you are doing--modify
-# the SWIG interface file instead.
-
-from sys import version_info as _swig_python_version_info
-if _swig_python_version_info >= (2, 7, 0):
-    def swig_import_helper():
-        import importlib
-        pkg = __name__.rpartition('.')[0]
-        mname = '.'.join((pkg, '_swig_decoders')).lstrip('.')
-        try:
-            return importlib.import_module(mname)
-        except ImportError:
-            return importlib.import_module('_swig_decoders')
-    _swig_decoders = swig_import_helper()
-    del swig_import_helper
-elif _swig_python_version_info >= (2, 6, 0):
-    def swig_import_helper():
-        from os.path import dirname
-        import imp
-        fp = None
-        try:
-            fp, pathname, description = imp.find_module('_swig_decoders', [dirname(__file__)])
-        except ImportError:
-            import _swig_decoders
-            return _swig_decoders
-        try:
-            _mod = imp.load_module('_swig_decoders', fp, pathname, description)
-        finally:
-            if fp is not None:
-                fp.close()
-        return _mod
-    _swig_decoders = swig_import_helper()
-    del swig_import_helper
-else:
-    import _swig_decoders
-del _swig_python_version_info
-
-try:
-    _swig_property = property
-except NameError:
-    pass  # Python < 2.2 doesn't have 'property'.
-
-try:
-    import builtins as __builtin__
-except ImportError:
-    import __builtin__
-
-def _swig_setattr_nondynamic(self, class_type, name, value, static=1):
-    if (name == "thisown"):
-        return self.this.own(value)
-    if (name == "this"):
-        if type(value).__name__ == 'SwigPyObject':
-            self.__dict__[name] = value
-            return
-    method = class_type.__swig_setmethods__.get(name, None)
-    if method:
-        return method(self, value)
-    if (not static):
-        if _newclass:
-            object.__setattr__(self, name, value)
-        else:
-            self.__dict__[name] = value
-    else:
-        raise AttributeError("You cannot add attributes to %s" % self)
-
-
-def _swig_setattr(self, class_type, name, value):
-    return _swig_setattr_nondynamic(self, class_type, name, value, 0)
-
-
-def _swig_getattr(self, class_type, name):
-    if (name == "thisown"):
-        return self.this.own()
-    method = class_type.__swig_getmethods__.get(name, None)
-    if method:
-        return method(self)
-    raise AttributeError("'%s' object has no attribute '%s'" % (class_type.__name__, name))
-
-
-def _swig_repr(self):
-    try:
-        strthis = "proxy of " + self.this.__repr__()
-    except __builtin__.Exception:
-        strthis = ""
-    return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
-
-try:
-    _object = object
-    _newclass = 1
-except __builtin__.Exception:
-    class _object:
-        pass
-    _newclass = 0
-
-class SwigPyIterator(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, SwigPyIterator, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, SwigPyIterator, name)
-
-    def __init__(self, *args, **kwargs):
-        raise AttributeError("No constructor defined - class is abstract")
-    __repr__ = _swig_repr
-    __swig_destroy__ = _swig_decoders.delete_SwigPyIterator
-    __del__ = lambda self: None
-
-    def value(self):
-        return _swig_decoders.SwigPyIterator_value(self)
-
-    def incr(self, n=1):
-        return _swig_decoders.SwigPyIterator_incr(self, n)
-
-    def decr(self, n=1):
-        return _swig_decoders.SwigPyIterator_decr(self, n)
-
-    def distance(self, x):
-        return _swig_decoders.SwigPyIterator_distance(self, x)
-
-    def equal(self, x):
-        return _swig_decoders.SwigPyIterator_equal(self, x)
-
-    def copy(self):
-        return _swig_decoders.SwigPyIterator_copy(self)
-
-    def next(self):
-        return _swig_decoders.SwigPyIterator_next(self)
-
-    def __next__(self):
-        return _swig_decoders.SwigPyIterator___next__(self)
-
-    def previous(self):
-        return _swig_decoders.SwigPyIterator_previous(self)
-
-    def advance(self, n):
-        return _swig_decoders.SwigPyIterator_advance(self, n)
-
-    def __eq__(self, x):
-        return _swig_decoders.SwigPyIterator___eq__(self, x)
-
-    def __ne__(self, x):
-        return _swig_decoders.SwigPyIterator___ne__(self, x)
-
-    def __iadd__(self, n):
-        return _swig_decoders.SwigPyIterator___iadd__(self, n)
-
-    def __isub__(self, n):
-        return _swig_decoders.SwigPyIterator___isub__(self, n)
-
-    def __add__(self, n):
-        return _swig_decoders.SwigPyIterator___add__(self, n)
-
-    def __sub__(self, *args):
-        return _swig_decoders.SwigPyIterator___sub__(self, *args)
-    def __iter__(self):
-        return self
-SwigPyIterator_swigregister = _swig_decoders.SwigPyIterator_swigregister
-SwigPyIterator_swigregister(SwigPyIterator)
-
-class PathTrie(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, PathTrie, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, PathTrie, name)
-    __repr__ = _swig_repr
-
-    def __init__(self):
-        this = _swig_decoders.new_PathTrie()
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-    __swig_destroy__ = _swig_decoders.delete_PathTrie
-    __del__ = lambda self: None
-
-    def get_path_trie(self, new_char, reset=True):
-        return _swig_decoders.PathTrie_get_path_trie(self, new_char, reset)
-
-    def get_path_vec(self, *args):
-        return _swig_decoders.PathTrie_get_path_vec(self, *args)
-
-    def iterate_to_vec(self, output):
-        return _swig_decoders.PathTrie_iterate_to_vec(self, output)
-
-    def iterate_to_vec_only(self, output):
-        return _swig_decoders.PathTrie_iterate_to_vec_only(self, output)
-
-    def set_dictionary(self, dictionary):
-        return _swig_decoders.PathTrie_set_dictionary(self, dictionary)
-
-    def set_matcher(self, arg2):
-        return _swig_decoders.PathTrie_set_matcher(self, arg2)
-
-    def is_empty(self):
-        return _swig_decoders.PathTrie_is_empty(self)
-
-    def remove(self):
-        return _swig_decoders.PathTrie_remove(self)
-    __swig_setmethods__["log_prob_b_prev"] = _swig_decoders.PathTrie_log_prob_b_prev_set
-    __swig_getmethods__["log_prob_b_prev"] = _swig_decoders.PathTrie_log_prob_b_prev_get
-    if _newclass:
-        log_prob_b_prev = _swig_property(_swig_decoders.PathTrie_log_prob_b_prev_get, _swig_decoders.PathTrie_log_prob_b_prev_set)
-    __swig_setmethods__["log_prob_nb_prev"] = _swig_decoders.PathTrie_log_prob_nb_prev_set
-    __swig_getmethods__["log_prob_nb_prev"] = _swig_decoders.PathTrie_log_prob_nb_prev_get
-    if _newclass:
-        log_prob_nb_prev = _swig_property(_swig_decoders.PathTrie_log_prob_nb_prev_get, _swig_decoders.PathTrie_log_prob_nb_prev_set)
-    __swig_setmethods__["log_prob_b_cur"] = _swig_decoders.PathTrie_log_prob_b_cur_set
-    __swig_getmethods__["log_prob_b_cur"] = _swig_decoders.PathTrie_log_prob_b_cur_get
-    if _newclass:
-        log_prob_b_cur = _swig_property(_swig_decoders.PathTrie_log_prob_b_cur_get, _swig_decoders.PathTrie_log_prob_b_cur_set)
-    __swig_setmethods__["log_prob_nb_cur"] = _swig_decoders.PathTrie_log_prob_nb_cur_set
-    __swig_getmethods__["log_prob_nb_cur"] = _swig_decoders.PathTrie_log_prob_nb_cur_get
-    if _newclass:
-        log_prob_nb_cur = _swig_property(_swig_decoders.PathTrie_log_prob_nb_cur_get, _swig_decoders.PathTrie_log_prob_nb_cur_set)
-    __swig_setmethods__["score"] = _swig_decoders.PathTrie_score_set
-    __swig_getmethods__["score"] = _swig_decoders.PathTrie_score_get
-    if _newclass:
-        score = _swig_property(_swig_decoders.PathTrie_score_get, _swig_decoders.PathTrie_score_set)
-    __swig_setmethods__["approx_ctc"] = _swig_decoders.PathTrie_approx_ctc_set
-    __swig_getmethods__["approx_ctc"] = _swig_decoders.PathTrie_approx_ctc_get
-    if _newclass:
-        approx_ctc = _swig_property(_swig_decoders.PathTrie_approx_ctc_get, _swig_decoders.PathTrie_approx_ctc_set)
-    __swig_setmethods__["character"] = _swig_decoders.PathTrie_character_set
-    __swig_getmethods__["character"] = _swig_decoders.PathTrie_character_get
-    if _newclass:
-        character = _swig_property(_swig_decoders.PathTrie_character_get, _swig_decoders.PathTrie_character_set)
-    __swig_setmethods__["parent"] = _swig_decoders.PathTrie_parent_set
-    __swig_getmethods__["parent"] = _swig_decoders.PathTrie_parent_get
-    if _newclass:
-        parent = _swig_property(_swig_decoders.PathTrie_parent_get, _swig_decoders.PathTrie_parent_set)
-PathTrie_swigregister = _swig_decoders.PathTrie_swigregister
-PathTrie_swigregister(PathTrie)
-
-class DoubleVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, DoubleVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, DoubleVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.DoubleVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.DoubleVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.DoubleVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.DoubleVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.DoubleVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.DoubleVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.DoubleVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.DoubleVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.DoubleVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.DoubleVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.DoubleVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.DoubleVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.DoubleVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.DoubleVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.DoubleVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.DoubleVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.DoubleVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.DoubleVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.DoubleVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.DoubleVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.DoubleVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.DoubleVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.DoubleVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_DoubleVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.DoubleVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.DoubleVector_front(self)
-
-    def back(self):
-        return _swig_decoders.DoubleVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.DoubleVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.DoubleVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.DoubleVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.DoubleVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.DoubleVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_DoubleVector
-    __del__ = lambda self: None
-DoubleVector_swigregister = _swig_decoders.DoubleVector_swigregister
-DoubleVector_swigregister(DoubleVector)
-
-class IntVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, IntVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, IntVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.IntVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.IntVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.IntVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.IntVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.IntVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.IntVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.IntVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.IntVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.IntVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.IntVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.IntVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.IntVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.IntVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.IntVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.IntVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.IntVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.IntVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.IntVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.IntVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.IntVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.IntVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.IntVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.IntVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_IntVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.IntVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.IntVector_front(self)
-
-    def back(self):
-        return _swig_decoders.IntVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.IntVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.IntVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.IntVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.IntVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.IntVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_IntVector
-    __del__ = lambda self: None
-IntVector_swigregister = _swig_decoders.IntVector_swigregister
-IntVector_swigregister(IntVector)
-
-class StringVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, StringVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, StringVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.StringVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.StringVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.StringVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.StringVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.StringVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.StringVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.StringVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.StringVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.StringVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.StringVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.StringVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.StringVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.StringVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.StringVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.StringVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.StringVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.StringVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.StringVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.StringVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.StringVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.StringVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.StringVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.StringVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_StringVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.StringVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.StringVector_front(self)
-
-    def back(self):
-        return _swig_decoders.StringVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.StringVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.StringVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.StringVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.StringVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.StringVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_StringVector
-    __del__ = lambda self: None
-StringVector_swigregister = _swig_decoders.StringVector_swigregister
-StringVector_swigregister(StringVector)
-
-class VectorOfStructVectorDouble(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, VectorOfStructVectorDouble, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, VectorOfStructVectorDouble, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.VectorOfStructVectorDouble_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.VectorOfStructVectorDouble___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.VectorOfStructVectorDouble___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.VectorOfStructVectorDouble___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.VectorOfStructVectorDouble___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.VectorOfStructVectorDouble___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.VectorOfStructVectorDouble___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.VectorOfStructVectorDouble___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.VectorOfStructVectorDouble___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.VectorOfStructVectorDouble___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.VectorOfStructVectorDouble_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.VectorOfStructVectorDouble_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.VectorOfStructVectorDouble_empty(self)
-
-    def size(self):
-        return _swig_decoders.VectorOfStructVectorDouble_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.VectorOfStructVectorDouble_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.VectorOfStructVectorDouble_begin(self)
-
-    def end(self):
-        return _swig_decoders.VectorOfStructVectorDouble_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.VectorOfStructVectorDouble_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.VectorOfStructVectorDouble_rend(self)
-
-    def clear(self):
-        return _swig_decoders.VectorOfStructVectorDouble_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.VectorOfStructVectorDouble_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.VectorOfStructVectorDouble_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.VectorOfStructVectorDouble_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_VectorOfStructVectorDouble(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.VectorOfStructVectorDouble_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.VectorOfStructVectorDouble_front(self)
-
-    def back(self):
-        return _swig_decoders.VectorOfStructVectorDouble_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.VectorOfStructVectorDouble_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.VectorOfStructVectorDouble_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.VectorOfStructVectorDouble_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.VectorOfStructVectorDouble_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.VectorOfStructVectorDouble_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_VectorOfStructVectorDouble
-    __del__ = lambda self: None
-VectorOfStructVectorDouble_swigregister = _swig_decoders.VectorOfStructVectorDouble_swigregister
-VectorOfStructVectorDouble_swigregister(VectorOfStructVectorDouble)
-
-class VectorOfStructVectorInt(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, VectorOfStructVectorInt, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, VectorOfStructVectorInt, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.VectorOfStructVectorInt_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.VectorOfStructVectorInt___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.VectorOfStructVectorInt___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.VectorOfStructVectorInt___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.VectorOfStructVectorInt___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.VectorOfStructVectorInt___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.VectorOfStructVectorInt___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.VectorOfStructVectorInt___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.VectorOfStructVectorInt___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.VectorOfStructVectorInt___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.VectorOfStructVectorInt_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.VectorOfStructVectorInt_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.VectorOfStructVectorInt_empty(self)
-
-    def size(self):
-        return _swig_decoders.VectorOfStructVectorInt_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.VectorOfStructVectorInt_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.VectorOfStructVectorInt_begin(self)
-
-    def end(self):
-        return _swig_decoders.VectorOfStructVectorInt_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.VectorOfStructVectorInt_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.VectorOfStructVectorInt_rend(self)
-
-    def clear(self):
-        return _swig_decoders.VectorOfStructVectorInt_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.VectorOfStructVectorInt_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.VectorOfStructVectorInt_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.VectorOfStructVectorInt_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_VectorOfStructVectorInt(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.VectorOfStructVectorInt_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.VectorOfStructVectorInt_front(self)
-
-    def back(self):
-        return _swig_decoders.VectorOfStructVectorInt_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.VectorOfStructVectorInt_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.VectorOfStructVectorInt_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.VectorOfStructVectorInt_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.VectorOfStructVectorInt_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.VectorOfStructVectorInt_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_VectorOfStructVectorInt
-    __del__ = lambda self: None
-VectorOfStructVectorInt_swigregister = _swig_decoders.VectorOfStructVectorInt_swigregister
-VectorOfStructVectorInt_swigregister(VectorOfStructVectorInt)
-
-class FloatVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, FloatVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, FloatVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.FloatVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.FloatVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.FloatVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.FloatVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.FloatVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.FloatVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.FloatVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.FloatVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.FloatVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.FloatVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.FloatVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.FloatVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.FloatVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.FloatVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.FloatVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.FloatVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.FloatVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.FloatVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.FloatVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.FloatVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.FloatVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.FloatVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.FloatVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_FloatVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.FloatVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.FloatVector_front(self)
-
-    def back(self):
-        return _swig_decoders.FloatVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.FloatVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.FloatVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.FloatVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.FloatVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.FloatVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_FloatVector
-    __del__ = lambda self: None
-FloatVector_swigregister = _swig_decoders.FloatVector_swigregister
-FloatVector_swigregister(FloatVector)
-
-class Pair(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, Pair, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, Pair, name)
-    __repr__ = _swig_repr
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_Pair(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-    __swig_setmethods__["first"] = _swig_decoders.Pair_first_set
-    __swig_getmethods__["first"] = _swig_decoders.Pair_first_get
-    if _newclass:
-        first = _swig_property(_swig_decoders.Pair_first_get, _swig_decoders.Pair_first_set)
-    __swig_setmethods__["second"] = _swig_decoders.Pair_second_set
-    __swig_getmethods__["second"] = _swig_decoders.Pair_second_get
-    if _newclass:
-        second = _swig_property(_swig_decoders.Pair_second_get, _swig_decoders.Pair_second_set)
-    def __len__(self):
-        return 2
-    def __repr__(self):
-        return str((self.first, self.second))
-    def __getitem__(self, index): 
-        if not (index % 2):
-            return self.first
-        else:
-            return self.second
-    def __setitem__(self, index, val):
-        if not (index % 2):
-            self.first = val
-        else:
-            self.second = val
-    __swig_destroy__ = _swig_decoders.delete_Pair
-    __del__ = lambda self: None
-Pair_swigregister = _swig_decoders.Pair_swigregister
-Pair_swigregister(Pair)
-
-class PairFloatVectorVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, PairFloatVectorVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, PairFloatVectorVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.PairFloatVectorVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.PairFloatVectorVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.PairFloatVectorVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.PairFloatVectorVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.PairFloatVectorVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.PairFloatVectorVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.PairFloatVectorVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.PairFloatVectorVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.PairFloatVectorVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.PairFloatVectorVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.PairFloatVectorVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.PairFloatVectorVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.PairFloatVectorVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.PairFloatVectorVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.PairFloatVectorVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.PairFloatVectorVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.PairFloatVectorVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.PairFloatVectorVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.PairFloatVectorVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.PairFloatVectorVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.PairFloatVectorVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.PairFloatVectorVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.PairFloatVectorVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_PairFloatVectorVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.PairFloatVectorVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.PairFloatVectorVector_front(self)
-
-    def back(self):
-        return _swig_decoders.PairFloatVectorVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.PairFloatVectorVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.PairFloatVectorVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.PairFloatVectorVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.PairFloatVectorVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.PairFloatVectorVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_PairFloatVectorVector
-    __del__ = lambda self: None
-PairFloatVectorVector_swigregister = _swig_decoders.PairFloatVectorVector_swigregister
-PairFloatVectorVector_swigregister(PairFloatVectorVector)
-
-class PairDoubleVectorVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, PairDoubleVectorVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, PairDoubleVectorVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.PairDoubleVectorVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.PairDoubleVectorVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.PairDoubleVectorVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.PairDoubleVectorVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.PairDoubleVectorVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.PairDoubleVectorVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.PairDoubleVectorVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.PairDoubleVectorVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.PairDoubleVectorVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.PairDoubleVectorVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.PairDoubleVectorVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.PairDoubleVectorVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.PairDoubleVectorVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.PairDoubleVectorVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.PairDoubleVectorVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.PairDoubleVectorVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.PairDoubleVectorVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.PairDoubleVectorVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.PairDoubleVectorVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_PairDoubleVectorVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.PairDoubleVectorVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.PairDoubleVectorVector_front(self)
-
-    def back(self):
-        return _swig_decoders.PairDoubleVectorVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.PairDoubleVectorVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.PairDoubleVectorVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.PairDoubleVectorVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.PairDoubleVectorVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.PairDoubleVectorVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_PairDoubleVectorVector
-    __del__ = lambda self: None
-PairDoubleVectorVector_swigregister = _swig_decoders.PairDoubleVectorVector_swigregister
-PairDoubleVectorVector_swigregister(PairDoubleVectorVector)
-
-class PairDoubleVectorVector2(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, PairDoubleVectorVector2, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, PairDoubleVectorVector2, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.PairDoubleVectorVector2_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.PairDoubleVectorVector2___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.PairDoubleVectorVector2___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.PairDoubleVectorVector2___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.PairDoubleVectorVector2___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector2___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.PairDoubleVectorVector2___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector2___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector2___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.PairDoubleVectorVector2___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.PairDoubleVectorVector2_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.PairDoubleVectorVector2_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.PairDoubleVectorVector2_empty(self)
-
-    def size(self):
-        return _swig_decoders.PairDoubleVectorVector2_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.PairDoubleVectorVector2_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.PairDoubleVectorVector2_begin(self)
-
-    def end(self):
-        return _swig_decoders.PairDoubleVectorVector2_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.PairDoubleVectorVector2_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.PairDoubleVectorVector2_rend(self)
-
-    def clear(self):
-        return _swig_decoders.PairDoubleVectorVector2_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.PairDoubleVectorVector2_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.PairDoubleVectorVector2_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.PairDoubleVectorVector2_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_PairDoubleVectorVector2(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.PairDoubleVectorVector2_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.PairDoubleVectorVector2_front(self)
-
-    def back(self):
-        return _swig_decoders.PairDoubleVectorVector2_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.PairDoubleVectorVector2_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.PairDoubleVectorVector2_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.PairDoubleVectorVector2_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.PairDoubleVectorVector2_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.PairDoubleVectorVector2_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_PairDoubleVectorVector2
-    __del__ = lambda self: None
-PairDoubleVectorVector2_swigregister = _swig_decoders.PairDoubleVectorVector2_swigregister
-PairDoubleVectorVector2_swigregister(PairDoubleVectorVector2)
-
-class DoubleVector3(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, DoubleVector3, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, DoubleVector3, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.DoubleVector3_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.DoubleVector3___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.DoubleVector3___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.DoubleVector3___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.DoubleVector3___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.DoubleVector3___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.DoubleVector3___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.DoubleVector3___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.DoubleVector3___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.DoubleVector3___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.DoubleVector3_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.DoubleVector3_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.DoubleVector3_empty(self)
-
-    def size(self):
-        return _swig_decoders.DoubleVector3_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.DoubleVector3_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.DoubleVector3_begin(self)
-
-    def end(self):
-        return _swig_decoders.DoubleVector3_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.DoubleVector3_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.DoubleVector3_rend(self)
-
-    def clear(self):
-        return _swig_decoders.DoubleVector3_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.DoubleVector3_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.DoubleVector3_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.DoubleVector3_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_DoubleVector3(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.DoubleVector3_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.DoubleVector3_front(self)
-
-    def back(self):
-        return _swig_decoders.DoubleVector3_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.DoubleVector3_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.DoubleVector3_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.DoubleVector3_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.DoubleVector3_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.DoubleVector3_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_DoubleVector3
-    __del__ = lambda self: None
-DoubleVector3_swigregister = _swig_decoders.DoubleVector3_swigregister
-DoubleVector3_swigregister(DoubleVector3)
-
-class IntVector3(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, IntVector3, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, IntVector3, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.IntVector3_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.IntVector3___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.IntVector3___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.IntVector3___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.IntVector3___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.IntVector3___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.IntVector3___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.IntVector3___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.IntVector3___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.IntVector3___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.IntVector3_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.IntVector3_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.IntVector3_empty(self)
-
-    def size(self):
-        return _swig_decoders.IntVector3_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.IntVector3_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.IntVector3_begin(self)
-
-    def end(self):
-        return _swig_decoders.IntVector3_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.IntVector3_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.IntVector3_rend(self)
-
-    def clear(self):
-        return _swig_decoders.IntVector3_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.IntVector3_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.IntVector3_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.IntVector3_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_IntVector3(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.IntVector3_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.IntVector3_front(self)
-
-    def back(self):
-        return _swig_decoders.IntVector3_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.IntVector3_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.IntVector3_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.IntVector3_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.IntVector3_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.IntVector3_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_IntVector3
-    __del__ = lambda self: None
-IntVector3_swigregister = _swig_decoders.IntVector3_swigregister
-IntVector3_swigregister(IntVector3)
-
-class TrieVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, TrieVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, TrieVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.TrieVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.TrieVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.TrieVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.TrieVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.TrieVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.TrieVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.TrieVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.TrieVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.TrieVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.TrieVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.TrieVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.TrieVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.TrieVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.TrieVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.TrieVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.TrieVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.TrieVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.TrieVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.TrieVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.TrieVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.TrieVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.TrieVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.TrieVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_TrieVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.TrieVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.TrieVector_front(self)
-
-    def back(self):
-        return _swig_decoders.TrieVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.TrieVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.TrieVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.TrieVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.TrieVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.TrieVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_TrieVector
-    __del__ = lambda self: None
-TrieVector_swigregister = _swig_decoders.TrieVector_swigregister
-TrieVector_swigregister(TrieVector)
-
-class BoolVector(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, BoolVector, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, BoolVector, name)
-    __repr__ = _swig_repr
-
-    def iterator(self):
-        return _swig_decoders.BoolVector_iterator(self)
-    def __iter__(self):
-        return self.iterator()
-
-    def __nonzero__(self):
-        return _swig_decoders.BoolVector___nonzero__(self)
-
-    def __bool__(self):
-        return _swig_decoders.BoolVector___bool__(self)
-
-    def __len__(self):
-        return _swig_decoders.BoolVector___len__(self)
-
-    def __getslice__(self, i, j):
-        return _swig_decoders.BoolVector___getslice__(self, i, j)
-
-    def __setslice__(self, *args):
-        return _swig_decoders.BoolVector___setslice__(self, *args)
-
-    def __delslice__(self, i, j):
-        return _swig_decoders.BoolVector___delslice__(self, i, j)
-
-    def __delitem__(self, *args):
-        return _swig_decoders.BoolVector___delitem__(self, *args)
-
-    def __getitem__(self, *args):
-        return _swig_decoders.BoolVector___getitem__(self, *args)
-
-    def __setitem__(self, *args):
-        return _swig_decoders.BoolVector___setitem__(self, *args)
-
-    def pop(self):
-        return _swig_decoders.BoolVector_pop(self)
-
-    def append(self, x):
-        return _swig_decoders.BoolVector_append(self, x)
-
-    def empty(self):
-        return _swig_decoders.BoolVector_empty(self)
-
-    def size(self):
-        return _swig_decoders.BoolVector_size(self)
-
-    def swap(self, v):
-        return _swig_decoders.BoolVector_swap(self, v)
-
-    def begin(self):
-        return _swig_decoders.BoolVector_begin(self)
-
-    def end(self):
-        return _swig_decoders.BoolVector_end(self)
-
-    def rbegin(self):
-        return _swig_decoders.BoolVector_rbegin(self)
-
-    def rend(self):
-        return _swig_decoders.BoolVector_rend(self)
-
-    def clear(self):
-        return _swig_decoders.BoolVector_clear(self)
-
-    def get_allocator(self):
-        return _swig_decoders.BoolVector_get_allocator(self)
-
-    def pop_back(self):
-        return _swig_decoders.BoolVector_pop_back(self)
-
-    def erase(self, *args):
-        return _swig_decoders.BoolVector_erase(self, *args)
-
-    def __init__(self, *args):
-        this = _swig_decoders.new_BoolVector(*args)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def push_back(self, x):
-        return _swig_decoders.BoolVector_push_back(self, x)
-
-    def front(self):
-        return _swig_decoders.BoolVector_front(self)
-
-    def back(self):
-        return _swig_decoders.BoolVector_back(self)
-
-    def assign(self, n, x):
-        return _swig_decoders.BoolVector_assign(self, n, x)
-
-    def resize(self, *args):
-        return _swig_decoders.BoolVector_resize(self, *args)
-
-    def insert(self, *args):
-        return _swig_decoders.BoolVector_insert(self, *args)
-
-    def reserve(self, n):
-        return _swig_decoders.BoolVector_reserve(self, n)
-
-    def capacity(self):
-        return _swig_decoders.BoolVector_capacity(self)
-    __swig_destroy__ = _swig_decoders.delete_BoolVector
-    __del__ = lambda self: None
-BoolVector_swigregister = _swig_decoders.BoolVector_swigregister
-BoolVector_swigregister(BoolVector)
-
-
-def IntDoublePairCompSecondRev(a, b):
-    return _swig_decoders.IntDoublePairCompSecondRev(a, b)
-IntDoublePairCompSecondRev = _swig_decoders.IntDoublePairCompSecondRev
-
-def StringDoublePairCompSecondRev(a, b):
-    return _swig_decoders.StringDoublePairCompSecondRev(a, b)
-StringDoublePairCompSecondRev = _swig_decoders.StringDoublePairCompSecondRev
-
-def DoubleStringPairCompFirstRev(a, b):
-    return _swig_decoders.DoubleStringPairCompFirstRev(a, b)
-DoubleStringPairCompFirstRev = _swig_decoders.DoubleStringPairCompFirstRev
-class RetriveStrEnumerateVocab(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, RetriveStrEnumerateVocab, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, RetriveStrEnumerateVocab, name)
-    __repr__ = _swig_repr
-
-    def __init__(self):
-        this = _swig_decoders.new_RetriveStrEnumerateVocab()
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-
-    def Add(self, index, str):
-        return _swig_decoders.RetriveStrEnumerateVocab_Add(self, index, str)
-    __swig_setmethods__["vocabulary"] = _swig_decoders.RetriveStrEnumerateVocab_vocabulary_set
-    __swig_getmethods__["vocabulary"] = _swig_decoders.RetriveStrEnumerateVocab_vocabulary_get
-    if _newclass:
-        vocabulary = _swig_property(_swig_decoders.RetriveStrEnumerateVocab_vocabulary_get, _swig_decoders.RetriveStrEnumerateVocab_vocabulary_set)
-    __swig_destroy__ = _swig_decoders.delete_RetriveStrEnumerateVocab
-    __del__ = lambda self: None
-RetriveStrEnumerateVocab_swigregister = _swig_decoders.RetriveStrEnumerateVocab_swigregister
-RetriveStrEnumerateVocab_swigregister(RetriveStrEnumerateVocab)
-cvar = _swig_decoders.cvar
-OOV_SCORE = cvar.OOV_SCORE
-START_TOKEN = cvar.START_TOKEN
-UNK_TOKEN = cvar.UNK_TOKEN
-END_TOKEN = cvar.END_TOKEN
-
-class Scorer(_object):
-    __swig_setmethods__ = {}
-    __setattr__ = lambda self, name, value: _swig_setattr(self, Scorer, name, value)
-    __swig_getmethods__ = {}
-    __getattr__ = lambda self, name: _swig_getattr(self, Scorer, name)
-    __repr__ = _swig_repr
-
-    def __init__(self, alpha, beta, lm_path, vocabulary):
-        this = _swig_decoders.new_Scorer(alpha, beta, lm_path, vocabulary)
-        try:
-            self.this.append(this)
-        except __builtin__.Exception:
-            self.this = this
-    __swig_destroy__ = _swig_decoders.delete_Scorer
-    __del__ = lambda self: None
-
-    def get_log_cond_prob(self, words):
-        return _swig_decoders.Scorer_get_log_cond_prob(self, words)
-
-    def get_sent_log_prob(self, words):
-        return _swig_decoders.Scorer_get_sent_log_prob(self, words)
-
-    def get_max_order(self):
-        return _swig_decoders.Scorer_get_max_order(self)
-
-    def get_dict_size(self):
-        return _swig_decoders.Scorer_get_dict_size(self)
-
-    def is_character_based(self):
-        return _swig_decoders.Scorer_is_character_based(self)
-
-    def reset_params(self, alpha, beta):
-        return _swig_decoders.Scorer_reset_params(self, alpha, beta)
-
-    def make_ngram(self, prefix):
-        return _swig_decoders.Scorer_make_ngram(self, prefix)
-
-    def split_labels(self, labels):
-        return _swig_decoders.Scorer_split_labels(self, labels)
-    __swig_setmethods__["alpha"] = _swig_decoders.Scorer_alpha_set
-    __swig_getmethods__["alpha"] = _swig_decoders.Scorer_alpha_get
-    if _newclass:
-        alpha = _swig_property(_swig_decoders.Scorer_alpha_get, _swig_decoders.Scorer_alpha_set)
-    __swig_setmethods__["beta"] = _swig_decoders.Scorer_beta_set
-    __swig_getmethods__["beta"] = _swig_decoders.Scorer_beta_get
-    if _newclass:
-        beta = _swig_property(_swig_decoders.Scorer_beta_get, _swig_decoders.Scorer_beta_set)
-    __swig_setmethods__["dictionary"] = _swig_decoders.Scorer_dictionary_set
-    __swig_getmethods__["dictionary"] = _swig_decoders.Scorer_dictionary_get
-    if _newclass:
-        dictionary = _swig_property(_swig_decoders.Scorer_dictionary_get, _swig_decoders.Scorer_dictionary_set)
-Scorer_swigregister = _swig_decoders.Scorer_swigregister
-Scorer_swigregister(Scorer)
-
-
-def ctc_beam_search_decoder(log_probs_seq, log_probs_idx, root, start, beam_size, blank_id=0, space_id=-1, cutoff_prob=0.999, ext_scorer=None):
-    return _swig_decoders.ctc_beam_search_decoder(log_probs_seq, log_probs_idx, root, start, beam_size, blank_id, space_id, cutoff_prob, ext_scorer)
-ctc_beam_search_decoder = _swig_decoders.ctc_beam_search_decoder
-
-def ctc_beam_search_decoder_batch(batch_log_probs_seq, batch_log_probs_idx, batch_root_trie, batch_start, beam_size, num_processes, blank_id=0, space_id=-1, cutoff_prob=0.999, ext_scorer=None):
-    return _swig_decoders.ctc_beam_search_decoder_batch(batch_log_probs_seq, batch_log_probs_idx, batch_root_trie, batch_start, beam_size, num_processes, blank_id, space_id, cutoff_prob, ext_scorer)
-ctc_beam_search_decoder_batch = _swig_decoders.ctc_beam_search_decoder_batch
-
-def map_sent(sent, vocabulary, greedy=False, blank_id=0):
-    return _swig_decoders.map_sent(sent, vocabulary, greedy, blank_id)
-map_sent = _swig_decoders.map_sent
-
-def map_batch(batch_sents, vocabulary, num_processes, greedy=False, blank_id=0):
-    return _swig_decoders.map_batch(batch_sents, vocabulary, num_processes, greedy, blank_id)
-map_batch = _swig_decoders.map_batch
-# This file is compatible with both classic and new-style classes.
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/test/test_en.py b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/test/test_en.py
deleted file mode 100644
index d777360a670753df0c783772de70be12aba16ac4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/test/test_en.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import swig_decoders as decoder
-import numpy as np
-
-probs_seq = [[
-            0.06390443, 0.21124858, 0.27323887, 0.06870235, 0.0361254,
-            0.18184413, 0.16493624
-        ], [
-            0.03309247, 0.22866108, 0.24390638, 0.09699597, 0.31895462,
-            0.0094893, 0.06890021
-        ], [
-            0.218104, 0.19992557, 0.18245131, 0.08503348, 0.14903535,
-            0.08424043, 0.08120984
-        ], [
-            0.12094152, 0.19162472, 0.01473646, 0.28045061, 0.24246305,
-            0.05206269, 0.09772094
-        ], [
-            0.1333387, 0.00550838, 0.00301669, 0.21745861, 0.20803985,
-            0.41317442, 0.01946335
-        ], [
-            0.16468227, 0.1980699, 0.1906545, 0.18963251, 0.19860937,
-            0.04377724, 0.01457421
-        ]]
-
-vocab_list = ["\'", " ", "a", "b", "c", "d"]
-
-log_prob_seq = np.log(np.array(probs_seq, dtype=np.float32))
-log_probs_idx = np.argsort(log_prob_seq, axis=-1)[:, ::-1]
-log_prob_seq = np.sort(log_prob_seq, axis=-1)[:, ::-1]
-
-root = decoder.PathTrie()
-root.score = root.log_prob_b_prev = 0.0
-beam_size=20
-
-chunk_log_prob_seq = [li.tolist() for li in log_prob_seq]
-chunk_log_probs_idx = [li.tolist() for li in log_probs_idx]
-
-alpha = 0.5
-beta = 0.5
-lm_path = '../kenlm/lm/test.arpa'
-scorer = decoder.Scorer(alpha, beta, lm_path, vocab_list)
-
-root2 = decoder.TrieVector()
-temp_dict = {}
-for i in range(2):
-    root = decoder.PathTrie()
-    temp_dict[i] = root
-    root2.push_back(root)
-
-
-batch_chunk_log_prob_seq = [chunk_log_prob_seq, chunk_log_prob_seq]
-batch_chunk_log_probs_idx = [chunk_log_probs_idx, chunk_log_probs_idx]
-batch_chunk_length = [6, 6]
-batch_start = [True, True]
-
-result1 =  decoder.ctc_beam_search_decoder_batch(batch_chunk_log_prob_seq, 
-                                                 batch_chunk_log_probs_idx,
-                                                 root2,
-                                                 batch_start,
-                                                 beam_size, 1, 6, 1, 0.9999, scorer)
-# print single sentence result
-print(decoder.map_sent(result1[0][0][1], vocab_list))
-print(result1[0])
-
-# Test stateful decoder
-# continue decoding
-batch_start = [False, False]
-result2 = decoder.ctc_beam_search_decoder_batch(batch_chunk_log_prob_seq, 
-                                                 batch_chunk_log_probs_idx,
-                                                 root2,
-                                                 batch_start,
-                                                 beam_size, 1, 6, 1, 0.9999, scorer)
-
-print(decoder.map_batch([result1[0][0][1], result1[1][0][1]], vocab_list, 1))
diff --git a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/test/test_zh.py b/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/test/test_zh.py
deleted file mode 100644
index 34d25de10e8ec36ae8069c187ea6b88863204eca..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/ctc_decoder/swig/test/test_zh.py
+++ /dev/null
@@ -1,142 +0,0 @@
-
-import numpy as np
-import logging
-from swig_decoders import TrieVector, ctc_beam_search_decoder_batch, \
-                            map_sent, map_batch, \
-                            PathTrie, TrieVector
-import multiprocessing
-
-logging.basicConfig(filename='out.log', level=logging.INFO)
-
-def test_prefix_beam_search(batch_log_ctc_probs, batch_lens, beam_size, blank_id, space_id, cutoff_prob=0.999):
-    """
-    Prefix beam search
-    Params:
-        batch_log_probs: B x T x V, the log probabilities of a sequence
-        batch_lens: B, the actual length of each sequence
-    Return:
-        hyps: a batch of beam candidates for each sequence
-        [[(score, cand_list1), (score, cand_list2), ....(score, cand_list_beam)],
-         [(score, cand_list1), (score, candi_list2), ...],
-         ...
-         []]
-    """
-    #batch_log_probs_seq, batch_log_probs_idx = torch.topk(batch_log_ctc_probs, beam_size, dim=-1)
-    batch_log_probs_idx = np.argsort(batch_log_ctc_probs, axis=-1)[:, :, ::-1]
-    batch_log_probs_seq = np.sort(batch_log_ctc_probs, axis=-1)[:, :, ::-1]
-    batch_log_probs_seq_list = batch_log_probs_seq.tolist()
-    batch_log_probs_idx_list = batch_log_probs_idx.tolist()
-    batch_len_list = batch_lens.tolist()
-    batch_log_probs_seq = []
-    batch_log_probs_ids = []
-    batch_start = []
-    batch_root = TrieVector()
-    root_dict = {}
-    for i in range(len(batch_len_list)):
-        num_sent = batch_len_list[i]
-        batch_log_probs_seq.append(batch_log_probs_seq_list[i][0:num_sent])
-        batch_log_probs_ids.append(batch_log_probs_idx_list[i][0:num_sent])
-        root_dict[i] = PathTrie()
-        batch_root.append(root_dict[i])
-        batch_start.append(True)
-    num_processes = min(multiprocessing.cpu_count()-1, len(batch_log_probs_seq))
-    score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                            batch_log_probs_ids,
-                                            batch_root,
-                                            batch_start,
-                                            beam_size,
-                                            num_processes,
-                                            blank_id,
-                                            space_id,
-                                            cutoff_prob)
-    return score_hyps
-
-def test_batch_greedy_search(batch_log_ctc_probs, batch_lens, vocab_list, blank_id):
-    """
-    Greedy search
-    Params:
-        batch_log_ctc_probs: B x T x V
-        batch_lens: B
-        vocab_list: a list of symbols, of size V
-        blank_id: id for blank symbol
-    Return:
-        batch of decoded string sentences
-    """
-    
-    sort_ids = np.argsort(batch_log_ctc_probs, axis=-1)[:, :, ::-1]
-    batch_greedy_ids = sort_ids[:, :, 0].tolist()
-    batch_len_list = batch_lens.tolist()
-    batch_ids = []
-    for seq_ids, seq_len in zip(batch_greedy_ids, batch_len_list):
-        batch_ids.append(seq_ids[0: seq_len])
-    num_processes = min(multiprocessing.cpu_count()-1, len(batch_ids))
-    greedy = True
-    result = map_batch(batch_ids, vocab_list, num_processes, greedy, blank_id)
-    return result
-
-
-def test_map_batch(batch_sent_list, vocab_list, blank_id):
-    """
-    Map a batch of sent ids to string
-    Prams:
-        batch_sent_list: a list of list of ids
-        vocab_list: a list of symbols, of size V
-        blank_id: id for blank symbol
-    """
-    num_processes = min(multiprocessing.cpu_count()-1, len(batch_sent_list))
-    greedy = False # this is not used for greedy search so we set it to false
-    results = map_batch(batch_sent_list, vocab_list, num_processes, greedy, blank_id)
-    return results
-
-def test_map_sent(sent_ids, vocab_list, greedy, blank_id):
-    """
-    Map one sentence ids to string
-    greedy: False, just map. True, use ctc greedy search
-    """
-    return map_sent(sent_ids, vocab_list, greedy, blank_id)
-    
-
-def load_vocab(vocab_file):
-    vocab = []
-    with open(vocab_file, "r", encoding="utf-8") as f:
-        for line in f:
-            line = line.strip().split()
-            vocab.append(line[0])
-    return vocab
-
-if __name__ == "__main__":
-    input = "data/test.npz"
-    word = "data/words.txt"
-    beam_size = 10
-    blank_id = 0
-    space_id = 45
-
-    vocab_list = load_vocab(word)
-    input = np.load(input)
-    batch_log_ctc_probs = input['batch_log_ctc_probs']
-    batch_len = input["batch_len"]
-    # ctc prefix beam search
-    logging.info("Testing ctc prefix beam search")
-    score_hyps = test_prefix_beam_search(batch_log_ctc_probs,
-                            batch_len,
-                            beam_size,
-                            blank_id,
-                            space_id,
-                            cutoff_prob=0.999)
-    # map the most probable cand ids to string
-    batch_ids = [score_hyps[0][0][1], score_hyps[1][0][1]]
-    map_sents = test_map_batch(batch_ids, vocab_list, blank_id)
-    logging.info(map_sents)
-
-    logging.info("Testing greedy search")
-    # greedy search
-    greedy_sents = test_batch_greedy_search(batch_log_ctc_probs,
-                                            batch_len,
-                                            vocab_list,
-                                            blank_id)
-    logging.info(greedy_sents)
-
-    logging.info("Test one sentence")
-    sent_ids = score_hyps[0][0][1]
-    one_sent = test_map_sent(sent_ids, vocab_list, False, blank_id)
-    logging.info(one_sent)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/data.list b/models/audio/speech_recognition/conformer/igie/data.list
deleted file mode 100644
index d584b0ee8d13cd0a83182da8edd84d5dcc547f56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/data.list
+++ /dev/null
@@ -1,7176 +0,0 @@
-{"key": "BAC009S0764W0121", "wav": "./aishell/wav/test/S0764/BAC009S0764W0121.wav", "txt": "甚至出现交易几乎停滞的情况"}
-{"key": "BAC009S0764W0122", "wav": "./aishell/wav/test/S0764/BAC009S0764W0122.wav", "txt": "一二线城市虽然也处于调整中"}
-{"key": "BAC009S0764W0123", "wav": "./aishell/wav/test/S0764/BAC009S0764W0123.wav", "txt": "但因为聚集了过多公共资源"}
-{"key": "BAC009S0764W0124", "wav": "./aishell/wav/test/S0764/BAC009S0764W0124.wav", "txt": "为了规避三四线城市明显过剩的市场风险"}
-{"key": "BAC009S0764W0125", "wav": "./aishell/wav/test/S0764/BAC009S0764W0125.wav", "txt": "标杆房企必然调整市场战略"}
-{"key": "BAC009S0764W0126", "wav": "./aishell/wav/test/S0764/BAC009S0764W0126.wav", "txt": "因此土地储备至关重要"}
-{"key": "BAC009S0764W0127", "wav": "./aishell/wav/test/S0764/BAC009S0764W0127.wav", "txt": "中原地产首席分析师张大伟说"}
-{"key": "BAC009S0764W0128", "wav": "./aishell/wav/test/S0764/BAC009S0764W0128.wav", "txt": "一线城市土地供应量减少"}
-{"key": "BAC009S0764W0129", "wav": "./aishell/wav/test/S0764/BAC009S0764W0129.wav", "txt": "也助推了土地市场的火爆"}
-{"key": "BAC009S0764W0130", "wav": "./aishell/wav/test/S0764/BAC009S0764W0130.wav", "txt": "北京仅新增住宅土地供应十宗"}
-{"key": "BAC009S0764W0131", "wav": "./aishell/wav/test/S0764/BAC009S0764W0131.wav", "txt": "开发边界将作为城市发展的刚性约定"}
-{"key": "BAC009S0764W0132", "wav": "./aishell/wav/test/S0764/BAC009S0764W0132.wav", "txt": "不得超越界限盲目扩张"}
-{"key": "BAC009S0764W0133", "wav": "./aishell/wav/test/S0764/BAC009S0764W0133.wav", "txt": "目前挂牌的只有几宗土地"}
-{"key": "BAC009S0764W0134", "wav": "./aishell/wav/test/S0764/BAC009S0764W0134.wav", "txt": "再加上近期一二线楼市升温"}
-{"key": "BAC009S0764W0135", "wav": "./aishell/wav/test/S0764/BAC009S0764W0135.wav", "txt": "房企对土地的争抢更加积极"}
-{"key": "BAC009S0764W0136", "wav": "./aishell/wav/test/S0764/BAC009S0764W0136.wav", "txt": "土地市场体现了房企对一二线市场的看重"}
-{"key": "BAC009S0764W0137", "wav": "./aishell/wav/test/S0764/BAC009S0764W0137.wav", "txt": "面包价格会跟风上涨吗"}
-{"key": "BAC009S0764W0138", "wav": "./aishell/wav/test/S0764/BAC009S0764W0138.wav", "txt": "成交量环比大幅增加"}
-{"key": "BAC009S0764W0139", "wav": "./aishell/wav/test/S0764/BAC009S0764W0139.wav", "txt": "国家统计局的数据显示"}
-{"key": "BAC009S0764W0140", "wav": "./aishell/wav/test/S0764/BAC009S0764W0140.wav", "txt": "其中广州深圳甚至出现了多个日光盘"}
-{"key": "BAC009S0764W0141", "wav": "./aishell/wav/test/S0764/BAC009S0764W0141.wav", "txt": "零三年到去年"}
-{"key": "BAC009S0764W0142", "wav": "./aishell/wav/test/S0764/BAC009S0764W0142.wav", "txt": "市场基数已不可同日而语"}
-{"key": "BAC009S0764W0143", "wav": "./aishell/wav/test/S0764/BAC009S0764W0143.wav", "txt": "在市场整体从高速增长进入中高速增长区间的同时"}
-{"key": "BAC009S0764W0144", "wav": "./aishell/wav/test/S0764/BAC009S0764W0144.wav", "txt": "一线城市在价格较高的基础上整体回升并领涨全国"}
-{"key": "BAC009S0764W0145", "wav": "./aishell/wav/test/S0764/BAC009S0764W0145.wav", "txt": "绝大部分三线城市房价仍然下降"}
-{"key": "BAC009S0764W0146", "wav": "./aishell/wav/test/S0764/BAC009S0764W0146.wav", "txt": "一线楼市成交量激增"}
-{"key": "BAC009S0764W0147", "wav": "./aishell/wav/test/S0764/BAC009S0764W0147.wav", "txt": "三四线城市依然冷清"}
-{"key": "BAC009S0764W0148", "wav": "./aishell/wav/test/S0764/BAC009S0764W0148.wav", "txt": "根据中原地产研究中心最新数据"}
-{"key": "BAC009S0764W0149", "wav": "./aishell/wav/test/S0764/BAC009S0764W0149.wav", "txt": "一线城市签约十七万套"}
-{"key": "BAC009S0764W0150", "wav": "./aishell/wav/test/S0764/BAC009S0764W0150.wav", "txt": "同比涨幅达到百分之四"}
-{"key": "BAC009S0764W0151", "wav": "./aishell/wav/test/S0764/BAC009S0764W0151.wav", "txt": "三线城市签约十六万套"}
-{"key": "BAC009S0764W0152", "wav": "./aishell/wav/test/S0764/BAC009S0764W0152.wav", "txt": "四线城市成交量有轻微下调"}
-{"key": "BAC009S0764W0153", "wav": "./aishell/wav/test/S0764/BAC009S0764W0153.wav", "txt": "住房城乡建设部政策研究中心主任秦虹表示"}
-{"key": "BAC009S0764W0154", "wav": "./aishell/wav/test/S0764/BAC009S0764W0154.wav", "txt": "我国房地产市场过去从体偏紧部分地区过紧"}
-{"key": "BAC009S0764W0155", "wav": "./aishell/wav/test/S0764/BAC009S0764W0155.wav", "txt": "总体偏松部分地区过剩"}
-{"key": "BAC009S0764W0156", "wav": "./aishell/wav/test/S0764/BAC009S0764W0156.wav", "txt": "当供给远快于需求时"}
-{"key": "BAC009S0764W0157", "wav": "./aishell/wav/test/S0764/BAC009S0764W0157.wav", "txt": "很难出现去年那样的楼市暴涨"}
-{"key": "BAC009S0764W0158", "wav": "./aishell/wav/test/S0764/BAC009S0764W0158.wav", "txt": "即便是北上广深等供应偏紧的一线城市"}
-{"key": "BAC009S0764W0159", "wav": "./aishell/wav/test/S0764/BAC009S0764W0159.wav", "txt": "也有限购政策在控制需求规模"}
-{"key": "BAC009S0764W0160", "wav": "./aishell/wav/test/S0764/BAC009S0764W0160.wav", "txt": "从而有利于抑制楼市过快上涨"}
-{"key": "BAC009S0764W0161", "wav": "./aishell/wav/test/S0764/BAC009S0764W0161.wav", "txt": "楼市调控供的行政手段宜减不宜加"}
-{"key": "BAC009S0764W0162", "wav": "./aishell/wav/test/S0764/BAC009S0764W0162.wav", "txt": "稳增长措施需更全面地考虑化解楼市风险问题"}
-{"key": "BAC009S0764W0163", "wav": "./aishell/wav/test/S0764/BAC009S0764W0163.wav", "txt": "楼市调控将去向何方"}
-{"key": "BAC009S0764W0164", "wav": "./aishell/wav/test/S0764/BAC009S0764W0164.wav", "txt": "进一步发挥市场在资源配置中的决定性作用"}
-{"key": "BAC009S0764W0165", "wav": "./aishell/wav/test/S0764/BAC009S0764W0165.wav", "txt": "楼市调控的行政手段宜减不宜加"}
-{"key": "BAC009S0764W0166", "wav": "./aishell/wav/test/S0764/BAC009S0764W0166.wav", "txt": "去行政化"}
-{"key": "BAC009S0764W0167", "wav": "./aishell/wav/test/S0764/BAC009S0764W0167.wav", "txt": "随着市场调整的深入"}
-{"key": "BAC009S0764W0168", "wav": "./aishell/wav/test/S0764/BAC009S0764W0168.wav", "txt": "一些三线城市取消限购及限贷"}
-{"key": "BAC009S0764W0169", "wav": "./aishell/wav/test/S0764/BAC009S0764W0169.wav", "txt": "实施较大幅度的补贴政策"}
-{"key": "BAC009S0764W0170", "wav": "./aishell/wav/test/S0764/BAC009S0764W0170.wav", "txt": "当地新建商品住宅的房价多在每平方米三四千元"}
-{"key": "BAC009S0764W0171", "wav": "./aishell/wav/test/S0764/BAC009S0764W0171.wav", "txt": "政府出台每平方米补贴五百元的托市政策"}
-{"key": "BAC009S0764W0172", "wav": "./aishell/wav/test/S0764/BAC009S0764W0172.wav", "txt": "由于不可能从根本上改变供求关系"}
-{"key": "BAC009S0764W0173", "wav": "./aishell/wav/test/S0764/BAC009S0764W0173.wav", "txt": "类似的补贴政策常常是短效刺激"}
-{"key": "BAC009S0764W0174", "wav": "./aishell/wav/test/S0764/BAC009S0764W0174.wav", "txt": "会对市场造成新一轮的干扰"}
-{"key": "BAC009S0764W0175", "wav": "./aishell/wav/test/S0764/BAC009S0764W0175.wav", "txt": "安徽铜陵结束了当地契税补贴政策"}
-{"key": "BAC009S0764W0176", "wav": "./aishell/wav/test/S0764/BAC009S0764W0176.wav", "txt": "当月住宅类商品房成交套数骤跌"}
-{"key": "BAC009S0764W0177", "wav": "./aishell/wav/test/S0764/BAC009S0764W0177.wav", "txt": "在经济下行压力加大的背景下"}
-{"key": "BAC009S0764W0178", "wav": "./aishell/wav/test/S0764/BAC009S0764W0178.wav", "txt": "稳增长措施需更全面地考虑化解楼市风险问题"}
-{"key": "BAC009S0764W0179", "wav": "./aishell/wav/test/S0764/BAC009S0764W0179.wav", "txt": "国务院发展研究中心市场经济研究所副所长邓郁松认为"}
-{"key": "BAC009S0764W0180", "wav": "./aishell/wav/test/S0764/BAC009S0764W0180.wav", "txt": "可能引发房价泡沫风险"}
-{"key": "BAC009S0764W0181", "wav": "./aishell/wav/test/S0764/BAC009S0764W0181.wav", "txt": "在经济增速放缓阶段运用货币政策工具时"}
-{"key": "BAC009S0764W0183", "wav": "./aishell/wav/test/S0764/BAC009S0764W0183.wav", "txt": "基本住房需求得到满足后"}
-{"key": "BAC009S0764W0184", "wav": "./aishell/wav/test/S0764/BAC009S0764W0184.wav", "txt": "对绿色高效宜居的高品质住房需求快速上升"}
-{"key": "BAC009S0764W0185", "wav": "./aishell/wav/test/S0764/BAC009S0764W0185.wav", "txt": "通过改革和政策调整"}
-{"key": "BAC009S0764W0186", "wav": "./aishell/wav/test/S0764/BAC009S0764W0186.wav", "txt": "实现我国房地产市场的平稳运行"}
-{"key": "BAC009S0764W0187", "wav": "./aishell/wav/test/S0764/BAC009S0764W0187.wav", "txt": "及时发现产业发展中的倾向性苗头性问题"}
-{"key": "BAC009S0764W0188", "wav": "./aishell/wav/test/S0764/BAC009S0764W0188.wav", "txt": "促进战略性新兴产业健康发展"}
-{"key": "BAC009S0764W0189", "wav": "./aishell/wav/test/S0764/BAC009S0764W0189.wav", "txt": "有关部门和社会各界积极响应"}
-{"key": "BAC009S0764W0190", "wav": "./aishell/wav/test/S0764/BAC009S0764W0190.wav", "txt": "采取了一系列的政策措施"}
-{"key": "BAC009S0764W0191", "wav": "./aishell/wav/test/S0764/BAC009S0764W0191.wav", "txt": "促使我国战略性新兴产业发展实现了良好开局"}
-{"key": "BAC009S0764W0192", "wav": "./aishell/wav/test/S0764/BAC009S0764W0192.wav", "txt": "战略性新兴产业在各地呈现出集聚蓬勃发展的态势"}
-{"key": "BAC009S0764W0193", "wav": "./aishell/wav/test/S0764/BAC009S0764W0193.wav", "txt": "先后出台的战略性新兴产业的政策措施主要有六项"}
-{"key": "BAC009S0764W0194", "wav": "./aishell/wav/test/S0764/BAC009S0764W0194.wav", "txt": "在加强宏观引导方面"}
-{"key": "BAC009S0764W0195", "wav": "./aishell/wav/test/S0764/BAC009S0764W0195.wav", "txt": "形成了系统完整的规划体系"}
-{"key": "BAC009S0764W0196", "wav": "./aishell/wav/test/S0764/BAC009S0764W0196.wav", "txt": "明确了发展目标和重点任务"}
-{"key": "BAC009S0764W0197", "wav": "./aishell/wav/test/S0764/BAC009S0764W0197.wav", "txt": "在加大要素支持方面"}
-{"key": "BAC009S0764W0198", "wav": "./aishell/wav/test/S0764/BAC009S0764W0198.wav", "txt": "新批复了七只创投基金的设立方案"}
-{"key": "BAC009S0764W0199", "wav": "./aishell/wav/test/S0764/BAC009S0764W0199.wav", "txt": "吸引社会资本七亿元"}
-{"key": "BAC009S0764W0200", "wav": "./aishell/wav/test/S0764/BAC009S0764W0200.wav", "txt": "在加快体制改革方面"}
-{"key": "BAC009S0764W0201", "wav": "./aishell/wav/test/S0764/BAC009S0764W0201.wav", "txt": "组织了第一批七个地区城市开展三网融合试点"}
-{"key": "BAC009S0764W0202", "wav": "./aishell/wav/test/S0764/BAC009S0764W0202.wav", "txt": "第二批三网融合试点工作业已启动"}
-{"key": "BAC009S0764W0203", "wav": "./aishell/wav/test/S0764/BAC009S0764W0203.wav", "txt": "制定了可再生能源电价附加补贴和配额交易方案"}
-{"key": "BAC009S0764W0204", "wav": "./aishell/wav/test/S0764/BAC009S0764W0204.wav", "txt": "发改委双节期间重点关注电商促销行为"}
-{"key": "BAC009S0764W0205", "wav": "./aishell/wav/test/S0764/BAC009S0764W0205.wav", "txt": "本报记者王颖春国家发改委近日发出通知"}
-{"key": "BAC009S0764W0206", "wav": "./aishell/wav/test/S0764/BAC009S0764W0206.wav", "txt": "相关公司股票走势农产品"}
-{"key": "BAC009S0764W0207", "wav": "./aishell/wav/test/S0764/BAC009S0764W0207.wav", "txt": "积极防范和妥善应对市场价格异常波动"}
-{"key": "BAC009S0764W0208", "wav": "./aishell/wav/test/S0764/BAC009S0764W0208.wav", "txt": "维护正常的市场价格秩序"}
-{"key": "BAC009S0764W0209", "wav": "./aishell/wav/test/S0764/BAC009S0764W0209.wav", "txt": "严厉打击春运期间违规上调票价价外收费等违法行为"}
-{"key": "BAC009S0764W0210", "wav": "./aishell/wav/test/S0764/BAC009S0764W0210.wav", "txt": "切实降低农产品流通成本"}
-{"key": "BAC009S0764W0211", "wav": "./aishell/wav/test/S0764/BAC009S0764W0211.wav", "txt": "要加强节日期间旅游市场价格监管"}
-{"key": "BAC009S0764W0212", "wav": "./aishell/wav/test/S0764/BAC009S0764W0212.wav", "txt": "以及提供服务中的变相涨价或价格欺诈行为"}
-{"key": "BAC009S0764W0213", "wav": "./aishell/wav/test/S0764/BAC009S0764W0213.wav", "txt": "构建良好的旅游市场环境"}
-{"key": "BAC009S0764W0214", "wav": "./aishell/wav/test/S0764/BAC009S0764W0214.wav", "txt": "要继续开展商贸零售领域价格秩序整治"}
-{"key": "BAC009S0764W0215", "wav": "./aishell/wav/test/S0764/BAC009S0764W0215.wav", "txt": "重点关注大型电子商务经营者的促销行为"}
-{"key": "BAC009S0764W0216", "wav": "./aishell/wav/test/S0764/BAC009S0764W0216.wav", "txt": "规范降价打折返券赠送等促销行为"}
-{"key": "BAC009S0764W0217", "wav": "./aishell/wav/test/S0764/BAC009S0764W0217.wav", "txt": "营造良好的消费环境"}
-{"key": "BAC009S0764W0218", "wav": "./aishell/wav/test/S0764/BAC009S0764W0218.wav", "txt": "发改委多渠道筹集保障房建设资金到"}
-{"key": "BAC009S0764W0219", "wav": "./aishell/wav/test/S0764/BAC009S0764W0219.wav", "txt": "要加大保障性安居工程建设资计划落实力度"}
-{"key": "BAC009S0764W0220", "wav": "./aishell/wav/test/S0764/BAC009S0764W0220.wav", "txt": "二零一二年中央进一步加大了资金支持力度"}
-{"key": "BAC009S0764W0221", "wav": "./aishell/wav/test/S0764/BAC009S0764W0221.wav", "txt": "地方政府也要加大资金筹措力度"}
-{"key": "BAC009S0764W0222", "wav": "./aishell/wav/test/S0764/BAC009S0764W0222.wav", "txt": "加强建设资金统筹和组织实施工作"}
-{"key": "BAC009S0764W0223", "wav": "./aishell/wav/test/S0764/BAC009S0764W0223.wav", "txt": "确保保障性安居工程年度建设任务的完成"}
-{"key": "BAC009S0764W0224", "wav": "./aishell/wav/test/S0764/BAC009S0764W0224.wav", "txt": "充分发挥地方政府融资平台作用"}
-{"key": "BAC009S0764W0225", "wav": "./aishell/wav/test/S0764/BAC009S0764W0225.wav", "txt": "鼓励引导社会力量参与建设保障性住房及配套设施"}
-{"key": "BAC009S0764W0226", "wav": "./aishell/wav/test/S0764/BAC009S0764W0226.wav", "txt": "尽快将中央补助投资和省级配套资金分解下达到市县"}
-{"key": "BAC009S0764W0227", "wav": "./aishell/wav/test/S0764/BAC009S0764W0227.wav", "txt": "二零一二保障房建设"}
-{"key": "BAC009S0764W0228", "wav": "./aishell/wav/test/S0764/BAC009S0764W0228.wav", "txt": "七千万套保障房多少钢材"}
-{"key": "BAC009S0764W0229", "wav": "./aishell/wav/test/S0764/BAC009S0764W0229.wav", "txt": "如何在五天内筹集到七万元"}
-{"key": "BAC009S0764W0230", "wav": "./aishell/wav/test/S0764/BAC009S0764W0230.wav", "txt": "各地保障房建设的套数"}
-{"key": "BAC009S0764W0231", "wav": "./aishell/wav/test/S0764/BAC009S0764W0231.wav", "txt": "保障房和水利建设概念股"}
-{"key": "BAC009S0764W0232", "wav": "./aishell/wav/test/S0764/BAC009S0764W0232.wav", "txt": "发改委将订制战略避免境外投资恶性竞争到"}
-{"key": "BAC009S0764W0233", "wav": "./aishell/wav/test/S0764/BAC009S0764W0233.wav", "txt": "新京报讯记者钟晶晶发改委昨日表示"}
-{"key": "BAC009S0764W0234", "wav": "./aishell/wav/test/S0764/BAC009S0764W0234.wav", "txt": "政府将制订境外投资总体战略"}
-{"key": "BAC009S0764W0235", "wav": "./aishell/wav/test/S0764/BAC009S0764W0235.wav", "txt": "避免中国企业境外恶性竞争"}
-{"key": "BAC009S0764W0236", "wav": "./aishell/wav/test/S0764/BAC009S0764W0236.wav", "txt": "并鼓励企业在境外上市"}
-{"key": "BAC009S0764W0237", "wav": "./aishell/wav/test/S0764/BAC009S0764W0237.wav", "txt": "加强海外信息监测为企业提供对外投资指导"}
-{"key": "BAC009S0764W0238", "wav": "./aishell/wav/test/S0764/BAC009S0764W0238.wav", "txt": "形成一批具有国际竞争力的中国企业"}
-{"key": "BAC009S0764W0239", "wav": "./aishell/wav/test/S0764/BAC009S0764W0239.wav", "txt": "十一五期间我国累计境外投资七千亿美元"}
-{"key": "BAC009S0764W0240", "wav": "./aishell/wav/test/S0764/BAC009S0764W0240.wav", "txt": "年均增速百分之七"}
-{"key": "BAC009S0764W0242", "wav": "./aishell/wav/test/S0764/BAC009S0764W0242.wav", "txt": "单项投资规模日益增大"}
-{"key": "BAC009S0764W0243", "wav": "./aishell/wav/test/S0764/BAC009S0764W0243.wav", "txt": "几个亿美元的项目不断出现"}
-{"key": "BAC009S0764W0244", "wav": "./aishell/wav/test/S0764/BAC009S0764W0244.wav", "txt": "规划对十二五的投资规模未做预测"}
-{"key": "BAC009S0764W0245", "wav": "./aishell/wav/test/S0764/BAC009S0764W0245.wav", "txt": "但在鼓励企业走出去方面释出多个信号"}
-{"key": "BAC009S0764W0246", "wav": "./aishell/wav/test/S0764/BAC009S0764W0246.wav", "txt": "鼓励传统纺织家电汽车等一般制造业外移"}
-{"key": "BAC009S0764W0247", "wav": "./aishell/wav/test/S0764/BAC009S0764W0247.wav", "txt": "鼓励商业银行去境外开设分支机构"}
-{"key": "BAC009S0764W0248", "wav": "./aishell/wav/test/S0764/BAC009S0764W0248.wav", "txt": "政府将完善境外投资统计制度"}
-{"key": "BAC009S0764W0249", "wav": "./aishell/wav/test/S0764/BAC009S0764W0249.wav", "txt": "实行全口径统计和动态监测"}
-{"key": "BAC009S0764W0250", "wav": "./aishell/wav/test/S0764/BAC009S0764W0250.wav", "txt": "确保境外企业和人员安全"}
-{"key": "BAC009S0764W0251", "wav": "./aishell/wav/test/S0764/BAC009S0764W0251.wav", "txt": "但目前还存在服务架构不完善"}
-{"key": "BAC009S0764W0252", "wav": "./aishell/wav/test/S0764/BAC009S0764W0252.wav", "txt": "缺乏对外投资长远规划等问题"}
-{"key": "BAC009S0764W0253", "wav": "./aishell/wav/test/S0764/BAC009S0764W0253.wav", "txt": "可控是病毒武器最基本的要求"}
-{"key": "BAC009S0764W0254", "wav": "./aishell/wav/test/S0764/BAC009S0764W0254.wav", "txt": "它必须尽量做到只针对敌对国家的计算机和网络"}
-{"key": "BAC009S0764W0255", "wav": "./aishell/wav/test/S0764/BAC009S0764W0255.wav", "txt": "不能波及和影响其他无关国家甚至本国"}
-{"key": "BAC009S0764W0256", "wav": "./aishell/wav/test/S0764/BAC009S0764W0256.wav", "txt": "具有精确的目标定位和识别能力"}
-{"key": "BAC009S0764W0257", "wav": "./aishell/wav/test/S0764/BAC009S0764W0257.wav", "txt": "一旦战事结束或出于特殊需要可以实现自毁"}
-{"key": "BAC009S0764W0258", "wav": "./aishell/wav/test/S0764/BAC009S0764W0258.wav", "txt": "病毒武器的传染性超强"}
-{"key": "BAC009S0764W0259", "wav": "./aishell/wav/test/S0764/BAC009S0764W0259.wav", "txt": "它可以跨硬件平台传染"}
-{"key": "BAC009S0764W0260", "wav": "./aishell/wav/test/S0764/BAC009S0764W0260.wav", "txt": "除了普通计算机以外"}
-{"key": "BAC009S0764W0261", "wav": "./aishell/wav/test/S0764/BAC009S0764W0261.wav", "txt": "病毒武器的隐蔽性极佳"}
-{"key": "BAC009S0764W0262", "wav": "./aishell/wav/test/S0764/BAC009S0764W0262.wav", "txt": "可以实现在敌国网络中的长期潜伏"}
-{"key": "BAC009S0764W0263", "wav": "./aishell/wav/test/S0764/BAC009S0764W0263.wav", "txt": "是威力巨大的定时炸弹"}
-{"key": "BAC009S0764W0264", "wav": "./aishell/wav/test/S0764/BAC009S0764W0264.wav", "txt": "用电脑进行战争比用核武器还有效"}
-{"key": "BAC009S0764W0265", "wav": "./aishell/wav/test/S0764/BAC009S0764W0265.wav", "txt": "核武器并不能征服类似美国这样的国家"}
-{"key": "BAC009S0764W0266", "wav": "./aishell/wav/test/S0764/BAC009S0764W0266.wav", "txt": "利用电脑病毒却可以在一秒钟内从银行盗走过亿美元"}
-{"key": "BAC009S0764W0267", "wav": "./aishell/wav/test/S0764/BAC009S0764W0267.wav", "txt": "足够使美国失去战争基础因此彻底失败"}
-{"key": "BAC009S0764W0268", "wav": "./aishell/wav/test/S0764/BAC009S0764W0268.wav", "txt": "但是病毒武器的出现"}
-{"key": "BAC009S0764W0269", "wav": "./aishell/wav/test/S0764/BAC009S0764W0269.wav", "txt": "预示着未来战争模样将完全改变"}
-{"key": "BAC009S0764W0270", "wav": "./aishell/wav/test/S0764/BAC009S0764W0270.wav", "txt": "病毒武器被认为是目前最具有代表性的网络武器"}
-{"key": "BAC009S0764W0271", "wav": "./aishell/wav/test/S0764/BAC009S0764W0271.wav", "txt": "美国芯片行业兴起并购热潮搜狐科技"}
-{"key": "BAC009S0764W0272", "wav": "./aishell/wav/test/S0764/BAC009S0764W0272.wav", "txt": "反映了芯片行业出现整合热潮"}
-{"key": "BAC009S0764W0273", "wav": "./aishell/wav/test/S0764/BAC009S0764W0273.wav", "txt": "英特尔是世界头号芯片制造商"}
-{"key": "BAC009S0764W0274", "wav": "./aishell/wav/test/S0764/BAC009S0764W0274.wav", "txt": "此次以一百六十七亿美元收购拓朗"}
-{"key": "BAC009S0764W0275", "wav": "./aishell/wav/test/S0764/BAC009S0764W0275.wav", "txt": "将创下该公司成立四七年来最大收购交易的记录"}
-{"key": "BAC009S0764W0276", "wav": "./aishell/wav/test/S0764/BAC009S0764W0276.wav", "txt": "正在寻求扩大移动市场份额"}
-{"key": "BAC009S0764W0277", "wav": "./aishell/wav/test/S0764/BAC009S0764W0277.wav", "txt": "拓朗的主打产品是现场可编程门阵列芯片"}
-{"key": "BAC009S0764W0278", "wav": "./aishell/wav/test/S0764/BAC009S0764W0278.wav", "txt": "可供客户为特定任务重新编程"}
-{"key": "BAC009S0764W0279", "wav": "./aishell/wav/test/S0764/BAC009S0764W0279.wav", "txt": "应用于汽车医疗等行业"}
-{"key": "BAC009S0764W0280", "wav": "./aishell/wav/test/S0764/BAC009S0764W0280.wav", "txt": "英特尔首席执行官布赖恩克尔扎尼奇在一份声明中说"}
-{"key": "BAC009S0764W0281", "wav": "./aishell/wav/test/S0764/BAC009S0764W0281.wav", "txt": "合并拓朗之后将推出新的产品"}
-{"key": "BAC009S0764W0282", "wav": "./aishell/wav/test/S0764/BAC009S0764W0282.wav", "txt": "满足数据中心和物联网细分市场的用户需求"}
-{"key": "BAC009S0764W0283", "wav": "./aishell/wav/test/S0764/BAC009S0764W0283.wav", "txt": "形成高度定制化的集成产品"}
-{"key": "BAC009S0764W0284", "wav": "./aishell/wav/test/S0764/BAC009S0764W0284.wav", "txt": "微芯片科技公司表示"}
-{"key": "BAC009S0764W0285", "wav": "./aishell/wav/test/S0764/BAC009S0764W0285.wav", "txt": "两家公司是联网汽车的主要芯片供应商"}
-{"key": "BAC009S0764W0287", "wav": "./aishell/wav/test/S0764/BAC009S0764W0287.wav", "txt": "今年芯片行业并购交易额在八百亿美元以上"}
-{"key": "BAC009S0764W0288", "wav": "./aishell/wav/test/S0764/BAC009S0764W0288.wav", "txt": "半导体行业的大公司正在寻求通过并购"}
-{"key": "BAC009S0764W0289", "wav": "./aishell/wav/test/S0764/BAC009S0764W0289.wav", "txt": "扩大它们在新的芯片市场的份额"}
-{"key": "BAC009S0764W0290", "wav": "./aishell/wav/test/S0764/BAC009S0764W0290.wav", "txt": "随着个人计算机芯片的需求放慢"}
-{"key": "BAC009S0764W0291", "wav": "./aishell/wav/test/S0764/BAC009S0764W0291.wav", "txt": "英特尔需要找到新的增长点"}
-{"key": "BAC009S0764W0292", "wav": "./aishell/wav/test/S0764/BAC009S0764W0292.wav", "txt": "高德纳咨询公司分析师马克黄说"}
-{"key": "BAC009S0764W0293", "wav": "./aishell/wav/test/S0764/BAC009S0764W0293.wav", "txt": "如今则猛增到一两亿美元"}
-{"key": "BAC009S0764W0294", "wav": "./aishell/wav/test/S0764/BAC009S0764W0294.wav", "txt": "解决小小芯片上的连线和物理问题需要大量昂贵设备"}
-{"key": "BAC009S0764W0295", "wav": "./aishell/wav/test/S0764/BAC009S0764W0295.wav", "txt": "芯片行业的并购风体现了整个科技行业的一种趋势"}
-{"key": "BAC009S0764W0296", "wav": "./aishell/wav/test/S0764/BAC009S0764W0296.wav", "txt": "即一些财大气粗的科技公司自己不创新"}
-{"key": "BAC009S0764W0297", "wav": "./aishell/wav/test/S0764/BAC009S0764W0297.wav", "txt": "而是寻求收购规模较小更为灵活的公司"}
-{"key": "BAC009S0764W0298", "wav": "./aishell/wav/test/S0764/BAC009S0764W0298.wav", "txt": "反映了芯片行业出现整合热"}
-{"key": "BAC009S0764W0299", "wav": "./aishell/wav/test/S0764/BAC009S0764W0299.wav", "txt": "因为难以忍受股价长期被低估"}
-{"key": "BAC009S0764W0300", "wav": "./aishell/wav/test/S0764/BAC009S0764W0300.wav", "txt": "中国游戏公司纷纷忙着退市"}
-{"key": "BAC009S0764W0301", "wav": "./aishell/wav/test/S0764/BAC009S0764W0301.wav", "txt": "巨人网络盛大游戏以及完美世界均已选择了私有化"}
-{"key": "BAC009S0764W0302", "wav": "./aishell/wav/test/S0764/BAC009S0764W0302.wav", "txt": "是这类公司在美国市场估值长期受低估"}
-{"key": "BAC009S0764W0303", "wav": "./aishell/wav/test/S0764/BAC009S0764W0303.wav", "txt": "北京商报讯记者王晔君日前"}
-{"key": "BAC009S0764W0304", "wav": "./aishell/wav/test/S0764/BAC009S0764W0304.wav", "txt": "裁员二千人是由于销售模式发生改变"}
-{"key": "BAC009S0764W0305", "wav": "./aishell/wav/test/S0764/BAC009S0764W0305.wav", "txt": "公司已将原有的直销模式改为经销模式"}
-{"key": "BAC009S0764W0306", "wav": "./aishell/wav/test/S0764/BAC009S0764W0306.wav", "txt": "因此需要的人员大幅下降"}
-{"key": "BAC009S0764W0307", "wav": "./aishell/wav/test/S0764/BAC009S0764W0307.wav", "txt": "由于去年底制定的销售战略是直销模式"}
-{"key": "BAC009S0764W0308", "wav": "./aishell/wav/test/S0764/BAC009S0764W0308.wav", "txt": "所以今年上半年公司在全国各地的员工人数大幅增加"}
-{"key": "BAC009S0764W0309", "wav": "./aishell/wav/test/S0764/BAC009S0764W0309.wav", "txt": "由于近期销售模式的调整"}
-{"key": "BAC009S0764W0310", "wav": "./aishell/wav/test/S0764/BAC009S0764W0310.wav", "txt": "即由直销模式转变为经销模式"}
-{"key": "BAC009S0764W0311", "wav": "./aishell/wav/test/S0764/BAC009S0764W0311.wav", "txt": "公司将更多地依靠经销商进行销售"}
-{"key": "BAC009S0764W0312", "wav": "./aishell/wav/test/S0764/BAC009S0764W0312.wav", "txt": "正是由于销售模式的改变"}
-{"key": "BAC009S0764W0313", "wav": "./aishell/wav/test/S0764/BAC009S0764W0313.wav", "txt": "汉能直接销售人员大幅度减少"}
-{"key": "BAC009S0764W0314", "wav": "./aishell/wav/test/S0764/BAC009S0764W0314.wav", "txt": "汉能发布中期财报披露"}
-{"key": "BAC009S0764W0315", "wav": "./aishell/wav/test/S0764/BAC009S0764W0315.wav", "txt": "上半年营业收入二十一点零八亿港元"}
-{"key": "BAC009S0764W0316", "wav": "./aishell/wav/test/S0764/BAC009S0764W0316.wav", "txt": "同比减少百分之三十四毛利十四点六一亿港元"}
-{"key": "BAC009S0764W0317", "wav": "./aishell/wav/test/S0764/BAC009S0764W0317.wav", "txt": "同比减少约百分之四十六亏损额为五百九十三二万港元"}
-{"key": "BAC009S0764W0318", "wav": "./aishell/wav/test/S0764/BAC009S0764W0318.wav", "txt": "而去年同期盈利十六点七六亿港元"}
-{"key": "BAC009S0764W0319", "wav": "./aishell/wav/test/S0764/BAC009S0764W0319.wav", "txt": "是自二零一一年借壳上市以来首次出现亏损"}
-{"key": "BAC009S0764W0320", "wav": "./aishell/wav/test/S0764/BAC009S0764W0320.wav", "txt": "同时公布了重组计划"}
-{"key": "BAC009S0764W0321", "wav": "./aishell/wav/test/S0764/BAC009S0764W0321.wav", "txt": "撤销旗下高端产业集团和产品开发集团"}
-{"key": "BAC009S0764W0322", "wav": "./aishell/wav/test/S0764/BAC009S0764W0322.wav", "txt": "并将从总部事业部及各区域公司共裁员二千人"}
-{"key": "BAC009S0764W0323", "wav": "./aishell/wav/test/S0764/BAC009S0764W0323.wav", "txt": "汉能曾计划今年底前将这一数字提高到三百家"}
-{"key": "BAC009S0764W0324", "wav": "./aishell/wav/test/S0764/BAC009S0764W0324.wav", "txt": "汉能上半年业绩出现大幅下滑"}
-{"key": "BAC009S0764W0325", "wav": "./aishell/wav/test/S0764/BAC009S0764W0325.wav", "txt": "当务之急是扭转业绩"}
-{"key": "BAC009S0764W0326", "wav": "./aishell/wav/test/S0764/BAC009S0764W0326.wav", "txt": "而由直销模式改为经销模式"}
-{"key": "BAC009S0764W0327", "wav": "./aishell/wav/test/S0764/BAC009S0764W0327.wav", "txt": "可以缩减很多人力成本"}
-{"key": "BAC009S0764W0328", "wav": "./aishell/wav/test/S0764/BAC009S0764W0328.wav", "txt": "有利于降低公司运营成本"}
-{"key": "BAC009S0764W0329", "wav": "./aishell/wav/test/S0764/BAC009S0764W0329.wav", "txt": "但是由于直销改为经销"}
-{"key": "BAC009S0764W0330", "wav": "./aishell/wav/test/S0764/BAC009S0764W0330.wav", "txt": "汉能对自身产品的议价能力推广力都将减弱"}
-{"key": "BAC009S0764W0331", "wav": "./aishell/wav/test/S0764/BAC009S0764W0331.wav", "txt": "公司已经暂停或终止部分关联交易项目"}
-{"key": "BAC009S0764W0332", "wav": "./aishell/wav/test/S0764/BAC009S0764W0332.wav", "txt": "已经花费了一定的资源和成本"}
-{"key": "BAC009S0764W0333", "wav": "./aishell/wav/test/S0764/BAC009S0764W0333.wav", "txt": "因此暂停或终止这些项目"}
-{"key": "BAC009S0764W0334", "wav": "./aishell/wav/test/S0764/BAC009S0764W0334.wav", "txt": "对本公司的上半年业绩带来了负面影响"}
-{"key": "BAC009S0764W0335", "wav": "./aishell/wav/test/S0764/BAC009S0764W0335.wav", "txt": "北京商报讯记者王晔君日前"}
-{"key": "BAC009S0764W0336", "wav": "./aishell/wav/test/S0764/BAC009S0764W0336.wav", "txt": "裁员两千人是由"}
-{"key": "BAC009S0764W0338", "wav": "./aishell/wav/test/S0764/BAC009S0764W0338.wav", "txt": "他们在训练和比赛过程之中的速度也会逐渐慢下来"}
-{"key": "BAC009S0764W0339", "wav": "./aishell/wav/test/S0764/BAC009S0764W0339.wav", "txt": "但是根据国外科学家最新的研究结果"}
-{"key": "BAC009S0764W0340", "wav": "./aishell/wav/test/S0764/BAC009S0764W0340.wav", "txt": "通过对脚踝和小腿等部位的强化"}
-{"key": "BAC009S0764W0341", "wav": "./aishell/wav/test/S0764/BAC009S0764W0341.wav", "txt": "可以有效的抵消年龄所带来的速度劣势"}
-{"key": "BAC009S0764W0342", "wav": "./aishell/wav/test/S0764/BAC009S0764W0342.wav", "txt": "使上年纪的跑者也能保持较快的速度"}
-{"key": "BAC009S0764W0343", "wav": "./aishell/wav/test/S0764/BAC009S0764W0343.wav", "txt": "美国东卡罗莱纳大学和维克森林大学的研究者认为"}
-{"key": "BAC009S0764W0344", "wav": "./aishell/wav/test/S0764/BAC009S0764W0344.wav", "txt": "脚踝和小腿的能力变弱"}
-{"key": "BAC009S0764W0345", "wav": "./aishell/wav/test/S0764/BAC009S0764W0345.wav", "txt": "如果能够加强这方面的锻炼"}
-{"key": "BAC009S0764W0346", "wav": "./aishell/wav/test/S0764/BAC009S0764W0346.wav", "txt": "他们会拥有较快的速度"}
-{"key": "BAC009S0764W0347", "wav": "./aishell/wav/test/S0764/BAC009S0764W0347.wav", "txt": "研究者们选取了一些年龄大的跑者作为研究对象"}
-{"key": "BAC009S0764W0348", "wav": "./aishell/wav/test/S0764/BAC009S0764W0348.wav", "txt": "并让年轻跑者作为参照"}
-{"key": "BAC009S0764W0349", "wav": "./aishell/wav/test/S0764/BAC009S0764W0349.wav", "txt": "他们的步频大致相同"}
-{"key": "BAC009S0764W0350", "wav": "./aishell/wav/test/S0764/BAC009S0764W0350.wav", "txt": "年龄大跑者的步幅明显短于年轻人"}
-{"key": "BAC009S0764W0351", "wav": "./aishell/wav/test/S0764/BAC009S0764W0351.wav", "txt": "使得他们的速度变慢了"}
-{"key": "BAC009S0764W0352", "wav": "./aishell/wav/test/S0764/BAC009S0764W0352.wav", "txt": "研究者们选取了十九位跑者"}
-{"key": "BAC009S0764W0353", "wav": "./aishell/wav/test/S0764/BAC009S0764W0353.wav", "txt": "年龄从二十三岁到五十九岁"}
-{"key": "BAC009S0764W0354", "wav": "./aishell/wav/test/S0764/BAC009S0764W0354.wav", "txt": "身体质量指数平均为二十三点四"}
-{"key": "BAC009S0764W0355", "wav": "./aishell/wav/test/S0764/BAC009S0764W0355.wav", "txt": "身材偏瘦而且比较健康"}
-{"key": "BAC009S0764W0356", "wav": "./aishell/wav/test/S0764/BAC009S0764W0356.wav", "txt": "跑者从二十多岁到五十九岁"}
-{"key": "BAC009S0764W0357", "wav": "./aishell/wav/test/S0764/BAC009S0764W0357.wav", "txt": "步幅长度和跑步速度大约下降了百分之二十"}
-{"key": "BAC009S0764W0358", "wav": "./aishell/wav/test/S0764/BAC009S0764W0358.wav", "txt": "脚踝的能力损失了大约百分之四十八"}
-{"key": "BAC009S0764W0359", "wav": "./aishell/wav/test/S0764/BAC009S0764W0359.wav", "txt": "按照平时训练的速度进行跑步"}
-{"key": "BAC009S0764W0360", "wav": "./aishell/wav/test/S0764/BAC009S0764W0360.wav", "txt": "二十岁的跑者平均每英里耗时八分十八秒"}
-{"key": "BAC009S0764W0361", "wav": "./aishell/wav/test/S0764/BAC009S0764W0361.wav", "txt": "而六十岁的跑者每英里耗时十分十八秒"}
-{"key": "BAC009S0764W0362", "wav": "./aishell/wav/test/S0764/BAC009S0764W0362.wav", "txt": "已经有过不少关于这方面的研究"}
-{"key": "BAC009S0764W0363", "wav": "./aishell/wav/test/S0764/BAC009S0764W0363.wav", "txt": "但是研究对象都是年轻跑者和年老跑者"}
-{"key": "BAC009S0764W0364", "wav": "./aishell/wav/test/S0764/BAC009S0764W0364.wav", "txt": "年龄段的复盖范围比较窄"}
-{"key": "BAC009S0764W0365", "wav": "./aishell/wav/test/S0764/BAC009S0764W0365.wav", "txt": "最令德维塔感到不可思议的是"}
-{"key": "BAC009S0764W0366", "wav": "./aishell/wav/test/S0764/BAC009S0764W0366.wav", "txt": "跑者们随着年龄的增长"}
-{"key": "BAC009S0764W0367", "wav": "./aishell/wav/test/S0764/BAC009S0764W0367.wav", "txt": "速度呈现出直线下降"}
-{"key": "BAC009S0764W0368", "wav": "./aishell/wav/test/S0764/BAC009S0764W0368.wav", "txt": "速度下降的更加明显"}
-{"key": "BAC009S0764W0369", "wav": "./aishell/wav/test/S0764/BAC009S0764W0369.wav", "txt": "很多六七十岁的跑者看到这个研究结果时"}
-{"key": "BAC009S0764W0370", "wav": "./aishell/wav/test/S0764/BAC009S0764W0370.wav", "txt": "意思是他们比较认同这个结果"}
-{"key": "BAC009S0764W0371", "wav": "./aishell/wav/test/S0764/BAC009S0764W0371.wav", "txt": "研究者们希望年龄大的跑者能够注意脚踝的锻炼"}
-{"key": "BAC009S0764W0372", "wav": "./aishell/wav/test/S0764/BAC009S0764W0372.wav", "txt": "但德维塔觉得归根到底还是小腿肌肉的问题"}
-{"key": "BAC009S0764W0373", "wav": "./aishell/wav/test/S0764/BAC009S0764W0373.wav", "txt": "尤其是比目鱼肌和腓肠肌"}
-{"key": "BAC009S0764W0374", "wav": "./aishell/wav/test/S0764/BAC009S0764W0374.wav", "txt": "这才是产生跑步力量的根源"}
-{"key": "BAC009S0764W0375", "wav": "./aishell/wav/test/S0764/BAC009S0764W0375.wav", "txt": "这两种方式的结合能够有效锻炼小腿肌肉"}
-{"key": "BAC009S0764W0376", "wav": "./aishell/wav/test/S0764/BAC009S0764W0376.wav", "txt": "对于高年龄跑者来说"}
-{"key": "BAC009S0764W0377", "wav": "./aishell/wav/test/S0764/BAC009S0764W0377.wav", "txt": "开始一项新的锻炼方式具有一定的风险性"}
-{"key": "BAC009S0764W0378", "wav": "./aishell/wav/test/S0764/BAC009S0764W0378.wav", "txt": "想通过训练提升脚踝和小腿的能力"}
-{"key": "BAC009S0764W0379", "wav": "./aishell/wav/test/S0764/BAC009S0764W0379.wav", "txt": "这些常年坚持跑步的人身体质量指数偏低"}
-{"key": "BAC009S0764W0380", "wav": "./aishell/wav/test/S0764/BAC009S0764W0380.wav", "txt": "长期跑步可能是一种不需要药物来保持身材的有效方式"}
-{"key": "BAC009S0764W0381", "wav": "./aishell/wav/test/S0764/BAC009S0764W0381.wav", "txt": "在二零二二年冬季奥运会的竞选当中"}
-{"key": "BAC009S0764W0382", "wav": "./aishell/wav/test/S0764/BAC009S0764W0382.wav", "txt": "北京和张家口最终击败了强大的对手阿拉木图"}
-{"key": "BAC009S0764W0383", "wav": "./aishell/wav/test/S0764/BAC009S0764W0383.wav", "txt": "顺利获得了冬奥会的主办权"}
-{"key": "BAC009S0764W0384", "wav": "./aishell/wav/test/S0764/BAC009S0764W0384.wav", "txt": "这也是这项冰雪顶级盛事首次来到中国"}
-{"key": "BAC009S0764W0385", "wav": "./aishell/wav/test/S0764/BAC009S0764W0385.wav", "txt": "在此次申办冬奥会的过程中"}
-{"key": "BAC009S0764W0386", "wav": "./aishell/wav/test/S0764/BAC009S0764W0386.wav", "txt": "我们看到了自身强大的综合实力"}
-{"key": "BAC009S0764W0387", "wav": "./aishell/wav/test/S0764/BAC009S0764W0387.wav", "txt": "也看到了在冰雪运动综合实力上的欠缺和不足"}
-{"key": "BAC009S0764W0388", "wav": "./aishell/wav/test/S0764/BAC009S0764W0388.wav", "txt": "经历过夏奥会的沉淀"}
-{"key": "BAC009S0764W0389", "wav": "./aishell/wav/test/S0764/BAC009S0764W0389.wav", "txt": "加上近几年承办诸多国际性赛事的经验积累"}
-{"key": "BAC009S0764W0390", "wav": "./aishell/wav/test/S0764/BAC009S0764W0390.wav", "txt": "在这场亚洲国家锁定胜局的申办博弈中"}
-{"key": "BAC009S0764W0391", "wav": "./aishell/wav/test/S0764/BAC009S0764W0391.wav", "txt": "北申办此次冬奥会的价值要远远超过承办本身"}
-{"key": "BAC009S0764W0392", "wav": "./aishell/wav/test/S0764/BAC009S0764W0392.wav", "txt": "对于北京申办冬奥会的最终结果"}
-{"key": "BAC009S0764W0393", "wav": "./aishell/wav/test/S0764/BAC009S0764W0393.wav", "txt": "我们也应该抱着更加长远和开阔的视角来看待"}
-{"key": "BAC009S0764W0394", "wav": "./aishell/wav/test/S0764/BAC009S0764W0394.wav", "txt": "北京申办冬奥强大实力成获胜武器"}
-{"key": "BAC009S0764W0395", "wav": "./aishell/wav/test/S0764/BAC009S0764W0395.wav", "txt": "此次北京联手张家口申办冬奥会"}
-{"key": "BAC009S0764W0396", "wav": "./aishell/wav/test/S0764/BAC009S0764W0396.wav", "txt": "在与阿拉木图的直接博弈中"}
-{"key": "BAC009S0764W0397", "wav": "./aishell/wav/test/S0764/BAC009S0764W0397.wav", "txt": "财政能力和硬件设施的优势是我们最终取胜的关键原因"}
-{"key": "BAC009S0764W0398", "wav": "./aishell/wav/test/S0764/BAC009S0764W0398.wav", "txt": "而二零零八年举办夏季奥运会所留下的宝贵遗产"}
-{"key": "BAC009S0764W0399", "wav": "./aishell/wav/test/S0764/BAC009S0764W0399.wav", "txt": "也是最终打动国家奥运委会评审团的法宝"}
-{"key": "BAC009S0764W0400", "wav": "./aishell/wav/test/S0764/BAC009S0764W0400.wav", "txt": "从经济实力和基础设施建设上看"}
-{"key": "BAC009S0764W0401", "wav": "./aishell/wav/test/S0764/BAC009S0764W0401.wav", "txt": "北京和张家口要占据着相当明显的优势"}
-{"key": "BAC009S0764W0402", "wav": "./aishell/wav/test/S0764/BAC009S0764W0402.wav", "txt": "北京和张家口两地的生产总值是二万二千七百三十点八亿元"}
-{"key": "BAC009S0764W0403", "wav": "./aishell/wav/test/S0764/BAC009S0764W0403.wav", "txt": "而阿拉木图仅为四百亿美元"}
-{"key": "BAC009S0764W0404", "wav": "./aishell/wav/test/S0764/BAC009S0764W0404.wav", "txt": "影片将在二零一五年一月在慕尼黑正式开机"}
-{"key": "BAC009S0764W0405", "wav": "./aishell/wav/test/S0764/BAC009S0764W0405.wav", "txt": "好莱坞当红明星之前曾被盛传将扮演斯诺登"}
-{"key": "BAC009S0764W0406", "wav": "./aishell/wav/test/S0764/BAC009S0764W0406.wav", "txt": "好莱坞当红明星之前曾被盛传将扮演斯诺登"}
-{"key": "BAC009S0764W0407", "wav": "./aishell/wav/test/S0764/BAC009S0764W0407.wav", "txt": "他确实拿下了这个角色"}
-{"key": "BAC009S0764W0408", "wav": "./aishell/wav/test/S0764/BAC009S0764W0408.wav", "txt": "对男友有什么条件"}
-{"key": "BAC009S0764W0409", "wav": "./aishell/wav/test/S0764/BAC009S0764W0409.wav", "txt": "她表示最重要的就是诚恳"}
-{"key": "BAC009S0764W0410", "wav": "./aishell/wav/test/S0764/BAC009S0764W0410.wav", "txt": "对于姊弟恋也不排斥"}
-{"key": "BAC009S0764W0411", "wav": "./aishell/wav/test/S0764/BAC009S0764W0411.wav", "txt": "搜狐娱乐讯七月十日消息"}
-{"key": "BAC009S0764W0412", "wav": "./aishell/wav/test/S0764/BAC009S0764W0412.wav", "txt": "据台湾媒体报道"}
-{"key": "BAC009S0764W0413", "wav": "./aishell/wav/test/S0764/BAC009S0764W0413.wav", "txt": "许玮甯最近到法国工作"}
-{"key": "BAC009S0764W0414", "wav": "./aishell/wav/test/S0764/BAC009S0764W0414.wav", "txt": "仍在个人社群网站频繁更新动态"}
-{"key": "BAC009S0764W0415", "wav": "./aishell/wav/test/S0764/BAC009S0764W0415.wav", "txt": "甚至被外界揣测是因为和阮经天分手后所刺"}
-{"key": "BAC009S0764W0416", "wav": "./aishell/wav/test/S0764/BAC009S0764W0416.wav", "txt": "她近日终于在受访时松口公开正解"}
-{"key": "BAC009S0764W0417", "wav": "./aishell/wav/test/S0764/BAC009S0764W0417.wav", "txt": "背后意义竟只是不要忘记自己从哪里来"}
-{"key": "BAC009S0764W0418", "wav": "./aishell/wav/test/S0764/BAC009S0764W0418.wav", "txt": "搜狐娱乐讯据台湾媒体报道"}
-{"key": "BAC009S0764W0419", "wav": "./aishell/wav/test/S0764/BAC009S0764W0419.wav", "txt": "阮经天和许玮甯交往八年屡传婚讯"}
-{"key": "BAC009S0764W0420", "wav": "./aishell/wav/test/S0764/BAC009S0764W0420.wav", "txt": "今年三月底惊爆分手"}
-{"key": "BAC009S0764W0421", "wav": "./aishell/wav/test/S0764/BAC009S0764W0421.wav", "txt": "当时女方坦承已分居"}
-{"key": "BAC009S0764W0422", "wav": "./aishell/wav/test/S0764/BAC009S0764W0422.wav", "txt": "但小天坚持玮甯依然是我的女人"}
-{"key": "BAC009S0764W0423", "wav": "./aishell/wav/test/S0764/BAC009S0764W0423.wav", "txt": "有网友日前目击他俩在大稻埕分食炒饭"}
-{"key": "BAC009S0764W0424", "wav": "./aishell/wav/test/S0764/BAC009S0764W0424.wav", "txt": "昨天她出席保养品活动"}
-{"key": "BAC009S0764W0425", "wav": "./aishell/wav/test/S0764/BAC009S0764W0425.wav", "txt": "松口仍有联络"}
-{"key": "BAC009S0764W0426", "wav": "./aishell/wav/test/S0764/BAC009S0764W0426.wav", "txt": "但称自己单身"}
-{"key": "BAC009S0764W0427", "wav": "./aishell/wav/test/S0764/BAC009S0764W0427.wav", "txt": "恰巧昨日记者碰见阮经天出门倒垃圾"}
-{"key": "BAC009S0764W0428", "wav": "./aishell/wav/test/S0764/BAC009S0764W0428.wav", "txt": "对许玮甯单身说语气落寞表示我没有什么看法"}
-{"key": "BAC009S0764W0429", "wav": "./aishell/wav/test/S0764/BAC009S0764W0429.wav", "txt": "搜狐娱乐讯男方和小三还藕断丝连"}
-{"key": "BAC009S0764W0430", "wav": "./aishell/wav/test/S0764/BAC009S0764W0430.wav", "txt": "因而痛斩情丝她除了拥有模特儿火辣身材"}
-{"key": "BAC009S0764W0431", "wav": "./aishell/wav/test/S0764/BAC009S0764W0431.wav", "txt": "快报讯记者赵丹丹快递实名制时代终于到来了"}
-{"key": "BAC009S0764W0432", "wav": "./aishell/wav/test/S0764/BAC009S0764W0432.wav", "txt": "按照国家邮政总局统一部署"}
-{"key": "BAC009S0764W0433", "wav": "./aishell/wav/test/S0764/BAC009S0764W0433.wav", "txt": "从下月起全面实施快递实名制登记"}
-{"key": "BAC009S0764W0434", "wav": "./aishell/wav/test/S0764/BAC009S0764W0434.wav", "txt": "现代快报记者从省邮政管理局了解到"}
-{"key": "BAC009S0764W0435", "wav": "./aishell/wav/test/S0764/BAC009S0764W0435.wav", "txt": "江苏快递实名制登记动真格"}
-{"key": "BAC009S0764W0436", "wav": "./aishell/wav/test/S0764/BAC009S0764W0436.wav", "txt": "本周内动员部署全省九零零多家快递企业按要求执行"}
-{"key": "BAC009S0764W0437", "wav": "./aishell/wav/test/S0764/BAC009S0764W0437.wav", "txt": "个人寄快递必须登记有效的身份证件"}
-{"key": "BAC009S0764W0438", "wav": "./aishell/wav/test/S0764/BAC009S0764W0438.wav", "txt": "本山传媒回应赵本山将有新作品没听说"}
-{"key": "BAC009S0764W0439", "wav": "./aishell/wav/test/S0764/BAC009S0764W0439.wav", "txt": "不仅赢得观众好口碑"}
-{"key": "BAC009S0764W0440", "wav": "./aishell/wav/test/S0764/BAC009S0764W0440.wav", "txt": "特别是师父赵本山也公开出面为大鹏点赞"}
-{"key": "BAC009S0764W0441", "wav": "./aishell/wav/test/S0764/BAC009S0764W0441.wav", "txt": "本月二八日超级月亮和最圆中秋月喜相逢"}
-{"key": "BAC009S0764W0442", "wav": "./aishell/wav/test/S0764/BAC009S0764W0442.wav", "txt": "月亮和地球之间的平均距离仅为三五六八九六万公里"}
-{"key": "BAC009S0764W0443", "wav": "./aishell/wav/test/S0764/BAC009S0764W0443.wav", "txt": "月亮看起来会比往常大"}
-{"key": "BAC009S0764W0444", "wav": "./aishell/wav/test/S0764/BAC009S0764W0444.wav", "txt": "也就是我们常说的超级月亮"}
-{"key": "BAC009S0764W0445", "wav": "./aishell/wav/test/S0764/BAC009S0764W0445.wav", "txt": "这一天还将上演月全食"}
-{"key": "BAC009S0764W0446", "wav": "./aishell/wav/test/S0764/BAC009S0764W0446.wav", "txt": "超级月亮碰上月全食"}
-{"key": "BAC009S0764W0447", "wav": "./aishell/wav/test/S0764/BAC009S0764W0447.wav", "txt": "错过了这次就要到二零三三年了"}
-{"key": "BAC009S0764W0448", "wav": "./aishell/wav/test/S0764/BAC009S0764W0448.wav", "txt": "本月下旬天宇将现五星连线奇观"}
-{"key": "BAC009S0764W0449", "wav": "./aishell/wav/test/S0764/BAC009S0764W0449.wav", "txt": "中科院紫金山天文台公布了一零月天象"}
-{"key": "BAC009S0764W0450", "wav": "./aishell/wav/test/S0764/BAC009S0764W0450.wav", "txt": "现代快报记者注意到"}
-{"key": "BAC009S0764W0451", "wav": "./aishell/wav/test/S0764/BAC009S0764W0451.wav", "txt": "天龙座流星雨猎户座流星雨"}
-{"key": "BAC009S0764W0452", "wav": "./aishell/wav/test/S0764/BAC009S0764W0452.wav", "txt": "让一零月的天空有点甜蜜蜜的味道"}
-{"key": "BAC009S0764W0453", "wav": "./aishell/wav/test/S0764/BAC009S0764W0453.wav", "txt": "水星金星也将迎来观测良机"}
-{"key": "BAC009S0764W0454", "wav": "./aishell/wav/test/S0764/BAC009S0764W0454.wav", "txt": "现代快报记者胡玉梅"}
-{"key": "BAC009S0764W0455", "wav": "./aishell/wav/test/S0764/BAC009S0764W0455.wav", "txt": "本月中下旬小行星撞地球"}
-{"key": "BAC009S0764W0456", "wav": "./aishell/wav/test/S0764/BAC009S0764W0456.wav", "txt": "专家没有科学依据"}
-{"key": "BAC009S0764W0457", "wav": "./aishell/wav/test/S0764/BAC009S0764W0457.wav", "txt": "京华时报讯记者任珊记者从北京市教育考试院获悉"}
-{"key": "BAC009S0764W0458", "wav": "./aishell/wav/test/S0764/BAC009S0764W0458.wav", "txt": "高招本科二批今天开始进行征集志愿录取"}
-{"key": "BAC009S0764W0459", "wav": "./aishell/wav/test/S0764/BAC009S0764W0459.wav", "txt": "一八一所院校将补录一九四九人"}
-{"key": "BAC009S0764W0460", "wav": "./aishell/wav/test/S0764/BAC009S0764W0460.wav", "txt": "朱军系阅兵世家曾参与一九八四年阅兵军乐演奏"}
-{"key": "BAC009S0764W0461", "wav": "./aishell/wav/test/S0764/BAC009S0764W0461.wav", "txt": "朱圣祎爆王思聪女朋友被诉法官送达起诉书遇阻"}
-{"key": "BAC009S0764W0462", "wav": "./aishell/wav/test/S0764/BAC009S0764W0462.wav", "txt": "王思聪将朱圣祎诉至北京朝阳法院"}
-{"key": "BAC009S0764W0463", "wav": "./aishell/wav/test/S0764/BAC009S0764W0463.wav", "txt": "要求停止侵权公开道歉赔偿精神损失一元"}
-{"key": "BAC009S0764W0464", "wav": "./aishell/wav/test/S0764/BAC009S0764W0464.wav", "txt": "法官送达起诉书副本等应诉材料遇阻"}
-{"key": "BAC009S0764W0465", "wav": "./aishell/wav/test/S0764/BAC009S0764W0465.wav", "txt": "朱茵说紫霞仙子谁来演不是我可以决定的"}
-{"key": "BAC009S0764W0466", "wav": "./aishell/wav/test/S0764/BAC009S0764W0466.wav", "txt": "资料图片在湖南卫视上周开播的偶像来了中"}
-{"key": "BAC009S0764W0467", "wav": "./aishell/wav/test/S0764/BAC009S0764W0467.wav", "txt": "永远的紫霞仙子朱茵的亮相引起粉丝的热捧"}
-{"key": "BAC009S0764W0468", "wav": "./aishell/wav/test/S0764/BAC009S0764W0468.wav", "txt": "永远的紫霞仙子朱茵的亮相引起粉丝的热捧"}
-{"key": "BAC009S0764W0469", "wav": "./aishell/wav/test/S0764/BAC009S0764W0469.wav", "txt": "来自全球四七个国家和地区的二零零零多名选手参赛"}
-{"key": "BAC009S0764W0470", "wav": "./aishell/wav/test/S0764/BAC009S0764W0470.wav", "txt": "机器人服务员现身火锅店顾客直呼女神"}
-{"key": "BAC009S0764W0471", "wav": "./aishell/wav/test/S0764/BAC009S0764W0471.wav", "txt": "女神机器人在火锅店内工作"}
-{"key": "BAC009S0764W0472", "wav": "./aishell/wav/test/S0764/BAC009S0764W0472.wav", "txt": "机场严查匿打火机过安检放在鞋子里算藏匿"}
-{"key": "BAC009S0764W0473", "wav": "./aishell/wav/test/S0764/BAC009S0764W0473.wav", "txt": "本报讯记者杨柳昨天"}
-{"key": "BAC009S0764W0474", "wav": "./aishell/wav/test/S0764/BAC009S0764W0474.wav", "txt": "记者从首都机场公安分局航站区派出所获悉"}
-{"key": "BAC009S0764W0475", "wav": "./aishell/wav/test/S0764/BAC009S0764W0475.wav", "txt": "首都机场公安分局航站区派出所联合驻场安检人员"}
-{"key": "BAC009S0764W0476", "wav": "./aishell/wav/test/S0764/BAC009S0764W0476.wav", "txt": "坚持违法零容忍和高限处理的执法态度"}
-{"key": "BAC009S0764W0477", "wav": "./aishell/wav/test/S0764/BAC009S0764W0477.wav", "txt": "严格搜集和固定相关证据"}
-{"key": "BAC009S0764W0478", "wav": "./aishell/wav/test/S0764/BAC009S0764W0478.wav", "txt": "近日在违法事实认识清楚法律法规适用明确的基础上"}
-{"key": "BAC009S0764W0479", "wav": "./aishell/wav/test/S0764/BAC009S0764W0479.wav", "txt": "依法对一名藏匿打火机过检的旅客进行了行政处罚"}
-{"key": "BAC009S0764W0480", "wav": "./aishell/wav/test/S0764/BAC009S0764W0480.wav", "txt": "机场公安加航航班未发生性侵事件"}
-{"key": "BAC009S0764W0482", "wav": "./aishell/wav/test/S0764/BAC009S0764W0482.wav", "txt": "网传该航班一名男性旅客对空姐试图性侵导致飞机返航"}
-{"key": "BAC009S0764W0483", "wav": "./aishell/wav/test/S0764/BAC009S0764W0483.wav", "txt": "新京报记者从首都国际机场公安分局相关人员处获悉"}
-{"key": "BAC009S0764W0484", "wav": "./aishell/wav/test/S0764/BAC009S0764W0484.wav", "txt": "冲突因空姐发餐时餐车碰到了一名旅客"}
-{"key": "BAC009S0764W0485", "wav": "./aishell/wav/test/S0764/BAC009S0764W0485.wav", "txt": "双方因语言交流不畅导致纠纷"}
-{"key": "BAC009S0764W0486", "wav": "./aishell/wav/test/S0764/BAC009S0764W0486.wav", "txt": "该男子因影响航班正常秩序"}
-{"key": "BAC009S0764W0487", "wav": "./aishell/wav/test/S0764/BAC009S0764W0487.wav", "txt": "明星刘晓庆又火了一把"}
-{"key": "BAC009S0764W0488", "wav": "./aishell/wav/test/S0764/BAC009S0764W0488.wav", "txt": "她几乎刷遍了各大媒体"}
-{"key": "BAC009S0764W0489", "wav": "./aishell/wav/test/S0764/BAC009S0764W0489.wav", "txt": "不是她的戏或是她的八卦"}
-{"key": "BAC009S0764W0490", "wav": "./aishell/wav/test/S0764/BAC009S0764W0490.wav", "txt": "而是因为她也中了天价的招"}
-{"key": "BAC009S0764W0491", "wav": "./aishell/wav/test/S0764/BAC009S0764W0491.wav", "txt": "机组成功处置深航机上纵火事件获奖二五零万"}
-{"key": "BAC009S0764W0492", "wav": "./aishell/wav/test/S0764/BAC009S0764W0492.wav", "txt": "成功处置深航机上纵火事件"}
-{"key": "BAC009S0764W0493", "wav": "./aishell/wav/test/S0764/BAC009S0764W0493.wav", "txt": "杀中传女生嫌犯就想找个人发泄"}
-{"key": "BAC009S0764W0494", "wav": "./aishell/wav/test/S0764/BAC009S0764W0494.wav", "txt": "其室友在微博上所发的寻人启事"}
-{"key": "BAC009S0764W0495", "wav": "./aishell/wav/test/S0764/BAC009S0764W0495.wav", "txt": "警方证实周云露遇害"}
-{"key": "BAC009S0765W0121", "wav": "./aishell/wav/test/S0765/BAC009S0765W0121.wav", "txt": "一线城市出现日光盘"}
-{"key": "BAC009S0765W0122", "wav": "./aishell/wav/test/S0765/BAC009S0765W0122.wav", "txt": "楼市地市交相升温房价会不会再度暴涨"}
-{"key": "BAC009S0765W0123", "wav": "./aishell/wav/test/S0765/BAC009S0765W0123.wav", "txt": "经济热点导读专家认为"}
-{"key": "BAC009S0765W0124", "wav": "./aishell/wav/test/S0765/BAC009S0765W0124.wav", "txt": "我国房地产市场过去总体偏紧部分地区过紧"}
-{"key": "BAC009S0765W0125", "wav": "./aishell/wav/test/S0765/BAC009S0765W0125.wav", "txt": "为了将后辈的婚姻分险隔断"}
-{"key": "BAC009S0765W0126", "wav": "./aishell/wav/test/S0765/BAC009S0765W0126.wav", "txt": "将受益人定为直系血亲后代非配偶继承人"}
-{"key": "BAC009S0765W0127", "wav": "./aishell/wav/test/S0765/BAC009S0765W0127.wav", "txt": "按公司持有房产计征"}
-{"key": "BAC009S0765W0128", "wav": "./aishell/wav/test/S0765/BAC009S0765W0128.wav", "txt": "相关公司股票走势"}
-{"key": "BAC009S0765W0129", "wav": "./aishell/wav/test/S0765/BAC009S0765W0129.wav", "txt": "房价起飞前购置了十几套房产"}
-{"key": "BAC009S0765W0130", "wav": "./aishell/wav/test/S0765/BAC009S0765W0130.wav", "txt": "目前总估值已过亿元"}
-{"key": "BAC009S0765W0131", "wav": "./aishell/wav/test/S0765/BAC009S0765W0131.wav", "txt": "这些房产全由宋芳自己打理"}
-{"key": "BAC009S0765W0132", "wav": "./aishell/wav/test/S0765/BAC009S0765W0132.wav", "txt": "每月光租金收入便已远大于自己和子女的总开销"}
-{"key": "BAC009S0765W0133", "wav": "./aishell/wav/test/S0765/BAC009S0765W0133.wav", "txt": "宋芳最近却有点烦恼"}
-{"key": "BAC009S0765W0134", "wav": "./aishell/wav/test/S0765/BAC009S0765W0134.wav", "txt": "我想把房产留给儿女"}
-{"key": "BAC009S0765W0135", "wav": "./aishell/wav/test/S0765/BAC009S0765W0135.wav", "txt": "万一以后儿女的婚姻出了问题"}
-{"key": "BAC009S0765W0136", "wav": "./aishell/wav/test/S0765/BAC009S0765W0136.wav", "txt": "他们的财产和生活不会受到太大影响"}
-{"key": "BAC009S0765W0137", "wav": "./aishell/wav/test/S0765/BAC009S0765W0137.wav", "txt": "宋芳对北京银行私人银行的财富顾问说"}
-{"key": "BAC009S0765W0138", "wav": "./aishell/wav/test/S0765/BAC009S0765W0138.wav", "txt": "该信托出资购入宋芳的房产"}
-{"key": "BAC009S0765W0139", "wav": "./aishell/wav/test/S0765/BAC009S0765W0139.wav", "txt": "成立资金信托购买自家房产"}
-{"key": "BAC009S0765W0140", "wav": "./aishell/wav/test/S0765/BAC009S0765W0140.wav", "txt": "在了解宋芳的资产情况与需求之后"}
-{"key": "BAC009S0765W0141", "wav": "./aishell/wav/test/S0765/BAC009S0765W0141.wav", "txt": "设立一个单一资金信托"}
-{"key": "BAC009S0765W0142", "wav": "./aishell/wav/test/S0765/BAC009S0765W0142.wav", "txt": "宋芳本人为信托的发起人和委托人"}
-{"key": "BAC009S0765W0143", "wav": "./aishell/wav/test/S0765/BAC009S0765W0143.wav", "txt": "北京信托作为受托人"}
-{"key": "BAC009S0765W0144", "wav": "./aishell/wav/test/S0765/BAC009S0765W0144.wav", "txt": "之后由该信托对宋芳指定的房产发出购买要约"}
-{"key": "BAC009S0765W0145", "wav": "./aishell/wav/test/S0765/BAC009S0765W0145.wav", "txt": "实现该信托对房产的控制"}
-{"key": "BAC009S0765W0146", "wav": "./aishell/wav/test/S0765/BAC009S0765W0146.wav", "txt": "虽然房产是在信托的名下"}
-{"key": "BAC009S0765W0147", "wav": "./aishell/wav/test/S0765/BAC009S0765W0147.wav", "txt": "但您和您的儿女能自由支配"}
-{"key": "BAC009S0765W0148", "wav": "./aishell/wav/test/S0765/BAC009S0765W0148.wav", "txt": "这相当于左兜掏右兜"}
-{"key": "BAC009S0765W0149", "wav": "./aishell/wav/test/S0765/BAC009S0765W0149.wav", "txt": "通过信托实现了财产的隔离保护"}
-{"key": "BAC009S0765W0150", "wav": "./aishell/wav/test/S0765/BAC009S0765W0150.wav", "txt": "未来子女出现姻缘风险"}
-{"key": "BAC009S0765W0151", "wav": "./aishell/wav/test/S0765/BAC009S0765W0151.wav", "txt": "其中资金这一要素指基于信托登记的相关法规局限"}
-{"key": "BAC009S0765W0152", "wav": "./aishell/wav/test/S0765/BAC009S0765W0152.wav", "txt": "为了购买自己想要传承给子女的房产"}
-{"key": "BAC009S0765W0153", "wav": "./aishell/wav/test/S0765/BAC009S0765W0153.wav", "txt": "宋芳必须再掏出完全属于自己的资金"}
-{"key": "BAC009S0765W0154", "wav": "./aishell/wav/test/S0765/BAC009S0765W0154.wav", "txt": "委托人以其持有的资金设立一个单一资金信托"}
-{"key": "BAC009S0765W0155", "wav": "./aishell/wav/test/S0765/BAC009S0765W0155.wav", "txt": "该资金可以是委托人的自有资金"}
-{"key": "BAC009S0765W0156", "wav": "./aishell/wav/test/S0765/BAC009S0765W0156.wav", "txt": "也可以是委托人合法获得的过桥资金"}
-{"key": "BAC009S0765W0157", "wav": "./aishell/wav/test/S0765/BAC009S0765W0157.wav", "txt": "确保所设信托的合法性"}
-{"key": "BAC009S0765W0158", "wav": "./aishell/wav/test/S0765/BAC009S0765W0158.wav", "txt": "按公司持有房产计税"}
-{"key": "BAC009S0765W0159", "wav": "./aishell/wav/test/S0765/BAC009S0765W0159.wav", "txt": "由于家族信托的存续期通常较长"}
-{"key": "BAC009S0765W0160", "wav": "./aishell/wav/test/S0765/BAC009S0765W0160.wav", "txt": "在信托收益的处置上"}
-{"key": "BAC009S0765W0161", "wav": "./aishell/wav/test/S0765/BAC009S0765W0161.wav", "txt": "不同的客户对收益再投资的需求差别较大"}
-{"key": "BAC009S0765W0162", "wav": "./aishell/wav/test/S0765/BAC009S0765W0162.wav", "txt": "对收益率的要求差别却不至于相去甚远"}
-{"key": "BAC009S0765W0163", "wav": "./aishell/wav/test/S0765/BAC009S0765W0163.wav", "txt": "从我们遇到的客户来看"}
-{"key": "BAC009S0765W0164", "wav": "./aishell/wav/test/S0765/BAC009S0765W0164.wav", "txt": "回报率普遍要求并不高"}
-{"key": "BAC009S0765W0165", "wav": "./aishell/wav/test/S0765/BAC009S0765W0165.wav", "txt": "有些客户只要求收益率超过利率即可"}
-{"key": "BAC009S0765W0166", "wav": "./aishell/wav/test/S0765/BAC009S0765W0166.wav", "txt": "看中的是其财产保护与传承的功能"}
-{"key": "BAC009S0765W0167", "wav": "./aishell/wav/test/S0765/BAC009S0765W0167.wav", "txt": "我现在就是担心自己哪天突然出现个什么情况"}
-{"key": "BAC009S0765W0168", "wav": "./aishell/wav/test/S0765/BAC009S0765W0168.wav", "txt": "他们说不定又有离婚风险"}
-{"key": "BAC009S0765W0169", "wav": "./aishell/wav/test/S0765/BAC009S0765W0169.wav", "txt": "我又不指望设立信托来赚钱"}
-{"key": "BAC009S0765W0170", "wav": "./aishell/wav/test/S0765/BAC009S0765W0170.wav", "txt": "主要目的是把后辈的婚姻风险隔断"}
-{"key": "BAC009S0765W0171", "wav": "./aishell/wav/test/S0765/BAC009S0765W0171.wav", "txt": "在宋芳的资金信托购买其房产时"}
-{"key": "BAC009S0765W0172", "wav": "./aishell/wav/test/S0765/BAC009S0765W0172.wav", "txt": "需要按北京当地的要求缴纳二手房交易费用"}
-{"key": "BAC009S0765W0173", "wav": "./aishell/wav/test/S0765/BAC009S0765W0173.wav", "txt": "而在信托持有这些房产后"}
-{"key": "BAC009S0765W0174", "wav": "./aishell/wav/test/S0765/BAC009S0765W0174.wav", "txt": "这是因为宋芳购买其房产"}
-{"key": "BAC009S0765W0175", "wav": "./aishell/wav/test/S0765/BAC009S0765W0175.wav", "txt": "按照公司持有房产计征"}
-{"key": "BAC009S0765W0176", "wav": "./aishell/wav/test/S0765/BAC009S0765W0176.wav", "txt": "各项费用的加总并不低"}
-{"key": "BAC009S0765W0177", "wav": "./aishell/wav/test/S0765/BAC009S0765W0177.wav", "txt": "在目前的法律框架下"}
-{"key": "BAC009S0765W0178", "wav": "./aishell/wav/test/S0765/BAC009S0765W0178.wav", "txt": "这些税费均无法避免"}
-{"key": "BAC009S0765W0179", "wav": "./aishell/wav/test/S0765/BAC009S0765W0179.wav", "txt": "他认为跟后辈姻缘风险相比"}
-{"key": "BAC009S0765W0180", "wav": "./aishell/wav/test/S0765/BAC009S0765W0180.wav", "txt": "点击进入股友会参与讨论"}
-{"key": "BAC009S0765W0181", "wav": "./aishell/wav/test/S0765/BAC009S0765W0181.wav", "txt": "本世纪经济报道"}
-{"key": "BAC009S0765W0182", "wav": "./aishell/wav/test/S0765/BAC009S0765W0182.wav", "txt": "为了将后辈的婚姻风险隔断"}
-{"key": "BAC009S0765W0183", "wav": "./aishell/wav/test/S0765/BAC009S0765W0183.wav", "txt": "并将受益人定为直系血亲后代非配偶继承人"}
-{"key": "BAC009S0765W0184", "wav": "./aishell/wav/test/S0765/BAC009S0765W0184.wav", "txt": "今久整合营销集团迎来了它的生日"}
-{"key": "BAC009S0765W0185", "wav": "./aishell/wav/test/S0765/BAC009S0765W0185.wav", "txt": "从最初的几十人"}
-{"key": "BAC009S0765W0186", "wav": "./aishell/wav/test/S0765/BAC009S0765W0186.wav", "txt": "事业版图遍布全中国的集团化整合营销公司"}
-{"key": "BAC009S0765W0187", "wav": "./aishell/wav/test/S0765/BAC009S0765W0187.wav", "txt": "无序竞争甚至恶意竞争时常发生"}
-{"key": "BAC009S0765W0188", "wav": "./aishell/wav/test/S0765/BAC009S0765W0188.wav", "txt": "将发挥部际会议联席制度作用"}
-{"key": "BAC009S0765W0189", "wav": "./aishell/wav/test/S0765/BAC009S0765W0189.wav", "txt": "制订境外投资总体战略"}
-{"key": "BAC009S0765W0190", "wav": "./aishell/wav/test/S0765/BAC009S0765W0190.wav", "txt": "对重大项目和重大问题进行协调"}
-{"key": "BAC009S0765W0191", "wav": "./aishell/wav/test/S0765/BAC009S0765W0191.wav", "txt": "引导企业围绕重点国家和地区在重点领域展开投资"}
-{"key": "BAC009S0765W0192", "wav": "./aishell/wav/test/S0765/BAC009S0765W0192.wav", "txt": "鼓励本土中介机构提供服务"}
-{"key": "BAC009S0765W0193", "wav": "./aishell/wav/test/S0765/BAC009S0765W0193.wav", "txt": "建立起政府部门企业和中介机构各司其职的组织架构"}
-{"key": "BAC009S0765W0194", "wav": "./aishell/wav/test/S0765/BAC009S0765W0194.wav", "txt": "北京科技大学教授刘澄表示"}
-{"key": "BAC009S0765W0195", "wav": "./aishell/wav/test/S0765/BAC009S0765W0195.wav", "txt": "主要是如何提供服务及做好监管"}
-{"key": "BAC009S0765W0196", "wav": "./aishell/wav/test/S0765/BAC009S0765W0196.wav", "txt": "规划提出诸多想法"}
-{"key": "BAC009S0765W0197", "wav": "./aishell/wav/test/S0765/BAC009S0765W0197.wav", "txt": "政府提供的服务企业是否需要"}
-{"key": "BAC009S0765W0198", "wav": "./aishell/wav/test/S0765/BAC009S0765W0198.wav", "txt": "如何为企业提供信息避免海外投资风险"}
-{"key": "BAC009S0765W0199", "wav": "./aishell/wav/test/S0765/BAC009S0765W0199.wav", "txt": "避免海外一窝蜂上项目等"}
-{"key": "BAC009S0765W0200", "wav": "./aishell/wav/test/S0765/BAC009S0765W0200.wav", "txt": "发改委将尽快建立地方政府债务管理体系到"}
-{"key": "BAC009S0765W0201", "wav": "./aishell/wav/test/S0765/BAC009S0765W0201.wav", "txt": "他就上述关注问题指出"}
-{"key": "BAC009S0765W0202", "wav": "./aishell/wav/test/S0765/BAC009S0765W0202.wav", "txt": "目前我国出现政府性债务违约可能性并不大"}
-{"key": "BAC009S0765W0203", "wav": "./aishell/wav/test/S0765/BAC009S0765W0203.wav", "txt": "下一步将进一步完善城投债券发行制度和防范风险机制"}
-{"key": "BAC009S0765W0204", "wav": "./aishell/wav/test/S0765/BAC009S0765W0204.wav", "txt": "并尽快建立我国地方政府债务管理体系等"}
-{"key": "BAC009S0765W0205", "wav": "./aishell/wav/test/S0765/BAC009S0765W0205.wav", "txt": "政府性违约可能性不大"}
-{"key": "BAC009S0765W0206", "wav": "./aishell/wav/test/S0765/BAC009S0765W0206.wav", "txt": "中国证券报随着欧美等国主权债务危机陆续爆发"}
-{"key": "BAC009S0765W0207", "wav": "./aishell/wav/test/S0765/BAC009S0765W0207.wav", "txt": "您如何看待政府的举债行为和债务风险"}
-{"key": "BAC009S0765W0208", "wav": "./aishell/wav/test/S0765/BAC009S0765W0208.wav", "txt": "徐林吸取欧美等国主权债务危机的教训"}
-{"key": "BAC009S0765W0209", "wav": "./aishell/wav/test/S0765/BAC009S0765W0209.wav", "txt": "采取必要措施加强政府债务管理"}
-{"key": "BAC009S0765W0210", "wav": "./aishell/wav/test/S0765/BAC009S0765W0210.wav", "txt": "防范我国政府债务风险"}
-{"key": "BAC009S0765W0211", "wav": "./aishell/wav/test/S0765/BAC009S0765W0211.wav", "txt": "但在具体评估我国地方政府债务风险程度时"}
-{"key": "BAC009S0765W0212", "wav": "./aishell/wav/test/S0765/BAC009S0765W0212.wav", "txt": "也要看到我国与欧美国家的不同之处"}
-{"key": "BAC009S0765W0213", "wav": "./aishell/wav/test/S0765/BAC009S0765W0213.wav", "txt": "我国地方政府性债务"}
-{"key": "BAC009S0765W0214", "wav": "./aishell/wav/test/S0765/BAC009S0765W0214.wav", "txt": "特别是地方投融资平台公司形成的债务"}
-{"key": "BAC009S0765W0215", "wav": "./aishell/wav/test/S0765/BAC009S0765W0215.wav", "txt": "主要用于各地基础设施的投资建设"}
-{"key": "BAC009S0765W0216", "wav": "./aishell/wav/test/S0765/BAC009S0765W0216.wav", "txt": "当代人和后代人共同承担债务还本付息责任"}
-{"key": "BAC009S0765W0217", "wav": "./aishell/wav/test/S0765/BAC009S0765W0217.wav", "txt": "可以更好地体现代际公平"}
-{"key": "BAC009S0765W0218", "wav": "./aishell/wav/test/S0765/BAC009S0765W0218.wav", "txt": "克服当期建设资金不足的瓶颈制约"}
-{"key": "BAC009S0765W0219", "wav": "./aishell/wav/test/S0765/BAC009S0765W0219.wav", "txt": "有利于加快完善基础设施和投资环境"}
-{"key": "BAC009S0765W0220", "wav": "./aishell/wav/test/S0765/BAC009S0765W0220.wav", "txt": "是一种合理的基础设施投融资建设行为"}
-{"key": "BAC009S0765W0221", "wav": "./aishell/wav/test/S0765/BAC009S0765W0221.wav", "txt": "政府举债建设形成了大量资产"}
-{"key": "BAC009S0765W0222", "wav": "./aishell/wav/test/S0765/BAC009S0765W0222.wav", "txt": "相当部分资产具有长期的直接收益"}
-{"key": "BAC009S0765W0223", "wav": "./aishell/wav/test/S0765/BAC009S0765W0223.wav", "txt": "一些没有直接收益的项目"}
-{"key": "BAC009S0765W0224", "wav": "./aishell/wav/test/S0765/BAC009S0765W0224.wav", "txt": "也具有间接的经济效益或社会效益"}
-{"key": "BAC009S0765W0225", "wav": "./aishell/wav/test/S0765/BAC009S0765W0225.wav", "txt": "对促进当地经济增长和政府财力的增长"}
-{"key": "BAC009S0765W0226", "wav": "./aishell/wav/test/S0765/BAC009S0765W0226.wav", "txt": "不能简单地用寅吃卯粮来作价值判断"}
-{"key": "BAC009S0765W0227", "wav": "./aishell/wav/test/S0765/BAC009S0765W0227.wav", "txt": "这并不意味着政府可以无节制地借债"}
-{"key": "BAC009S0765W0228", "wav": "./aishell/wav/test/S0765/BAC009S0765W0228.wav", "txt": "关键是要把投资规模和债务规模"}
-{"key": "BAC009S0765W0229", "wav": "./aishell/wav/test/S0765/BAC009S0765W0229.wav", "txt": "控制在合理的范围内"}
-{"key": "BAC009S0765W0230", "wav": "./aishell/wav/test/S0765/BAC009S0765W0230.wav", "txt": "防止出现系统性的偿债风险"}
-{"key": "BAC009S0765W0231", "wav": "./aishell/wav/test/S0765/BAC009S0765W0231.wav", "txt": "国务院高度重视防范地方政府债务风险"}
-{"key": "BAC009S0765W0232", "wav": "./aishell/wav/test/S0765/BAC009S0765W0232.wav", "txt": "从二零零九年下半年就开始要求有关部门调研这一问题"}
-{"key": "BAC009S0765W0233", "wav": "./aishell/wav/test/S0765/BAC009S0765W0233.wav", "txt": "国家审计署还专门组织力量"}
-{"key": "BAC009S0765W0234", "wav": "./aishell/wav/test/S0765/BAC009S0765W0234.wav", "txt": "对全国各地的政府债务进行了严格审计"}
-{"key": "BAC009S0765W0235", "wav": "./aishell/wav/test/S0765/BAC009S0765W0235.wav", "txt": "审计署的审计结论表明"}
-{"key": "BAC009S0765W0236", "wav": "./aishell/wav/test/S0765/BAC009S0765W0236.wav", "txt": "我国地方政府的累积债务相对于偿付能力来看"}
-{"key": "BAC009S0765W0237", "wav": "./aishell/wav/test/S0765/BAC009S0765W0237.wav", "txt": "远低于发生债务危机的欧美国家"}
-{"key": "BAC009S0765W0238", "wav": "./aishell/wav/test/S0765/BAC009S0765W0238.wav", "txt": "考虑到我国正处在经济快速增长期"}
-{"key": "BAC009S0765W0239", "wav": "./aishell/wav/test/S0765/BAC009S0765W0239.wav", "txt": "政府财力增长也相应较快"}
-{"key": "BAC009S0765W0240", "wav": "./aishell/wav/test/S0765/BAC009S0765W0240.wav", "txt": "政府还拥有较多的可变现资产"}
-{"key": "BAC009S0765W0241", "wav": "./aishell/wav/test/S0765/BAC009S0765W0241.wav", "txt": "相对于目前的负债规模"}
-{"key": "BAC009S0765W0242", "wav": "./aishell/wav/test/S0765/BAC009S0765W0242.wav", "txt": "政府总体上具有较强的偿债能力"}
-{"key": "BAC009S0765W0243", "wav": "./aishell/wav/test/S0765/BAC009S0765W0243.wav", "txt": "采取积极有效的措施化解部分地区和领域的债务风险"}
-{"key": "BAC009S0765W0244", "wav": "./aishell/wav/test/S0765/BAC009S0765W0244.wav", "txt": "在我国出现政府性债务违约的可能性是不大的"}
-{"key": "BAC009S0765W0245", "wav": "./aishell/wav/test/S0765/BAC009S0765W0245.wav", "txt": "债券市场城投债券发行不畅"}
-{"key": "BAC009S0765W0246", "wav": "./aishell/wav/test/S0765/BAC009S0765W0246.wav", "txt": "从城投债券发行监管部门的角度"}
-{"key": "BAC009S0765W0247", "wav": "./aishell/wav/test/S0765/BAC009S0765W0247.wav", "txt": "您如何看待这一现象"}
-{"key": "BAC009S0765W0248", "wav": "./aishell/wav/test/S0765/BAC009S0765W0248.wav", "txt": "徐林出于对地方政府债务风险的担忧"}
-{"key": "BAC009S0765W0249", "wav": "./aishell/wav/test/S0765/BAC009S0765W0249.wav", "txt": "投资者采取措施防范风险是成熟的表现"}
-{"key": "BAC009S0765W0250", "wav": "./aishell/wav/test/S0765/BAC009S0765W0250.wav", "txt": "但出于对我国地方政府债务风险的不合理判断"}
-{"key": "BAC009S0765W0251", "wav": "./aishell/wav/test/S0765/BAC009S0765W0251.wav", "txt": "并进而对城投债券进行唱空或做空"}
-{"key": "BAC009S0765W0252", "wav": "./aishell/wav/test/S0765/BAC009S0765W0252.wav", "txt": "最近企业债券特别是城投债券的发行难度加大"}
-{"key": "BAC009S0765W0253", "wav": "./aishell/wav/test/S0765/BAC009S0765W0253.wav", "txt": "其在纳斯达克上市时的发行价为一六美元"}
-{"key": "BAC009S0765W0254", "wav": "./aishell/wav/test/S0765/BAC009S0765W0254.wav", "txt": "其股票价格在十五点七六美元上下徘徊"}
-{"key": "BAC009S0765W0255", "wav": "./aishell/wav/test/S0765/BAC009S0765W0255.wav", "txt": "中国手游在退市之前的市盈率在十六十七倍左右"}
-{"key": "BAC009S0765W0256", "wav": "./aishell/wav/test/S0765/BAC009S0765W0256.wav", "txt": "掌趣科技三零零三一五一度超过二百倍"}
-{"key": "BAC009S0765W0257", "wav": "./aishell/wav/test/S0765/BAC009S0765W0257.wav", "txt": "华尔街并不认可游戏这种商业模式"}
-{"key": "BAC009S0765W0258", "wav": "./aishell/wav/test/S0765/BAC009S0765W0258.wav", "txt": "并非仅仅针对中国公司"}
-{"key": "BAC009S0765W0260", "wav": "./aishell/wav/test/S0765/BAC009S0765W0260.wav", "txt": "作为美国本土著名社交游戏开发商"}
-{"key": "BAC009S0765W0261", "wav": "./aishell/wav/test/S0765/BAC009S0765W0261.wav", "txt": "在当年社交游戏风靡的时候"}
-{"key": "BAC009S0765W0262", "wav": "./aishell/wav/test/S0765/BAC009S0765W0262.wav", "txt": "因为快速发展的业务和不断膨胀的营收受资本市场亲睐"}
-{"key": "BAC009S0765W0265", "wav": "./aishell/wav/test/S0765/BAC009S0765W0265.wav", "txt": "亏损二千六百九十万美元相比上一季度"}
-{"key": "BAC009S0765W0266", "wav": "./aishell/wav/test/S0765/BAC009S0765W0266.wav", "txt": "这一亏损已经收窄了百分之五十七"}
-{"key": "BAC009S0765W0267", "wav": "./aishell/wav/test/S0765/BAC009S0765W0267.wav", "txt": "成熟的资本市场相对公平"}
-{"key": "BAC009S0765W0268", "wav": "./aishell/wav/test/S0765/BAC009S0765W0268.wav", "txt": "这些被市场唱空的游戏公司本身业务模式遇到了困境"}
-{"key": "BAC009S0765W0269", "wav": "./aishell/wav/test/S0765/BAC009S0765W0269.wav", "txt": "游戏公司往往靠一款游戏在市场上火爆"}
-{"key": "BAC009S0765W0270", "wav": "./aishell/wav/test/S0765/BAC009S0765W0270.wav", "txt": "大多数游戏产品往往病毒式地成长"}
-{"key": "BAC009S0765W0272", "wav": "./aishell/wav/test/S0765/BAC009S0765W0272.wav", "txt": "这些中国游戏公司大多成长于中国市场"}
-{"key": "BAC009S0765W0273", "wav": "./aishell/wav/test/S0765/BAC009S0765W0273.wav", "txt": "他们的产品在海外市场也极少被认可"}
-{"key": "BAC009S0765W0274", "wav": "./aishell/wav/test/S0765/BAC009S0765W0274.wav", "txt": "这些公司在海外市场上市往往除了获得融资机会"}
-{"key": "BAC009S0765W0275", "wav": "./aishell/wav/test/S0765/BAC009S0765W0275.wav", "txt": "并未能给这些公司带来其他的效应"}
-{"key": "BAC009S0765W0277", "wav": "./aishell/wav/test/S0765/BAC009S0765W0277.wav", "txt": "中国游戏产品和美国产品极为不同"}
-{"key": "BAC009S0765W0278", "wav": "./aishell/wav/test/S0765/BAC009S0765W0278.wav", "txt": "美国玩家对游戏难度创造性要求较高"}
-{"key": "BAC009S0765W0279", "wav": "./aishell/wav/test/S0765/BAC009S0765W0279.wav", "txt": "中国产品不可能照搬到美国市场"}
-{"key": "BAC009S0765W0280", "wav": "./aishell/wav/test/S0765/BAC009S0765W0280.wav", "txt": "在融资和发展海外市场上"}
-{"key": "BAC009S0765W0281", "wav": "./aishell/wav/test/S0765/BAC009S0765W0281.wav", "txt": "还希望拓展市场的话"}
-{"key": "BAC009S0765W0282", "wav": "./aishell/wav/test/S0765/BAC009S0765W0282.wav", "txt": "触控科技全资韩国子公司在韩国上市"}
-{"key": "BAC009S0765W0283", "wav": "./aishell/wav/test/S0765/BAC009S0765W0283.wav", "txt": "而从二零一三年开始"}
-{"key": "BAC009S0765W0284", "wav": "./aishell/wav/test/S0765/BAC009S0765W0284.wav", "txt": "这家公司就在挖角当地游戏公司高管建立分公司"}
-{"key": "BAC009S0765W0285", "wav": "./aishell/wav/test/S0765/BAC009S0765W0285.wav", "txt": "打造适合当地市场的产品"}
-{"key": "BAC009S0765W0286", "wav": "./aishell/wav/test/S0765/BAC009S0765W0286.wav", "txt": "根据陈昊芝在二零一四年八月提供的数据"}
-{"key": "BAC009S0765W0287", "wav": "./aishell/wav/test/S0765/BAC009S0765W0287.wav", "txt": "市场份额做到了前十位"}
-{"key": "BAC009S0765W0288", "wav": "./aishell/wav/test/S0765/BAC009S0765W0288.wav", "txt": "未来上市能够放大公司的品牌效益"}
-{"key": "BAC009S0765W0289", "wav": "./aishell/wav/test/S0765/BAC009S0765W0289.wav", "txt": "让当地更多的人知道这家公司"}
-{"key": "BAC009S0765W0290", "wav": "./aishell/wav/test/S0765/BAC009S0765W0290.wav", "txt": "在韩国股市低迷情况下"}
-{"key": "BAC009S0765W0291", "wav": "./aishell/wav/test/S0765/BAC009S0765W0291.wav", "txt": "触控科技子公司涨幅居前"}
-{"key": "BAC009S0765W0292", "wav": "./aishell/wav/test/S0765/BAC009S0765W0292.wav", "txt": "对于游戏这种地域属性较重的产品"}
-{"key": "BAC009S0765W0293", "wav": "./aishell/wav/test/S0765/BAC009S0765W0293.wav", "txt": "应慎重考虑上市时机和地点"}
-{"key": "BAC009S0765W0294", "wav": "./aishell/wav/test/S0765/BAC009S0765W0294.wav", "txt": "反复检视自身商业模式"}
-{"key": "BAC009S0765W0295", "wav": "./aishell/wav/test/S0765/BAC009S0765W0295.wav", "txt": "而不是迫不及待抓住一切可以上市圈钱的机会"}
-{"key": "BAC009S0765W0296", "wav": "./aishell/wav/test/S0765/BAC009S0765W0296.wav", "txt": "往往连最直接的目标都无法达成"}
-{"key": "BAC009S0765W0297", "wav": "./aishell/wav/test/S0765/BAC009S0765W0297.wav", "txt": "因为难以忍受股价长期被低估"}
-{"key": "BAC009S0765W0298", "wav": "./aishell/wav/test/S0765/BAC009S0765W0298.wav", "txt": "中国游戏公司纷纷忙着退市"}
-{"key": "BAC009S0765W0299", "wav": "./aishell/wav/test/S0765/BAC009S0765W0299.wav", "txt": "最近都在流行做预测"}
-{"key": "BAC009S0765W0300", "wav": "./aishell/wav/test/S0765/BAC009S0765W0300.wav", "txt": "于是他也来凑凑热闹"}
-{"key": "BAC009S0765W0301", "wav": "./aishell/wav/test/S0765/BAC009S0765W0301.wav", "txt": "他的预测有点毒基本上是在讨论谁会下台"}
-{"key": "BAC009S0765W0303", "wav": "./aishell/wav/test/S0765/BAC009S0765W0303.wav", "txt": "每日经济新闻记者杨建江南嘉捷六万"}
-{"key": "BAC009S0765W0304", "wav": "./aishell/wav/test/S0765/BAC009S0765W0304.wav", "txt": "收盘价十三点六五元于七月八日发布公告"}
-{"key": "BAC009S0765W0305", "wav": "./aishell/wav/test/S0765/BAC009S0765W0305.wav", "txt": "为使股价与公司价值匹配"}
-{"key": "BAC009S0765W0306", "wav": "./aishell/wav/test/S0765/BAC009S0765W0306.wav", "txt": "公司拟计划通过集中竞价交易方式回购公司股份"}
-{"key": "BAC009S0765W0307", "wav": "./aishell/wav/test/S0765/BAC009S0765W0307.wav", "txt": "公司此次回购股份的价格不超过十五点一零七元股"}
-{"key": "BAC009S0765W0308", "wav": "./aishell/wav/test/S0765/BAC009S0765W0308.wav", "txt": "用于回购的资金总额不超过一点五一七亿元"}
-{"key": "BAC009S0765W0309", "wav": "./aishell/wav/test/S0765/BAC009S0765W0309.wav", "txt": "预计回购股份约一千万股"}
-{"key": "BAC009S0765W0310", "wav": "./aishell/wav/test/S0765/BAC009S0765W0310.wav", "txt": "占公司总股本约二点百分之五十"}
-{"key": "BAC009S0765W0311", "wav": "./aishell/wav/test/S0765/BAC009S0765W0311.wav", "txt": "公司股票于二零一五年七月八日复牌"}
-{"key": "BAC009S0765W0312", "wav": "./aishell/wav/test/S0765/BAC009S0765W0312.wav", "txt": "每日经济新闻记者注意到"}
-{"key": "BAC009S0765W0313", "wav": "./aishell/wav/test/S0765/BAC009S0765W0313.wav", "txt": "截至二零一四年十二月三十一日"}
-{"key": "BAC009S0765W0314", "wav": "./aishell/wav/test/S0765/BAC009S0765W0314.wav", "txt": "资金来源为自有资金"}
-{"key": "BAC009S0765W0315", "wav": "./aishell/wav/test/S0765/BAC009S0765W0315.wav", "txt": "回购期限为自回购股份方案之日起至今年底"}
-{"key": "BAC009S0765W0317", "wav": "./aishell/wav/test/S0765/BAC009S0765W0317.wav", "txt": "收盘价四点九九元也于七月八日公告"}
-{"key": "BAC009S0765W0321", "wav": "./aishell/wav/test/S0765/BAC009S0765W0321.wav", "txt": "其目前的股票市值已经不能完全反映公司价值"}
-{"key": "BAC009S0765W0323", "wav": "./aishell/wav/test/S0765/BAC009S0765W0323.wav", "txt": "增持后持股比例为六十二点百分之二十三"}
-{"key": "BAC009S0765W0325", "wav": "./aishell/wav/test/S0765/BAC009S0765W0325.wav", "txt": "拟在二零一五年二零一七年先行推出两期回购方案"}
-{"key": "BAC009S0765W0326", "wav": "./aishell/wav/test/S0765/BAC009S0765W0326.wav", "txt": "其中第一期回购资金上限为二零一四年净利润的百分之二十五"}
-{"key": "BAC009S0765W0327", "wav": "./aishell/wav/test/S0765/BAC009S0765W0327.wav", "txt": "第二期股票回购方案不晚于二零一七年六月三十日推出"}
-{"key": "BAC009S0765W0328", "wav": "./aishell/wav/test/S0765/BAC009S0765W0328.wav", "txt": "回购期限为股东大会通过后不超过十二个月"}
-{"key": "BAC009S0765W0329", "wav": "./aishell/wav/test/S0765/BAC009S0765W0329.wav", "txt": "预计可回购不少于七百九十一万股"}
-{"key": "BAC009S0765W0330", "wav": "./aishell/wav/test/S0765/BAC009S0765W0330.wav", "txt": "每日经济新闻记者杨建江南嘉捷六万一千一百三十一三"}
-{"key": "BAC009S0765W0331", "wav": "./aishell/wav/test/S0765/BAC009S0765W0331.wav", "txt": "收盘价十三点六五元于七月八日发布公告"}
-{"key": "BAC009S0765W0332", "wav": "./aishell/wav/test/S0765/BAC009S0765W0332.wav", "txt": "为使股价与公司价值匹配"}
-{"key": "BAC009S0765W0333", "wav": "./aishell/wav/test/S0765/BAC009S0765W0333.wav", "txt": "公司拟计划通过集中竞价交易方式回购公司股份"}
-{"key": "BAC009S0765W0334", "wav": "./aishell/wav/test/S0765/BAC009S0765W0334.wav", "txt": "锂电池在今年上半年成为诸多上市公司的业绩功臣"}
-{"key": "BAC009S0765W0335", "wav": "./aishell/wav/test/S0765/BAC009S0765W0335.wav", "txt": "成飞集成百二十一九十"}
-{"key": "BAC009S0765W0337", "wav": "./aishell/wav/test/S0765/BAC009S0765W0337.wav", "txt": "公司上半年营业收入六点三四亿元"}
-{"key": "BAC009S0765W0338", "wav": "./aishell/wav/test/S0765/BAC009S0765W0338.wav", "txt": "折合人民币大概二千四八零亿"}
-{"key": "BAC009S0765W0339", "wav": "./aishell/wav/test/S0765/BAC009S0765W0339.wav", "txt": "而与经济实力相关的一些基础设施建设方面"}
-{"key": "BAC009S0765W0340", "wav": "./aishell/wav/test/S0765/BAC009S0765W0340.wav", "txt": "无论是城市交通建设还是机场运力"}
-{"key": "BAC009S0765W0341", "wav": "./aishell/wav/test/S0765/BAC009S0765W0341.wav", "txt": "阿拉木图都无法和北京相比"}
-{"key": "BAC009S0765W0342", "wav": "./aishell/wav/test/S0765/BAC009S0765W0342.wav", "txt": "远远无法和北京相提并论"}
-{"key": "BAC009S0765W0343", "wav": "./aishell/wav/test/S0765/BAC009S0765W0343.wav", "txt": "从申办冬奥会的硬件基础上看"}
-{"key": "BAC009S0765W0344", "wav": "./aishell/wav/test/S0765/BAC009S0765W0344.wav", "txt": "北京冬奥会的硬件基础要强于阿拉木图"}
-{"key": "BAC009S0765W0345", "wav": "./aishell/wav/test/S0765/BAC009S0765W0345.wav", "txt": "北京张家口计划启用一二个竞赛场馆"}
-{"key": "BAC009S0765W0346", "wav": "./aishell/wav/test/S0765/BAC009S0765W0346.wav", "txt": "其中五个场馆需要新建"}
-{"key": "BAC009S0765W0347", "wav": "./aishell/wav/test/S0765/BAC009S0765W0347.wav", "txt": "其馀场馆改扩建后可以满足赛事需要"}
-{"key": "BAC009S0765W0348", "wav": "./aishell/wav/test/S0765/BAC009S0765W0348.wav", "txt": "其中北京市区仅需要新建一座速滑场馆"}
-{"key": "BAC009S0765W0349", "wav": "./aishell/wav/test/S0765/BAC009S0765W0349.wav", "txt": "阿拉木图方面将会使用十四个场馆作为比赛之用"}
-{"key": "BAC009S0765W0350", "wav": "./aishell/wav/test/S0765/BAC009S0765W0350.wav", "txt": "目前八座为已有场馆并在使用中"}
-{"key": "BAC009S0765W0351", "wav": "./aishell/wav/test/S0765/BAC009S0765W0351.wav", "txt": "其于六个场馆都需要新建"}
-{"key": "BAC009S0765W0352", "wav": "./aishell/wav/test/S0765/BAC009S0765W0352.wav", "txt": "从举办大型体育赛事的经验来看"}
-{"key": "BAC009S0765W0353", "wav": "./aishell/wav/test/S0765/BAC009S0765W0353.wav", "txt": "北京的经验比阿拉木图丰富"}
-{"key": "BAC009S0765W0354", "wav": "./aishell/wav/test/S0765/BAC009S0765W0354.wav", "txt": "还有二零一五年的田径世锦赛"}
-{"key": "BAC009S0765W0355", "wav": "./aishell/wav/test/S0765/BAC009S0765W0355.wav", "txt": "这些使得北京积累了大量的举办与运营经验"}
-{"key": "BAC009S0765W0356", "wav": "./aishell/wav/test/S0765/BAC009S0765W0356.wav", "txt": "也证明了北京举办大型体育赛事的能力"}
-{"key": "BAC009S0765W0357", "wav": "./aishell/wav/test/S0765/BAC009S0765W0357.wav", "txt": "自从哈萨克斯坦独立"}
-{"key": "BAC009S0765W0358", "wav": "./aishell/wav/test/S0765/BAC009S0765W0358.wav", "txt": "二零一一年的亚冬会是其举办的第一个国际性综合赛事"}
-{"key": "BAC009S0765W0359", "wav": "./aishell/wav/test/S0765/BAC009S0765W0359.wav", "txt": "之后就没有举办过的大型体育赛事"}
-{"key": "BAC009S0765W0360", "wav": "./aishell/wav/test/S0765/BAC009S0765W0360.wav", "txt": "花样滑冰大奖赛中国杯常年在北京和上海之间轮换"}
-{"key": "BAC009S0765W0361", "wav": "./aishell/wav/test/S0765/BAC009S0765W0361.wav", "txt": "二零一四年的冰壶世锦赛也在北京举行"}
-{"key": "BAC009S0765W0362", "wav": "./aishell/wav/test/S0765/BAC009S0765W0362.wav", "txt": "一系列大型赛事的承办"}
-{"key": "BAC009S0765W0363", "wav": "./aishell/wav/test/S0765/BAC009S0765W0363.wav", "txt": "让北京具备了承办冬奥会这种顶级赛事的经验和能力"}
-{"key": "BAC009S0765W0364", "wav": "./aishell/wav/test/S0765/BAC009S0765W0364.wav", "txt": "北京申办冬奥影响远超承办本身"}
-{"key": "BAC009S0765W0365", "wav": "./aishell/wav/test/S0765/BAC009S0765W0365.wav", "txt": "此次北京申办冬奥会"}
-{"key": "BAC009S0765W0366", "wav": "./aishell/wav/test/S0765/BAC009S0765W0366.wav", "txt": "也让我们看到了自身存在着的不足"}
-{"key": "BAC009S0765W0367", "wav": "./aishell/wav/test/S0765/BAC009S0765W0367.wav", "txt": "其中主要集中于冰雪运动本身实力上的有所欠缺"}
-{"key": "BAC009S0765W0368", "wav": "./aishell/wav/test/S0765/BAC009S0765W0368.wav", "txt": "二零二二年冬奥会的举行"}
-{"key": "BAC009S0765W0369", "wav": "./aishell/wav/test/S0765/BAC009S0765W0369.wav", "txt": "对于我国冰雪运动实力的提升会有巨大的推动作用"}
-{"key": "BAC009S0765W0370", "wav": "./aishell/wav/test/S0765/BAC009S0765W0370.wav", "txt": "和夏季奥运会上的斩金夺银不同"}
-{"key": "BAC009S0765W0371", "wav": "./aishell/wav/test/S0765/BAC009S0765W0371.wav", "txt": "中国的冬季运动还处于半起步阶段"}
-{"key": "BAC009S0765W0372", "wav": "./aishell/wav/test/S0765/BAC009S0765W0372.wav", "txt": "这在我国体育发展史上具有划时代的意义"}
-{"key": "BAC009S0765W0373", "wav": "./aishell/wav/test/S0765/BAC009S0765W0373.wav", "txt": "标志着我国体育开始走向国际化"}
-{"key": "BAC009S0765W0374", "wav": "./aishell/wav/test/S0765/BAC009S0765W0374.wav", "txt": "成为了国际体育运动大家庭中的一员"}
-{"key": "BAC009S0765W0375", "wav": "./aishell/wav/test/S0765/BAC009S0765W0375.wav", "txt": "但直到一二年后的法国阿尔贝维尔冬奥会上"}
-{"key": "BAC009S0765W0376", "wav": "./aishell/wav/test/S0765/BAC009S0765W0376.wav", "txt": "我国选手才实现了冬奥奖牌零的突破"}
-{"key": "BAC009S0765W0377", "wav": "./aishell/wav/test/S0765/BAC009S0765W0377.wav", "txt": "取得这一突破的领军人就包括轮椅英雄叶乔波"}
-{"key": "BAC009S0765W0378", "wav": "./aishell/wav/test/S0765/BAC009S0765W0378.wav", "txt": "又是十年的空白期"}
-{"key": "BAC009S0765W0379", "wav": "./aishell/wav/test/S0765/BAC009S0765W0379.wav", "txt": "二零零二年的美国盐湖城冬奥会上"}
-{"key": "BAC009S0765W0380", "wav": "./aishell/wav/test/S0765/BAC009S0765W0380.wav", "txt": "杨扬拿到了五百米和一千米两项短道速的金牌"}
-{"key": "BAC009S0765W0381", "wav": "./aishell/wav/test/S0765/BAC009S0765W0381.wav", "txt": "更具历史性意义的是"}
-{"key": "BAC009S0765W0382", "wav": "./aishell/wav/test/S0765/BAC009S0765W0382.wav", "txt": "这是中国奥运代表团在冬季奥运会上取得的首枚金牌"}
-{"key": "BAC009S0765W0383", "wav": "./aishell/wav/test/S0765/BAC009S0765W0383.wav", "txt": "经过二十多年的努力"}
-{"key": "BAC009S0765W0384", "wav": "./aishell/wav/test/S0765/BAC009S0765W0384.wav", "txt": "中国冰雪健儿终于站到了冬奥会的最高领奖台"}
-{"key": "BAC009S0765W0385", "wav": "./aishell/wav/test/S0765/BAC009S0765W0385.wav", "txt": "以及拥有陈露的女单项目"}
-{"key": "BAC009S0765W0386", "wav": "./aishell/wav/test/S0765/BAC009S0765W0386.wav", "txt": "可以在世界范围内立足"}
-{"key": "BAC009S0765W0387", "wav": "./aishell/wav/test/S0765/BAC009S0765W0387.wav", "txt": "但随着这一系列名将的退役"}
-{"key": "BAC009S0765W0388", "wav": "./aishell/wav/test/S0765/BAC009S0765W0388.wav", "txt": "在中国的这一传统优势项目上"}
-{"key": "BAC009S0765W0389", "wav": "./aishell/wav/test/S0765/BAC009S0765W0389.wav", "txt": "我们可以说已经输给了其他强敌"}
-{"key": "BAC009S0765W0390", "wav": "./aishell/wav/test/S0765/BAC009S0765W0390.wav", "txt": "更直观的体现是在冰雪运动的核心项目冰球上"}
-{"key": "BAC009S0765W0391", "wav": "./aishell/wav/test/S0765/BAC009S0765W0391.wav", "txt": "竞争对手哈萨克斯坦在这一点上要强过我们"}
-{"key": "BAC009S0765W0392", "wav": "./aishell/wav/test/S0765/BAC009S0765W0392.wav", "txt": "中国国家男子冰球队目前排名第三十二位"}
-{"key": "BAC009S0765W0393", "wav": "./aishell/wav/test/S0765/BAC009S0765W0393.wav", "txt": "而哈萨克斯坦则是第十六位"}
-{"key": "BAC009S0765W0394", "wav": "./aishell/wav/test/S0765/BAC009S0765W0394.wav", "txt": "所有主办国的男子冰球成绩排位均在二十位之内"}
-{"key": "BAC009S0765W0395", "wav": "./aishell/wav/test/S0765/BAC009S0765W0395.wav", "txt": "二零一八年冬奥会的主办地韩国平昌是一个绝好的例子"}
-{"key": "BAC009S0765W0396", "wav": "./aishell/wav/test/S0765/BAC009S0765W0396.wav", "txt": "平昌曾经三次申办冬奥会"}
-{"key": "BAC009S0765W0397", "wav": "./aishell/wav/test/S0765/BAC009S0765W0397.wav", "txt": "前两次申办的过程中"}
-{"key": "BAC009S0765W0398", "wav": "./aishell/wav/test/S0765/BAC009S0765W0398.wav", "txt": "男子冰球的战绩均在二十五名左右"}
-{"key": "BAC009S0765W0399", "wav": "./aishell/wav/test/S0765/BAC009S0765W0399.wav", "txt": "而第三次申办周期内"}
-{"key": "BAC009S0765W0401", "wav": "./aishell/wav/test/S0765/BAC009S0765W0401.wav", "txt": "几乎帮助了平昌拿下二零一八年冬奥会的主办权"}
-{"key": "BAC009S0765W0402", "wav": "./aishell/wav/test/S0765/BAC009S0765W0402.wav", "txt": "由于韩国冰球协会的四年规划"}
-{"key": "BAC009S0765W0403", "wav": "./aishell/wav/test/S0765/BAC009S0765W0403.wav", "txt": "保证国家队水平不会被其他球队相差太远的承诺下"}
-{"key": "BAC009S0765W0404", "wav": "./aishell/wav/test/S0765/BAC009S0765W0404.wav", "txt": "业已正式启动斯诺登事件电影的拍摄"}
-{"key": "BAC009S0765W0405", "wav": "./aishell/wav/test/S0765/BAC009S0765W0405.wav", "txt": "影片发布了第一批定装照"}
-{"key": "BAC009S0765W0406", "wav": "./aishell/wav/test/S0765/BAC009S0765W0406.wav", "txt": "以一身越野军装黑框眼镜的造型出现"}
-{"key": "BAC009S0765W0407", "wav": "./aishell/wav/test/S0765/BAC009S0765W0407.wav", "txt": "看上去和人物原型相当贴合"}
-{"key": "BAC009S0765W0408", "wav": "./aishell/wav/test/S0765/BAC009S0765W0408.wav", "txt": "演技也日渐精湛"}
-{"key": "BAC009S0765W0409", "wav": "./aishell/wav/test/S0765/BAC009S0765W0409.wav", "txt": "更有一手好厨艺"}
-{"key": "BAC009S0765W0410", "wav": "./aishell/wav/test/S0765/BAC009S0765W0410.wav", "txt": "可说是超完美女神"}
-{"key": "BAC009S0765W0411", "wav": "./aishell/wav/test/S0765/BAC009S0765W0411.wav", "txt": "男友却仍然劈腿偷吃"}
-{"key": "BAC009S0765W0412", "wav": "./aishell/wav/test/S0765/BAC009S0765W0412.wav", "txt": "好友林心如也心疼喊话我会陪她"}
-{"key": "BAC009S0765W0413", "wav": "./aishell/wav/test/S0765/BAC009S0765W0413.wav", "txt": "中新网五月六日电据台湾媒体报道"}
-{"key": "BAC009S0765W0414", "wav": "./aishell/wav/test/S0765/BAC009S0765W0414.wav", "txt": "刚与阮经天传出情变不久的许玮甯近日接拍恐怖片"}
-{"key": "BAC009S0765W0415", "wav": "./aishell/wav/test/S0765/BAC009S0765W0415.wav", "txt": "称为了演好戏"}
-{"key": "BAC009S0765W0416", "wav": "./aishell/wav/test/S0765/BAC009S0765W0416.wav", "txt": "她看了不少恐怖片"}
-{"key": "BAC009S0765W0417", "wav": "./aishell/wav/test/S0765/BAC009S0765W0417.wav", "txt": "看完片后会睡不好做恶梦"}
-{"key": "BAC009S0765W0418", "wav": "./aishell/wav/test/S0765/BAC009S0765W0418.wav", "txt": "上厕所都要把灯全部打开"}
-{"key": "BAC009S0765W0419", "wav": "./aishell/wav/test/S0765/BAC009S0765W0419.wav", "txt": "搜狐娱乐讯日前"}
-{"key": "BAC009S0765W0420", "wav": "./aishell/wav/test/S0765/BAC009S0765W0420.wav", "txt": "引发众多粉丝围堵"}
-{"key": "BAC009S0765W0421", "wav": "./aishell/wav/test/S0765/BAC009S0765W0421.wav", "txt": "玩心大起的许绍洋与玩家一起比拼游戏"}
-{"key": "BAC009S0765W0422", "wav": "./aishell/wav/test/S0765/BAC009S0765W0422.wav", "txt": "没想竟然惨败"}
-{"key": "BAC009S0765W0423", "wav": "./aishell/wav/test/S0765/BAC009S0765W0423.wav", "txt": "这让自称游戏达人的他颇有些不好意思"}
-{"key": "BAC009S0765W0424", "wav": "./aishell/wav/test/S0765/BAC009S0765W0424.wav", "txt": "金陵晚报八月十二日报道二零一四年"}
-{"key": "BAC009S0765W0425", "wav": "./aishell/wav/test/S0765/BAC009S0765W0425.wav", "txt": "许茹芸与韩籍男友举行了婚礼"}
-{"key": "BAC009S0765W0426", "wav": "./aishell/wav/test/S0765/BAC009S0765W0426.wav", "txt": "迎来了人生崭新阶段"}
-{"key": "BAC009S0765W0427", "wav": "./aishell/wav/test/S0765/BAC009S0765W0427.wav", "txt": "不同于大家心中按部就班的乖乖女形象"}
-{"key": "BAC009S0765W0428", "wav": "./aishell/wav/test/S0765/BAC009S0765W0428.wav", "txt": "许茹芸突然闪婚让当时的娱乐圈也惊起了一阵小波澜"}
-{"key": "BAC009S0765W0429", "wav": "./aishell/wav/test/S0765/BAC009S0765W0429.wav", "txt": "在许茹芸看来"}
-{"key": "BAC009S0765W0430", "wav": "./aishell/wav/test/S0765/BAC009S0765W0430.wav", "txt": "但几乎一个都没有实现"}
-{"key": "BAC009S0765W0431", "wav": "./aishell/wav/test/S0765/BAC009S0765W0431.wav", "txt": "一四年前轰动东莞沙田的一起命案"}
-{"key": "BAC009S0765W0432", "wav": "./aishell/wav/test/S0765/BAC009S0765W0432.wav", "txt": "日前因为广东省高院作出的无罪判决"}
-{"key": "BAC009S0765W0433", "wav": "./aishell/wav/test/S0765/BAC009S0765W0433.wav", "txt": "再次吸引了众人的目光"}
-{"key": "BAC009S0765W0434", "wav": "./aishell/wav/test/S0765/BAC009S0765W0434.wav", "txt": "八月一七日上午一一时"}
-{"key": "BAC009S0765W0435", "wav": "./aishell/wav/test/S0765/BAC009S0765W0435.wav", "txt": "陈传钧从东莞市第二看守所出来"}
-{"key": "BAC009S0765W0436", "wav": "./aishell/wav/test/S0765/BAC009S0765W0436.wav", "txt": "这是二零一零年四月二三日以来"}
-{"key": "BAC009S0765W0437", "wav": "./aishell/wav/test/S0765/BAC009S0765W0437.wav", "txt": "杀人犯出狱后喊冤被驳回供述与鉴定相印证"}
-{"key": "BAC009S0765W0438", "wav": "./aishell/wav/test/S0765/BAC009S0765W0438.wav", "txt": "丈夫关某身负多处刀伤"}
-{"key": "BAC009S0765W0439", "wav": "./aishell/wav/test/S0765/BAC009S0765W0439.wav", "txt": "呼救报警时称有人入屋行凶"}
-{"key": "BAC009S0765W0440", "wav": "./aishell/wav/test/S0765/BAC009S0765W0440.wav", "txt": "又供称是自己失手杀妻"}
-{"key": "BAC009S0765W0441", "wav": "./aishell/wav/test/S0765/BAC009S0765W0441.wav", "txt": "关某先后被判死刑死缓"}
-{"key": "BAC009S0765W0442", "wav": "./aishell/wav/test/S0765/BAC009S0765W0442.wav", "txt": "他向广东省高院申诉"}
-{"key": "BAC009S0765W0443", "wav": "./aishell/wav/test/S0765/BAC009S0765W0443.wav", "txt": "广东高院审理后驳回了关某的申诉"}
-{"key": "BAC009S0765W0444", "wav": "./aishell/wav/test/S0765/BAC009S0765W0444.wav", "txt": "杀人犯受民警感召行刑前捐器官谢罪"}
-{"key": "BAC009S0765W0445", "wav": "./aishell/wav/test/S0765/BAC009S0765W0445.wav", "txt": "杀人犯抢劫获刑未查出旧案警方指纹识别有遗漏"}
-{"key": "BAC009S0765W0446", "wav": "./aishell/wav/test/S0765/BAC009S0765W0446.wav", "txt": "京华时报记者蒲东峰摄二零零七年"}
-{"key": "BAC009S0765W0447", "wav": "./aishell/wav/test/S0765/BAC009S0765W0447.wav", "txt": "时年二三岁的杨柱军在北京抢劫杀害了一名出租车司机"}
-{"key": "BAC009S0765W0448", "wav": "./aishell/wav/test/S0765/BAC009S0765W0448.wav", "txt": "此后他没有隐姓埋名逃往外地"}
-{"key": "BAC009S0765W0449", "wav": "./aishell/wav/test/S0765/BAC009S0765W0449.wav", "txt": "公安机关并未查出其身上还背着命案"}
-{"key": "BAC009S0765W0450", "wav": "./aishell/wav/test/S0765/BAC009S0765W0450.wav", "txt": "并于二零一五年一月将其抓获"}
-{"key": "BAC009S0765W0451", "wav": "./aishell/wav/test/S0765/BAC009S0765W0451.wav", "txt": "曾多次比对二零零七年命案现场匕首上的指纹"}
-{"key": "BAC009S0765W0452", "wav": "./aishell/wav/test/S0765/BAC009S0765W0452.wav", "txt": "但指纹比对识别系统会出现一定概率的遗漏"}
-{"key": "BAC009S0765W0453", "wav": "./aishell/wav/test/S0765/BAC009S0765W0453.wav", "txt": "杨柱军因涉嫌抢劫罪在市二中院受审"}
-{"key": "BAC009S0765W0454", "wav": "./aishell/wav/test/S0765/BAC009S0765W0454.wav", "txt": "杀害中传失联女主嫌犯想找个无辜的人发泄"}
-{"key": "BAC009S0765W0455", "wav": "./aishell/wav/test/S0765/BAC009S0765W0455.wav", "txt": "视频截图新京报快讯记者杨锋昨日"}
-{"key": "BAC009S0765W0456", "wav": "./aishell/wav/test/S0765/BAC009S0765W0456.wav", "txt": "杀害中传女学生犯罪嫌疑人从小家庭教育严格"}
-{"key": "BAC009S0765W0457", "wav": "./aishell/wav/test/S0765/BAC009S0765W0457.wav", "txt": "失联近两天的中传研究生周云露"}
-{"key": "BAC009S0765W0458", "wav": "./aishell/wav/test/S0765/BAC009S0765W0458.wav", "txt": "李斯达表示自己跟周云露并没有深仇大恨"}
-{"key": "BAC009S0765W0459", "wav": "./aishell/wav/test/S0765/BAC009S0765W0459.wav", "txt": "称就是想找个无辜的人"}
-{"key": "BAC009S0765W0460", "wav": "./aishell/wav/test/S0765/BAC009S0765W0460.wav", "txt": "目前李斯达被关押在朝阳区看守所"}
-{"key": "BAC009S0765W0461", "wav": "./aishell/wav/test/S0765/BAC009S0765W0461.wav", "txt": "周云露的父母在昨天上午去过朝阳刑警队"}
-{"key": "BAC009S0765W0462", "wav": "./aishell/wav/test/S0765/BAC009S0765W0462.wav", "txt": "杀害中传女生嫌犯曾私藏刺刀同学称其特立独行"}
-{"key": "BAC009S0765W0463", "wav": "./aishell/wav/test/S0765/BAC009S0765W0463.wav", "txt": "李斯达手持尖刀的自拍照"}
-{"key": "BAC009S0765W0464", "wav": "./aishell/wav/test/S0765/BAC009S0765W0464.wav", "txt": "新京报快讯记者杨锋凌晨今日下午"}
-{"key": "BAC009S0765W0465", "wav": "./aishell/wav/test/S0765/BAC009S0765W0465.wav", "txt": "中国传媒大学官网发布消息称"}
-{"key": "BAC009S0765W0466", "wav": "./aishell/wav/test/S0765/BAC009S0765W0466.wav", "txt": "在朝阳区百子湾阳光嘉园小区遇害"}
-{"key": "BAC009S0765W0467", "wav": "./aishell/wav/test/S0765/BAC009S0765W0467.wav", "txt": "犯罪嫌疑人已被抓获"}
-{"key": "BAC009S0765W0468", "wav": "./aishell/wav/test/S0765/BAC009S0765W0468.wav", "txt": "学校正在全力配合公安机关和家属进行善后处理"}
-{"key": "BAC009S0765W0469", "wav": "./aishell/wav/test/S0765/BAC009S0765W0469.wav", "txt": "杀害夜跑女子嫌犯不言不语拾荒者身份尚未确认"}
-{"key": "BAC009S0765W0470", "wav": "./aishell/wav/test/S0765/BAC009S0765W0470.wav", "txt": "杀害女教师疑犯行凶后脸有伤警方悬赏五万缉拿"}
-{"key": "BAC009S0765W0471", "wav": "./aishell/wav/test/S0765/BAC009S0765W0471.wav", "txt": "遇害女教师昨晚七时五七分"}
-{"key": "BAC009S0765W0472", "wav": "./aishell/wav/test/S0765/BAC009S0765W0472.wav", "txt": "其作案后身上有大量血迹"}
-{"key": "BAC009S0765W0473", "wav": "./aishell/wav/test/S0765/BAC009S0765W0473.wav", "txt": "双手背脸部等裸露部位有刺伤划伤"}
-{"key": "BAC009S0765W0474", "wav": "./aishell/wav/test/S0765/BAC009S0765W0474.wav", "txt": "通告呼吁广大群众积极检举揭发提供线索"}
-{"key": "BAC009S0765W0475", "wav": "./aishell/wav/test/S0765/BAC009S0765W0475.wav", "txt": "对提供重大线索协助破案者"}
-{"key": "BAC009S0765W0476", "wav": "./aishell/wav/test/S0765/BAC009S0765W0476.wav", "txt": "我局将给予五万元奖励"}
-{"key": "BAC009S0765W0477", "wav": "./aishell/wav/test/S0765/BAC009S0765W0477.wav", "txt": "杀害女童凶手被抓指认现场上千民众喊打"}
-{"key": "BAC009S0765W0478", "wav": "./aishell/wav/test/S0765/BAC009S0765W0478.wav", "txt": "四川广安一一岁女孩的失踪"}
-{"key": "BAC009S0765W0479", "wav": "./aishell/wav/test/S0765/BAC009S0765W0479.wav", "txt": "九日晚女孩尸体被找到"}
-{"key": "BAC009S0765W0480", "wav": "./aishell/wav/test/S0765/BAC009S0765W0480.wav", "txt": "凶手在郫县安靖镇被抓"}
-{"key": "BAC009S0765W0481", "wav": "./aishell/wav/test/S0765/BAC009S0765W0481.wav", "txt": "凶手到岳池县石垭镇指认骗走孩子的现场"}
-{"key": "BAC009S0765W0482", "wav": "./aishell/wav/test/S0765/BAC009S0765W0482.wav", "txt": "数千围观人群高呼打死这个杂碎"}
-{"key": "BAC009S0765W0483", "wav": "./aishell/wav/test/S0765/BAC009S0765W0483.wav", "txt": "现场喊打声持续不断"}
-{"key": "BAC009S0765W0484", "wav": "./aishell/wav/test/S0765/BAC009S0765W0484.wav", "txt": "杀害宝鸡夜跑教师嫌犯落网是否为拾荒者尚无定论"}
-{"key": "BAC009S0765W0485", "wav": "./aishell/wav/test/S0765/BAC009S0765W0485.wav", "txt": "吕某于一零月一四日晚从家中外出锻炼失踪"}
-{"key": "BAC009S0765W0486", "wav": "./aishell/wav/test/S0765/BAC009S0765W0486.wav", "txt": "尸体于一零月二零日在宝鸡渭河公园被发现"}
-{"key": "BAC009S0765W0487", "wav": "./aishell/wav/test/S0765/BAC009S0765W0487.wav", "txt": "李克强集众智汇众力攻坚克难激发活力"}
-{"key": "BAC009S0765W0488", "wav": "./aishell/wav/test/S0765/BAC009S0765W0488.wav", "txt": "李彬彬喂大象喝水略显老态提醒网友夏天要补水"}
-{"key": "BAC009S0765W0489", "wav": "./aishell/wav/test/S0765/BAC009S0765W0489.wav", "txt": "联合国官方微博晒出一张李彬彬喂大象喝水的照片"}
-{"key": "BAC009S0765W0490", "wav": "./aishell/wav/test/S0765/BAC009S0765W0490.wav", "txt": "华西都市报讯记者杜恩湖一零月二四日中午一二时"}
-{"key": "BAC009S0765W0491", "wav": "./aishell/wav/test/S0765/BAC009S0765W0491.wav", "txt": "一零月二三曰现身成都平乐古城"}
-{"key": "BAC009S0765W0492", "wav": "./aishell/wav/test/S0765/BAC009S0765W0492.wav", "txt": "应邀参加第二届天府古镇艺术节"}
-{"key": "BAC009S0765W0493", "wav": "./aishell/wav/test/S0765/BAC009S0765W0493.wav", "txt": "现场李双江受到了观众的热烈欢迎"}
-{"key": "BAC009S0765W0494", "wav": "./aishell/wav/test/S0765/BAC009S0765W0494.wav", "txt": "二零零多幅珍贵油画抵达南京"}
-{"key": "BAC009S0765W0495", "wav": "./aishell/wav/test/S0765/BAC009S0765W0495.wav", "txt": "李嘉诚军师抛售马云一五亿买香港最贵单价豪宅"}
-{"key": "BAC009S0766W0121", "wav": "./aishell/wav/test/S0766/BAC009S0766W0121.wav", "txt": "实现数字化整合营销"}
-{"key": "BAC009S0766W0122", "wav": "./aishell/wav/test/S0766/BAC009S0766W0122.wav", "txt": "是当今广告行业的需要"}
-{"key": "BAC009S0766W0123", "wav": "./aishell/wav/test/S0766/BAC009S0766W0123.wav", "txt": "消费者行为的变化及技术的进步"}
-{"key": "BAC009S0766W0124", "wav": "./aishell/wav/test/S0766/BAC009S0766W0124.wav", "txt": "催生了广告领域新的变革和创新"}
-{"key": "BAC009S0766W0125", "wav": "./aishell/wav/test/S0766/BAC009S0766W0125.wav", "txt": "唯有实力雄厚又颇具现代创新意识的广告企业"}
-{"key": "BAC009S0766W0126", "wav": "./aishell/wav/test/S0766/BAC009S0766W0126.wav", "txt": "今久整合营销集团就是如此"}
-{"key": "BAC009S0766W0127", "wav": "./aishell/wav/test/S0766/BAC009S0766W0127.wav", "txt": "成为圈子里首屈一指的超大企业"}
-{"key": "BAC009S0766W0128", "wav": "./aishell/wav/test/S0766/BAC009S0766W0128.wav", "txt": "自成立以来"}
-{"key": "BAC009S0766W0129", "wav": "./aishell/wav/test/S0766/BAC009S0766W0129.wav", "txt": "服务项目几千个"}
-{"key": "BAC009S0766W0130", "wav": "./aishell/wav/test/S0766/BAC009S0766W0130.wav", "txt": "开创了蔓延全国的青年社区概念"}
-{"key": "BAC009S0766W0131", "wav": "./aishell/wav/test/S0766/BAC009S0766W0131.wav", "txt": "确立了无人撼动的行业老大地位"}
-{"key": "BAC009S0766W0132", "wav": "./aishell/wav/test/S0766/BAC009S0766W0132.wav", "txt": "成为房地产最信任的营销公司"}
-{"key": "BAC009S0766W0133", "wav": "./aishell/wav/test/S0766/BAC009S0766W0133.wav", "txt": "然而这家雄心勃勃的公司并未止步于此"}
-{"key": "BAC009S0766W0134", "wav": "./aishell/wav/test/S0766/BAC009S0766W0134.wav", "txt": "一个以互联网和大数据为核心的时代已经到来"}
-{"key": "BAC009S0766W0135", "wav": "./aishell/wav/test/S0766/BAC009S0766W0135.wav", "txt": "今久必须担当起引领时代潮流的重任"}
-{"key": "BAC009S0766W0136", "wav": "./aishell/wav/test/S0766/BAC009S0766W0136.wav", "txt": "蓝色光标以几亿人民币收购今久"}
-{"key": "BAC009S0766W0137", "wav": "./aishell/wav/test/S0766/BAC009S0766W0137.wav", "txt": "这成为今久转型的起点"}
-{"key": "BAC009S0766W0138", "wav": "./aishell/wav/test/S0766/BAC009S0766W0138.wav", "txt": "依托蓝色光标强大的技术和资源优势"}
-{"key": "BAC009S0766W0139", "wav": "./aishell/wav/test/S0766/BAC009S0766W0139.wav", "txt": "今久率先提出整合营销的概念"}
-{"key": "BAC009S0766W0140", "wav": "./aishell/wav/test/S0766/BAC009S0766W0140.wav", "txt": "其核心在于利用数字化工具"}
-{"key": "BAC009S0766W0141", "wav": "./aishell/wav/test/S0766/BAC009S0766W0141.wav", "txt": "为房地产商提供系统化的服务"}
-{"key": "BAC009S0766W0142", "wav": "./aishell/wav/test/S0766/BAC009S0766W0142.wav", "txt": "整合营销实现了从策略到执行的系统化服务"}
-{"key": "BAC009S0766W0143", "wav": "./aishell/wav/test/S0766/BAC009S0766W0143.wav", "txt": "当地产商的效果预期不断提高"}
-{"key": "BAC009S0766W0144", "wav": "./aishell/wav/test/S0766/BAC009S0766W0144.wav", "txt": "这时候更要求服务商具备思考和行动的一致性"}
-{"key": "BAC009S0766W0145", "wav": "./aishell/wav/test/S0766/BAC009S0766W0145.wav", "txt": "这样也为开发商节省了运营成本"}
-{"key": "BAC009S0766W0146", "wav": "./aishell/wav/test/S0766/BAC009S0766W0146.wav", "txt": "整合营销是利用全案思维和大数据技术"}
-{"key": "BAC009S0766W0147", "wav": "./aishell/wav/test/S0766/BAC009S0766W0147.wav", "txt": "市场上就出现了各类新型技术软件"}
-{"key": "BAC009S0766W0148", "wav": "./aishell/wav/test/S0766/BAC009S0766W0148.wav", "txt": "但大多是雷声大雨点小"}
-{"key": "BAC009S0766W0149", "wav": "./aishell/wav/test/S0766/BAC009S0766W0149.wav", "txt": "与房地产商的需求相去甚远"}
-{"key": "BAC009S0766W0150", "wav": "./aishell/wav/test/S0766/BAC009S0766W0150.wav", "txt": "大数据营销需要的是强大的技术实力"}
-{"key": "BAC009S0766W0151", "wav": "./aishell/wav/test/S0766/BAC009S0766W0151.wav", "txt": "而非某些功能的简单嫁接"}
-{"key": "BAC009S0766W0152", "wav": "./aishell/wav/test/S0766/BAC009S0766W0152.wav", "txt": "蓝色光标作为全球首屈一指的广告服务商"}
-{"key": "BAC009S0766W0153", "wav": "./aishell/wav/test/S0766/BAC009S0766W0153.wav", "txt": "在大数据上的技术优势无可匹敌"}
-{"key": "BAC009S0766W0154", "wav": "./aishell/wav/test/S0766/BAC009S0766W0154.wav", "txt": "今久正是在蓝色光标的技术支持下"}
-{"key": "BAC009S0766W0155", "wav": "./aishell/wav/test/S0766/BAC009S0766W0155.wav", "txt": "实现了大数据营销的创新"}
-{"key": "BAC009S0766W0156", "wav": "./aishell/wav/test/S0766/BAC009S0766W0156.wav", "txt": "许多数字新产品"}
-{"key": "BAC009S0766W0157", "wav": "./aishell/wav/test/S0766/BAC009S0766W0157.wav", "txt": "广泛应用于移动端"}
-{"key": "BAC009S0766W0158", "wav": "./aishell/wav/test/S0766/BAC009S0766W0158.wav", "txt": "分析用户的消费行为和生活方式"}
-{"key": "BAC009S0766W0159", "wav": "./aishell/wav/test/S0766/BAC009S0766W0159.wav", "txt": "帮助广告主找出目标用户"}
-{"key": "BAC009S0766W0160", "wav": "./aishell/wav/test/S0766/BAC009S0766W0160.wav", "txt": "然后对广告信息进行精确匹配"}
-{"key": "BAC009S0766W0161", "wav": "./aishell/wav/test/S0766/BAC009S0766W0161.wav", "txt": "达到降低成本提升营销效果的目的"}
-{"key": "BAC009S0766W0162", "wav": "./aishell/wav/test/S0766/BAC009S0766W0162.wav", "txt": "今久在大举创新的同时"}
-{"key": "BAC009S0766W0163", "wav": "./aishell/wav/test/S0766/BAC009S0766W0163.wav", "txt": "保持原有业务的正常运作"}
-{"key": "BAC009S0766W0164", "wav": "./aishell/wav/test/S0766/BAC009S0766W0164.wav", "txt": "这才是一个大企业应该有的战略方向"}
-{"key": "BAC009S0766W0165", "wav": "./aishell/wav/test/S0766/BAC009S0766W0165.wav", "txt": "带动了区域板块的扩张"}
-{"key": "BAC009S0766W0166", "wav": "./aishell/wav/test/S0766/BAC009S0766W0166.wav", "txt": "在海南成立了分公司"}
-{"key": "BAC009S0766W0167", "wav": "./aishell/wav/test/S0766/BAC009S0766W0167.wav", "txt": "现在已经是海南本土最大的房地产推广公司"}
-{"key": "BAC009S0766W0168", "wav": "./aishell/wav/test/S0766/BAC009S0766W0168.wav", "txt": "拥有许多优质客户"}
-{"key": "BAC009S0766W0169", "wav": "./aishell/wav/test/S0766/BAC009S0766W0169.wav", "txt": "今久上海分公司又悄无声息地开张了"}
-{"key": "BAC009S0766W0170", "wav": "./aishell/wav/test/S0766/BAC009S0766W0170.wav", "txt": "新媒体推广的业务扩张"}
-{"key": "BAC009S0766W0171", "wav": "./aishell/wav/test/S0766/BAC009S0766W0171.wav", "txt": "逐渐地撬开了上海这个外来公司很难生根的大都市"}
-{"key": "BAC009S0766W0172", "wav": "./aishell/wav/test/S0766/BAC009S0766W0172.wav", "txt": "郑州长春和哈尔滨三地办事处"}
-{"key": "BAC009S0766W0173", "wav": "./aishell/wav/test/S0766/BAC009S0766W0173.wav", "txt": "用蓝色光标强大的新媒体技术和资源"}
-{"key": "BAC009S0766W0174", "wav": "./aishell/wav/test/S0766/BAC009S0766W0174.wav", "txt": "搭起了全国地产推广新媒体的版图"}
-{"key": "BAC009S0766W0175", "wav": "./aishell/wav/test/S0766/BAC009S0766W0175.wav", "txt": "今久又出高价"}
-{"key": "BAC009S0766W0176", "wav": "./aishell/wav/test/S0766/BAC009S0766W0176.wav", "txt": "收购了房地产互联网营销公司沈阳新维一半股份"}
-{"key": "BAC009S0766W0177", "wav": "./aishell/wav/test/S0766/BAC009S0766W0177.wav", "txt": "今久又一次利用资本市场"}
-{"key": "BAC009S0766W0178", "wav": "./aishell/wav/test/S0766/BAC009S0766W0178.wav", "txt": "实现区域扩张"}
-{"key": "BAC009S0766W0179", "wav": "./aishell/wav/test/S0766/BAC009S0766W0179.wav", "txt": "区域产品和业务三大层面"}
-{"key": "BAC009S0766W0180", "wav": "./aishell/wav/test/S0766/BAC009S0766W0180.wav", "txt": "今久成功实现了转型"}
-{"key": "BAC009S0766W0181", "wav": "./aishell/wav/test/S0766/BAC009S0766W0181.wav", "txt": "后今久时代正式到来"}
-{"key": "BAC009S0766W0182", "wav": "./aishell/wav/test/S0766/BAC009S0766W0182.wav", "txt": "转型后的今久整合营销集团"}
-{"key": "BAC009S0766W0183", "wav": "./aishell/wav/test/S0766/BAC009S0766W0183.wav", "txt": "在全球大数据浪潮中"}
-{"key": "BAC009S0766W0184", "wav": "./aishell/wav/test/S0766/BAC009S0766W0184.wav", "txt": "依托蓝色光标的强大平台"}
-{"key": "BAC009S0766W0185", "wav": "./aishell/wav/test/S0766/BAC009S0766W0185.wav", "txt": "助力中国房地产开发企业发掘并实现更大的价值需求"}
-{"key": "BAC009S0766W0186", "wav": "./aishell/wav/test/S0766/BAC009S0766W0186.wav", "txt": "在机遇与挑战共存的互联网时代"}
-{"key": "BAC009S0766W0187", "wav": "./aishell/wav/test/S0766/BAC009S0766W0187.wav", "txt": "发行利率也有较大幅度上升"}
-{"key": "BAC009S0766W0188", "wav": "./aishell/wav/test/S0766/BAC009S0766W0188.wav", "txt": "人民银行多次提高存款准备金率和存贷款基准利率"}
-{"key": "BAC009S0766W0189", "wav": "./aishell/wav/test/S0766/BAC009S0766W0189.wav", "txt": "不仅是城投债券发行利率"}
-{"key": "BAC009S0766W0190", "wav": "./aishell/wav/test/S0766/BAC009S0766W0190.wav", "txt": "债券市场所有品种发行利率整体上都表现出向上的走向"}
-{"key": "BAC009S0766W0191", "wav": "./aishell/wav/test/S0766/BAC009S0766W0191.wav", "txt": "导致城投债券发行产生较高的风险溢价"}
-{"key": "BAC009S0766W0192", "wav": "./aishell/wav/test/S0766/BAC009S0766W0192.wav", "txt": "城投债券收益率上升"}
-{"key": "BAC009S0766W0193", "wav": "./aishell/wav/test/S0766/BAC009S0766W0193.wav", "txt": "对债券投资人来说不是坏事"}
-{"key": "BAC009S0766W0194", "wav": "./aishell/wav/test/S0766/BAC009S0766W0194.wav", "txt": "有利于提升城投债券的资产配置价值"}
-{"key": "BAC009S0766W0195", "wav": "./aishell/wav/test/S0766/BAC009S0766W0195.wav", "txt": "则需要在发债时机和发债规模上进行合理的把握"}
-{"key": "BAC009S0766W0196", "wav": "./aishell/wav/test/S0766/BAC009S0766W0196.wav", "txt": "我个人不赞成这一判断"}
-{"key": "BAC009S0766W0197", "wav": "./aishell/wav/test/S0766/BAC009S0766W0197.wav", "txt": "债券发行人是优质的"}
-{"key": "BAC009S0766W0198", "wav": "./aishell/wav/test/S0766/BAC009S0766W0198.wav", "txt": "还本付息也是正常的"}
-{"key": "BAC009S0766W0199", "wav": "./aishell/wav/test/S0766/BAC009S0766W0199.wav", "txt": "投资者对城投债券风险表现出的恐慌"}
-{"key": "BAC009S0766W0200", "wav": "./aishell/wav/test/S0766/BAC009S0766W0200.wav", "txt": "加强城投债监管完善制度建设"}
-{"key": "BAC009S0766W0201", "wav": "./aishell/wav/test/S0766/BAC009S0766W0201.wav", "txt": "有的媒体甚至用井喷来描述"}
-{"key": "BAC009S0766W0202", "wav": "./aishell/wav/test/S0766/BAC009S0766W0202.wav", "txt": "您如何看待城投债券这几年的发展和作用"}
-{"key": "BAC009S0766W0203", "wav": "./aishell/wav/test/S0766/BAC009S0766W0203.wav", "txt": "徐林这几年城投债券发行数量的确有所增加"}
-{"key": "BAC009S0766W0204", "wav": "./aishell/wav/test/S0766/BAC009S0766W0204.wav", "txt": "地方投融资平台公司通过发行债券进行融资"}
-{"key": "BAC009S0766W0205", "wav": "./aishell/wav/test/S0766/BAC009S0766W0205.wav", "txt": "符合提高直接融资比重的要求"}
-{"key": "BAC009S0766W0206", "wav": "./aishell/wav/test/S0766/BAC009S0766W0206.wav", "txt": "城投债券也适应了发行人和投资人的需要"}
-{"key": "BAC009S0766W0207", "wav": "./aishell/wav/test/S0766/BAC009S0766W0207.wav", "txt": "这是这几年城投债券发行规模不断扩大的主要原因"}
-{"key": "BAC009S0766W0208", "wav": "./aishell/wav/test/S0766/BAC009S0766W0208.wav", "txt": "我委核准发行的企业债券累计为七千亿元"}
-{"key": "BAC009S0766W0209", "wav": "./aishell/wav/test/S0766/BAC009S0766W0209.wav", "txt": "其中城投债券共发行七千亿元"}
-{"key": "BAC009S0766W0210", "wav": "./aishell/wav/test/S0766/BAC009S0766W0210.wav", "txt": "占比只有百分之七"}
-{"key": "BAC009S0766W0211", "wav": "./aishell/wav/test/S0766/BAC009S0766W0211.wav", "txt": "城投债券的发行有比较严格的条件"}
-{"key": "BAC009S0766W0212", "wav": "./aishell/wav/test/S0766/BAC009S0766W0212.wav", "txt": "从已发行的城投债券用途看"}
-{"key": "BAC009S0766W0213", "wav": "./aishell/wav/test/S0766/BAC009S0766W0213.wav", "txt": "保障房建设和棚户区改造"}
-{"key": "BAC009S0766W0214", "wav": "./aishell/wav/test/S0766/BAC009S0766W0214.wav", "txt": "城市文化和体育设施"}
-{"key": "BAC009S0766W0215", "wav": "./aishell/wav/test/S0766/BAC009S0766W0215.wav", "txt": "地震灾后重建等领域"}
-{"key": "BAC009S0766W0216", "wav": "./aishell/wav/test/S0766/BAC009S0766W0216.wav", "txt": "都起到了积极的作用"}
-{"key": "BAC009S0766W0217", "wav": "./aishell/wav/test/S0766/BAC009S0766W0217.wav", "txt": "随着我国资本市场的进一步发展"}
-{"key": "BAC009S0766W0218", "wav": "./aishell/wav/test/S0766/BAC009S0766W0218.wav", "txt": "城投债券作为中国债券市场的准市政债"}
-{"key": "BAC009S0766W0219", "wav": "./aishell/wav/test/S0766/BAC009S0766W0219.wav", "txt": "发行规模还会稳步扩大"}
-{"key": "BAC009S0766W0220", "wav": "./aishell/wav/test/S0766/BAC009S0766W0220.wav", "txt": "中国证券报面对市场对城投债券风险的担忧"}
-{"key": "BAC009S0766W0221", "wav": "./aishell/wav/test/S0766/BAC009S0766W0221.wav", "txt": "是如何更好地防范城投债券可能出现的风险的"}
-{"key": "BAC009S0766W0222", "wav": "./aishell/wav/test/S0766/BAC009S0766W0222.wav", "txt": "虽然已发行的城投债券的还本付息都是正常的"}
-{"key": "BAC009S0766W0223", "wav": "./aishell/wav/test/S0766/BAC009S0766W0223.wav", "txt": "城投债作为一个信用产品"}
-{"key": "BAC009S0766W0224", "wav": "./aishell/wav/test/S0766/BAC009S0766W0224.wav", "txt": "不可能是完全无风险的"}
-{"key": "BAC009S0766W0225", "wav": "./aishell/wav/test/S0766/BAC009S0766W0225.wav", "txt": "我看了以后很受震动"}
-{"key": "BAC009S0766W0226", "wav": "./aishell/wav/test/S0766/BAC009S0766W0226.wav", "txt": "虽然报道内容并没有具体的城投债券还本付息违约案"}
-{"key": "BAC009S0766W0227", "wav": "./aishell/wav/test/S0766/BAC009S0766W0227.wav", "txt": "但却提醒了我们要更加关注城投债券可能出现的风险"}
-{"key": "BAC009S0766W0228", "wav": "./aishell/wav/test/S0766/BAC009S0766W0228.wav", "txt": "并采取措施切实保护债券投资人的合法权益"}
-{"key": "BAC009S0766W0229", "wav": "./aishell/wav/test/S0766/BAC009S0766W0229.wav", "txt": "作为城投债券发行监管部门"}
-{"key": "BAC009S0766W0230", "wav": "./aishell/wav/test/S0766/BAC009S0766W0230.wav", "txt": "我们对城投债券发行人的审核一直是比较严格的"}
-{"key": "BAC009S0766W0231", "wav": "./aishell/wav/test/S0766/BAC009S0766W0231.wav", "txt": "地方投融资平台公司申请发行债券"}
-{"key": "BAC009S0766W0232", "wav": "./aishell/wav/test/S0766/BAC009S0766W0232.wav", "txt": "必须符合一些基本的条件企业必须连续三年盈利"}
-{"key": "BAC009S0766W0233", "wav": "./aishell/wav/test/S0766/BAC009S0766W0233.wav", "txt": "所投项目必须经过合规性审查"}
-{"key": "BAC009S0766W0234", "wav": "./aishell/wav/test/S0766/BAC009S0766W0234.wav", "txt": "我们还控制了投融资平台公司发债的范围"}
-{"key": "BAC009S0766W0235", "wav": "./aishell/wav/test/S0766/BAC009S0766W0235.wav", "txt": "才能申请发行城投债券"}
-{"key": "BAC009S0766W0236", "wav": "./aishell/wav/test/S0766/BAC009S0766W0236.wav", "txt": "就不得再通过发行城投债券新增政府性债务"}
-{"key": "BAC009S0766W0237", "wav": "./aishell/wav/test/S0766/BAC009S0766W0237.wav", "txt": "正是有了这样一些严格的规定"}
-{"key": "BAC009S0766W0238", "wav": "./aishell/wav/test/S0766/BAC009S0766W0238.wav", "txt": "使得很多投融资平台公司"}
-{"key": "BAC009S0766W0239", "wav": "./aishell/wav/test/S0766/BAC009S0766W0239.wav", "txt": "难以满足发行城投债券的资格和条件"}
-{"key": "BAC009S0766W0240", "wav": "./aishell/wav/test/S0766/BAC009S0766W0240.wav", "txt": "这在相当程度上控制了城投债券的发行规模"}
-{"key": "BAC009S0766W0241", "wav": "./aishell/wav/test/S0766/BAC009S0766W0241.wav", "txt": "也降低了城投债券的风险"}
-{"key": "BAC009S0766W0242", "wav": "./aishell/wav/test/S0766/BAC009S0766W0242.wav", "txt": "为了控制地方政府本届发债下届还钱的道德风险"}
-{"key": "BAC009S0766W0243", "wav": "./aishell/wav/test/S0766/BAC009S0766W0243.wav", "txt": "我们还安排了专门的偿债均摊机制"}
-{"key": "BAC009S0766W0244", "wav": "./aishell/wav/test/S0766/BAC009S0766W0244.wav", "txt": "也就是将债券还本压力在债券存续期内进行合理分摊"}
-{"key": "BAC009S0766W0245", "wav": "./aishell/wav/test/S0766/BAC009S0766W0245.wav", "txt": "避免在最后一年累积过大的还本压力和风险"}
-{"key": "BAC009S0766W0246", "wav": "./aishell/wav/test/S0766/BAC009S0766W0246.wav", "txt": "有媒体报道了云投集团等发债企业转移核心资产"}
-{"key": "BAC009S0766W0247", "wav": "./aishell/wav/test/S0766/BAC009S0766W0247.wav", "txt": "损害债券持有人利益的事件"}
-{"key": "BAC009S0766W0248", "wav": "./aishell/wav/test/S0766/BAC009S0766W0248.wav", "txt": "并对债券市场形成了不小的冲击"}
-{"key": "BAC009S0766W0249", "wav": "./aishell/wav/test/S0766/BAC009S0766W0249.wav", "txt": "我们如何考虑防止这类事件再次发生"}
-{"key": "BAC009S0766W0250", "wav": "./aishell/wav/test/S0766/BAC009S0766W0250.wav", "txt": "更好地保护债券投资人的利益"}
-{"key": "BAC009S0766W0251", "wav": "./aishell/wav/test/S0766/BAC009S0766W0251.wav", "txt": "徐林发债企业在债券存续期内进行资产转移"}
-{"key": "BAC009S0766W0252", "wav": "./aishell/wav/test/S0766/BAC009S0766W0252.wav", "txt": "极有可能对债券持有人利益构成不利影响"}
-{"key": "BAC009S0766W0253", "wav": "./aishell/wav/test/S0766/BAC009S0766W0253.wav", "txt": "华尔街的半兽人已经为他的离开紧锣密鼓地敲起退堂鼓"}
-{"key": "BAC009S0766W0257", "wav": "./aishell/wav/test/S0766/BAC009S0766W0257.wav", "txt": "问题是他也想不出谁能干得更好"}
-{"key": "BAC009S0766W0260", "wav": "./aishell/wav/test/S0766/BAC009S0766W0260.wav", "txt": "但亏损却达到了一点八亿美元"}
-{"key": "BAC009S0766W0262", "wav": "./aishell/wav/test/S0766/BAC009S0766W0262.wav", "txt": "不应从用户身上榨取广告收入"}
-{"key": "BAC009S0766W0263", "wav": "./aishell/wav/test/S0766/BAC009S0766W0263.wav", "txt": "试问又有哪位有魔法能挽回巨额亏损呢"}
-{"key": "BAC009S0766W0268", "wav": "./aishell/wav/test/S0766/BAC009S0766W0268.wav", "txt": "对于高中生来说这会有点令人尴尬罢了"}
-{"key": "BAC009S0766W0269", "wav": "./aishell/wav/test/S0766/BAC009S0766W0269.wav", "txt": "可对于一个成年人来说算什么"}
-{"key": "BAC009S0766W0270", "wav": "./aishell/wav/test/S0766/BAC009S0766W0270.wav", "txt": "还有他对日本文化的迷恋"}
-{"key": "BAC009S0766W0271", "wav": "./aishell/wav/test/S0766/BAC009S0766W0271.wav", "txt": "然后又要去竞选纽约州长"}
-{"key": "BAC009S0766W0273", "wav": "./aishell/wav/test/S0766/BAC009S0766W0273.wav", "txt": "跟星巴克的合作就是灾难"}
-{"key": "BAC009S0766W0285", "wav": "./aishell/wav/test/S0766/BAC009S0766W0285.wav", "txt": "梅姐待的已算够长了"}
-{"key": "BAC009S0766W0287", "wav": "./aishell/wav/test/S0766/BAC009S0766W0287.wav", "txt": "但是至少会给股价刺激一下"}
-{"key": "BAC009S0766W0288", "wav": "./aishell/wav/test/S0766/BAC009S0766W0288.wav", "txt": "而梅姐则可以陪陪小孩或者去搞搞政治"}
-{"key": "BAC009S0766W0293", "wav": "./aishell/wav/test/S0766/BAC009S0766W0293.wav", "txt": "但亏损达到了一点七亿美元"}
-{"key": "BAC009S0766W0294", "wav": "./aishell/wav/test/S0766/BAC009S0766W0294.wav", "txt": "这样的成绩已经比二零一三年要好"}
-{"key": "BAC009S0766W0296", "wav": "./aishell/wav/test/S0766/BAC009S0766W0296.wav", "txt": "十年都还没赚钱的话"}
-{"key": "BAC009S0766W0299", "wav": "./aishell/wav/test/S0766/BAC009S0766W0299.wav", "txt": "它已经失去了作为独立公司的存在意义"}
-{"key": "BAC009S0766W0303", "wav": "./aishell/wav/test/S0766/BAC009S0766W0303.wav", "txt": "同比增长一百三十四点七百分之三"}
-{"key": "BAC009S0766W0304", "wav": "./aishell/wav/test/S0766/BAC009S0766W0304.wav", "txt": "归属于上市公司股东的净利润二十五十二万元"}
-{"key": "BAC009S0766W0305", "wav": "./aishell/wav/test/S0766/BAC009S0766W0305.wav", "txt": "去年同期则是亏损二百四十二万元"}
-{"key": "BAC009S0766W0306", "wav": "./aishell/wav/test/S0766/BAC009S0766W0306.wav", "txt": "同比增长十一五十六点四百分之二"}
-{"key": "BAC009S0766W0307", "wav": "./aishell/wav/test/S0766/BAC009S0766W0307.wav", "txt": "公司锂电池业务实现营业收入四点零一亿元"}
-{"key": "BAC009S0766W0308", "wav": "./aishell/wav/test/S0766/BAC009S0766W0308.wav", "txt": "同比增长二百六十八点五百分之一"}
-{"key": "BAC009S0766W0309", "wav": "./aishell/wav/test/S0766/BAC009S0766W0309.wav", "txt": "成飞集成相关人士告诉每日经济新闻记者"}
-{"key": "BAC009S0766W0310", "wav": "./aishell/wav/test/S0766/BAC009S0766W0310.wav", "txt": "前两年锂电池行业整体处于市场培育期"}
-{"key": "BAC009S0766W0311", "wav": "./aishell/wav/test/S0766/BAC009S0766W0311.wav", "txt": "虽然国家在二零一零年就颁布了新能源补贴政策"}
-{"key": "BAC009S0766W0312", "wav": "./aishell/wav/test/S0766/BAC009S0766W0312.wav", "txt": "但是市场启动不像预期那么快"}
-{"key": "BAC009S0766W0313", "wav": "./aishell/wav/test/S0766/BAC009S0766W0313.wav", "txt": "基本上是从二零一四年下半年才有明显的感觉"}
-{"key": "BAC009S0766W0314", "wav": "./aishell/wav/test/S0766/BAC009S0766W0314.wav", "txt": "目前公司锂电池订单比较充足"}
-{"key": "BAC009S0766W0315", "wav": "./aishell/wav/test/S0766/BAC009S0766W0315.wav", "txt": "由于传统汽车产业步入寒冬"}
-{"key": "BAC009S0766W0316", "wav": "./aishell/wav/test/S0766/BAC009S0766W0316.wav", "txt": "汽车厂商纷纷转型新能源汽车"}
-{"key": "BAC009S0766W0317", "wav": "./aishell/wav/test/S0766/BAC009S0766W0317.wav", "txt": "新能源汽车的爆发使得锂电池供不应求"}
-{"key": "BAC009S0766W0318", "wav": "./aishell/wav/test/S0766/BAC009S0766W0318.wav", "txt": "随着锂电池产业链迎来井喷"}
-{"key": "BAC009S0766W0319", "wav": "./aishell/wav/test/S0766/BAC009S0766W0319.wav", "txt": "锂电需求带动业绩增长"}
-{"key": "BAC009S0766W0320", "wav": "./aishell/wav/test/S0766/BAC009S0766W0320.wav", "txt": "成飞集成的锂电池业务在前两年情况并不好"}
-{"key": "BAC009S0766W0321", "wav": "./aishell/wav/test/S0766/BAC009S0766W0321.wav", "txt": "新能源汽车市场在逐步启动"}
-{"key": "BAC009S0766W0322", "wav": "./aishell/wav/test/S0766/BAC009S0766W0322.wav", "txt": "锂电池市场也在向好"}
-{"key": "BAC009S0766W0323", "wav": "./aishell/wav/test/S0766/BAC009S0766W0323.wav", "txt": "成飞集成相关人士告诉记者"}
-{"key": "BAC009S0766W0324", "wav": "./aishell/wav/test/S0766/BAC009S0766W0324.wav", "txt": "这是今年上半年锂电池业务爆发的原因"}
-{"key": "BAC009S0766W0325", "wav": "./aishell/wav/test/S0766/BAC009S0766W0325.wav", "txt": "成飞集成的其他主营业务中"}
-{"key": "BAC009S0766W0326", "wav": "./aishell/wav/test/S0766/BAC009S0766W0326.wav", "txt": "汽车模具以及汽车零部件表现一般"}
-{"key": "BAC009S0766W0327", "wav": "./aishell/wav/test/S0766/BAC009S0766W0327.wav", "txt": "汽车模具实现营收一点一零一亿元"}
-{"key": "BAC009S0766W0328", "wav": "./aishell/wav/test/S0766/BAC009S0766W0328.wav", "txt": "毛利率为十八点百分之三"}
-{"key": "BAC009S0766W0329", "wav": "./aishell/wav/test/S0766/BAC009S0766W0329.wav", "txt": "毛利率为十九点六百分之六"}
-{"key": "BAC009S0766W0330", "wav": "./aishell/wav/test/S0766/BAC009S0766W0330.wav", "txt": "同比增长四点百分之三十五"}
-{"key": "BAC009S0766W0331", "wav": "./aishell/wav/test/S0766/BAC009S0766W0331.wav", "txt": "但是由于该项业务占比较小"}
-{"key": "BAC009S0766W0332", "wav": "./aishell/wav/test/S0766/BAC009S0766W0332.wav", "txt": "所以对业绩的影响有限"}
-{"key": "BAC009S0766W0333", "wav": "./aishell/wav/test/S0766/BAC009S0766W0333.wav", "txt": "汽车零部件总体规模不大"}
-{"key": "BAC009S0766W0334", "wav": "./aishell/wav/test/S0766/BAC009S0766W0334.wav", "txt": "汽车模具毛利率下滑"}
-{"key": "BAC009S0766W0335", "wav": "./aishell/wav/test/S0766/BAC009S0766W0335.wav", "txt": "一方面是由于上半年模具的比较基数较低"}
-{"key": "BAC009S0766W0336", "wav": "./aishell/wav/test/S0766/BAC009S0766W0336.wav", "txt": "也就是去年和今年上半年营收总额都不高"}
-{"key": "BAC009S0766W0337", "wav": "./aishell/wav/test/S0766/BAC009S0766W0337.wav", "txt": "另外今年上半年个别订单的价格也比较低"}
-{"key": "BAC009S0766W0339", "wav": "./aishell/wav/test/S0766/BAC009S0766W0339.wav", "txt": "这一状况有望发生改变"}
-{"key": "BAC009S0766W0340", "wav": "./aishell/wav/test/S0766/BAC009S0766W0340.wav", "txt": "七年之后宋安东二十五岁"}
-{"key": "BAC009S0766W0341", "wav": "./aishell/wav/test/S0766/BAC009S0766W0341.wav", "txt": "正是冰球运动员的黄金年龄"}
-{"key": "BAC009S0766W0342", "wav": "./aishell/wav/test/S0766/BAC009S0766W0342.wav", "txt": "年少成名的他带领国家队出征冬奥会"}
-{"key": "BAC009S0766W0343", "wav": "./aishell/wav/test/S0766/BAC009S0766W0343.wav", "txt": "铁定会有助于提升我国的冰球水平"}
-{"key": "BAC009S0766W0344", "wav": "./aishell/wav/test/S0766/BAC009S0766W0344.wav", "txt": "进而提升我国在冬奥会申办过程中的竞争力"}
-{"key": "BAC009S0766W0345", "wav": "./aishell/wav/test/S0766/BAC009S0766W0345.wav", "txt": "二零二二年冬奥会在北京举行"}
-{"key": "BAC009S0766W0346", "wav": "./aishell/wav/test/S0766/BAC009S0766W0346.wav", "txt": "以宋安东为首的运动员们可以说是鲜活的冬奥名片"}
-{"key": "BAC009S0766W0347", "wav": "./aishell/wav/test/S0766/BAC009S0766W0347.wav", "txt": "让越来越多人关注并参与到其中来"}
-{"key": "BAC009S0766W0348", "wav": "./aishell/wav/test/S0766/BAC009S0766W0348.wav", "txt": "建设三个相对集聚的场馆群"}
-{"key": "BAC009S0766W0349", "wav": "./aishell/wav/test/S0766/BAC009S0766W0349.wav", "txt": "申奥过程本身已经推动了城际交通建设"}
-{"key": "BAC009S0766W0350", "wav": "./aishell/wav/test/S0766/BAC009S0766W0350.wav", "txt": "因此对于北京申办冬奥会的最终结果"}
-{"key": "BAC009S0766W0351", "wav": "./aishell/wav/test/S0766/BAC009S0766W0351.wav", "txt": "我们应该抱着更加长远和开阔的视角来看待"}
-{"key": "BAC009S0766W0352", "wav": "./aishell/wav/test/S0766/BAC009S0766W0352.wav", "txt": "更要期待着中国冰雪运动真正强大起来的那一天"}
-{"key": "BAC009S0766W0353", "wav": "./aishell/wav/test/S0766/BAC009S0766W0353.wav", "txt": "法国冰协同于放人五度世界冠军即将复出搜狐体育"}
-{"key": "BAC009S0766W0354", "wav": "./aishell/wav/test/S0766/BAC009S0766W0354.wav", "txt": "北京时间十月二十七日"}
-{"key": "BAC009S0766W0355", "wav": "./aishell/wav/test/S0766/BAC009S0766W0355.wav", "txt": "经过将近一年时间的漫长谈判"}
-{"key": "BAC009S0766W0356", "wav": "./aishell/wav/test/S0766/BAC009S0766W0356.wav", "txt": "法国花样滑冰联合会终于同意"}
-{"key": "BAC009S0766W0357", "wav": "./aishell/wav/test/S0766/BAC009S0766W0357.wav", "txt": "允许布鲁诺马塞洛特代表德国"}
-{"key": "BAC009S0766W0358", "wav": "./aishell/wav/test/S0766/BAC009S0766W0358.wav", "txt": "两人的更改国籍禁赛期即将开始"}
-{"key": "BAC009S0766W0359", "wav": "./aishell/wav/test/S0766/BAC009S0766W0359.wav", "txt": "这也意味着最晚在明年的各项赛事中"}
-{"key": "BAC009S0766W0360", "wav": "./aishell/wav/test/S0766/BAC009S0766W0360.wav", "txt": "我们就能看到这对强大组合的身影"}
-{"key": "BAC009S0766W0361", "wav": "./aishell/wav/test/S0766/BAC009S0766W0361.wav", "txt": "在金牌搭档罗宾索尔科维退役之后"}
-{"key": "BAC009S0766W0362", "wav": "./aishell/wav/test/S0766/BAC009S0766W0362.wav", "txt": "萨维申科宣布会再坚持一个冬奥会周期"}
-{"key": "BAC009S0766W0363", "wav": "./aishell/wav/test/S0766/BAC009S0766W0363.wav", "txt": "她所选择的新搭档就是法国猛男马塞洛特"}
-{"key": "BAC009S0766W0364", "wav": "./aishell/wav/test/S0766/BAC009S0766W0364.wav", "txt": "但是因为涉及到男伴更改国籍问题"}
-{"key": "BAC009S0766W0365", "wav": "./aishell/wav/test/S0766/BAC009S0766W0365.wav", "txt": "两人的联手十分不顺利"}
-{"key": "BAC009S0766W0366", "wav": "./aishell/wav/test/S0766/BAC009S0766W0366.wav", "txt": "这也让他们虽然可以参加小型赛事"}
-{"key": "BAC009S0766W0367", "wav": "./aishell/wav/test/S0766/BAC009S0766W0367.wav", "txt": "但是由于国籍不统一"}
-{"key": "BAC009S0766W0368", "wav": "./aishell/wav/test/S0766/BAC009S0766W0368.wav", "txt": "无法参加奥运会的比赛"}
-{"key": "BAC009S0766W0369", "wav": "./aishell/wav/test/S0766/BAC009S0766W0369.wav", "txt": "对于法国冰协的行为"}
-{"key": "BAC009S0766W0370", "wav": "./aishell/wav/test/S0766/BAC009S0766W0370.wav", "txt": "不少粉丝都表达了谴责"}
-{"key": "BAC009S0766W0371", "wav": "./aishell/wav/test/S0766/BAC009S0766W0371.wav", "txt": "支持他们继续训练参加比赛"}
-{"key": "BAC009S0766W0372", "wav": "./aishell/wav/test/S0766/BAC009S0766W0372.wav", "txt": "显然处于最艰难时期的两人丝毫没有放弃"}
-{"key": "BAC009S0766W0373", "wav": "./aishell/wav/test/S0766/BAC009S0766W0373.wav", "txt": "休赛期内他们参加了小型赛事"}
-{"key": "BAC009S0766W0374", "wav": "./aishell/wav/test/S0766/BAC009S0766W0374.wav", "txt": "从目前已经传出的视频来看"}
-{"key": "BAC009S0766W0375", "wav": "./aishell/wav/test/S0766/BAC009S0766W0375.wav", "txt": "男伴更是在最新公布的视频中"}
-{"key": "BAC009S0766W0376", "wav": "./aishell/wav/test/S0766/BAC009S0766W0376.wav", "txt": "他们的不放弃换来了成功"}
-{"key": "BAC009S0766W0377", "wav": "./aishell/wav/test/S0766/BAC009S0766W0377.wav", "txt": "马塞洛特的母亲表示"}
-{"key": "BAC009S0766W0378", "wav": "./aishell/wav/test/S0766/BAC009S0766W0378.wav", "txt": "法国冰协方面的态度有了缓和"}
-{"key": "BAC009S0766W0379", "wav": "./aishell/wav/test/S0766/BAC009S0766W0379.wav", "txt": "法国冰协提出最后要求"}
-{"key": "BAC009S0766W0380", "wav": "./aishell/wav/test/S0766/BAC009S0766W0380.wav", "txt": "要求马塞洛特缴纳七万欧元的转国籍费用"}
-{"key": "BAC009S0766W0381", "wav": "./aishell/wav/test/S0766/BAC009S0766W0381.wav", "txt": "随后冰迷们自发为其网上募集资金"}
-{"key": "BAC009S0766W0382", "wav": "./aishell/wav/test/S0766/BAC009S0766W0382.wav", "txt": "马塞洛特来到法国花样滑冰联合会"}
-{"key": "BAC009S0766W0383", "wav": "./aishell/wav/test/S0766/BAC009S0766W0383.wav", "txt": "双方进行了最后一次也是最成功的一次洽谈"}
-{"key": "BAC009S0766W0384", "wav": "./aishell/wav/test/S0766/BAC009S0766W0384.wav", "txt": "能够保障他的职业生涯发展是我的荣幸"}
-{"key": "BAC009S0766W0385", "wav": "./aishell/wav/test/S0766/BAC009S0766W0385.wav", "txt": "恭喜他与萨维申科走上正确的道路"}
-{"key": "BAC009S0766W0386", "wav": "./aishell/wav/test/S0766/BAC009S0766W0386.wav", "txt": "我们的朋友将代表德国"}
-{"key": "BAC009S0766W0387", "wav": "./aishell/wav/test/S0766/BAC009S0766W0387.wav", "txt": "继续征战花样滑冰的比赛"}
-{"key": "BAC009S0766W0388", "wav": "./aishell/wav/test/S0766/BAC009S0766W0388.wav", "txt": "今年第二位离开法国冰协更换国籍的选手"}
-{"key": "BAC009S0766W0389", "wav": "./aishell/wav/test/S0766/BAC009S0766W0389.wav", "txt": "对于这个万众期待的消息"}
-{"key": "BAC009S0766W0390", "wav": "./aishell/wav/test/S0766/BAC009S0766W0390.wav", "txt": "我可以带着它回家了"}
-{"key": "BAC009S0766W0391", "wav": "./aishell/wav/test/S0766/BAC009S0766W0391.wav", "txt": "谢谢每一个支持我们的人"}
-{"key": "BAC009S0766W0392", "wav": "./aishell/wav/test/S0766/BAC009S0766W0392.wav", "txt": "没有你们的支持我们该如何度过最挣扎的时期呢"}
-{"key": "BAC009S0766W0393", "wav": "./aishell/wav/test/S0766/BAC009S0766W0393.wav", "txt": "是时候去努力工作了"}
-{"key": "BAC009S0766W0394", "wav": "./aishell/wav/test/S0766/BAC009S0766W0394.wav", "txt": "他们的禁赛期即将开始"}
-{"key": "BAC009S0766W0395", "wav": "./aishell/wav/test/S0766/BAC009S0766W0395.wav", "txt": "我们或许就将看到他们征战各类大型赛事的身影"}
-{"key": "BAC009S0766W0396", "wav": "./aishell/wav/test/S0766/BAC009S0766W0396.wav", "txt": "对于隋文静韩聪彭程张昊领衔的中国双人滑军团"}
-{"key": "BAC009S0766W0397", "wav": "./aishell/wav/test/S0766/BAC009S0766W0397.wav", "txt": "五度世锦赛冠军萨维申科联手年轻新搭档马塞洛特"}
-{"key": "BAC009S0766W0398", "wav": "./aishell/wav/test/S0766/BAC009S0766W0398.wav", "txt": "这会是一对绝对强大的对手"}
-{"key": "BAC009S0766W0399", "wav": "./aishell/wav/test/S0766/BAC009S0766W0399.wav", "txt": "经过近两个星期的漫长等待"}
-{"key": "BAC009S0766W0400", "wav": "./aishell/wav/test/S0766/BAC009S0766W0400.wav", "txt": "北京时间八月九日一五三零"}
-{"key": "BAC009S0766W0401", "wav": "./aishell/wav/test/S0766/BAC009S0766W0401.wav", "txt": "为观众们奉上昆仑决鏖战香江的精彩赛事"}
-{"key": "BAC009S0766W0402", "wav": "./aishell/wav/test/S0766/BAC009S0766W0402.wav", "txt": "泰拳黑王子播求无疑同小皇帝詹姆斯最为相似"}
-{"key": "BAC009S0766W0403", "wav": "./aishell/wav/test/S0766/BAC009S0766W0403.wav", "txt": "并在各自领域中享受着各自粉丝们帝王般的顶礼膜拜"}
-{"key": "BAC009S0766W0404", "wav": "./aishell/wav/test/S0766/BAC009S0766W0404.wav", "txt": "曾以分歧者星运里的错窜红的谢琳伍德蕾"}
-{"key": "BAC009S0766W0405", "wav": "./aishell/wav/test/S0766/BAC009S0766W0405.wav", "txt": "将出演影片的女主角"}
-{"key": "BAC009S0766W0406", "wav": "./aishell/wav/test/S0766/BAC009S0766W0406.wav", "txt": "搜狐娱乐讯文耷子备受关注的重拍版乌鸦"}
-{"key": "BAC009S0766W0407", "wav": "./aishell/wav/test/S0766/BAC009S0766W0407.wav", "txt": "在经历了无数次的导演和演员更换之后"}
-{"key": "BAC009S0766W0408", "wav": "./aishell/wav/test/S0766/BAC009S0766W0408.wav", "txt": "除了去年结婚"}
-{"key": "BAC009S0766W0409", "wav": "./aishell/wav/test/S0766/BAC009S0766W0409.wav", "txt": "有时候人生是计划赶不上变化的"}
-{"key": "BAC009S0766W0410", "wav": "./aishell/wav/test/S0766/BAC009S0766W0410.wav", "txt": "就顺着你的感觉走就好了"}
-{"key": "BAC009S0766W0411", "wav": "./aishell/wav/test/S0766/BAC009S0766W0411.wav", "txt": "日前在初赛收官战中返场的她加盟猜评团"}
-{"key": "BAC009S0766W0412", "wav": "./aishell/wav/test/S0766/BAC009S0766W0412.wav", "txt": "一袭土豪金西装简直潮爆"}
-{"key": "BAC009S0766W0413", "wav": "./aishell/wav/test/S0766/BAC009S0766W0413.wav", "txt": "有眼尖的网友发现"}
-{"key": "BAC009S0766W0414", "wav": "./aishell/wav/test/S0766/BAC009S0766W0414.wav", "txt": "与孙楠巫启贤共同起立鼓掌的许茹芸小腹凸起"}
-{"key": "BAC009S0766W0415", "wav": "./aishell/wav/test/S0766/BAC009S0766W0415.wav", "txt": "搜狐娱乐讯九月十二日"}
-{"key": "BAC009S0766W0416", "wav": "./aishell/wav/test/S0766/BAC009S0766W0416.wav", "txt": "许茹芸与韩国丈夫崔栽诚迎来结婚一周年纪念日"}
-{"key": "BAC009S0766W0417", "wav": "./aishell/wav/test/S0766/BAC009S0766W0417.wav", "txt": "许茹芸特地发微博感谢婚姻带来的幸福"}
-{"key": "BAC009S0766W0418", "wav": "./aishell/wav/test/S0766/BAC009S0766W0418.wav", "txt": "许茹芸重回舞台不做苦情女娱乐频道"}
-{"key": "BAC009S0766W0419", "wav": "./aishell/wav/test/S0766/BAC009S0766W0419.wav", "txt": "华西都市报讯闪婚欧巴一年后二零一四年"}
-{"key": "BAC009S0766W0420", "wav": "./aishell/wav/test/S0766/BAC009S0766W0420.wav", "txt": "许茹芸与韩籍男朋友举行了婚礼"}
-{"key": "BAC009S0766W0421", "wav": "./aishell/wav/test/S0766/BAC009S0766W0421.wav", "txt": "迎来了人生崭新阶段"}
-{"key": "BAC009S0766W0422", "wav": "./aishell/wav/test/S0766/BAC009S0766W0422.wav", "txt": "不同于大家心中按部就班的乖乖女形象"}
-{"key": "BAC009S0766W0423", "wav": "./aishell/wav/test/S0766/BAC009S0766W0423.wav", "txt": "许茹芸突然闪婚让当时的娱乐圈也惊起了一阵小波澜"}
-{"key": "BAC009S0766W0424", "wav": "./aishell/wav/test/S0766/BAC009S0766W0424.wav", "txt": "此后便鲜有消息"}
-{"key": "BAC009S0766W0425", "wav": "./aishell/wav/test/S0766/BAC009S0766W0425.wav", "txt": "和往日的端庄淑女形象大有不同"}
-{"key": "BAC009S0766W0426", "wav": "./aishell/wav/test/S0766/BAC009S0766W0426.wav", "txt": "对于重回舞台夺下当日歌王"}
-{"key": "BAC009S0766W0427", "wav": "./aishell/wav/test/S0766/BAC009S0766W0427.wav", "txt": "她也坦言内心感触很多"}
-{"key": "BAC009S0766W0428", "wav": "./aishell/wav/test/S0766/BAC009S0766W0428.wav", "txt": "论眉毛重要性"}
-{"key": "BAC009S0766W0430", "wav": "./aishell/wav/test/S0766/BAC009S0766W0430.wav", "txt": "中新网五月七日电据台湾中国时报消息"}
-{"key": "BAC009S0766W0431", "wav": "./aishell/wav/test/S0766/BAC009S0766W0431.wav", "txt": "李嘉诚回应撤资不爱国指控完全不成立"}
-{"key": "BAC009S0766W0432", "wav": "./aishell/wav/test/S0766/BAC009S0766W0432.wav", "txt": "李嘉诚首次公开回应撤资不爱国等质疑"}
-{"key": "BAC009S0766W0433", "wav": "./aishell/wav/test/S0766/BAC009S0766W0433.wav", "txt": "称一篇似是而非的文章"}
-{"key": "BAC009S0766W0434", "wav": "./aishell/wav/test/S0766/BAC009S0766W0434.wav", "txt": "在其发给记者的新闻稿中说"}
-{"key": "BAC009S0766W0435", "wav": "./aishell/wav/test/S0766/BAC009S0766W0435.wav", "txt": "所谓撤资指控完全不成立"}
-{"key": "BAC009S0766W0436", "wav": "./aishell/wav/test/S0766/BAC009S0766W0436.wav", "txt": "其集团在全球拥有一三零零零间店铺"}
-{"key": "BAC009S0766W0437", "wav": "./aishell/wav/test/S0766/BAC009S0766W0437.wav", "txt": "其中内地由两年前的一三零零间增至今天的二三零零间"}
-{"key": "BAC009S0766W0438", "wav": "./aishell/wav/test/S0766/BAC009S0766W0438.wav", "txt": "李嘉诚怎么回答与中央关系有变"}
-{"key": "BAC009S0766W0439", "wav": "./aishell/wav/test/S0766/BAC009S0766W0439.wav", "txt": "李嘉诚或再抛售内地地产项目拟出售上海办公楼"}
-{"key": "BAC009S0766W0440", "wav": "./aishell/wav/test/S0766/BAC009S0766W0440.wav", "txt": "中国日报网八月三日电据华尔街日报三日报道"}
-{"key": "BAC009S0766W0441", "wav": "./aishell/wav/test/S0766/BAC009S0766W0441.wav", "txt": "据两名知情人透露"}
-{"key": "BAC009S0766W0442", "wav": "./aishell/wav/test/S0766/BAC009S0766W0442.wav", "txt": "李嘉诚正式回应撤资指控不相信文革式思维复苏"}
-{"key": "BAC009S0766W0443", "wav": "./aishell/wav/test/S0766/BAC009S0766W0443.wav", "txt": "李嘉诚首次对撤资做出回应"}
-{"key": "BAC009S0766W0444", "wav": "./aishell/wav/test/S0766/BAC009S0766W0444.wav", "txt": "我明白言论自由是一把两刃刀"}
-{"key": "BAC009S0766W0445", "wav": "./aishell/wav/test/S0766/BAC009S0766W0445.wav", "txt": "因此一篇似是而非的文章"}
-{"key": "BAC009S0766W0446", "wav": "./aishell/wav/test/S0766/BAC009S0766W0446.wav", "txt": "李嘉诚首次回应撤资传闻对中国发展充满信心"}
-{"key": "BAC009S0766W0447", "wav": "./aishell/wav/test/S0766/BAC009S0766W0447.wav", "txt": "中新网九月三零日电据香港文汇报报道"}
-{"key": "BAC009S0766W0448", "wav": "./aishell/wav/test/S0766/BAC009S0766W0448.wav", "txt": "对中央坚定不移继续改革开放"}
-{"key": "BAC009S0766W0449", "wav": "./aishell/wav/test/S0766/BAC009S0766W0449.wav", "txt": "致力优化营商环境有信心"}
-{"key": "BAC009S0766W0450", "wav": "./aishell/wav/test/S0766/BAC009S0766W0450.wav", "txt": "对中国发展充满信心"}
-{"key": "BAC009S0766W0451", "wav": "./aishell/wav/test/S0766/BAC009S0766W0451.wav", "txt": "李娜产女后首次亮相运动员掌握英语很重要"}
-{"key": "BAC009S0766W0452", "wav": "./aishell/wav/test/S0766/BAC009S0766W0452.wav", "txt": "李娜在一个商业活动中谈退役后的生活"}
-{"key": "BAC009S0766W0453", "wav": "./aishell/wav/test/S0766/BAC009S0766W0453.wav", "txt": "李娜不想大家一直记得我那说明中国网球没突破"}
-{"key": "BAC009S0766W0454", "wav": "./aishell/wav/test/S0766/BAC009S0766W0454.wav", "txt": "虽然已经退役近一年"}
-{"key": "BAC009S0766W0455", "wav": "./aishell/wav/test/S0766/BAC009S0766W0455.wav", "txt": "但曾经的中国网球一姐李娜仍然没有淡出媒体的关注"}
-{"key": "BAC009S0766W0456", "wav": "./aishell/wav/test/S0766/BAC009S0766W0456.wav", "txt": "李娜媒体用一次性参赛是对运动员的侮辱"}
-{"key": "BAC009S0766W0457", "wav": "./aishell/wav/test/S0766/BAC009S0766W0457.wav", "txt": "长江商报消息本报记者张萌昨日"}
-{"key": "BAC009S0766W0458", "wav": "./aishell/wav/test/S0766/BAC009S0766W0458.wav", "txt": "家居养娃的李娜又重新出现在媒体大众的面前"}
-{"key": "BAC009S0766W0459", "wav": "./aishell/wav/test/S0766/BAC009S0766W0459.wav", "txt": "带着辛吉斯逛完了黄鹤楼"}
-{"key": "BAC009S0766W0460", "wav": "./aishell/wav/test/S0766/BAC009S0766W0460.wav", "txt": "当日的新闻发布会上"}
-{"key": "BAC009S0766W0461", "wav": "./aishell/wav/test/S0766/BAC009S0766W0461.wav", "txt": "李娜一身素色休闲装"}
-{"key": "BAC009S0766W0462", "wav": "./aishell/wav/test/S0766/BAC009S0766W0462.wav", "txt": "走进了武网的新闻大厅"}
-{"key": "BAC009S0766W0463", "wav": "./aishell/wav/test/S0766/BAC009S0766W0463.wav", "txt": "她身上少了些以往的悍将拼劲"}
-{"key": "BAC009S0766W0464", "wav": "./aishell/wav/test/S0766/BAC009S0766W0464.wav", "txt": "多了初为人母的幸福光彩"}
-{"key": "BAC009S0766W0465", "wav": "./aishell/wav/test/S0766/BAC009S0766W0465.wav", "txt": "看似犀利不再的娜姐老将气场立刻显出"}
-{"key": "BAC009S0766W0466", "wav": "./aishell/wav/test/S0766/BAC009S0766W0466.wav", "txt": "一语回击一次性参赛这种说法是一种侮辱"}
-{"key": "BAC009S0766W0467", "wav": "./aishell/wav/test/S0766/BAC009S0766W0467.wav", "txt": "希望媒体不要用这样的词来形容所有网球运动员"}
-{"key": "BAC009S0766W0468", "wav": "./aishell/wav/test/S0766/BAC009S0766W0468.wav", "txt": "因为没有哪个运动员不想表现出最好的自己"}
-{"key": "BAC009S0766W0469", "wav": "./aishell/wav/test/S0766/BAC009S0766W0469.wav", "txt": "李岚清座谈戏称自己八零后退休不等于生命终结"}
-{"key": "BAC009S0766W0470", "wav": "./aishell/wav/test/S0766/BAC009S0766W0470.wav", "txt": "不知不觉我成为一个八零后的老头"}
-{"key": "BAC009S0766W0471", "wav": "./aishell/wav/test/S0766/BAC009S0766W0471.wav", "txt": "退休后不在其位不谋其政"}
-{"key": "BAC009S0766W0472", "wav": "./aishell/wav/test/S0766/BAC009S0766W0472.wav", "txt": "退休并不等于生命的终结"}
-{"key": "BAC009S0766W0473", "wav": "./aishell/wav/test/S0766/BAC009S0766W0473.wav", "txt": "如果放弃学习没有追求"}
-{"key": "BAC009S0766W0474", "wav": "./aishell/wav/test/S0766/BAC009S0766W0474.wav", "txt": "一个人的精神生命就将走向衰老"}
-{"key": "BAC009S0766W0475", "wav": "./aishell/wav/test/S0766/BAC009S0766W0475.wav", "txt": "因此我给自己规划了八个字的退休生活"}
-{"key": "BAC009S0766W0476", "wav": "./aishell/wav/test/S0766/BAC009S0766W0476.wav", "txt": "戏称自己年过八零当为八零后"}
-{"key": "BAC009S0766W0477", "wav": "./aishell/wav/test/S0766/BAC009S0766W0477.wav", "txt": "李开复经历死亡这一课学会看透和放下"}
-{"key": "BAC009S0766W0478", "wav": "./aishell/wav/test/S0766/BAC009S0766W0478.wav", "txt": "李开复被医生宣判为第四期淋巴癌"}
-{"key": "BAC009S0766W0479", "wav": "./aishell/wav/test/S0766/BAC009S0766W0479.wav", "txt": "不期而至的阴霾让他被迫抛下工作"}
-{"key": "BAC009S0766W0480", "wav": "./aishell/wav/test/S0766/BAC009S0766W0480.wav", "txt": "在新书向死而生我修的死亡学分中"}
-{"key": "BAC009S0766W0481", "wav": "./aishell/wav/test/S0766/BAC009S0766W0481.wav", "txt": "我从没想过自己竟会出版一本这样的书"}
-{"key": "BAC009S0766W0482", "wav": "./aishell/wav/test/S0766/BAC009S0766W0482.wav", "txt": "李晨马震就是玩笑任何情况都力挺范冰冰"}
-{"key": "BAC009S0766W0483", "wav": "./aishell/wav/test/S0766/BAC009S0766W0483.wav", "txt": "新京报快讯记者刘玮近日"}
-{"key": "BAC009S0766W0484", "wav": "./aishell/wav/test/S0766/BAC009S0766W0484.wav", "txt": "由于电影王朝的女人杨贵妃中的一场激情戏"}
-{"key": "BAC009S0766W0485", "wav": "./aishell/wav/test/S0766/BAC009S0766W0485.wav", "txt": "范冰冰承包了娱乐头条"}
-{"key": "BAC009S0766W0486", "wav": "./aishell/wav/test/S0766/BAC009S0766W0486.wav", "txt": "出席活动时笑称今后拍激情戏会征求男友李晨的意见"}
-{"key": "BAC009S0766W0487", "wav": "./aishell/wav/test/S0766/BAC009S0766W0487.wav", "txt": "李晨秀才遇到兵发布会后回应称"}
-{"key": "BAC009S0766W0488", "wav": "./aishell/wav/test/S0766/BAC009S0766W0488.wav", "txt": "如果这个事情反过来"}
-{"key": "BAC009S0766W0489", "wav": "./aishell/wav/test/S0766/BAC009S0766W0489.wav", "txt": "演员这个职业就是这样"}
-{"key": "BAC009S0766W0490", "wav": "./aishell/wav/test/S0766/BAC009S0766W0490.wav", "txt": "李玉刚张学友黄琦雯入选一零大最涨姿势歌曲"}
-{"key": "BAC009S0766W0491", "wav": "./aishell/wav/test/S0766/BAC009S0766W0491.wav", "txt": "李玉刚新歌点击逾一亿网友李家每人只需半次"}
-{"key": "BAC009S0766W0492", "wav": "./aishell/wav/test/S0766/BAC009S0766W0492.wav", "txt": "李玉刚饰演的杨贵妃被指芳华绝代说到神曲"}
-{"key": "BAC009S0766W0493", "wav": "./aishell/wav/test/S0766/BAC009S0766W0493.wav", "txt": "该歌曲二零零字的歌词用典竟达三六处之多"}
-{"key": "BAC009S0766W0494", "wav": "./aishell/wav/test/S0766/BAC009S0766W0494.wav", "txt": "让一些网友有如猜谜"}
-{"key": "BAC009S0766W0495", "wav": "./aishell/wav/test/S0766/BAC009S0766W0495.wav", "txt": "被称为二零一五年第一神曲"}
-{"key": "BAC009S0767W0121", "wav": "./aishell/wav/test/S0767/BAC009S0767W0121.wav", "txt": "时刻保持创新和变革意识"}
-{"key": "BAC009S0767W0122", "wav": "./aishell/wav/test/S0767/BAC009S0767W0122.wav", "txt": "引领中国房地产广告行业走向新的黄金时代"}
-{"key": "BAC009S0767W0123", "wav": "./aishell/wav/test/S0767/BAC009S0767W0123.wav", "txt": "今久整合营销集团迎来了它的十岁生日"}
-{"key": "BAC009S0767W0124", "wav": "./aishell/wav/test/S0767/BAC009S0767W0124.wav", "txt": "今久从最初的几十个人"}
-{"key": "BAC009S0767W0125", "wav": "./aishell/wav/test/S0767/BAC009S0767W0125.wav", "txt": "今久商品房销售额首次上涨"}
-{"key": "BAC009S0767W0126", "wav": "./aishell/wav/test/S0767/BAC009S0767W0126.wav", "txt": "房地产投资增速仍下降"}
-{"key": "BAC009S0767W0127", "wav": "./aishell/wav/test/S0767/BAC009S0767W0127.wav", "txt": "大智慧阿思达克通讯社"}
-{"key": "BAC009S0767W0128", "wav": "./aishell/wav/test/S0767/BAC009S0767W0128.wav", "txt": "一五年一月份"}
-{"key": "BAC009S0767W0129", "wav": "./aishell/wav/test/S0767/BAC009S0767W0129.wav", "txt": "全国房地产开发投资三万亿元"}
-{"key": "BAC009S0767W0130", "wav": "./aishell/wav/test/S0767/BAC009S0767W0130.wav", "txt": "同比名义增长许多"}
-{"key": "BAC009S0767W0131", "wav": "./aishell/wav/test/S0767/BAC009S0767W0131.wav", "txt": "增速比一月份回落零点九个百分点"}
-{"key": "BAC009S0767W0132", "wav": "./aishell/wav/test/S0767/BAC009S0767W0132.wav", "txt": "全国商品房销售额两万亿元"}
-{"key": "BAC009S0767W0133", "wav": "./aishell/wav/test/S0767/BAC009S0767W0133.wav", "txt": "年内首次出现同比增长"}
-{"key": "BAC009S0767W0134", "wav": "./aishell/wav/test/S0767/BAC009S0767W0134.wav", "txt": "住宅销售额也增长了"}
-{"key": "BAC009S0767W0135", "wav": "./aishell/wav/test/S0767/BAC009S0767W0135.wav", "txt": "办公楼销售额下降了"}
-{"key": "BAC009S0767W0136", "wav": "./aishell/wav/test/S0767/BAC009S0767W0136.wav", "txt": "商业营业用房销售额下降了"}
-{"key": "BAC009S0767W0137", "wav": "./aishell/wav/test/S0767/BAC009S0767W0137.wav", "txt": "住宅成为全国房地产销售金额唯一增长的板块"}
-{"key": "BAC009S0767W0138", "wav": "./aishell/wav/test/S0767/BAC009S0767W0138.wav", "txt": "一系列楼市新政效果逐步显现"}
-{"key": "BAC009S0767W0139", "wav": "./aishell/wav/test/S0767/BAC009S0767W0139.wav", "txt": "德佑链家市场研究部总监陆骑麟表示"}
-{"key": "BAC009S0767W0140", "wav": "./aishell/wav/test/S0767/BAC009S0767W0140.wav", "txt": "全国房地产开发投资增速仍然延续了增速放缓的渠势"}
-{"key": "BAC009S0767W0141", "wav": "./aishell/wav/test/S0767/BAC009S0767W0141.wav", "txt": "尽管有央行降息等各方利好刺激"}
-{"key": "BAC009S0767W0142", "wav": "./aishell/wav/test/S0767/BAC009S0767W0142.wav", "txt": "尤其是库存高企的三四线城市"}
-{"key": "BAC009S0767W0143", "wav": "./aishell/wav/test/S0767/BAC009S0767W0143.wav", "txt": "开发商仍然面临着较大的销售压力"}
-{"key": "BAC009S0767W0144", "wav": "./aishell/wav/test/S0767/BAC009S0767W0144.wav", "txt": "国家统计局公布的数据显示"}
-{"key": "BAC009S0767W0145", "wav": "./aishell/wav/test/S0767/BAC009S0767W0145.wav", "txt": "无论是东部中部还是西部地区"}
-{"key": "BAC009S0767W0146", "wav": "./aishell/wav/test/S0767/BAC009S0767W0146.wav", "txt": "商品房房的销售面积同比数据出现好转"}
-{"key": "BAC009S0767W0147", "wav": "./aishell/wav/test/S0767/BAC009S0767W0147.wav", "txt": "商品房销售面积三亿平方米"}
-{"key": "BAC009S0767W0148", "wav": "./aishell/wav/test/S0767/BAC009S0767W0148.wav", "txt": "降幅比四月份收窄六个百分点"}
-{"key": "BAC009S0767W0149", "wav": "./aishell/wav/test/S0767/BAC009S0767W0149.wav", "txt": "在公积金松绑等作用的刺激下"}
-{"key": "BAC009S0767W0150", "wav": "./aishell/wav/test/S0767/BAC009S0767W0150.wav", "txt": "五月份商品房销售的面积同比数据由负转正"}
-{"key": "BAC009S0767W0151", "wav": "./aishell/wav/test/S0767/BAC009S0767W0151.wav", "txt": "作为三四线城市最为集中的中部地区来说"}
-{"key": "BAC009S0767W0152", "wav": "./aishell/wav/test/S0767/BAC009S0767W0152.wav", "txt": "房地产开发企业土地购置面积很大"}
-{"key": "BAC009S0767W0153", "wav": "./aishell/wav/test/S0767/BAC009S0767W0153.wav", "txt": "同比下降不少"}
-{"key": "BAC009S0767W0154", "wav": "./aishell/wav/test/S0767/BAC009S0767W0154.wav", "txt": "降幅收窄三个版百分点"}
-{"key": "BAC009S0767W0155", "wav": "./aishell/wav/test/S0767/BAC009S0767W0155.wav", "txt": "各方原因的叠加导致了房企拿地量的明显减少"}
-{"key": "BAC009S0767W0156", "wav": "./aishell/wav/test/S0767/BAC009S0767W0156.wav", "txt": "今年一线城市住宅用地价格涨五成"}
-{"key": "BAC009S0767W0157", "wav": "./aishell/wav/test/S0767/BAC009S0767W0157.wav", "txt": "今年商品房销售一度低迷"}
-{"key": "BAC009S0767W0158", "wav": "./aishell/wav/test/S0767/BAC009S0767W0158.wav", "txt": "一线城市土地市场成交火热"}
-{"key": "BAC009S0767W0159", "wav": "./aishell/wav/test/S0767/BAC009S0767W0159.wav", "txt": "中介机构统计数据显示"}
-{"key": "BAC009S0767W0160", "wav": "./aishell/wav/test/S0767/BAC009S0767W0160.wav", "txt": "平均价格为十万元每平方米"}
-{"key": "BAC009S0767W0161", "wav": "./aishell/wav/test/S0767/BAC009S0767W0161.wav", "txt": "同比上涨五成"}
-{"key": "BAC009S0767W0162", "wav": "./aishell/wav/test/S0767/BAC009S0767W0162.wav", "txt": "随着一线城市楼市企温回升"}
-{"key": "BAC009S0767W0163", "wav": "./aishell/wav/test/S0767/BAC009S0767W0163.wav", "txt": "房企在一线城市拿地的热情还将提高"}
-{"key": "BAC009S0767W0164", "wav": "./aishell/wav/test/S0767/BAC009S0767W0164.wav", "txt": "土地市场热度可能有所下降"}
-{"key": "BAC009S0767W0165", "wav": "./aishell/wav/test/S0767/BAC009S0767W0165.wav", "txt": "大型房企低迷期拿地"}
-{"key": "BAC009S0767W0166", "wav": "./aishell/wav/test/S0767/BAC009S0767W0166.wav", "txt": "中原地产市场研究部统计数据显示"}
-{"key": "BAC009S0767W0167", "wav": "./aishell/wav/test/S0767/BAC009S0767W0167.wav", "txt": "土地成交价款三千亿元"}
-{"key": "BAC009S0767W0168", "wav": "./aishell/wav/test/S0767/BAC009S0767W0168.wav", "txt": "预计全年有望突破四千亿元"}
-{"key": "BAC009S0767W0169", "wav": "./aishell/wav/test/S0767/BAC009S0767W0169.wav", "txt": "一线城市住宅用地平均价格为十一万元每平方米"}
-{"key": "BAC009S0767W0170", "wav": "./aishell/wav/test/S0767/BAC009S0767W0170.wav", "txt": "同比上涨约六成"}
-{"key": "BAC009S0767W0171", "wav": "./aishell/wav/test/S0767/BAC009S0767W0171.wav", "txt": "中原地产首席分析师张大伟认为"}
-{"key": "BAC009S0767W0172", "wav": "./aishell/wav/test/S0767/BAC009S0767W0172.wav", "txt": "住宅市场交易明显升温"}
-{"key": "BAC009S0767W0173", "wav": "./aishell/wav/test/S0767/BAC009S0767W0173.wav", "txt": "迅速带动一线城市土地市场的整体成交"}
-{"key": "BAC009S0767W0174", "wav": "./aishell/wav/test/S0767/BAC009S0767W0174.wav", "txt": "房地产业正经历一个调整阶段"}
-{"key": "BAC009S0767W0175", "wav": "./aishell/wav/test/S0767/BAC009S0767W0175.wav", "txt": "大型房企实施低迷期拿地的策略"}
-{"key": "BAC009S0767W0176", "wav": "./aishell/wav/test/S0767/BAC009S0767W0176.wav", "txt": "在整体市场供大于求区域分化严重的情况下"}
-{"key": "BAC009S0767W0177", "wav": "./aishell/wav/test/S0767/BAC009S0767W0177.wav", "txt": "房企更加愿意扎堆一线城市"}
-{"key": "BAC009S0767W0178", "wav": "./aishell/wav/test/S0767/BAC009S0767W0178.wav", "txt": "而非在三四线城市深耕"}
-{"key": "BAC009S0767W0179", "wav": "./aishell/wav/test/S0767/BAC009S0767W0179.wav", "txt": "这使得一线城市的土地竞争激烈"}
-{"key": "BAC009S0767W0180", "wav": "./aishell/wav/test/S0767/BAC009S0767W0180.wav", "txt": "今年一线城市宅地成交的溢价率不高"}
-{"key": "BAC009S0767W0181", "wav": "./aishell/wav/test/S0767/BAC009S0767W0181.wav", "txt": "平均溢价率较低"}
-{"key": "BAC009S0767W0182", "wav": "./aishell/wav/test/S0767/BAC009S0767W0182.wav", "txt": "较去年明显下降"}
-{"key": "BAC009S0767W0183", "wav": "./aishell/wav/test/S0767/BAC009S0767W0183.wav", "txt": "这是因为土地一级开发成本提高"}
-{"key": "BAC009S0767W0184", "wav": "./aishell/wav/test/S0767/BAC009S0767W0184.wav", "txt": "一线城市住宅用地的低价不断抬升"}
-{"key": "BAC009S0767W0185", "wav": "./aishell/wav/test/S0767/BAC009S0767W0185.wav", "txt": "北京等城市在土地出让中"}
-{"key": "BAC009S0767W0186", "wav": "./aishell/wav/test/S0767/BAC009S0767W0186.wav", "txt": "将保障房地块和商品房地块捆绑出让"}
-{"key": "BAC009S0767W0187", "wav": "./aishell/wav/test/S0767/BAC009S0767W0187.wav", "txt": "直接涉及到债券持有人利益的保护问题"}
-{"key": "BAC009S0767W0188", "wav": "./aishell/wav/test/S0767/BAC009S0767W0188.wav", "txt": "我们立即与云投集团进行了沟通"}
-{"key": "BAC009S0767W0189", "wav": "./aishell/wav/test/S0767/BAC009S0767W0189.wav", "txt": "并严格按照合规程序进行"}
-{"key": "BAC009S0767W0190", "wav": "./aishell/wav/test/S0767/BAC009S0767W0190.wav", "txt": "我委也注意到在企业债券存续期间"}
-{"key": "BAC009S0767W0191", "wav": "./aishell/wav/test/S0767/BAC009S0767W0191.wav", "txt": "需要对发行人资产重组等重大事宜加强监管"}
-{"key": "BAC009S0767W0192", "wav": "./aishell/wav/test/S0767/BAC009S0767W0192.wav", "txt": "在制度上对债券持有人的合法权益进行保护"}
-{"key": "BAC009S0767W0193", "wav": "./aishell/wav/test/S0767/BAC009S0767W0193.wav", "txt": "建立地方政府债务管理体系"}
-{"key": "BAC009S0767W0194", "wav": "./aishell/wav/test/S0767/BAC009S0767W0194.wav", "txt": "中国证券报从您刚才的介绍中我们了解到"}
-{"key": "BAC009S0767W0195", "wav": "./aishell/wav/test/S0767/BAC009S0767W0195.wav", "txt": "城投债券对推动城市基础设施和市政设施的建设"}
-{"key": "BAC009S0767W0196", "wav": "./aishell/wav/test/S0767/BAC009S0767W0196.wav", "txt": "起到了非常积极的作用"}
-{"key": "BAC009S0767W0197", "wav": "./aishell/wav/test/S0767/BAC009S0767W0197.wav", "txt": "对丰富债券市场品种也具有积极意义"}
-{"key": "BAC009S0767W0198", "wav": "./aishell/wav/test/S0767/BAC009S0767W0198.wav", "txt": "结合地方政府债务管理制度的完善"}
-{"key": "BAC009S0767W0199", "wav": "./aishell/wav/test/S0767/BAC009S0767W0199.wav", "txt": "下一步我国的城投债券还需要做哪些完善"}
-{"key": "BAC009S0767W0200", "wav": "./aishell/wav/test/S0767/BAC009S0767W0200.wav", "txt": "徐林这个问题涉及到一系列的制度完善"}
-{"key": "BAC009S0767W0201", "wav": "./aishell/wav/test/S0767/BAC009S0767W0201.wav", "txt": "是一个比较复杂的问题"}
-{"key": "BAC009S0767W0202", "wav": "./aishell/wav/test/S0767/BAC009S0767W0202.wav", "txt": "我个人是这么认认识的"}
-{"key": "BAC009S0767W0203", "wav": "./aishell/wav/test/S0767/BAC009S0767W0203.wav", "txt": "我国还处于城市化快速发展期"}
-{"key": "BAC009S0767W0204", "wav": "./aishell/wav/test/S0767/BAC009S0767W0204.wav", "txt": "需要为各地的城市建设提供规范的融资渠道"}
-{"key": "BAC009S0767W0205", "wav": "./aishell/wav/test/S0767/BAC009S0767W0205.wav", "txt": "农业与非农产业之间劳动生产率的差距也很大"}
-{"key": "BAC009S0767W0206", "wav": "./aishell/wav/test/S0767/BAC009S0767W0206.wav", "txt": "这决定了我国城市化动力十分强劲"}
-{"key": "BAC009S0767W0207", "wav": "./aishell/wav/test/S0767/BAC009S0767W0207.wav", "txt": "城市化进程远未结束"}
-{"key": "BAC009S0767W0208", "wav": "./aishell/wav/test/S0767/BAC009S0767W0208.wav", "txt": "城市化快速发展期的重要特征就是基础设施投资需求大"}
-{"key": "BAC009S0767W0209", "wav": "./aishell/wav/test/S0767/BAC009S0767W0209.wav", "txt": "这是我国所处的发展阶段决定的"}
-{"key": "BAC009S0767W0210", "wav": "./aishell/wav/test/S0767/BAC009S0767W0210.wav", "txt": "政府通过债务融资从事基础设施建设"}
-{"key": "BAC009S0767W0211", "wav": "./aishell/wav/test/S0767/BAC009S0767W0211.wav", "txt": "我们应该建立风险可控的规范化的地方政府融资机制"}
-{"key": "BAC009S0767W0212", "wav": "./aishell/wav/test/S0767/BAC009S0767W0212.wav", "txt": "为各地的基础设施建设提供有制度保障的融资渠道"}
-{"key": "BAC009S0767W0213", "wav": "./aishell/wav/test/S0767/BAC009S0767W0213.wav", "txt": "城投债劵作为准市政债劵仍将是有效的融资工具"}
-{"key": "BAC009S0767W0214", "wav": "./aishell/wav/test/S0767/BAC009S0767W0214.wav", "txt": "但还需要进一步改进"}
-{"key": "BAC009S0767W0215", "wav": "./aishell/wav/test/S0767/BAC009S0767W0215.wav", "txt": "在政府投融资体制改革过程中"}
-{"key": "BAC009S0767W0216", "wav": "./aishell/wav/test/S0767/BAC009S0767W0216.wav", "txt": "从事当地的基础设施建设"}
-{"key": "BAC009S0767W0217", "wav": "./aishell/wav/test/S0767/BAC009S0767W0217.wav", "txt": "相对于过去的体制而言是更加市场化的"}
-{"key": "BAC009S0767W0218", "wav": "./aishell/wav/test/S0767/BAC009S0767W0218.wav", "txt": "城投债劵作为投融资平台公司最透明的直接融资工具"}
-{"key": "BAC009S0767W0219", "wav": "./aishell/wav/test/S0767/BAC009S0767W0219.wav", "txt": "仍然会存在并具有发展空间"}
-{"key": "BAC009S0767W0220", "wav": "./aishell/wav/test/S0767/BAC009S0767W0220.wav", "txt": "由于目前城投债劵的发行需要符合企业债劵发行的条件"}
-{"key": "BAC009S0767W0221", "wav": "./aishell/wav/test/S0767/BAC009S0767W0221.wav", "txt": "这使得我国城投债劵的发行利率相对偏高"}
-{"key": "BAC009S0767W0222", "wav": "./aishell/wav/test/S0767/BAC009S0767W0222.wav", "txt": "城投债劵的发行期限和利率"}
-{"key": "BAC009S0767W0223", "wav": "./aishell/wav/test/S0767/BAC009S0767W0223.wav", "txt": "未来应该在制度上作进一步完善"}
-{"key": "BAC009S0767W0224", "wav": "./aishell/wav/test/S0767/BAC009S0767W0224.wav", "txt": "使得城投公司能够发行真正意义上的长期市政债劵"}
-{"key": "BAC009S0767W0225", "wav": "./aishell/wav/test/S0767/BAC009S0767W0225.wav", "txt": "要尽快建立我国的地方政府债务管理体系"}
-{"key": "BAC009S0767W0226", "wav": "./aishell/wav/test/S0767/BAC009S0767W0226.wav", "txt": "对于如何建立规范的地方政府融资渠道"}
-{"key": "BAC009S0767W0227", "wav": "./aishell/wav/test/S0767/BAC009S0767W0227.wav", "txt": "加强地方政府债务管理和风险防控"}
-{"key": "BAC009S0767W0228", "wav": "./aishell/wav/test/S0767/BAC009S0767W0228.wav", "txt": "一些专家学者提出了许多好的建议"}
-{"key": "BAC009S0767W0229", "wav": "./aishell/wav/test/S0767/BAC009S0767W0229.wav", "txt": "如建立规范透明的地方政府融资渠道"}
-{"key": "BAC009S0767W0230", "wav": "./aishell/wav/test/S0767/BAC009S0767W0230.wav", "txt": "并对地方政府债务进行监控和风险防范等"}
-{"key": "BAC009S0767W0231", "wav": "./aishell/wav/test/S0767/BAC009S0767W0231.wav", "txt": "由于我国还没有建立统一的地方政府债务风险管理制度"}
-{"key": "BAC009S0767W0232", "wav": "./aishell/wav/test/S0767/BAC009S0767W0232.wav", "txt": "设定政府性债务风险控制指标和标准"}
-{"key": "BAC009S0767W0233", "wav": "./aishell/wav/test/S0767/BAC009S0767W0233.wav", "txt": "并对政府性债务实行馀额管理"}
-{"key": "BAC009S0767W0234", "wav": "./aishell/wav/test/S0767/BAC009S0767W0234.wav", "txt": "使地方政府的债务融资规模控制在安全范围内"}
-{"key": "BAC009S0767W0235", "wav": "./aishell/wav/test/S0767/BAC009S0767W0235.wav", "txt": "远低于发生债务危机的欧美国家"}
-{"key": "BAC009S0767W0236", "wav": "./aishell/wav/test/S0767/BAC009S0767W0236.wav", "txt": "债券发行人是优质的"}
-{"key": "BAC009S0767W0237", "wav": "./aishell/wav/test/S0767/BAC009S0767W0237.wav", "txt": "还本付息也是正常的"}
-{"key": "BAC009S0767W0238", "wav": "./aishell/wav/test/S0767/BAC009S0767W0238.wav", "txt": "应该建立风险可控的规范化地方政府融资机制"}
-{"key": "BAC009S0767W0239", "wav": "./aishell/wav/test/S0767/BAC009S0767W0239.wav", "txt": "为各地的基础设施建设提供有制度保障的融资渠道"}
-{"key": "BAC009S0767W0240", "wav": "./aishell/wav/test/S0767/BAC009S0767W0240.wav", "txt": "本报记者曹志为王婷王颖春来源中国证券报"}
-{"key": "BAC009S0767W0241", "wav": "./aishell/wav/test/S0767/BAC009S0767W0241.wav", "txt": "责任编辑廖一宁"}
-{"key": "BAC009S0767W0242", "wav": "./aishell/wav/test/S0767/BAC009S0767W0242.wav", "txt": "据国家发改委网站消息"}
-{"key": "BAC009S0767W0243", "wav": "./aishell/wav/test/S0767/BAC009S0767W0243.wav", "txt": "将考试费标准由各地自行制定改为实行上限管理"}
-{"key": "BAC009S0767W0244", "wav": "./aishell/wav/test/S0767/BAC009S0767W0244.wav", "txt": "价格主管部门将按统一合理的平均成本确定考试费用"}
-{"key": "BAC009S0767W0245", "wav": "./aishell/wav/test/S0767/BAC009S0767W0245.wav", "txt": "将切实减轻考生经济负担"}
-{"key": "BAC009S0767W0246", "wav": "./aishell/wav/test/S0767/BAC009S0767W0246.wav", "txt": "针对目前职业资格考试收费项目增加"}
-{"key": "BAC009S0767W0247", "wav": "./aishell/wav/test/S0767/BAC009S0767W0247.wav", "txt": "一些考试单位考务成本偏高"}
-{"key": "BAC009S0767W0248", "wav": "./aishell/wav/test/S0767/BAC009S0767W0248.wav", "txt": "有的考试在不同地区收费标准差异较大"}
-{"key": "BAC009S0767W0249", "wav": "./aishell/wav/test/S0767/BAC009S0767W0249.wav", "txt": "考生对考试收费问题反映较多等问题"}
-{"key": "BAC009S0767W0250", "wav": "./aishell/wav/test/S0767/BAC009S0767W0250.wav", "txt": "改革了职业资格考试收费管理方式"}
-{"key": "BAC009S0767W0251", "wav": "./aishell/wav/test/S0767/BAC009S0767W0251.wav", "txt": "对考务费标准实行统一标准化管理"}
-{"key": "BAC009S0767W0252", "wav": "./aishell/wav/test/S0767/BAC009S0767W0252.wav", "txt": "通知按照不同考生规模考试类类别的合理平均成本"}
-{"key": "BAC009S0767W0254", "wav": "./aishell/wav/test/S0767/BAC009S0767W0254.wav", "txt": "无疑也会成为投资者的宠儿"}
-{"key": "BAC009S0767W0260", "wav": "./aishell/wav/test/S0767/BAC009S0767W0260.wav", "txt": "而苹果虽有可能卖出不少手表给忠实的粉丝"}
-{"key": "BAC009S0767W0264", "wav": "./aishell/wav/test/S0767/BAC009S0767W0264.wav", "txt": "就开始追寻打造真正的机器人的梦想"}
-{"key": "BAC009S0767W0265", "wav": "./aishell/wav/test/S0767/BAC009S0767W0265.wav", "txt": "但是过去整整一年他都在秘密工作"}
-{"key": "BAC009S0767W0266", "wav": "./aishell/wav/test/S0767/BAC009S0767W0266.wav", "txt": "没人知道他在干什么"}
-{"key": "BAC009S0767W0269", "wav": "./aishell/wav/test/S0767/BAC009S0767W0269.wav", "txt": "无论他做的什么都是什么都会引人注目的"}
-{"key": "BAC009S0767W0270", "wav": "./aishell/wav/test/S0767/BAC009S0767W0270.wav", "txt": "像索尼被黑那样的事"}
-{"key": "BAC009S0767W0271", "wav": "./aishell/wav/test/S0767/BAC009S0767W0271.wav", "txt": "也可能会是受到国家支持的攻击"}
-{"key": "BAC009S0767W0272", "wav": "./aishell/wav/test/S0767/BAC009S0767W0272.wav", "txt": "未来的战争形态有可能就是计算机对抗计算机"}
-{"key": "BAC009S0767W0273", "wav": "./aishell/wav/test/S0767/BAC009S0767W0273.wav", "txt": "当年泡沫破裂前也是这样的情景"}
-{"key": "BAC009S0767W0274", "wav": "./aishell/wav/test/S0767/BAC009S0767W0274.wav", "txt": "一堆不赚钱的公司赶着上市当然不是什么好事"}
-{"key": "BAC009S0767W0276", "wav": "./aishell/wav/test/S0767/BAC009S0767W0276.wav", "txt": "疯狂估值局限于私有公司内"}
-{"key": "BAC009S0767W0277", "wav": "./aishell/wav/test/S0767/BAC009S0767W0277.wav", "txt": "可现在那些公司纷纷上市后疯狂是不是就暴露了呢"}
-{"key": "BAC009S0767W0278", "wav": "./aishell/wav/test/S0767/BAC009S0767W0278.wav", "txt": "而现在的股票市场也已经达到创纪录的新高"}
-{"key": "BAC009S0767W0282", "wav": "./aishell/wav/test/S0767/BAC009S0767W0282.wav", "txt": "称有些技术公司烧钱太快可能会人间蒸发"}
-{"key": "BAC009S0767W0284", "wav": "./aishell/wav/test/S0767/BAC009S0767W0284.wav", "txt": "连这些人都预测不准的话"}
-{"key": "BAC009S0767W0285", "wav": "./aishell/wav/test/S0767/BAC009S0767W0285.wav", "txt": "还有谁能预测得准呢"}
-{"key": "BAC009S0767W0287", "wav": "./aishell/wav/test/S0767/BAC009S0767W0287.wav", "txt": "但是需记住对风向保持敏感"}
-{"key": "BAC009S0767W0288", "wav": "./aishell/wav/test/S0767/BAC009S0767W0288.wav", "txt": "高空飞航时战略无人机"}
-{"key": "BAC009S0767W0289", "wav": "./aishell/wav/test/S0767/BAC009S0767W0289.wav", "txt": "全球鹰并不能独占鳌头"}
-{"key": "BAC009S0767W0290", "wav": "./aishell/wav/test/S0767/BAC009S0767W0290.wav", "txt": "继二零一一年出现独特的连翼造型的翔龙无人机以后"}
-{"key": "BAC009S0767W0291", "wav": "./aishell/wav/test/S0767/BAC009S0767W0291.wav", "txt": "又一款个性十足的双机身气动外形的大型无人机神雕"}
-{"key": "BAC009S0767W0292", "wav": "./aishell/wav/test/S0767/BAC009S0767W0292.wav", "txt": "又一次引爆坊间议论"}
-{"key": "BAC009S0767W0293", "wav": "./aishell/wav/test/S0767/BAC009S0767W0293.wav", "txt": "今年三月美国大众科学杂志刊文称"}
-{"key": "BAC009S0767W0294", "wav": "./aishell/wav/test/S0767/BAC009S0767W0294.wav", "txt": "中国正在研制一种世界上尺寸最大的无人机"}
-{"key": "BAC009S0767W0295", "wav": "./aishell/wav/test/S0767/BAC009S0767W0295.wav", "txt": "发表的想象图与最近曝光的飞机布局很像"}
-{"key": "BAC009S0767W0296", "wav": "./aishell/wav/test/S0767/BAC009S0767W0296.wav", "txt": "这使得神雕在全球也成为最大的无人机之一"}
-{"key": "BAC009S0767W0297", "wav": "./aishell/wav/test/S0767/BAC009S0767W0297.wav", "txt": "据可靠的网络消息源称"}
-{"key": "BAC009S0767W0298", "wav": "./aishell/wav/test/S0767/BAC009S0767W0298.wav", "txt": "其相应的对手不是全球鹰"}
-{"key": "BAC009S0767W0300", "wav": "./aishell/wav/test/S0767/BAC009S0767W0300.wav", "txt": "神雕的两个机身前后装有两对机翼"}
-{"key": "BAC009S0767W0301", "wav": "./aishell/wav/test/S0767/BAC009S0767W0301.wav", "txt": "位于后方的主翼中央挂着两具涡轮风扇发动机"}
-{"key": "BAC009S0767W0303", "wav": "./aishell/wav/test/S0767/BAC009S0767W0303.wav", "txt": "上述公司人士对每日经济新闻记者表示"}
-{"key": "BAC009S0767W0304", "wav": "./aishell/wav/test/S0767/BAC009S0767W0304.wav", "txt": "成飞机成业绩增长主要是由于锂电需求增长"}
-{"key": "BAC009S0767W0305", "wav": "./aishell/wav/test/S0767/BAC009S0767W0305.wav", "txt": "传统汽车业务并没有太大起色"}
-{"key": "BAC009S0767W0306", "wav": "./aishell/wav/test/S0767/BAC009S0767W0306.wav", "txt": "现在汽车市场也在下滑"}
-{"key": "BAC009S0767W0307", "wav": "./aishell/wav/test/S0767/BAC009S0767W0307.wav", "txt": "整个汽车体系都是随着汽车销量在变动"}
-{"key": "BAC009S0767W0308", "wav": "./aishell/wav/test/S0767/BAC009S0767W0308.wav", "txt": "在锂电业务爆发的情况下"}
-{"key": "BAC009S0767W0309", "wav": "./aishell/wav/test/S0767/BAC009S0767W0309.wav", "txt": "公司的汽车业务应该会有一些弱化"}
-{"key": "BAC009S0767W0310", "wav": "./aishell/wav/test/S0767/BAC009S0767W0310.wav", "txt": "新能源汽车的爆发带动了锂电池供不应求"}
-{"key": "BAC009S0767W0311", "wav": "./aishell/wav/test/S0767/BAC009S0767W0311.wav", "txt": "几乎所有锂电厂商都在满负荷生产"}
-{"key": "BAC009S0767W0312", "wav": "./aishell/wav/test/S0767/BAC009S0767W0312.wav", "txt": "上述成飞集成人士表示"}
-{"key": "BAC009S0767W0313", "wav": "./aishell/wav/test/S0767/BAC009S0767W0313.wav", "txt": "产能现在已经满足不了订单需求"}
-{"key": "BAC009S0767W0314", "wav": "./aishell/wav/test/S0767/BAC009S0767W0314.wav", "txt": "八月初公司通过了增加产能建设的决议"}
-{"key": "BAC009S0767W0315", "wav": "./aishell/wav/test/S0767/BAC009S0767W0315.wav", "txt": "今年初也在原来厂里新增了生产线"}
-{"key": "BAC009S0767W0316", "wav": "./aishell/wav/test/S0767/BAC009S0767W0316.wav", "txt": "预计在三四季度会有陆续新增产能"}
-{"key": "BAC009S0767W0317", "wav": "./aishell/wav/test/S0767/BAC009S0767W0317.wav", "txt": "每日经济新闻记者注意到"}
-{"key": "BAC009S0767W0318", "wav": "./aishell/wav/test/S0767/BAC009S0767W0318.wav", "txt": "项目总投资十四点五亿元"}
-{"key": "BAC009S0767W0319", "wav": "./aishell/wav/test/S0767/BAC009S0767W0319.wav", "txt": "总投资预计一百二十五亿元"}
-{"key": "BAC009S0767W0320", "wav": "./aishell/wav/test/S0767/BAC009S0767W0320.wav", "txt": "上述成飞集成人士告诉记者"}
-{"key": "BAC009S0767W0321", "wav": "./aishell/wav/test/S0767/BAC009S0767W0321.wav", "txt": "公司目前看好锂电池行业的发展渠势"}
-{"key": "BAC009S0767W0322", "wav": "./aishell/wav/test/S0767/BAC009S0767W0322.wav", "txt": "但其并未向记者透露项目盈利水平预测"}
-{"key": "BAC009S0767W0323", "wav": "./aishell/wav/test/S0767/BAC009S0767W0323.wav", "txt": "洛阳本部项目是一个增量投资"}
-{"key": "BAC009S0767W0324", "wav": "./aishell/wav/test/S0767/BAC009S0767W0324.wav", "txt": "有一部分研发楼办公楼是利用现成的"}
-{"key": "BAC009S0767W0325", "wav": "./aishell/wav/test/S0767/BAC009S0767W0325.wav", "txt": "包括管理人员等并不会因为新增生产线而增加"}
-{"key": "BAC009S0767W0326", "wav": "./aishell/wav/test/S0767/BAC009S0767W0326.wav", "txt": "这个项目是自有资金投入"}
-{"key": "BAC009S0767W0327", "wav": "./aishell/wav/test/S0767/BAC009S0767W0327.wav", "txt": "就没有要求专业机构做可研报告"}
-{"key": "BAC009S0767W0328", "wav": "./aishell/wav/test/S0767/BAC009S0767W0328.wav", "txt": "我们内部做的盈利测算数据暂时无法公告"}
-{"key": "BAC009S0767W0329", "wav": "./aishell/wav/test/S0767/BAC009S0767W0329.wav", "txt": "成飞集成与常州市金坛区政府合作的项目将分三期完成"}
-{"key": "BAC009S0767W0330", "wav": "./aishell/wav/test/S0767/BAC009S0767W0330.wav", "txt": "一期投资额为二十五亿元"}
-{"key": "BAC009S0767W0331", "wav": "./aishell/wav/test/S0767/BAC009S0767W0331.wav", "txt": "上述成飞集成人士告诉记者"}
-{"key": "BAC009S0767W0332", "wav": "./aishell/wav/test/S0767/BAC009S0767W0332.wav", "txt": "随着国内新能源车产业的迅猛发展"}
-{"key": "BAC009S0767W0333", "wav": "./aishell/wav/test/S0767/BAC009S0767W0333.wav", "txt": "锂电池作为新能源车的重要部件"}
-{"key": "BAC009S0767W0334", "wav": "./aishell/wav/test/S0767/BAC009S0767W0334.wav", "txt": "锂电池生产企业将迎来业绩持续高增长阶段"}
-{"key": "BAC009S0767W0335", "wav": "./aishell/wav/test/S0767/BAC009S0767W0335.wav", "txt": "二零一四年其市场规模已达七十一五亿元"}
-{"key": "BAC009S0767W0336", "wav": "./aishell/wav/test/S0767/BAC009S0767W0336.wav", "txt": "随着锂电池产业链迎来井喷"}
-{"key": "BAC009S0767W0337", "wav": "./aishell/wav/test/S0767/BAC009S0767W0337.wav", "txt": "锂电池在今年上半年成为诸多上市公司的业绩功臣"}
-{"key": "BAC009S0767W0338", "wav": "./aishell/wav/test/S0767/BAC009S0767W0338.wav", "txt": "二者虽从事项目不同"}
-{"key": "BAC009S0767W0339", "wav": "./aishell/wav/test/S0767/BAC009S0767W0339.wav", "txt": "也恰恰符合播求的个人风格"}
-{"key": "BAC009S0767W0340", "wav": "./aishell/wav/test/S0767/BAC009S0767W0340.wav", "txt": "擂台上的黑王子肌肉强健"}
-{"key": "BAC009S0767W0341", "wav": "./aishell/wav/test/S0767/BAC009S0767W0341.wav", "txt": "这也是他一次次在擂台上打出恐怖重击的最大资本"}
-{"key": "BAC009S0767W0342", "wav": "./aishell/wav/test/S0767/BAC009S0767W0342.wav", "txt": "却可以演绎出撼人心魄的体育大美"}
-{"key": "BAC009S0767W0343", "wav": "./aishell/wav/test/S0767/BAC009S0767W0343.wav", "txt": "此次播求面对的强敌"}
-{"key": "BAC009S0767W0344", "wav": "./aishell/wav/test/S0767/BAC009S0767W0344.wav", "txt": "恰恰在风格打法和比赛理念上"}
-{"key": "BAC009S0767W0345", "wav": "./aishell/wav/test/S0767/BAC009S0767W0345.wav", "txt": "同詹姆斯昔年头号强敌卡梅隆安东尼如出一辙"}
-{"key": "BAC009S0767W0346", "wav": "./aishell/wav/test/S0767/BAC009S0767W0346.wav", "txt": "丰富的战斗技巧是我的特色"}
-{"key": "BAC009S0767W0347", "wav": "./aishell/wav/test/S0767/BAC009S0767W0347.wav", "txt": "我希望自己可以像卡梅隆一样"}
-{"key": "BAC009S0767W0348", "wav": "./aishell/wav/test/S0767/BAC009S0767W0348.wav", "txt": "在比赛中展示出更多击败对手的手段"}
-{"key": "BAC009S0767W0349", "wav": "./aishell/wav/test/S0767/BAC009S0767W0349.wav", "txt": "对于我的对手制造更多的麻烦"}
-{"key": "BAC009S0767W0350", "wav": "./aishell/wav/test/S0767/BAC009S0767W0350.wav", "txt": "俄罗斯搏击新生代旗帜性天才高手哈亚在接受采访时"}
-{"key": "BAC009S0767W0351", "wav": "./aishell/wav/test/S0767/BAC009S0767W0351.wav", "txt": "而哈亚的表现也正如其自己所言"}
-{"key": "BAC009S0767W0352", "wav": "./aishell/wav/test/S0767/BAC009S0767W0352.wav", "txt": "展示出了如同其偶像安东尼一样的全面犀利"}
-{"key": "BAC009S0767W0353", "wav": "./aishell/wav/test/S0767/BAC009S0767W0353.wav", "txt": "直接将威瑟里诺夫击倒"}
-{"key": "BAC009S0767W0354", "wav": "./aishell/wav/test/S0767/BAC009S0767W0354.wav", "txt": "其搏击技能之全面精湛格斗天赋之卓越令人惊叹"}
-{"key": "BAC009S0767W0355", "wav": "./aishell/wav/test/S0767/BAC009S0767W0355.wav", "txt": "当搏击界的勒布朗与卡梅隆狭路相逢"}
-{"key": "BAC009S0767W0356", "wav": "./aishell/wav/test/S0767/BAC009S0767W0356.wav", "txt": "激情战火必将以燎原之势"}
-{"key": "BAC009S0767W0357", "wav": "./aishell/wav/test/S0767/BAC009S0767W0357.wav", "txt": "彭博一英里接力赛将在十月十五日首次登陆上海"}
-{"key": "BAC009S0767W0358", "wav": "./aishell/wav/test/S0767/BAC009S0767W0358.wav", "txt": "今天赛事举行了赛前新闻发布会"}
-{"key": "BAC009S0767W0359", "wav": "./aishell/wav/test/S0767/BAC009S0767W0359.wav", "txt": "宣告彭博一英里接力赛上海站全面启动"}
-{"key": "BAC009S0767W0360", "wav": "./aishell/wav/test/S0767/BAC009S0767W0360.wav", "txt": "让他们能在工作之馀释放对于体育的热情"}
-{"key": "BAC009S0767W0361", "wav": "./aishell/wav/test/S0767/BAC009S0767W0361.wav", "txt": "从二零零七年在伦敦创办至今"}
-{"key": "BAC009S0767W0362", "wav": "./aishell/wav/test/S0767/BAC009S0767W0362.wav", "txt": "已在新加坡香港等城市成功落地"}
-{"key": "BAC009S0767W0363", "wav": "./aishell/wav/test/S0767/BAC009S0767W0363.wav", "txt": "得到当地企业的强烈积极响应"}
-{"key": "BAC009S0767W0364", "wav": "./aishell/wav/test/S0767/BAC009S0767W0364.wav", "txt": "在各城市都有至少百支队伍报名参加"}
-{"key": "BAC009S0767W0365", "wav": "./aishell/wav/test/S0767/BAC009S0767W0365.wav", "txt": "彭博一英里接力赛区别于其他跑步活动的是"}
-{"key": "BAC009S0767W0366", "wav": "./aishell/wav/test/S0767/BAC009S0767W0366.wav", "txt": "每支报名队伍以企业为单位每队十名成员"}
-{"key": "BAC009S0767W0367", "wav": "./aishell/wav/test/S0767/BAC009S0767W0367.wav", "txt": "每人分别完成一英里即约一点六公里的路程"}
-{"key": "BAC009S0767W0368", "wav": "./aishell/wav/test/S0767/BAC009S0767W0368.wav", "txt": "最后取全队用时最少者为胜"}
-{"key": "BAC009S0767W0369", "wav": "./aishell/wav/test/S0767/BAC009S0767W0369.wav", "txt": "冠军皆由麦格理集团获得"}
-{"key": "BAC009S0767W0370", "wav": "./aishell/wav/test/S0767/BAC009S0767W0370.wav", "txt": "现场参赛企业誓言要打破这项记录"}
-{"key": "BAC009S0767W0371", "wav": "./aishell/wav/test/S0767/BAC009S0767W0371.wav", "txt": "本次赛事已经开始接受团队报名"}
-{"key": "BAC009S0767W0372", "wav": "./aishell/wav/test/S0767/BAC009S0767W0372.wav", "txt": "目前报名仍在进行中"}
-{"key": "BAC009S0767W0373", "wav": "./aishell/wav/test/S0767/BAC009S0767W0373.wav", "txt": "这一项目将为神农架林区的孩子筹建开放式体育空间"}
-{"key": "BAC009S0767W0374", "wav": "./aishell/wav/test/S0767/BAC009S0767W0374.wav", "txt": "为他们搭建一个特色自由的体育室加户外体育课堂"}
-{"key": "BAC009S0767W0375", "wav": "./aishell/wav/test/S0767/BAC009S0767W0375.wav", "txt": "让他们也可以有机会参与体育运动"}
-{"key": "BAC009S0767W0376", "wav": "./aishell/wav/test/S0767/BAC009S0767W0376.wav", "txt": "高清女排金花手捧奖杯庆夺冠"}
-{"key": "BAC009S0767W0377", "wav": "./aishell/wav/test/S0767/BAC009S0767W0377.wav", "txt": "刚刚在日本女排世界杯上夺冠的中国女排载誉回京"}
-{"key": "BAC009S0767W0378", "wav": "./aishell/wav/test/S0767/BAC009S0767W0378.wav", "txt": "在首都国际机场受到了各界人士的欢迎"}
-{"key": "BAC009S0767W0379", "wav": "./aishell/wav/test/S0767/BAC009S0767W0379.wav", "txt": "这其中一位身材高挑的女孩子颇为引人关注"}
-{"key": "BAC009S0767W0380", "wav": "./aishell/wav/test/S0767/BAC009S0767W0380.wav", "txt": "她就是因伤未能随队参加本次赛事的徐云丽"}
-{"key": "BAC009S0767W0381", "wav": "./aishell/wav/test/S0767/BAC009S0767W0381.wav", "txt": "我觉得这冠军来之不易"}
-{"key": "BAC009S0767W0382", "wav": "./aishell/wav/test/S0767/BAC009S0767W0382.wav", "txt": "特别是我们今年刚开始的时候特别艰难"}
-{"key": "BAC009S0767W0383", "wav": "./aishell/wav/test/S0767/BAC009S0767W0383.wav", "txt": "整个队伍承受了很大的困难和考验"}
-{"key": "BAC009S0767W0384", "wav": "./aishell/wav/test/S0767/BAC009S0767W0384.wav", "txt": "最后顶住困难和压力拿到冠军"}
-{"key": "BAC009S0767W0385", "wav": "./aishell/wav/test/S0767/BAC009S0767W0385.wav", "txt": "我为她们感到骄傲和自豪"}
-{"key": "BAC009S0767W0386", "wav": "./aishell/wav/test/S0767/BAC009S0767W0386.wav", "txt": "那就是姑娘们高举起惠若琪徐云丽和杨方旭的球衣"}
-{"key": "BAC009S0767W0387", "wav": "./aishell/wav/test/S0767/BAC009S0767W0387.wav", "txt": "感谢这三位因伤未能参赛的姐妹对球队做出的巨大贡献"}
-{"key": "BAC009S0767W0388", "wav": "./aishell/wav/test/S0767/BAC009S0767W0388.wav", "txt": "徐云丽透露在赛前队友曾经给自己发了一条短信"}
-{"key": "BAC009S0767W0389", "wav": "./aishell/wav/test/S0767/BAC009S0767W0389.wav", "txt": "就此事征求她的意见"}
-{"key": "BAC009S0767W0390", "wav": "./aishell/wav/test/S0767/BAC009S0767W0390.wav", "txt": "看到这一幕我控制不住自己了"}
-{"key": "BAC009S0767W0391", "wav": "./aishell/wav/test/S0767/BAC009S0767W0391.wav", "txt": "一切都难以用言语来表达"}
-{"key": "BAC009S0767W0392", "wav": "./aishell/wav/test/S0767/BAC009S0767W0392.wav", "txt": "自己此时此刻特别迫切地想要尽快恢复"}
-{"key": "BAC009S0767W0393", "wav": "./aishell/wav/test/S0767/BAC009S0767W0393.wav", "txt": "希望能够跟大家一起再次站在领奖台上"}
-{"key": "BAC009S0767W0394", "wav": "./aishell/wav/test/S0767/BAC009S0767W0394.wav", "txt": "徐云丽最后动情地说"}
-{"key": "BAC009S0767W0395", "wav": "./aishell/wav/test/S0767/BAC009S0767W0395.wav", "txt": "搜狐体育郭健文"}
-{"key": "BAC009S0767W0396", "wav": "./aishell/wav/test/S0767/BAC009S0767W0396.wav", "txt": "二零一五年八月十五"}
-{"key": "BAC009S0767W0397", "wav": "./aishell/wav/test/S0767/BAC009S0767W0397.wav", "txt": "这次赛事是昆仑决二零一五欧洲之旅的第三站"}
-{"key": "BAC009S0767W0398", "wav": "./aishell/wav/test/S0767/BAC009S0767W0398.wav", "txt": "四季如春的俄罗斯黑海东部沿岸"}
-{"key": "BAC009S0767W0399", "wav": "./aishell/wav/test/S0767/BAC009S0767W0399.wav", "txt": "新一轮激战烽火即将炽烈点燃"}
-{"key": "BAC009S0767W0400", "wav": "./aishell/wav/test/S0767/BAC009S0767W0400.wav", "txt": "我是这次中俄对抗赛第一个出场的中方选手"}
-{"key": "BAC009S0767W0401", "wav": "./aishell/wav/test/S0767/BAC009S0767W0401.wav", "txt": "一定要尽全力打一场漂亮的比赛"}
-{"key": "BAC009S0767W0402", "wav": "./aishell/wav/test/S0767/BAC009S0767W0402.wav", "txt": "为中国战队打响第一枪"}
-{"key": "BAC009S0767W0403", "wav": "./aishell/wav/test/S0767/BAC009S0767W0403.wav", "txt": "徐永昊的站立打击技术在该级别中堪称翘楚"}
-{"key": "BAC009S0767W0404", "wav": "./aishell/wav/test/S0767/BAC009S0767W0404.wav", "txt": "如今再遭厄运"}
-{"key": "BAC009S0767W0406", "wav": "./aishell/wav/test/S0767/BAC009S0767W0406.wav", "txt": "最终又确定为科林哈迪"}
-{"key": "BAC009S0767W0407", "wav": "./aishell/wav/test/S0767/BAC009S0767W0407.wav", "txt": "科林也退出了剧组"}
-{"key": "BAC009S0767W0408", "wav": "./aishell/wav/test/S0767/BAC009S0767W0408.wav", "txt": "柯震东去年经历吸毒风波"}
-{"key": "BAC009S0767W0409", "wav": "./aishell/wav/test/S0767/BAC009S0767W0409.wav", "txt": "演艺事业受挫"}
-{"key": "BAC009S0767W0410", "wav": "./aishell/wav/test/S0767/BAC009S0767W0410.wav", "txt": "沉寂一段时间后"}
-{"key": "BAC009S0767W0411", "wav": "./aishell/wav/test/S0767/BAC009S0767W0411.wav", "txt": "近期他积极复出"}
-{"key": "BAC009S0767W0412", "wav": "./aishell/wav/test/S0767/BAC009S0767W0412.wav", "txt": "再度经营他的粉丝团与粉丝互动六日昨晚"}
-{"key": "BAC009S0767W0414", "wav": "./aishell/wav/test/S0767/BAC009S0767W0414.wav", "txt": "他调皮地将自己的眉毛抹掉"}
-{"key": "BAC009S0767W0415", "wav": "./aishell/wav/test/S0767/BAC009S0767W0415.wav", "txt": "搜狐娱乐讯据香港明晚九月十日报道"}
-{"key": "BAC009S0767W0416", "wav": "./aishell/wav/test/S0767/BAC009S0767W0416.wav", "txt": "诸葛紫岐十日晚出席活动时表示"}
-{"key": "BAC009S0767W0417", "wav": "./aishell/wav/test/S0767/BAC009S0767W0417.wav", "txt": "一个月内暴瘦了九至十三斤"}
-{"key": "BAC009S0767W0418", "wav": "./aishell/wav/test/S0767/BAC009S0767W0418.wav", "txt": "有时甚至忘记吃饭"}
-{"key": "BAC009S0767W0419", "wav": "./aishell/wav/test/S0767/BAC009S0767W0419.wav", "txt": "也有情绪问题"}
-{"key": "BAC009S0767W0420", "wav": "./aishell/wav/test/S0767/BAC009S0767W0420.wav", "txt": "打算看医生寻求纾缓方式"}
-{"key": "BAC009S0767W0421", "wav": "./aishell/wav/test/S0767/BAC009S0767W0421.wav", "txt": "她说之前打电话给医生"}
-{"key": "BAC009S0767W0422", "wav": "./aishell/wav/test/S0767/BAC009S0767W0422.wav", "txt": "医生说得好恐怖"}
-{"key": "BAC009S0767W0423", "wav": "./aishell/wav/test/S0767/BAC009S0767W0423.wav", "txt": "但不至于要吃药"}
-{"key": "BAC009S0767W0424", "wav": "./aishell/wav/test/S0767/BAC009S0767W0424.wav", "txt": "我叫他不要吓我"}
-{"key": "BAC009S0767W0425", "wav": "./aishell/wav/test/S0767/BAC009S0767W0425.wav", "txt": "现在有点怕要见他"}
-{"key": "BAC009S0767W0426", "wav": "./aishell/wav/test/S0767/BAC009S0767W0426.wav", "txt": "搜狐娱乐讯九月十六日二十点二十七分"}
-{"key": "BAC009S0767W0428", "wav": "./aishell/wav/test/S0767/BAC009S0767W0428.wav", "txt": "并自嘲的写道自幼就走性感风格"}
-{"key": "BAC009S0767W0429", "wav": "./aishell/wav/test/S0767/BAC009S0767W0429.wav", "txt": "谢依霖穿着白色吊带裙"}
-{"key": "BAC009S0767W0430", "wav": "./aishell/wav/test/S0767/BAC009S0767W0430.wav", "txt": "嘟嘴作亲吻状"}
-{"key": "BAC009S0767W0431", "wav": "./aishell/wav/test/S0767/BAC009S0767W0431.wav", "txt": "李玉刚离开蒙面歌王网友遗憾没听见神曲"}
-{"key": "BAC009S0767W0432", "wav": "./aishell/wav/test/S0767/BAC009S0767W0432.wav", "txt": "蒙面歌王迎来初赛的收官之战"}
-{"key": "BAC009S0767W0433", "wav": "./aishell/wav/test/S0767/BAC009S0767W0433.wav", "txt": "千面娇娃绝地反击拿下最后一席歌王头衔"}
-{"key": "BAC009S0767W0434", "wav": "./aishell/wav/test/S0767/BAC009S0767W0434.wav", "txt": "而绝代歌姬李玉刚揭面引起了广泛热议"}
-{"key": "BAC009S0767W0435", "wav": "./aishell/wav/test/S0767/BAC009S0767W0435.wav", "txt": "也有网友发出疑问若是李玉刚演唱李的话"}
-{"key": "BAC009S0767W0436", "wav": "./aishell/wav/test/S0767/BAC009S0767W0436.wav", "txt": "登上歌王宝座的概率是不是会要大很多倍呢"}
-{"key": "BAC009S0767W0437", "wav": "./aishell/wav/test/S0767/BAC009S0767W0437.wav", "txt": "短短二零零字歌词运用诗词典故三六处"}
-{"key": "BAC009S0767W0438", "wav": "./aishell/wav/test/S0767/BAC009S0767W0438.wav", "txt": "每句歌词都蕴含一段李姓历史文化在里头"}
-{"key": "BAC009S0767W0439", "wav": "./aishell/wav/test/S0767/BAC009S0767W0439.wav", "txt": "该歌曲今年一经在各大音乐网站上线便收获无数好评"}
-{"key": "BAC009S0767W0440", "wav": "./aishell/wav/test/S0767/BAC009S0767W0440.wav", "txt": "更有全球李氏宗亲大会将其列为祭祖主题曲"}
-{"key": "BAC009S0767W0441", "wav": "./aishell/wav/test/S0767/BAC009S0767W0441.wav", "txt": "同时李也俘获了无数中国大妈的芳心"}
-{"key": "BAC009S0767W0442", "wav": "./aishell/wav/test/S0767/BAC009S0767W0442.wav", "txt": "成为各国各地广场舞今年最流行的背景音乐之一"}
-{"key": "BAC009S0767W0443", "wav": "./aishell/wav/test/S0767/BAC009S0767W0443.wav", "txt": "李磊灭门案遗产纠纷终结八零零馀万三人有份"}
-{"key": "BAC009S0767W0444", "wav": "./aishell/wav/test/S0767/BAC009S0767W0444.wav", "txt": "大兴灭门案的凶犯李磊被执行死刑后"}
-{"key": "BAC009S0767W0445", "wav": "./aishell/wav/test/S0767/BAC009S0767W0445.wav", "txt": "其身后的财产分割问题尘埃落定"}
-{"key": "BAC009S0767W0446", "wav": "./aishell/wav/test/S0767/BAC009S0767W0446.wav", "txt": "市二中院终审认定李家遗产共计八零零多万元"}
-{"key": "BAC009S0767W0447", "wav": "./aishell/wav/test/S0767/BAC009S0767W0447.wav", "txt": "李磊的奶奶继承四三七万馀元"}
-{"key": "BAC009S0767W0448", "wav": "./aishell/wav/test/S0767/BAC009S0767W0448.wav", "txt": "姥姥继承二六六万馀元"}
-{"key": "BAC009S0767W0449", "wav": "./aishell/wav/test/S0767/BAC009S0767W0449.wav", "txt": "岳父母继承一零九万馀元"}
-{"key": "BAC009S0767W0450", "wav": "./aishell/wav/test/S0767/BAC009S0767W0450.wav", "txt": "李谷一曾怒揭东方歌舞团腐败事后被调离岗位"}
-{"key": "BAC009S0767W0451", "wav": "./aishell/wav/test/S0767/BAC009S0767W0451.wav", "txt": "顾欣资料图片昨早九号一零时"}
-{"key": "BAC009S0767W0452", "wav": "./aishell/wav/test/S0767/BAC009S0767W0452.wav", "txt": "东方演艺集团大门口戒备森严"}
-{"key": "BAC009S0767W0453", "wav": "./aishell/wav/test/S0767/BAC009S0767W0453.wav", "txt": "中纪委监察部网站发布消息"}
-{"key": "BAC009S0767W0454", "wav": "./aishell/wav/test/S0767/BAC009S0767W0454.wav", "txt": "顾欣因涉嫌严重违纪违法"}
-{"key": "BAC009S0767W0455", "wav": "./aishell/wav/test/S0767/BAC009S0767W0455.wav", "txt": "集团新领导已经上任"}
-{"key": "BAC009S0767W0456", "wav": "./aishell/wav/test/S0767/BAC009S0767W0456.wav", "txt": "是原中国文化集团党委书记宋官林"}
-{"key": "BAC009S0767W0457", "wav": "./aishell/wav/test/S0767/BAC009S0767W0457.wav", "txt": "李连杰名誉维权案一审胜诉网站被判至致歉赔偿一零万"}
-{"key": "BAC009S0767W0458", "wav": "./aishell/wav/test/S0767/BAC009S0767W0458.wav", "txt": "新京报快讯记者林野记者今天傍晚获悉"}
-{"key": "BAC009S0767W0459", "wav": "./aishell/wav/test/S0767/BAC009S0767W0459.wav", "txt": "李银河的文学梦将出版虐恋小说三卷本"}
-{"key": "BAC009S0767W0460", "wav": "./aishell/wav/test/S0767/BAC009S0767W0460.wav", "txt": "权义澎湃资料李银河在现实中是柔软的"}
-{"key": "BAC009S0767W0461", "wav": "./aishell/wav/test/S0767/BAC009S0767W0461.wav", "txt": "不像她发表的那些先锋的观点一样冲击人眼球"}
-{"key": "BAC009S0767W0462", "wav": "./aishell/wav/test/S0767/BAC009S0767W0462.wav", "txt": "李银河写虐恋不会伤害小波"}
-{"key": "BAC009S0767W0463", "wav": "./aishell/wav/test/S0767/BAC009S0767W0463.wav", "txt": "北京南三环附近一家茶楼里"}
-{"key": "BAC009S0767W0464", "wav": "./aishell/wav/test/S0767/BAC009S0767W0464.wav", "txt": "六三岁的李银河拿着钥匙袋走了进来"}
-{"key": "BAC009S0767W0465", "wav": "./aishell/wav/test/S0767/BAC009S0767W0465.wav", "txt": "她身着湖蓝色细纱短袖黑长裤白运动鞋"}
-{"key": "BAC009S0767W0466", "wav": "./aishell/wav/test/S0767/BAC009S0767W0466.wav", "txt": "手腕上还戴着一块与之呼应的白色塑料腕表"}
-{"key": "BAC009S0767W0467", "wav": "./aishell/wav/test/S0767/BAC009S0767W0467.wav", "txt": "出门前我拿了两套衣服"}
-{"key": "BAC009S0767W0468", "wav": "./aishell/wav/test/S0767/BAC009S0767W0468.wav", "txt": "面对第一财经日报记者"}
-{"key": "BAC009S0767W0469", "wav": "./aishell/wav/test/S0767/BAC009S0767W0469.wav", "txt": "说起自己的伴侣大侠"}
-{"key": "BAC009S0767W0470", "wav": "./aishell/wav/test/S0767/BAC009S0767W0470.wav", "txt": "村中数百亩农田干旱村民质疑水库断了灌溉水"}
-{"key": "BAC009S0767W0471", "wav": "./aishell/wav/test/S0767/BAC009S0767W0471.wav", "txt": "高新区钓渭镇疙瘩沟村村民称"}
-{"key": "BAC009S0767W0472", "wav": "./aishell/wav/test/S0767/BAC009S0767W0472.wav", "txt": "却为了发电断了灌溉农田的水"}
-{"key": "BAC009S0767W0473", "wav": "./aishell/wav/test/S0767/BAC009S0767W0473.wav", "txt": "导致数百亩农田干旱"}
-{"key": "BAC009S0767W0474", "wav": "./aishell/wav/test/S0767/BAC009S0767W0474.wav", "txt": "该镇农办一名主管水利负责人介绍"}
-{"key": "BAC009S0767W0475", "wav": "./aishell/wav/test/S0767/BAC009S0767W0475.wav", "txt": "政府曾多次叫停电站发电"}
-{"key": "BAC009S0767W0476", "wav": "./aishell/wav/test/S0767/BAC009S0767W0476.wav", "txt": "但是干旱原因主要系降水减少"}
-{"key": "BAC009S0767W0477", "wav": "./aishell/wav/test/S0767/BAC009S0767W0477.wav", "txt": "今后将加强水库管理"}
-{"key": "BAC009S0767W0478", "wav": "./aishell/wav/test/S0767/BAC009S0767W0478.wav", "txt": "努力处理好灌溉与发电之间的关系"}
-{"key": "BAC009S0767W0479", "wav": "./aishell/wav/test/S0767/BAC009S0767W0479.wav", "txt": "村主任发环卫工一六零零元工资含一四张假钞"}
-{"key": "BAC009S0767W0480", "wav": "./aishell/wav/test/S0767/BAC009S0767W0480.wav", "txt": "村主任发账号给村文书想要继续当拿四八万元"}
-{"key": "BAC009S0767W0481", "wav": "./aishell/wav/test/S0767/BAC009S0767W0481.wav", "txt": "华商报商洛讯记者白鹏飞近日"}
-{"key": "BAC009S0767W0482", "wav": "./aishell/wav/test/S0767/BAC009S0767W0482.wav", "txt": "并向其发送银行账号"}
-{"key": "BAC009S0767W0483", "wav": "./aishell/wav/test/S0767/BAC009S0767W0483.wav", "txt": "原因是有人愿为该村垫资四八万元费用修桥"}
-{"key": "BAC009S0767W0484", "wav": "./aishell/wav/test/S0767/BAC009S0767W0484.wav", "txt": "村委会主任建议由垫资人担任村文书"}
-{"key": "BAC009S0767W0485", "wav": "./aishell/wav/test/S0767/BAC009S0767W0485.wav", "txt": "唐寨子村党支部书记村委会主任已被全镇通报批评"}
-{"key": "BAC009S0767W0486", "wav": "./aishell/wav/test/S0767/BAC009S0767W0486.wav", "txt": "村主任向开发商索贿五二零万村组干部几乎全参与分赃"}
-{"key": "BAC009S0767W0487", "wav": "./aishell/wav/test/S0767/BAC009S0767W0487.wav", "txt": "城改拆迁对很多村民来说"}
-{"key": "BAC009S0767W0488", "wav": "./aishell/wav/test/S0767/BAC009S0767W0488.wav", "txt": "意味着生活条件的改善"}
-{"key": "BAC009S0767W0489", "wav": "./aishell/wav/test/S0767/BAC009S0767W0489.wav", "txt": "但对于部分村官及个别政府工作人员来说"}
-{"key": "BAC009S0767W0490", "wav": "./aishell/wav/test/S0767/BAC009S0767W0490.wav", "txt": "却是一块大大的唐僧肉"}
-{"key": "BAC009S0767W0491", "wav": "./aishell/wav/test/S0767/BAC009S0767W0491.wav", "txt": "想办法扑上去咬一口"}
-{"key": "BAC009S0767W0492", "wav": "./aishell/wav/test/S0767/BAC009S0767W0492.wav", "txt": "村主任给狗盖房吞六万公款被判刑二年八个月"}
-{"key": "BAC009S0767W0493", "wav": "./aishell/wav/test/S0767/BAC009S0767W0493.wav", "txt": "京华时报讯记者王晓飞在农村"}
-{"key": "BAC009S0767W0494", "wav": "./aishell/wav/test/S0767/BAC009S0767W0494.wav", "txt": "几乎家家户户都会在院子里养狗"}
-{"key": "BAC009S0767W0495", "wav": "./aishell/wav/test/S0767/BAC009S0767W0495.wav", "txt": "平时作为看家护院之用"}
-{"key": "BAC009S0768W0121", "wav": "./aishell/wav/test/S0768/BAC009S0768W0121.wav", "txt": "成本的转嫁使得商品房用地成本更高"}
-{"key": "BAC009S0768W0122", "wav": "./aishell/wav/test/S0768/BAC009S0768W0122.wav", "txt": "明年初料迎供应淡季"}
-{"key": "BAC009S0768W0123", "wav": "./aishell/wav/test/S0768/BAC009S0768W0123.wav", "txt": "土地市场交易火热的局面可能降温"}
-{"key": "BAC009S0768W0124", "wav": "./aishell/wav/test/S0768/BAC009S0768W0124.wav", "txt": "土地供应往往呈现前松后紧的态势"}
-{"key": "BAC009S0768W0125", "wav": "./aishell/wav/test/S0768/BAC009S0768W0125.wav", "txt": "年初往往是土地供应的淡季"}
-{"key": "BAC009S0768W0126", "wav": "./aishell/wav/test/S0768/BAC009S0768W0126.wav", "txt": "为完成全念土地供应计划"}
-{"key": "BAC009S0768W0127", "wav": "./aishell/wav/test/S0768/BAC009S0768W0127.wav", "txt": "地方政府倾向于频繁推出优质地块"}
-{"key": "BAC009S0768W0128", "wav": "./aishell/wav/test/S0768/BAC009S0768W0128.wav", "txt": "土地交易可能随着供应淡季的到来而降温"}
-{"key": "BAC009S0768W0129", "wav": "./aishell/wav/test/S0768/BAC009S0768W0129.wav", "txt": "叠加春节因素的影响"}
-{"key": "BAC009S0768W0130", "wav": "./aishell/wav/test/S0768/BAC009S0768W0130.wav", "txt": "这种情况在二月可能较明显"}
-{"key": "BAC009S0768W0131", "wav": "./aishell/wav/test/S0768/BAC009S0768W0131.wav", "txt": "房地产企业的整体资金状况超紧"}
-{"key": "BAC009S0768W0132", "wav": "./aishell/wav/test/S0768/BAC009S0768W0132.wav", "txt": "不利于继续大规模拿地"}
-{"key": "BAC009S0768W0133", "wav": "./aishell/wav/test/S0768/BAC009S0768W0133.wav", "txt": "国家统计局数据显示"}
-{"key": "BAC009S0768W0134", "wav": "./aishell/wav/test/S0768/BAC009S0768W0134.wav", "txt": "房地产开发企业到位资金十万亿元"}
-{"key": "BAC009S0768W0135", "wav": "./aishell/wav/test/S0768/BAC009S0768W0135.wav", "txt": "增速比三月回落六个百分点"}
-{"key": "BAC009S0768W0136", "wav": "./aishell/wav/test/S0768/BAC009S0768W0136.wav", "txt": "未来房企拿地投资新开工等指标可能受到影响"}
-{"key": "BAC009S0768W0137", "wav": "./aishell/wav/test/S0768/BAC009S0768W0137.wav", "txt": "尽管降息等利好政策出台"}
-{"key": "BAC009S0768W0138", "wav": "./aishell/wav/test/S0768/BAC009S0768W0138.wav", "txt": "但房地产市场仍处于调整期"}
-{"key": "BAC009S0768W0139", "wav": "./aishell/wav/test/S0768/BAC009S0768W0139.wav", "txt": "预计不会在短期内迅速回暖"}
-{"key": "BAC009S0768W0140", "wav": "./aishell/wav/test/S0768/BAC009S0768W0140.wav", "txt": "与之相联系的土地市场也会受到影响"}
-{"key": "BAC009S0768W0141", "wav": "./aishell/wav/test/S0768/BAC009S0768W0141.wav", "txt": "中国证券报报道"}
-{"key": "BAC009S0768W0142", "wav": "./aishell/wav/test/S0768/BAC009S0768W0142.wav", "txt": "今年商品房销售一度低日迷"}
-{"key": "BAC009S0768W0143", "wav": "./aishell/wav/test/S0768/BAC009S0768W0143.wav", "txt": "一线城市土地市场成交火热"}
-{"key": "BAC009S0768W0144", "wav": "./aishell/wav/test/S0768/BAC009S0768W0144.wav", "txt": "中介机构统计数据显示"}
-{"key": "BAC009S0768W0145", "wav": "./aishell/wav/test/S0768/BAC009S0768W0145.wav", "txt": "五环内商品住宅的成交在市场中并非主流"}
-{"key": "BAC009S0768W0146", "wav": "./aishell/wav/test/S0768/BAC009S0768W0146.wav", "txt": "一位房企人士认为五环内项目的稀缺性难以改变"}
-{"key": "BAC009S0768W0147", "wav": "./aishell/wav/test/S0768/BAC009S0768W0147.wav", "txt": "新京报讯记者张徐报道"}
-{"key": "BAC009S0768W0148", "wav": "./aishell/wav/test/S0768/BAC009S0768W0148.wav", "txt": "北京去年土地出让落下大幕"}
-{"key": "BAC009S0768W0149", "wav": "./aishell/wav/test/S0768/BAC009S0768W0149.wav", "txt": "在丰台潘家村一宗商业用地低价成交后"}
-{"key": "BAC009S0768W0150", "wav": "./aishell/wav/test/S0768/BAC009S0768W0150.wav", "txt": "北京今年的土地出让金锁定在两千亿元"}
-{"key": "BAC009S0768W0151", "wav": "./aishell/wav/test/S0768/BAC009S0768W0151.wav", "txt": "同比前年增长五成"}
-{"key": "BAC009S0768W0152", "wav": "./aishell/wav/test/S0768/BAC009S0768W0152.wav", "txt": "丰台区域潘家村危改三号地成为今年的收官地质块"}
-{"key": "BAC009S0768W0153", "wav": "./aishell/wav/test/S0768/BAC009S0768W0153.wav", "txt": "这宗零售商业用地位于南三环外"}
-{"key": "BAC009S0768W0154", "wav": "./aishell/wav/test/S0768/BAC009S0768W0154.wav", "txt": "邻近地铁十号线首竟贸站"}
-{"key": "BAC009S0768W0155", "wav": "./aishell/wav/test/S0768/BAC009S0768W0155.wav", "txt": "规划建筑面积约五万平方米"}
-{"key": "BAC009S0768W0156", "wav": "./aishell/wav/test/S0768/BAC009S0768W0156.wav", "txt": "潘家村地块体量较小"}
-{"key": "BAC009S0768W0157", "wav": "./aishell/wav/test/S0768/BAC009S0768W0157.wav", "txt": "未必吸引太多擅长上规模开发的企业"}
-{"key": "BAC009S0768W0158", "wav": "./aishell/wav/test/S0768/BAC009S0768W0158.wav", "txt": "因此最终仅有龙湖地产一家报价"}
-{"key": "BAC009S0768W0159", "wav": "./aishell/wav/test/S0768/BAC009S0768W0159.wav", "txt": "龙湖即以低价五点五亿元拿地"}
-{"key": "BAC009S0768W0160", "wav": "./aishell/wav/test/S0768/BAC009S0768W0160.wav", "txt": "楼面价折合约一万元每平方米"}
-{"key": "BAC009S0768W0161", "wav": "./aishell/wav/test/S0768/BAC009S0768W0161.wav", "txt": "龙湖地产有关人士对记者表示"}
-{"key": "BAC009S0768W0162", "wav": "./aishell/wav/test/S0768/BAC009S0768W0162.wav", "txt": "龙湖已经在丰台有土地储备"}
-{"key": "BAC009S0768W0163", "wav": "./aishell/wav/test/S0768/BAC009S0768W0163.wav", "txt": "未来还将继续深耕丰台区域"}
-{"key": "BAC009S0768W0164", "wav": "./aishell/wav/test/S0768/BAC009S0768W0164.wav", "txt": "龙湖在丰台西局撤资三十亿元拿地"}
-{"key": "BAC009S0768W0165", "wav": "./aishell/wav/test/S0768/BAC009S0768W0165.wav", "txt": "纯商品房楼面价接近六万元每平方米"}
-{"key": "BAC009S0768W0166", "wav": "./aishell/wav/test/S0768/BAC009S0768W0166.wav", "txt": "并不代表全年土地市场行情走低"}
-{"key": "BAC009S0768W0167", "wav": "./aishell/wav/test/S0768/BAC009S0768W0167.wav", "txt": "今年北京土地市场仍然是高温状态"}
-{"key": "BAC009S0768W0168", "wav": "./aishell/wav/test/S0768/BAC009S0768W0168.wav", "txt": "特别是一至四月土地出让金即破千亿元"}
-{"key": "BAC009S0768W0169", "wav": "./aishell/wav/test/S0768/BAC009S0768W0169.wav", "txt": "根据北京中原地产统计"}
-{"key": "BAC009S0768W0170", "wav": "./aishell/wav/test/S0768/BAC009S0768W0170.wav", "txt": "去年北京共出让五十宗居住楼用地"}
-{"key": "BAC009S0768W0171", "wav": "./aishell/wav/test/S0768/BAC009S0768W0171.wav", "txt": "规划建筑面积合计九百万平方米"}
-{"key": "BAC009S0768W0172", "wav": "./aishell/wav/test/S0768/BAC009S0768W0172.wav", "txt": "出让金合计一千亿元"}
-{"key": "BAC009S0768W0173", "wav": "./aishell/wav/test/S0768/BAC009S0768W0173.wav", "txt": "整体平均楼面价折合一万元每平方米"}
-{"key": "BAC009S0768W0174", "wav": "./aishell/wav/test/S0768/BAC009S0768W0174.wav", "txt": "这一平均楼面价较年前的九千元每平方米"}
-{"key": "BAC009S0768W0175", "wav": "./aishell/wav/test/S0768/BAC009S0768W0175.wav", "txt": "北京中原地产首席分析师张大伟认为"}
-{"key": "BAC009S0768W0176", "wav": "./aishell/wav/test/S0768/BAC009S0768W0176.wav", "txt": "一二线城市特别是京沪这样的核心城市"}
-{"key": "BAC009S0768W0177", "wav": "./aishell/wav/test/S0768/BAC009S0768W0177.wav", "txt": "投资价值更好房企看好"}
-{"key": "BAC009S0768W0178", "wav": "./aishell/wav/test/S0768/BAC009S0768W0178.wav", "txt": "加上优质地块的连续供应"}
-{"key": "BAC009S0768W0179", "wav": "./aishell/wav/test/S0768/BAC009S0768W0179.wav", "txt": "促成了今年北京土地市场的走高"}
-{"key": "BAC009S0768W0180", "wav": "./aishell/wav/test/S0768/BAC009S0768W0180.wav", "txt": "通州新城彩虹之门用地挂出三十日"}
-{"key": "BAC009S0768W0181", "wav": "./aishell/wav/test/S0768/BAC009S0768W0181.wav", "txt": "记者从北京市国土局网站看到"}
-{"key": "BAC009S0768W0182", "wav": "./aishell/wav/test/S0768/BAC009S0768W0182.wav", "txt": "通州运河核心区一宗多功能用地挂出"}
-{"key": "BAC009S0768W0183", "wav": "./aishell/wav/test/S0768/BAC009S0768W0183.wav", "txt": "将于明年投标"}
-{"key": "BAC009S0768W0184", "wav": "./aishell/wav/test/S0768/BAC009S0768W0184.wav", "txt": "该地块位于通州新城五河交汇处东南角"}
-{"key": "BAC009S0768W0185", "wav": "./aishell/wav/test/S0768/BAC009S0768W0185.wav", "txt": "规划建筑面积为四十万平方米"}
-{"key": "BAC009S0768W0186", "wav": "./aishell/wav/test/S0768/BAC009S0768W0186.wav", "txt": "据记者从多个渠道了解"}
-{"key": "BAC009S0768W0187", "wav": "./aishell/wav/test/S0768/BAC009S0768W0187.wav", "txt": "分档制定了中央部门收取的考务费统一上限标准"}
-{"key": "BAC009S0768W0188", "wav": "./aishell/wav/test/S0768/BAC009S0768W0188.wav", "txt": "考虑到地方组织考试的成本相对比较固定"}
-{"key": "BAC009S0768W0189", "wav": "./aishell/wav/test/S0768/BAC009S0768W0189.wav", "txt": "即各省在考务费标准基础上"}
-{"key": "BAC009S0768W0190", "wav": "./aishell/wav/test/S0768/BAC009S0768W0190.wav", "txt": "实践技能操作和面试类考试科目"}
-{"key": "BAC009S0768W0191", "wav": "./aishell/wav/test/S0768/BAC009S0768W0191.wav", "txt": "需配备租赁精密仪器专业设备大型场地"}
-{"key": "BAC009S0768W0192", "wav": "./aishell/wav/test/S0768/BAC009S0768W0192.wav", "txt": "考试过程需要消耗相关材料或需聘请专业面试考官的"}
-{"key": "BAC009S0768W0193", "wav": "./aishell/wav/test/S0768/BAC009S0768W0193.wav", "txt": "由于影响成本的因素过多"}
-{"key": "BAC009S0768W0194", "wav": "./aishell/wav/test/S0768/BAC009S0768W0194.wav", "txt": "由各省根据实际成本制定"}
-{"key": "BAC009S0768W0195", "wav": "./aishell/wav/test/S0768/BAC009S0768W0195.wav", "txt": "三是促进考务成本降低和考试单位合并"}
-{"key": "BAC009S0768W0196", "wav": "./aishell/wav/test/S0768/BAC009S0768W0196.wav", "txt": "对考务费实行统一标准化管理后"}
-{"key": "BAC009S0768W0197", "wav": "./aishell/wav/test/S0768/BAC009S0768W0197.wav", "txt": "而是改按统一合理的平均成本确定"}
-{"key": "BAC009S0768W0198", "wav": "./aishell/wav/test/S0768/BAC009S0768W0198.wav", "txt": "将切实减轻考生经济负担"}
-{"key": "BAC009S0768W0199", "wav": "./aishell/wav/test/S0768/BAC009S0768W0199.wav", "txt": "改革将对考试单位的费用支出形成倒逼机制"}
-{"key": "BAC009S0768W0200", "wav": "./aishell/wav/test/S0768/BAC009S0768W0200.wav", "txt": "促使考试单位自觉降低成本由于形不成规模效益"}
-{"key": "BAC009S0768W0201", "wav": "./aishell/wav/test/S0768/BAC009S0768W0201.wav", "txt": "一些规模较小的考试机构也将自动寻求合并"}
-{"key": "BAC009S0768W0202", "wav": "./aishell/wav/test/S0768/BAC009S0768W0202.wav", "txt": "利用价格杠杆促进考试单位向集约化发展"}
-{"key": "BAC009S0768W0203", "wav": "./aishell/wav/test/S0768/BAC009S0768W0203.wav", "txt": "他就上述关注问题指出"}
-{"key": "BAC009S0768W0204", "wav": "./aishell/wav/test/S0768/BAC009S0768W0204.wav", "txt": "目前我国出现政府性债务违约可能性并不大"}
-{"key": "BAC009S0768W0205", "wav": "./aishell/wav/test/S0768/BAC009S0768W0205.wav", "txt": "下一步将进一步完善城投债卷发行制度和防范风险机制"}
-{"key": "BAC009S0768W0206", "wav": "./aishell/wav/test/S0768/BAC009S0768W0206.wav", "txt": "并尽快建立我国地方政府债务管理体系等"}
-{"key": "BAC009S0768W0207", "wav": "./aishell/wav/test/S0768/BAC009S0768W0207.wav", "txt": "政府性违约可能性不大"}
-{"key": "BAC009S0768W0208", "wav": "./aishell/wav/test/S0768/BAC009S0768W0208.wav", "txt": "中国证卷报随着欧美等国主权债务危机陆续爆发"}
-{"key": "BAC009S0768W0209", "wav": "./aishell/wav/test/S0768/BAC009S0768W0209.wav", "txt": "您如何看待政府的举债行为和债务风险"}
-{"key": "BAC009S0768W0210", "wav": "./aishell/wav/test/S0768/BAC009S0768W0210.wav", "txt": "徐林吸取欧美等国主权债务危机的教训"}
-{"key": "BAC009S0768W0211", "wav": "./aishell/wav/test/S0768/BAC009S0768W0211.wav", "txt": "采取必要措施加强政府债务管理"}
-{"key": "BAC009S0768W0212", "wav": "./aishell/wav/test/S0768/BAC009S0768W0212.wav", "txt": "防范我国政府债务风险"}
-{"key": "BAC009S0768W0213", "wav": "./aishell/wav/test/S0768/BAC009S0768W0213.wav", "txt": "但在具体评估我国地方政府债务风险程度时"}
-{"key": "BAC009S0768W0214", "wav": "./aishell/wav/test/S0768/BAC009S0768W0214.wav", "txt": "也要看到我国与欧美国家的不同之处"}
-{"key": "BAC009S0768W0215", "wav": "./aishell/wav/test/S0768/BAC009S0768W0215.wav", "txt": "我国地方政府性债务"}
-{"key": "BAC009S0768W0216", "wav": "./aishell/wav/test/S0768/BAC009S0768W0216.wav", "txt": "特别是地方投投融资平台公司形成的债务"}
-{"key": "BAC009S0768W0217", "wav": "./aishell/wav/test/S0768/BAC009S0768W0217.wav", "txt": "主要由于各各种基础设施的投资建设"}
-{"key": "BAC009S0768W0218", "wav": "./aishell/wav/test/S0768/BAC009S0768W0218.wav", "txt": "当代人和后代人共同承担债务还本付息责任"}
-{"key": "BAC009S0768W0219", "wav": "./aishell/wav/test/S0768/BAC009S0768W0219.wav", "txt": "可以更好地体现代际公平"}
-{"key": "BAC009S0768W0220", "wav": "./aishell/wav/test/S0768/BAC009S0768W0220.wav", "txt": "克服当期建设资金不足的瓶颈制约"}
-{"key": "BAC009S0768W0221", "wav": "./aishell/wav/test/S0768/BAC009S0768W0221.wav", "txt": "有利于加快完善基础设施和投资环境"}
-{"key": "BAC009S0768W0222", "wav": "./aishell/wav/test/S0768/BAC009S0768W0222.wav", "txt": "是一种合理的基础设施投融资金建设行为"}
-{"key": "BAC009S0768W0223", "wav": "./aishell/wav/test/S0768/BAC009S0768W0223.wav", "txt": "政府举债建设形成大量资金"}
-{"key": "BAC009S0768W0224", "wav": "./aishell/wav/test/S0768/BAC009S0768W0224.wav", "txt": "相当部分资产具有长期的直接收益"}
-{"key": "BAC009S0768W0225", "wav": "./aishell/wav/test/S0768/BAC009S0768W0225.wav", "txt": "一些没有直接收益的项目"}
-{"key": "BAC009S0768W0226", "wav": "./aishell/wav/test/S0768/BAC009S0768W0226.wav", "txt": "也具有间接的经济效益或社会效益"}
-{"key": "BAC009S0768W0227", "wav": "./aishell/wav/test/S0768/BAC009S0768W0227.wav", "txt": "对促进当地经济增长和政府财力的增长"}
-{"key": "BAC009S0768W0228", "wav": "./aishell/wav/test/S0768/BAC009S0768W0228.wav", "txt": "不能简单地用寅吃卯粮来作价值判断"}
-{"key": "BAC009S0768W0229", "wav": "./aishell/wav/test/S0768/BAC009S0768W0229.wav", "txt": "这并不意味着政府可以无节制地借债"}
-{"key": "BAC009S0768W0230", "wav": "./aishell/wav/test/S0768/BAC009S0768W0230.wav", "txt": "关键是要把投资规模和债务规模"}
-{"key": "BAC009S0768W0231", "wav": "./aishell/wav/test/S0768/BAC009S0768W0231.wav", "txt": "控制在合理的范围内"}
-{"key": "BAC009S0768W0232", "wav": "./aishell/wav/test/S0768/BAC009S0768W0232.wav", "txt": "防止出现系统性的偿债风险"}
-{"key": "BAC009S0768W0233", "wav": "./aishell/wav/test/S0768/BAC009S0768W0233.wav", "txt": "国务院高度重视防范地方政府债务风险"}
-{"key": "BAC009S0768W0234", "wav": "./aishell/wav/test/S0768/BAC009S0768W0234.wav", "txt": "从二零零九年下半年就开始要求有关部门调研这一问题"}
-{"key": "BAC009S0768W0235", "wav": "./aishell/wav/test/S0768/BAC009S0768W0235.wav", "txt": "国家审计署还专门组织力量"}
-{"key": "BAC009S0768W0236", "wav": "./aishell/wav/test/S0768/BAC009S0768W0236.wav", "txt": "对全国各地的政府债务进行啦严格审计"}
-{"key": "BAC009S0768W0237", "wav": "./aishell/wav/test/S0768/BAC009S0768W0237.wav", "txt": "审计署的审计结论表明"}
-{"key": "BAC009S0768W0238", "wav": "./aishell/wav/test/S0768/BAC009S0768W0238.wav", "txt": "我国地方政府的累积债务相对于偿付能力来看"}
-{"key": "BAC009S0768W0239", "wav": "./aishell/wav/test/S0768/BAC009S0768W0239.wav", "txt": "远低于发生债务危机的欧美国家"}
-{"key": "BAC009S0768W0240", "wav": "./aishell/wav/test/S0768/BAC009S0768W0240.wav", "txt": "考虑到我国正处在经济快速增长期"}
-{"key": "BAC009S0768W0241", "wav": "./aishell/wav/test/S0768/BAC009S0768W0241.wav", "txt": "政府财力增长也相当较快"}
-{"key": "BAC009S0768W0242", "wav": "./aishell/wav/test/S0768/BAC009S0768W0242.wav", "txt": "政府还拥有较多的可变现资产"}
-{"key": "BAC009S0768W0243", "wav": "./aishell/wav/test/S0768/BAC009S0768W0243.wav", "txt": "相对于目前的负债规模"}
-{"key": "BAC009S0768W0244", "wav": "./aishell/wav/test/S0768/BAC009S0768W0244.wav", "txt": "政府总体上具有较强的偿债能力"}
-{"key": "BAC009S0768W0245", "wav": "./aishell/wav/test/S0768/BAC009S0768W0245.wav", "txt": "采取积极有效的措施化解部分地区和领域的债务风险"}
-{"key": "BAC009S0768W0246", "wav": "./aishell/wav/test/S0768/BAC009S0768W0246.wav", "txt": "在我国出现政府性债务违约的可能性是不大的"}
-{"key": "BAC009S0768W0247", "wav": "./aishell/wav/test/S0768/BAC009S0768W0247.wav", "txt": "债卷市场城投债卷发行不畅"}
-{"key": "BAC009S0768W0248", "wav": "./aishell/wav/test/S0768/BAC009S0768W0248.wav", "txt": "从城投债券发行监管部门的角度"}
-{"key": "BAC009S0768W0249", "wav": "./aishell/wav/test/S0768/BAC009S0768W0249.wav", "txt": "您如何看待这一现象"}
-{"key": "BAC009S0768W0250", "wav": "./aishell/wav/test/S0768/BAC009S0768W0250.wav", "txt": "徐林出于对地方政府债务风险的担忧"}
-{"key": "BAC009S0768W0251", "wav": "./aishell/wav/test/S0768/BAC009S0768W0251.wav", "txt": "投资者采取措施防范风险是成熟的表现"}
-{"key": "BAC009S0768W0252", "wav": "./aishell/wav/test/S0768/BAC009S0768W0252.wav", "txt": "但出于对我国地方政府债务风险的不合理判断"}
-{"key": "BAC009S0768W0253", "wav": "./aishell/wav/test/S0768/BAC009S0768W0253.wav", "txt": "神雕的机身四周装有分布式有源相控阵雷达天线"}
-{"key": "BAC009S0768W0254", "wav": "./aishell/wav/test/S0768/BAC009S0768W0254.wav", "txt": "可以提供三六零度无死角的早期预警"}
-{"key": "BAC009S0768W0255", "wav": "./aishell/wav/test/S0768/BAC009S0768W0255.wav", "txt": "它的雷达可能采用了双波段设计"}
-{"key": "BAC009S0768W0259", "wav": "./aishell/wav/test/S0768/BAC009S0768W0259.wav", "txt": "该机的雷达还具备合成孔径工作能力"}
-{"key": "BAC009S0768W0260", "wav": "./aishell/wav/test/S0768/BAC009S0768W0260.wav", "txt": "可用于侦察缓慢移动的地面和海面目标"}
-{"key": "BAC009S0768W0261", "wav": "./aishell/wav/test/S0768/BAC009S0768W0261.wav", "txt": "神雕还有一定的隐身特性"}
-{"key": "BAC009S0768W0262", "wav": "./aishell/wav/test/S0768/BAC009S0768W0262.wav", "txt": "加上它凭借远程雷达与对方舰队保持远距离"}
-{"key": "BAC009S0768W0263", "wav": "./aishell/wav/test/S0768/BAC009S0768W0263.wav", "txt": "如果神雕大量服役和部署"}
-{"key": "BAC009S0768W0264", "wav": "./aishell/wav/test/S0768/BAC009S0768W0264.wav", "txt": "在战区上空形成有效韧的信息网络"}
-{"key": "BAC009S0768W0265", "wav": "./aishell/wav/test/S0768/BAC009S0768W0265.wav", "txt": "那将会是中国海空军的战力倍增器之一"}
-{"key": "BAC009S0768W0266", "wav": "./aishell/wav/test/S0768/BAC009S0768W0266.wav", "txt": "高空长航时战略无人机"}
-{"key": "BAC009S0768W0267", "wav": "./aishell/wav/test/S0768/BAC009S0768W0267.wav", "txt": "全球鹰并不能独占鳌头"}
-{"key": "BAC009S0768W0268", "wav": "./aishell/wav/test/S0768/BAC009S0768W0268.wav", "txt": "继二零一一年出现独特的连翼造型的翔龙无人机以后"}
-{"key": "BAC009S0768W0269", "wav": "./aishell/wav/test/S0768/BAC009S0768W0269.wav", "txt": "又一款个性十足的双机身气动外形的大型无人机神雕"}
-{"key": "BAC009S0768W0271", "wav": "./aishell/wav/test/S0768/BAC009S0768W0271.wav", "txt": "据新华社电美国国际贸易委员会二十一日作出终裁"}
-{"key": "BAC009S0768W0272", "wav": "./aishell/wav/test/S0768/BAC009S0768W0272.wav", "txt": "从台湾地区进口的此类产品存在切销行为"}
-{"key": "BAC009S0768W0273", "wav": "./aishell/wav/test/S0768/BAC009S0768W0273.wav", "txt": "美国国际贸易委员会称"}
-{"key": "BAC009S0768W0274", "wav": "./aishell/wav/test/S0768/BAC009S0768W0274.wav", "txt": "在征收反倾销或反补贴税之前"}
-{"key": "BAC009S0768W0275", "wav": "./aishell/wav/test/S0768/BAC009S0768W0275.wav", "txt": "美商务部与国际贸易委员会都需作出肯定性终裁"}
-{"key": "BAC009S0768W0276", "wav": "./aishell/wav/test/S0768/BAC009S0768W0276.wav", "txt": "商务部裁定切销或补贴幅度"}
-{"key": "BAC009S0768W0277", "wav": "./aishell/wav/test/S0768/BAC009S0768W0277.wav", "txt": "根据美国商务部去年十二月份终裁确定的幅度"}
-{"key": "BAC009S0768W0278", "wav": "./aishell/wav/test/S0768/BAC009S0768W0278.wav", "txt": "针对中美光伏贸易纠纷"}
-{"key": "BAC009S0768W0279", "wav": "./aishell/wav/test/S0768/BAC009S0768W0279.wav", "txt": "中国商务部已明确表示"}
-{"key": "BAC009S0768W0280", "wav": "./aishell/wav/test/S0768/BAC009S0768W0280.wav", "txt": "再次对中国光伏产品发起双反调查并试图征收高额关税"}
-{"key": "BAC009S0768W0281", "wav": "./aishell/wav/test/S0768/BAC009S0768W0281.wav", "txt": "中方对此表示强烈不满"}
-{"key": "BAC009S0768W0282", "wav": "./aishell/wav/test/S0768/BAC009S0768W0282.wav", "txt": "美方对中国产品进行限制的做法"}
-{"key": "BAC009S0768W0283", "wav": "./aishell/wav/test/S0768/BAC009S0768W0283.wav", "txt": "是对贸易救济措施的滥用"}
-{"key": "BAC009S0768W0284", "wav": "./aishell/wav/test/S0768/BAC009S0768W0284.wav", "txt": "势必使用中美光伏贸易纠纷再度升级"}
-{"key": "BAC009S0768W0285", "wav": "./aishell/wav/test/S0768/BAC009S0768W0285.wav", "txt": "美国智库学学者和太阳能行业协会也多次警告"}
-{"key": "BAC009S0768W0286", "wav": "./aishell/wav/test/S0768/BAC009S0768W0286.wav", "txt": "许多美国太阳太阳能制造商依赖于全球光伏供应链"}
-{"key": "BAC009S0768W0287", "wav": "./aishell/wav/test/S0768/BAC009S0768W0287.wav", "txt": "并减少太阳能产业相关就业岗位"}
-{"key": "BAC009S0768W0288", "wav": "./aishell/wav/test/S0768/BAC009S0768W0288.wav", "txt": "美初裁中国产轮胎倾销"}
-{"key": "BAC009S0768W0289", "wav": "./aishell/wav/test/S0768/BAC009S0768W0289.wav", "txt": "据新华社电美国商务部二十一日宣布初裁结果"}
-{"key": "BAC009S0768W0290", "wav": "./aishell/wav/test/S0768/BAC009S0768W0290.wav", "txt": "认定从中国进口的乘用车和轻型卡车轮胎存在倾销行为"}
-{"key": "BAC009S0768W0291", "wav": "./aishell/wav/test/S0768/BAC009S0768W0291.wav", "txt": "美商务部当天发表声明说"}
-{"key": "BAC009S0768W0292", "wav": "./aishell/wav/test/S0768/BAC009S0768W0292.wav", "txt": "倾销幅度从百分之十七至百分之九十九"}
-{"key": "BAC009S0768W0293", "wav": "./aishell/wav/test/S0768/BAC009S0768W0293.wav", "txt": "基于倾销幅度的初裁结果"}
-{"key": "BAC009S0768W0294", "wav": "./aishell/wav/test/S0768/BAC009S0768W0294.wav", "txt": "就美国对中国产轮胎发起双坊调查"}
-{"key": "BAC009S0768W0295", "wav": "./aishell/wav/test/S0768/BAC009S0768W0295.wav", "txt": "中国商务部曾表示强烈反对"}
-{"key": "BAC009S0768W0296", "wav": "./aishell/wav/test/S0768/BAC009S0768W0296.wav", "txt": "认为美国此举违反世界贸易组织规则和美国国内法"}
-{"key": "BAC009S0768W0297", "wav": "./aishell/wav/test/S0768/BAC009S0768W0297.wav", "txt": "希望美方吸取前车之鉴"}
-{"key": "BAC009S0768W0298", "wav": "./aishell/wav/test/S0768/BAC009S0768W0298.wav", "txt": "避免破坏两国相关产业的贸易与合作"}
-{"key": "BAC009S0768W0299", "wav": "./aishell/wav/test/S0768/BAC009S0768W0299.wav", "txt": "据新华社电美国国际贸易委员会二十一日作出终裁"}
-{"key": "BAC009S0768W0300", "wav": "./aishell/wav/test/S0768/BAC009S0768W0300.wav", "txt": "从台湾地区进口的此类产品存在倾销行为"}
-{"key": "BAC009S0768W0301", "wav": "./aishell/wav/test/S0768/BAC009S0768W0301.wav", "txt": "这意味着美国将对相关产品"}
-{"key": "BAC009S0768W0303", "wav": "./aishell/wav/test/S0768/BAC009S0768W0303.wav", "txt": "成飞集成百二十一九十"}
-{"key": "BAC009S0768W0305", "wav": "./aishell/wav/test/S0768/BAC009S0768W0305.wav", "txt": "公司上半年营业收入六点三四亿元"}
-{"key": "BAC009S0768W0307", "wav": "./aishell/wav/test/S0768/BAC009S0768W0307.wav", "txt": "从而获取用户信息的案件"}
-{"key": "BAC009S0768W0308", "wav": "./aishell/wav/test/S0768/BAC009S0768W0308.wav", "txt": "杨某等四人一同在深圳成立了安丰公司"}
-{"key": "BAC009S0768W0309", "wav": "./aishell/wav/test/S0768/BAC009S0768W0309.wav", "txt": "公司主要从事计算机手机的软件开发业务"}
-{"key": "BAC009S0768W0310", "wav": "./aishell/wav/test/S0768/BAC009S0768W0310.wav", "txt": "由于安丰公司的业务不景气"}
-{"key": "BAC009S0768W0311", "wav": "./aishell/wav/test/S0768/BAC009S0768W0311.wav", "txt": "杨某等四人经过商议"}
-{"key": "BAC009S0768W0312", "wav": "./aishell/wav/test/S0768/BAC009S0768W0312.wav", "txt": "决定由麦德公司的技术部门研发静默插件"}
-{"key": "BAC009S0768W0313", "wav": "./aishell/wav/test/S0768/BAC009S0768W0313.wav", "txt": "使用户在刷机过程中"}
-{"key": "BAC009S0768W0314", "wav": "./aishell/wav/test/S0768/BAC009S0768W0314.wav", "txt": "不知不觉地安装上公司开发的插件"}
-{"key": "BAC009S0768W0315", "wav": "./aishell/wav/test/S0768/BAC009S0768W0315.wav", "txt": "而手机被安装上这一插件后"}
-{"key": "BAC009S0768W0316", "wav": "./aishell/wav/test/S0768/BAC009S0768W0316.wav", "txt": "公司不仅可以向手机推送软件广告等商业性电子信息"}
-{"key": "BAC009S0768W0317", "wav": "./aishell/wav/test/S0768/BAC009S0768W0317.wav", "txt": "安丰公司的广告网页是他们推送的重要内容"}
-{"key": "BAC009S0768W0318", "wav": "./aishell/wav/test/S0768/BAC009S0768W0318.wav", "txt": "他们通过这个插件已获利广告收入二十馀万元"}
-{"key": "BAC009S0768W0319", "wav": "./aishell/wav/test/S0768/BAC009S0768W0319.wav", "txt": "同案被捕的马某等四人是公司技术部门的员工"}
-{"key": "BAC009S0768W0320", "wav": "./aishell/wav/test/S0768/BAC009S0768W0320.wav", "txt": "软件开发是领导的授意"}
-{"key": "BAC009S0768W0321", "wav": "./aishell/wav/test/S0768/BAC009S0768W0321.wav", "txt": "自己只是执行公司的工作要求"}
-{"key": "BAC009S0768W0322", "wav": "./aishell/wav/test/S0768/BAC009S0768W0322.wav", "txt": "三百六十软件识别出了麦德公司的插件"}
-{"key": "BAC009S0768W0323", "wav": "./aishell/wav/test/S0768/BAC009S0768W0323.wav", "txt": "将其列为恶意软件用户称其为流氓软件"}
-{"key": "BAC009S0768W0324", "wav": "./aishell/wav/test/S0768/BAC009S0768W0324.wav", "txt": "马某等人进一步完善了插件"}
-{"key": "BAC009S0768W0325", "wav": "./aishell/wav/test/S0768/BAC009S0768W0325.wav", "txt": "再次利用同样的静默安装方式继续推广软件"}
-{"key": "BAC009S0768W0326", "wav": "./aishell/wav/test/S0768/BAC009S0768W0326.wav", "txt": "二十馀万部手机遭殃"}
-{"key": "BAC009S0768W0327", "wav": "./aishell/wav/test/S0768/BAC009S0768W0327.wav", "txt": "在被公安机关查获后"}
-{"key": "BAC009S0768W0328", "wav": "./aishell/wav/test/S0768/BAC009S0768W0328.wav", "txt": "警方在麦德公司数据库中发现"}
-{"key": "BAC009S0768W0329", "wav": "./aishell/wav/test/S0768/BAC009S0768W0329.wav", "txt": "获取到的通讯录近两千万条"}
-{"key": "BAC009S0768W0330", "wav": "./aishell/wav/test/S0768/BAC009S0768W0330.wav", "txt": "判处有期徒刑三年六个月"}
-{"key": "BAC009S0768W0331", "wav": "./aishell/wav/test/S0768/BAC009S0768W0331.wav", "txt": "其馀九人获刑一年五个月至三年不等"}
-{"key": "BAC009S0768W0332", "wav": "./aishell/wav/test/S0768/BAC009S0768W0332.wav", "txt": "依据国家相关法律法规"}
-{"key": "BAC009S0768W0333", "wav": "./aishell/wav/test/S0768/BAC009S0768W0333.wav", "txt": "杨某等人在明知插件功能的情况下"}
-{"key": "BAC009S0768W0334", "wav": "./aishell/wav/test/S0768/BAC009S0768W0334.wav", "txt": "未经用户同意将该插件预置到呃用户手机中"}
-{"key": "BAC009S0768W0335", "wav": "./aishell/wav/test/S0768/BAC009S0768W0335.wav", "txt": "非法获取用户身份认证信息"}
-{"key": "BAC009S0768W0336", "wav": "./aishell/wav/test/S0768/BAC009S0768W0336.wav", "txt": "已经构成了对他人计算机信息系统的侵入控制"}
-{"key": "BAC009S0768W0337", "wav": "./aishell/wav/test/S0768/BAC009S0768W0337.wav", "txt": "侵犯了公民的合法权益"}
-{"key": "BAC009S0768W0338", "wav": "./aishell/wav/test/S0768/BAC009S0768W0338.wav", "txt": "强劲犀利的拳法与膝法破坏力惊人"}
-{"key": "BAC009S0768W0339", "wav": "./aishell/wav/test/S0768/BAC009S0768W0339.wav", "txt": "二零一五年初在南京的笼斗中"}
-{"key": "BAC009S0768W0340", "wav": "./aishell/wav/test/S0768/BAC009S0768W0340.wav", "txt": "徐永昊以雷霆万钧之势缔造了一场震撼的秒杀之作"}
-{"key": "BAC009S0768W0341", "wav": "./aishell/wav/test/S0768/BAC009S0768W0341.wav", "txt": "迅即杀狠的站立技术令人惊叹不已"}
-{"key": "BAC009S0768W0342", "wav": "./aishell/wav/test/S0768/BAC009S0768W0342.wav", "txt": "也是我喜欢的格斗方式"}
-{"key": "BAC009S0768W0343", "wav": "./aishell/wav/test/S0768/BAC009S0768W0343.wav", "txt": "我都会对站立技术进行重点强化"}
-{"key": "BAC009S0768W0344", "wav": "./aishell/wav/test/S0768/BAC009S0768W0344.wav", "txt": "让自己的攻击变得更快更狠"}
-{"key": "BAC009S0768W0345", "wav": "./aishell/wav/test/S0768/BAC009S0768W0345.wav", "txt": "对于这场比赛的备战"}
-{"key": "BAC009S0768W0346", "wav": "./aishell/wav/test/S0768/BAC009S0768W0346.wav", "txt": "我在重点强化站立技术的同时"}
-{"key": "BAC009S0768W0347", "wav": "./aishell/wav/test/S0768/BAC009S0768W0347.wav", "txt": "也对地面技术和防摔技术上做了很多针对性的训练"}
-{"key": "BAC009S0768W0348", "wav": "./aishell/wav/test/S0768/BAC009S0768W0348.wav", "txt": "对于综合能力的严苛要求"}
-{"key": "BAC009S0768W0349", "wav": "./aishell/wav/test/S0768/BAC009S0768W0349.wav", "txt": "是综合格斗运动的一大特色"}
-{"key": "BAC009S0768W0350", "wav": "./aishell/wav/test/S0768/BAC009S0768W0350.wav", "txt": "相较于其精湛凶猛的站立技术"}
-{"key": "BAC009S0768W0351", "wav": "./aishell/wav/test/S0768/BAC009S0768W0351.wav", "txt": "徐永昊的地面技术无疑是其格斗体系中的一块短板"}
-{"key": "BAC009S0768W0352", "wav": "./aishell/wav/test/S0768/BAC009S0768W0352.wav", "txt": "上一场同包尔江的比赛之后"}
-{"key": "BAC009S0768W0353", "wav": "./aishell/wav/test/S0768/BAC009S0768W0353.wav", "txt": "一个强项跟弱项同样突出的拳手"}
-{"key": "BAC009S0768W0354", "wav": "./aishell/wav/test/S0768/BAC009S0768W0354.wav", "txt": "是很难成为真正的王者"}
-{"key": "BAC009S0768W0355", "wav": "./aishell/wav/test/S0768/BAC009S0768W0355.wav", "txt": "我必须要变的更加全面"}
-{"key": "BAC009S0768W0356", "wav": "./aishell/wav/test/S0768/BAC009S0768W0356.wav", "txt": "此次昆罗决中俄对抗赛上"}
-{"key": "BAC009S0768W0357", "wav": "./aishell/wav/test/S0768/BAC009S0768W0357.wav", "txt": "对于代表中国战队略先出阵的徐永昊来讲"}
-{"key": "BAC009S0768W0358", "wav": "./aishell/wav/test/S0768/BAC009S0768W0358.wav", "txt": "这无疑又是一次严峻的考验"}
-{"key": "BAC009S0768W0359", "wav": "./aishell/wav/test/S0768/BAC009S0768W0359.wav", "txt": "也是其对于自身技术全面性提高程度的一次检验"}
-{"key": "BAC009S0768W0360", "wav": "./aishell/wav/test/S0768/BAC009S0768W0360.wav", "txt": "我这次的对手水平很高"}
-{"key": "BAC009S0768W0361", "wav": "./aishell/wav/test/S0768/BAC009S0768W0361.wav", "txt": "拳法和摔跤能力很出色"}
-{"key": "BAC009S0768W0362", "wav": "./aishell/wav/test/S0768/BAC009S0768W0362.wav", "txt": "而在谈及此次应敌的策略时"}
-{"key": "BAC009S0768W0363", "wav": "./aishell/wav/test/S0768/BAC009S0768W0363.wav", "txt": "我不会改变自己擅长的风格"}
-{"key": "BAC009S0768W0364", "wav": "./aishell/wav/test/S0768/BAC009S0768W0364.wav", "txt": "这次比赛我会用胜利证明自己的实力"}
-{"key": "BAC009S0768W0365", "wav": "./aishell/wav/test/S0768/BAC009S0768W0365.wav", "txt": "二零一五年世界田径锦标赛即将在北京拉开序幕"}
-{"key": "BAC009S0768W0366", "wav": "./aishell/wav/test/S0768/BAC009S0768W0366.wav", "txt": "近日德郭队公布了参加此次世锦赛的六六人大名单"}
-{"key": "BAC009S0768W0367", "wav": "./aishell/wav/test/S0768/BAC009S0768W0367.wav", "txt": "上届莫斯科世锦赛上拿到了金牌的四位选手悉数出战"}
-{"key": "BAC009S0768W0368", "wav": "./aishell/wav/test/S0768/BAC009S0768W0368.wav", "txt": "主教练对于这支以老带新的队伍也充满了自信"}
-{"key": "BAC009S0768W0369", "wav": "./aishell/wav/test/S0768/BAC009S0768W0369.wav", "txt": "上届莫斯科世锦赛上拿到的金牌的四位选手悉数出战"}
-{"key": "BAC009S0768W0370", "wav": "./aishell/wav/test/S0768/BAC009S0768W0370.wav", "txt": "包括前秋运动员维斯多尔"}
-{"key": "BAC009S0768W0371", "wav": "./aishell/wav/test/S0768/BAC009S0768W0371.wav", "txt": "撑杆跳选手拉斐尔霍尔泽德佩"}
-{"key": "BAC009S0768W0372", "wav": "./aishell/wav/test/S0768/BAC009S0768W0372.wav", "txt": "哈特灵今年饱受十字韧带伤势困扰"}
-{"key": "BAC009S0768W0373", "wav": "./aishell/wav/test/S0768/BAC009S0768W0373.wav", "txt": "他是否接受外卡参赛要视情况而定"}
-{"key": "BAC009S0768W0374", "wav": "./aishell/wav/test/S0768/BAC009S0768W0374.wav", "txt": "德国队此次以老带新"}
-{"key": "BAC009S0768W0375", "wav": "./aishell/wav/test/S0768/BAC009S0768W0375.wav", "txt": "这也是他一年四记来第一次参加世锦赛"}
-{"key": "BAC009S0768W0376", "wav": "./aishell/wav/test/S0768/BAC009S0768W0376.wav", "txt": "也有经验丰富的老队员"}
-{"key": "BAC009S0768W0377", "wav": "./aishell/wav/test/S0768/BAC009S0768W0377.wav", "txt": "我相信每个人都会付出一切来为团队力争最好的成绩"}
-{"key": "BAC009S0768W0378", "wav": "./aishell/wav/test/S0768/BAC009S0768W0378.wav", "txt": "附二零一五田径世锦赛德国队名单"}
-{"key": "BAC009S0768W0379", "wav": "./aishell/wav/test/S0768/BAC009S0768W0379.wav", "txt": "一百米塞文基尼菲尔斯"}
-{"key": "BAC009S0768W0380", "wav": "./aishell/wav/test/S0768/BAC009S0768W0380.wav", "txt": "二百米罗宾埃尔瓦"}
-{"key": "BAC009S0768W0381", "wav": "./aishell/wav/test/S0768/BAC009S0768W0381.wav", "txt": "八百米罗宾斯切姆贝拉"}
-{"key": "BAC009S0768W0382", "wav": "./aishell/wav/test/S0768/BAC009S0768W0382.wav", "txt": "五千米理查德灵格"}
-{"key": "BAC009S0768W0383", "wav": "./aishell/wav/test/S0768/BAC009S0768W0383.wav", "txt": "一万米阿尔恩加比乌斯"}
-{"key": "BAC009S0768W0384", "wav": "./aishell/wav/test/S0768/BAC009S0768W0384.wav", "txt": "一百一十米栏马特里亚斯布赫雷尔"}
-{"key": "BAC009S0768W0385", "wav": "./aishell/wav/test/S0768/BAC009S0768W0385.wav", "txt": "格里格尔特拉贝尔"}
-{"key": "BAC009S0768W0386", "wav": "./aishell/wav/test/S0768/BAC009S0768W0386.wav", "txt": "马特伍兹菲兹比亚尔科"}
-{"key": "BAC009S0768W0387", "wav": "./aishell/wav/test/S0768/BAC009S0768W0387.wav", "txt": "撑杆跳拉斐尔霍尔泽德斯佩"}
-{"key": "BAC009S0768W0388", "wav": "./aishell/wav/test/S0768/BAC009S0768W0388.wav", "txt": "托比亚斯斯切尔巴尔斯"}
-{"key": "BAC009S0768W0389", "wav": "./aishell/wav/test/S0768/BAC009S0768W0389.wav", "txt": "跳远阿莱恩卡马拉"}
-{"key": "BAC009S0768W0390", "wav": "./aishell/wav/test/S0768/BAC009S0768W0390.wav", "txt": "铅球达维斯多尔"}
-{"key": "BAC009S0768W0391", "wav": "./aishell/wav/test/S0768/BAC009S0768W0391.wav", "txt": "铁饼克里斯托弗哈特灵"}
-{"key": "BAC009S0768W0392", "wav": "./aishell/wav/test/S0768/BAC009S0768W0392.wav", "txt": "标枪拉尔斯哈曼恩"}
-{"key": "BAC009S0768W0393", "wav": "./aishell/wav/test/S0768/BAC009S0768W0393.wav", "txt": "全能里科费雷姆斯"}
-{"key": "BAC009S0768W0394", "wav": "./aishell/wav/test/S0768/BAC009S0768W0394.wav", "txt": "迈克尔斯齐莱德尔"}
-{"key": "BAC009S0768W0395", "wav": "./aishell/wav/test/S0768/BAC009S0768W0395.wav", "txt": "二零千米竞走尼尔斯布莱姆巴号"}
-{"key": "BAC009S0768W0396", "wav": "./aishell/wav/test/S0768/BAC009S0768W0396.wav", "txt": "五零千米竞走卡尔多赫曼"}
-{"key": "BAC009S0768W0397", "wav": "./aishell/wav/test/S0768/BAC009S0768W0397.wav", "txt": "四乘一百米接力罗伯特哈特灵"}
-{"key": "BAC009S0768W0398", "wav": "./aishell/wav/test/S0768/BAC009S0768W0398.wav", "txt": "卢卡斯亚库比泽克"}
-{"key": "BAC009S0768W0399", "wav": "./aishell/wav/test/S0768/BAC009S0768W0399.wav", "txt": "亚历山大克塞诺科夫"}
-{"key": "BAC009S0768W0400", "wav": "./aishell/wav/test/S0768/BAC009S0768W0400.wav", "txt": "雅莱克斯欧帕拉迪尼门格"}
-{"key": "BAC009S0768W0401", "wav": "./aishell/wav/test/S0768/BAC009S0768W0401.wav", "txt": "一百米莱贝卡哈塞"}
-{"key": "BAC009S0768W0402", "wav": "./aishell/wav/test/S0768/BAC009S0768W0402.wav", "txt": "吉娜卢克肯科姆普尔"}
-{"key": "BAC009S0768W0403", "wav": "./aishell/wav/test/S0768/BAC009S0768W0403.wav", "txt": "八百米克里斯蒂娜哈灵"}
-{"key": "BAC009S0768W0404", "wav": "./aishell/wav/test/S0768/BAC009S0768W0404.wav", "txt": "这部命运多旭的电影"}
-{"key": "BAC009S0768W0405", "wav": "./aishell/wav/test/S0768/BAC009S0768W0405.wav", "txt": "原本计划在今年六月正式开机"}
-{"key": "BAC009S0768W0406", "wav": "./aishell/wav/test/S0768/BAC009S0768W0406.wav", "txt": "可现在已经全部泡汤"}
-{"key": "BAC009S0768W0407", "wav": "./aishell/wav/test/S0768/BAC009S0768W0407.wav", "txt": "科林之前已经积极的支持影片拍摄"}
-{"key": "BAC009S0768W0408", "wav": "./aishell/wav/test/S0768/BAC009S0768W0408.wav", "txt": "圆圆的脸蛋非常的可爱"}
-{"key": "BAC009S0768W0409", "wav": "./aishell/wav/test/S0768/BAC009S0768W0409.wav", "txt": "此照片萌翻众网友"}
-{"key": "BAC009S0768W0410", "wav": "./aishell/wav/test/S0768/BAC009S0768W0410.wav", "txt": "纷纷留言点赞"}
-{"key": "BAC009S0768W0411", "wav": "./aishell/wav/test/S0768/BAC009S0768W0411.wav", "txt": "称哈哈哈性感的不要不要的"}
-{"key": "BAC009S0768W0412", "wav": "./aishell/wav/test/S0768/BAC009S0768W0412.wav", "txt": "自小卖得一脸好萌"}
-{"key": "BAC009S0768W0413", "wav": "./aishell/wav/test/S0768/BAC009S0768W0413.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0768W0414", "wav": "./aishell/wav/test/S0768/BAC009S0768W0414.wav", "txt": "诞下很像天华的小宝贝"}
-{"key": "BAC009S0768W0415", "wav": "./aishell/wav/test/S0768/BAC009S0768W0415.wav", "txt": "一向都是在圈子中人缘甚佳的谢天华"}
-{"key": "BAC009S0768W0416", "wav": "./aishell/wav/test/S0768/BAC009S0768W0416.wav", "txt": "使得宝宝刚出生就有了一大班星星级干爹干娘"}
-{"key": "BAC009S0768W0417", "wav": "./aishell/wav/test/S0768/BAC009S0768W0417.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0768W0418", "wav": "./aishell/wav/test/S0768/BAC009S0768W0418.wav", "txt": "艺人谢婷婷出席活动时"}
-{"key": "BAC009S0768W0419", "wav": "./aishell/wav/test/S0768/BAC009S0768W0419.wav", "txt": "被问到有传其胞兄谢霆锋将与王菲结婚"}
-{"key": "BAC009S0768W0420", "wav": "./aishell/wav/test/S0768/BAC009S0768W0420.wav", "txt": "她回应是么"}
-{"key": "BAC009S0768W0421", "wav": "./aishell/wav/test/S0768/BAC009S0768W0421.wav", "txt": "没有人同我讲"}
-{"key": "BAC009S0768W0422", "wav": "./aishell/wav/test/S0768/BAC009S0768W0422.wav", "txt": "好多传闻我都不会特别问他"}
-{"key": "BAC009S0768W0423", "wav": "./aishell/wav/test/S0768/BAC009S0768W0423.wav", "txt": "是真的话他自己会同我讲"}
-{"key": "BAC009S0768W0424", "wav": "./aishell/wav/test/S0768/BAC009S0768W0424.wav", "txt": "想不想他再次成家立室"}
-{"key": "BAC009S0768W0425", "wav": "./aishell/wav/test/S0768/BAC009S0768W0425.wav", "txt": "他开心就好"}
-{"key": "BAC009S0768W0426", "wav": "./aishell/wav/test/S0768/BAC009S0768W0426.wav", "txt": "不过要看他心情工作同家人相处同小朋友"}
-{"key": "BAC009S0768W0427", "wav": "./aishell/wav/test/S0768/BAC009S0768W0427.wav", "txt": "各样都平衡得好处理得好"}
-{"key": "BAC009S0768W0428", "wav": "./aishell/wav/test/S0768/BAC009S0768W0428.wav", "txt": "结婚都只是一张纸同戒指"}
-{"key": "BAC009S0768W0430", "wav": "./aishell/wav/test/S0768/BAC009S0768W0430.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0768W0431", "wav": "./aishell/wav/test/S0768/BAC009S0768W0431.wav", "txt": "为了给自己的爱犬盖狗舍及休息场所"}
-{"key": "BAC009S0768W0432", "wav": "./aishell/wav/test/S0768/BAC009S0768W0432.wav", "txt": "他指使他人虚开发票六万馀元用公款报销"}
-{"key": "BAC009S0768W0433", "wav": "./aishell/wav/test/S0768/BAC009S0768W0433.wav", "txt": "贾某被市三中院终审判处有期徒刑两年八个月"}
-{"key": "BAC009S0768W0434", "wav": "./aishell/wav/test/S0768/BAC009S0768W0434.wav", "txt": "村书记被村民驾车撞倒身亡肇事者已被刑拘"}
-{"key": "BAC009S0768W0435", "wav": "./aishell/wav/test/S0768/BAC009S0768W0435.wav", "txt": "京华时报讯记者迟名常鑫前天中午近一一点半"}
-{"key": "BAC009S0768W0436", "wav": "./aishell/wav/test/S0768/BAC009S0768W0436.wav", "txt": "大兴区礼贤镇紫各庄村"}
-{"key": "BAC009S0768W0437", "wav": "./aishell/wav/test/S0768/BAC009S0768W0437.wav", "txt": "村书记乔俊然在家门前被一辆轿车撞倒后"}
-{"key": "BAC009S0768W0438", "wav": "./aishell/wav/test/S0768/BAC009S0768W0438.wav", "txt": "肇事者为紫各庄村民乔某"}
-{"key": "BAC009S0768W0439", "wav": "./aishell/wav/test/S0768/BAC009S0768W0439.wav", "txt": "大兴警方以涉嫌交通肇事罪将肇事者刑事拘留"}
-{"key": "BAC009S0768W0440", "wav": "./aishell/wav/test/S0768/BAC009S0768W0440.wav", "txt": "案件正在进一步调查中"}
-{"key": "BAC009S0768W0441", "wav": "./aishell/wav/test/S0768/BAC009S0768W0441.wav", "txt": "村书记骗补助被判一一年受审辩称不了解政策"}
-{"key": "BAC009S0768W0442", "wav": "./aishell/wav/test/S0768/BAC009S0768W0442.wav", "txt": "新京报讯记者王巍利用村里遭受泥石流灾害后"}
-{"key": "BAC009S0768W0443", "wav": "./aishell/wav/test/S0768/BAC009S0768W0443.wav", "txt": "政府出钱搬迁盖房的机会"}
-{"key": "BAC009S0768W0444", "wav": "./aishell/wav/test/S0768/BAC009S0768W0444.wav", "txt": "延庆县永宁镇偏坡峪村原党支部书记钱某"}
-{"key": "BAC009S0768W0445", "wav": "./aishell/wav/test/S0768/BAC009S0768W0445.wav", "txt": "将不应享受政府的两个女儿作为搬迁户上报"}
-{"key": "BAC009S0768W0446", "wav": "./aishell/wav/test/S0768/BAC009S0768W0446.wav", "txt": "骗取搬迁补助资金用于支付搬迁安置房费用"}
-{"key": "BAC009S0768W0447", "wav": "./aishell/wav/test/S0768/BAC009S0768W0447.wav", "txt": "延庆法院一审判决认为"}
-{"key": "BAC009S0768W0448", "wav": "./aishell/wav/test/S0768/BAC009S0768W0448.wav", "txt": "钱某贪污一二馀万元拆迁款"}
-{"key": "BAC009S0768W0449", "wav": "./aishell/wav/test/S0768/BAC009S0768W0449.wav", "txt": "判处有期徒刑一一年"}
-{"key": "BAC009S0768W0450", "wav": "./aishell/wav/test/S0768/BAC009S0768W0450.wav", "txt": "村内常有蛇出没疑从养蛇村民中爬出"}
-{"key": "BAC009S0768W0451", "wav": "./aishell/wav/test/S0768/BAC009S0768W0451.wav", "txt": "信息时报讯记者陈子玉近日"}
-{"key": "BAC009S0768W0452", "wav": "./aishell/wav/test/S0768/BAC009S0768W0452.wav", "txt": "白云区钟落潭竹一村民白云区钟落潭竹一村的村民跟记者报料"}
-{"key": "BAC009S0768W0453", "wav": "./aishell/wav/test/S0768/BAC009S0768W0453.wav", "txt": "说最近他们村里经常有蛇出没"}
-{"key": "BAC009S0768W0454", "wav": "./aishell/wav/test/S0768/BAC009S0768W0454.wav", "txt": "甚至还会爬到村民家中"}
-{"key": "BAC009S0768W0455", "wav": "./aishell/wav/test/S0768/BAC009S0768W0455.wav", "txt": "他们怀疑是有人在村里养蛇所致"}
-{"key": "BAC009S0768W0456", "wav": "./aishell/wav/test/S0768/BAC009S0768W0456.wav", "txt": "蛇主刘先生表示以后将不在家里养蛇"}
-{"key": "BAC009S0768W0457", "wav": "./aishell/wav/test/S0768/BAC009S0768W0457.wav", "txt": "村医研发神奇止痛药网销全全国获刑三年"}
-{"key": "BAC009S0768W0458", "wav": "./aishell/wav/test/S0768/BAC009S0768W0458.wav", "txt": "村卫生室医师兼职黑ｂ超记者暗访结束被跟踪"}
-{"key": "BAC009S0768W0459", "wav": "./aishell/wav/test/S0768/BAC009S0768W0459.wav", "txt": "明着是大兴区黄村镇狼垡三村的医师"}
-{"key": "BAC009S0768W0460", "wav": "./aishell/wav/test/S0768/BAC009S0768W0460.wav", "txt": "暗地里却发布小广告揽客"}
-{"key": "BAC009S0768W0461", "wav": "./aishell/wav/test/S0768/BAC009S0768W0461.wav", "txt": "村妇为缓解丈夫病痛种罂丽当药用被判刑六个月"}
-{"key": "BAC009S0768W0462", "wav": "./aishell/wav/test/S0768/BAC009S0768W0462.wav", "txt": "曲靖一村妇竟在自家菜地内非法种植罂丽一零四二株"}
-{"key": "BAC009S0768W0463", "wav": "./aishell/wav/test/S0768/BAC009S0768W0463.wav", "txt": "用罂丽熬汤为丈夫止痛"}
-{"key": "BAC009S0768W0464", "wav": "./aishell/wav/test/S0768/BAC009S0768W0464.wav", "txt": "该村妇因犯非法种植毒品原植物罪"}
-{"key": "BAC009S0768W0465", "wav": "./aishell/wav/test/S0768/BAC009S0768W0465.wav", "txt": "被麒麟区法院判处有期徒刑六个月"}
-{"key": "BAC009S0768W0466", "wav": "./aishell/wav/test/S0768/BAC009S0768W0466.wav", "txt": "并处罚金人民币一千元"}
-{"key": "BAC009S0768W0467", "wav": "./aishell/wav/test/S0768/BAC009S0768W0467.wav", "txt": "村妇将一零万元现金埋地底四年多已腐烂成碎块"}
-{"key": "BAC009S0768W0468", "wav": "./aishell/wav/test/S0768/BAC009S0768W0468.wav", "txt": "村委会主任因经济问题两次被免第三次当选惹争议"}
-{"key": "BAC009S0768W0469", "wav": "./aishell/wav/test/S0768/BAC009S0768W0469.wav", "txt": "张绵跃当选村委会主任"}
-{"key": "BAC009S0768W0470", "wav": "./aishell/wav/test/S0768/BAC009S0768W0470.wav", "txt": "村委会在农田搭起违法建筑每年收租金一四万元"}
-{"key": "BAC009S0768W0471", "wav": "./aishell/wav/test/S0768/BAC009S0768W0471.wav", "txt": "奉化江口儒江村村委会却带头盖起了违法建筑"}
-{"key": "BAC009S0768W0472", "wav": "./aishell/wav/test/S0768/BAC009S0768W0472.wav", "txt": "记者接到这样的报料"}
-{"key": "BAC009S0768W0473", "wav": "./aishell/wav/test/S0768/BAC009S0768W0473.wav", "txt": "三改一拆可以说是一条红线"}
-{"key": "BAC009S0768W0474", "wav": "./aishell/wav/test/S0768/BAC009S0768W0474.wav", "txt": "村委会竟然会顶风作案"}
-{"key": "BAC009S0768W0475", "wav": "./aishell/wav/test/S0768/BAC009S0768W0475.wav", "txt": "记者和宁波市三三改一拆办工作人员前往现场"}
-{"key": "BAC009S0768W0476", "wav": "./aishell/wav/test/S0768/BAC009S0768W0476.wav", "txt": "这听起来多少有些匪夷所思的违建竟然是真的"}
-{"key": "BAC009S0768W0477", "wav": "./aishell/wav/test/S0768/BAC009S0768W0477.wav", "txt": "村官一顿工作餐吃二六个菜挂钩蹲点领导被诫勉谈话"}
-{"key": "BAC009S0768W0478", "wav": "./aishell/wav/test/S0768/BAC009S0768W0478.wav", "txt": "一顿工作餐竟上二六个菜"}
-{"key": "BAC009S0768W0479", "wav": "./aishell/wav/test/S0768/BAC009S0768W0479.wav", "txt": "且逢餐必有烟酒从园区领导到村组干部"}
-{"key": "BAC009S0768W0480", "wav": "./aishell/wav/test/S0768/BAC009S0768W0480.wav", "txt": "在严查四风的高压态势下"}
-{"key": "BAC009S0768W0481", "wav": "./aishell/wav/test/S0768/BAC009S0768W0481.wav", "txt": "以公务招待为名大肆公款吃喝"}
-{"key": "BAC009S0768W0482", "wav": "./aishell/wav/test/S0768/BAC009S0768W0482.wav", "txt": "村官借四零零多户居民三亿一携款失联"}
-{"key": "BAC009S0768W0483", "wav": "./aishell/wav/test/S0768/BAC009S0768W0483.wav", "txt": "村官接连顶风违纪其子领证为热闹摆酒六七桌"}
-{"key": "BAC009S0768W0484", "wav": "./aishell/wav/test/S0768/BAC009S0768W0484.wav", "txt": "编者按为深入贯彻落实中央八项规定精神"}
-{"key": "BAC009S0768W0485", "wav": "./aishell/wav/test/S0768/BAC009S0768W0485.wav", "txt": "按照中央纪委宣传部的统一部署"}
-{"key": "BAC009S0768W0486", "wav": "./aishell/wav/test/S0768/BAC009S0768W0486.wav", "txt": "陆续派记者深入采访"}
-{"key": "BAC009S0768W0487", "wav": "./aishell/wav/test/S0768/BAC009S0768W0487.wav", "txt": "进一步加大舆论监督力度"}
-{"key": "BAC009S0768W0488", "wav": "./aishell/wav/test/S0768/BAC009S0768W0488.wav", "txt": "通报一个教育一批震灭一片"}
-{"key": "BAC009S0768W0489", "wav": "./aishell/wav/test/S0768/BAC009S0768W0489.wav", "txt": "释放出中央执纪必严紧抓不放的强烈信号"}
-{"key": "BAC009S0768W0490", "wav": "./aishell/wav/test/S0768/BAC009S0768W0490.wav", "txt": "广大领导干部要以引以为戒守住底线"}
-{"key": "BAC009S0768W0491", "wav": "./aishell/wav/test/S0768/BAC009S0768W0491.wav", "txt": "坚决不在四风问题上犯错犯错误跌跟头"}
-{"key": "BAC009S0768W0492", "wav": "./aishell/wav/test/S0768/BAC009S0768W0492.wav", "txt": "村官涉不雅视频被免职饭桌上摸女子胸部臀部等"}
-{"key": "BAC009S0768W0493", "wav": "./aishell/wav/test/S0768/BAC009S0768W0493.wav", "txt": "村官私刻公章侵占二八万粮补派人殴打上访村民"}
-{"key": "BAC009S0768W0494", "wav": "./aishell/wav/test/S0768/BAC009S0768W0494.wav", "txt": "党和人民不会管到我身上来"}
-{"key": "BAC009S0768W0495", "wav": "./aishell/wav/test/S0768/BAC009S0768W0495.wav", "txt": "侵吞征地种粮补偿款"}
-{"key": "BAC009S0769W0121", "wav": "./aishell/wav/test/S0769/BAC009S0769W0121.wav", "txt": "该地块即为通州新城核心地标彩虹之门用地"}
-{"key": "BAC009S0769W0122", "wav": "./aishell/wav/test/S0769/BAC009S0769W0122.wav", "txt": "北京通州新城投资公司网站显示"}
-{"key": "BAC009S0769W0123", "wav": "./aishell/wav/test/S0769/BAC009S0769W0123.wav", "txt": "彩虹之门建筑净高三十米"}
-{"key": "BAC009S0769W0124", "wav": "./aishell/wav/test/S0769/BAC009S0769W0124.wav", "txt": "为双拱形非中心对称建筑"}
-{"key": "BAC009S0769W0125", "wav": "./aishell/wav/test/S0769/BAC009S0769W0125.wav", "txt": "新京报讯记者张旭报道"}
-{"key": "BAC009S0769W0126", "wav": "./aishell/wav/test/S0769/BAC009S0769W0126.wav", "txt": "北京去年土地出让落下大幕"}
-{"key": "BAC009S0769W0127", "wav": "./aishell/wav/test/S0769/BAC009S0769W0127.wav", "txt": "在丰台樊家村一宗商业用地底价成交后"}
-{"key": "BAC009S0769W0128", "wav": "./aishell/wav/test/S0769/BAC009S0769W0128.wav", "txt": "北京今年的土地出让金锁定在两千亿元"}
-{"key": "BAC009S0769W0129", "wav": "./aishell/wav/test/S0769/BAC009S0769W0129.wav", "txt": "同比去年增长五成"}
-{"key": "BAC009S0769W0130", "wav": "./aishell/wav/test/S0769/BAC009S0769W0130.wav", "txt": "市政府决定今年将全面加快棚户区改造步伐"}
-{"key": "BAC009S0769W0131", "wav": "./aishell/wav/test/S0769/BAC009S0769W0131.wav", "txt": "确保完成六万户搬迁改造任务"}
-{"key": "BAC009S0769W0132", "wav": "./aishell/wav/test/S0769/BAC009S0769W0132.wav", "txt": "推进上百个棚改项目全面启动实施"}
-{"key": "BAC009S0769W0133", "wav": "./aishell/wav/test/S0769/BAC009S0769W0133.wav", "txt": "今年北京要建设筹集各类保障房十万套"}
-{"key": "BAC009S0769W0134", "wav": "./aishell/wav/test/S0769/BAC009S0769W0134.wav", "txt": "各区县力争完成十五万套开工任务竣工八万套"}
-{"key": "BAC009S0769W0135", "wav": "./aishell/wav/test/S0769/BAC009S0769W0135.wav", "txt": "开工建设公租房不低于三万套"}
-{"key": "BAC009S0769W0136", "wav": "./aishell/wav/test/S0769/BAC009S0769W0136.wav", "txt": "为了确保保障房住宅的优良品质"}
-{"key": "BAC009S0769W0137", "wav": "./aishell/wav/test/S0769/BAC009S0769W0137.wav", "txt": "北京将继续改进住宅产业化推进方式"}
-{"key": "BAC009S0769W0138", "wav": "./aishell/wav/test/S0769/BAC009S0769W0138.wav", "txt": "推行标准化装配式装修"}
-{"key": "BAC009S0769W0139", "wav": "./aishell/wav/test/S0769/BAC009S0769W0139.wav", "txt": "前年至今年期间"}
-{"key": "BAC009S0769W0140", "wav": "./aishell/wav/test/S0769/BAC009S0769W0140.wav", "txt": "北京要筹集建设各类保障性住房一百万套"}
-{"key": "BAC009S0769W0141", "wav": "./aishell/wav/test/S0769/BAC009S0769W0141.wav", "txt": "为改善中低收入家庭住房条件"}
-{"key": "BAC009S0769W0142", "wav": "./aishell/wav/test/S0769/BAC009S0769W0142.wav", "txt": "今年北京除了建设保障性住房外"}
-{"key": "BAC009S0769W0143", "wav": "./aishell/wav/test/S0769/BAC009S0769W0143.wav", "txt": "还加大棚户区的改造任务"}
-{"key": "BAC009S0769W0144", "wav": "./aishell/wav/test/S0769/BAC009S0769W0144.wav", "txt": "各区县各单位要按照下达的任务指标"}
-{"key": "BAC009S0769W0145", "wav": "./aishell/wav/test/S0769/BAC009S0769W0145.wav", "txt": "确保完成今年六万户棚户区改造任务"}
-{"key": "BAC009S0769W0146", "wav": "./aishell/wav/test/S0769/BAC009S0769W0146.wav", "txt": "今年是十二五规划的收官之年"}
-{"key": "BAC009S0769W0147", "wav": "./aishell/wav/test/S0769/BAC009S0769W0147.wav", "txt": "各区县各单位要加强协作配合"}
-{"key": "BAC009S0769W0148", "wav": "./aishell/wav/test/S0769/BAC009S0769W0148.wav", "txt": "要重点加大政策支持"}
-{"key": "BAC009S0769W0149", "wav": "./aishell/wav/test/S0769/BAC009S0769W0149.wav", "txt": "破解棚户区改造征收瓶颈问题"}
-{"key": "BAC009S0769W0150", "wav": "./aishell/wav/test/S0769/BAC009S0769W0150.wav", "txt": "各相关部门要主动服务区县服务各参建单位"}
-{"key": "BAC009S0769W0151", "wav": "./aishell/wav/test/S0769/BAC009S0769W0151.wav", "txt": "对于今后棚户区改造中遇到的问题"}
-{"key": "BAC009S0769W0152", "wav": "./aishell/wav/test/S0769/BAC009S0769W0152.wav", "txt": "各项目标任务已分解至各区县"}
-{"key": "BAC009S0769W0153", "wav": "./aishell/wav/test/S0769/BAC009S0769W0153.wav", "txt": "今年北京将继续加大集体土地建设公租房试点力度"}
-{"key": "BAC009S0769W0154", "wav": "./aishell/wav/test/S0769/BAC009S0769W0154.wav", "txt": "加快公租房的配租进度"}
-{"key": "BAC009S0769W0155", "wav": "./aishell/wav/test/S0769/BAC009S0769W0155.wav", "txt": "力争配租三万户以上"}
-{"key": "BAC009S0769W0156", "wav": "./aishell/wav/test/S0769/BAC009S0769W0156.wav", "txt": "今年北京还将加大社会单位泵租力度"}
-{"key": "BAC009S0769W0157", "wav": "./aishell/wav/test/S0769/BAC009S0769W0157.wav", "txt": "市政府决定今年将全面加快棚户区改造步伐"}
-{"key": "BAC009S0769W0158", "wav": "./aishell/wav/test/S0769/BAC009S0769W0158.wav", "txt": "确保完成六万户搬迁改造任务"}
-{"key": "BAC009S0769W0159", "wav": "./aishell/wav/test/S0769/BAC009S0769W0159.wav", "txt": "今年土地收入预计近四万亿元"}
-{"key": "BAC009S0769W0160", "wav": "./aishell/wav/test/S0769/BAC009S0769W0160.wav", "txt": "今年国有土地使用权出让收入四千亿元"}
-{"key": "BAC009S0769W0161", "wav": "./aishell/wav/test/S0769/BAC009S0769W0161.wav", "txt": "继前年和去年连续两年突破四万亿元后"}
-{"key": "BAC009S0769W0162", "wav": "./aishell/wav/test/S0769/BAC009S0769W0162.wav", "txt": "今年土地收入再维持稳定"}
-{"key": "BAC009S0769W0163", "wav": "./aishell/wav/test/S0769/BAC009S0769W0163.wav", "txt": "相关公司股票走势"}
-{"key": "BAC009S0769W0164", "wav": "./aishell/wav/test/S0769/BAC009S0769W0164.wav", "txt": "房地产市场竞争加大"}
-{"key": "BAC009S0769W0165", "wav": "./aishell/wav/test/S0769/BAC009S0769W0165.wav", "txt": "房企应走差异化路线"}
-{"key": "BAC009S0769W0166", "wav": "./aishell/wav/test/S0769/BAC009S0769W0166.wav", "txt": "还有多家机构分析认为"}
-{"key": "BAC009S0769W0167", "wav": "./aishell/wav/test/S0769/BAC009S0769W0167.wav", "txt": "政府对今年的土地出让金收入预期下降"}
-{"key": "BAC009S0769W0168", "wav": "./aishell/wav/test/S0769/BAC009S0769W0168.wav", "txt": "这暗示房地产的库存大"}
-{"key": "BAC009S0769W0169", "wav": "./aishell/wav/test/S0769/BAC009S0769W0169.wav", "txt": "这直接影响到房地产的买地情况"}
-{"key": "BAC009S0769W0170", "wav": "./aishell/wav/test/S0769/BAC009S0769W0170.wav", "txt": "相应的房价涨跌"}
-{"key": "BAC009S0769W0171", "wav": "./aishell/wav/test/S0769/BAC009S0769W0171.wav", "txt": "如今房地产市场已经供需相对平衡"}
-{"key": "BAC009S0769W0172", "wav": "./aishell/wav/test/S0769/BAC009S0769W0172.wav", "txt": "甚至开始进入了供过于求的局面"}
-{"key": "BAC009S0769W0173", "wav": "./aishell/wav/test/S0769/BAC009S0769W0173.wav", "txt": "但去年住宅土地成交建筑面积仅十二亿平米"}
-{"key": "BAC009S0769W0174", "wav": "./aishell/wav/test/S0769/BAC009S0769W0174.wav", "txt": "远低于去年和前年平均的二十亿平米水平"}
-{"key": "BAC009S0769W0175", "wav": "./aishell/wav/test/S0769/BAC009S0769W0175.wav", "txt": "除了开发商的买地行为减少"}
-{"key": "BAC009S0769W0176", "wav": "./aishell/wav/test/S0769/BAC009S0769W0176.wav", "txt": "全国房地产库存正在堆积"}
-{"key": "BAC009S0769W0177", "wav": "./aishell/wav/test/S0769/BAC009S0769W0177.wav", "txt": "而出让的住宅建筑面积总和至少为一百亿平米"}
-{"key": "BAC009S0769W0178", "wav": "./aishell/wav/test/S0769/BAC009S0769W0178.wav", "txt": "约可供销售四年"}
-{"key": "BAC009S0769W0179", "wav": "./aishell/wav/test/S0769/BAC009S0769W0179.wav", "txt": "开发商整体在手土地充足"}
-{"key": "BAC009S0769W0180", "wav": "./aishell/wav/test/S0769/BAC009S0769W0180.wav", "txt": "瑞银分析师丁晓预测"}
-{"key": "BAC009S0769W0181", "wav": "./aishell/wav/test/S0769/BAC009S0769W0181.wav", "txt": "预计明年全国土地市场仍难复苏"}
-{"key": "BAC009S0769W0182", "wav": "./aishell/wav/test/S0769/BAC009S0769W0182.wav", "txt": "各路开发商一致看好一线城市房地产市场"}
-{"key": "BAC009S0769W0183", "wav": "./aishell/wav/test/S0769/BAC009S0769W0183.wav", "txt": "从一月的一线城市的土地成交看"}
-{"key": "BAC009S0769W0184", "wav": "./aishell/wav/test/S0769/BAC009S0769W0184.wav", "txt": "溢价率楼面价均处于高位"}
-{"key": "BAC009S0769W0185", "wav": "./aishell/wav/test/S0769/BAC009S0769W0185.wav", "txt": "预计后市一二线城市拿地竞争将更加剧烈"}
-{"key": "BAC009S0769W0186", "wav": "./aishell/wav/test/S0769/BAC009S0769W0186.wav", "txt": "中原地产首席市场分析师张大伟告诉南都记者"}
-{"key": "BAC009S0769W0187", "wav": "./aishell/wav/test/S0769/BAC009S0769W0187.wav", "txt": "并进而对城投债券进行唱空或做空"}
-{"key": "BAC009S0769W0188", "wav": "./aishell/wav/test/S0769/BAC009S0769W0188.wav", "txt": "最近企业债券特别是城投债券的发行难度加大"}
-{"key": "BAC009S0769W0189", "wav": "./aishell/wav/test/S0769/BAC009S0769W0189.wav", "txt": "发行利率也有较大幅度上升"}
-{"key": "BAC009S0769W0190", "wav": "./aishell/wav/test/S0769/BAC009S0769W0190.wav", "txt": "人民银行多次提高存款准备金率和存贷款基准利率"}
-{"key": "BAC009S0769W0191", "wav": "./aishell/wav/test/S0769/BAC009S0769W0191.wav", "txt": "不仅是城投债券发行利率"}
-{"key": "BAC009S0769W0192", "wav": "./aishell/wav/test/S0769/BAC009S0769W0192.wav", "txt": "债券市场所有品种发行利率整体上都表现出向上的走向"}
-{"key": "BAC009S0769W0193", "wav": "./aishell/wav/test/S0769/BAC009S0769W0193.wav", "txt": "导致城投债券发行产生较高的风险溢价"}
-{"key": "BAC009S0769W0194", "wav": "./aishell/wav/test/S0769/BAC009S0769W0194.wav", "txt": "城投债券收益率上升"}
-{"key": "BAC009S0769W0195", "wav": "./aishell/wav/test/S0769/BAC009S0769W0195.wav", "txt": "对债券投资人来说不是坏事"}
-{"key": "BAC009S0769W0196", "wav": "./aishell/wav/test/S0769/BAC009S0769W0196.wav", "txt": "有利于提升城投债券的资产配置价值"}
-{"key": "BAC009S0769W0197", "wav": "./aishell/wav/test/S0769/BAC009S0769W0197.wav", "txt": "则需要在发债时机和发债规模上进行合理的把握"}
-{"key": "BAC009S0769W0198", "wav": "./aishell/wav/test/S0769/BAC009S0769W0198.wav", "txt": "我个人不赞成这一判断"}
-{"key": "BAC009S0769W0199", "wav": "./aishell/wav/test/S0769/BAC009S0769W0199.wav", "txt": "债券发行人是优质的"}
-{"key": "BAC009S0769W0200", "wav": "./aishell/wav/test/S0769/BAC009S0769W0200.wav", "txt": "还本付息也是正常的"}
-{"key": "BAC009S0769W0201", "wav": "./aishell/wav/test/S0769/BAC009S0769W0201.wav", "txt": "投资者对城投债券风险表现出的恐慌"}
-{"key": "BAC009S0769W0202", "wav": "./aishell/wav/test/S0769/BAC009S0769W0202.wav", "txt": "加强城投债监管完善制度建设"}
-{"key": "BAC009S0769W0203", "wav": "./aishell/wav/test/S0769/BAC009S0769W0203.wav", "txt": "有的媒体甚至用井喷来描述"}
-{"key": "BAC009S0769W0204", "wav": "./aishell/wav/test/S0769/BAC009S0769W0204.wav", "txt": "您如何看待城投债券这几年的发展和作用"}
-{"key": "BAC009S0769W0205", "wav": "./aishell/wav/test/S0769/BAC009S0769W0205.wav", "txt": "这几年城投债券发行数量的确有所增加"}
-{"key": "BAC009S0769W0206", "wav": "./aishell/wav/test/S0769/BAC009S0769W0206.wav", "txt": "地方投融资平台公司通过发行债券进行融资"}
-{"key": "BAC009S0769W0207", "wav": "./aishell/wav/test/S0769/BAC009S0769W0207.wav", "txt": "符合提高直接融资比重的要求"}
-{"key": "BAC009S0769W0208", "wav": "./aishell/wav/test/S0769/BAC009S0769W0208.wav", "txt": "城投债券也适应了发行人和投资人的需要"}
-{"key": "BAC009S0769W0209", "wav": "./aishell/wav/test/S0769/BAC009S0769W0209.wav", "txt": "这是这几年城投债券发行规模不断扩大的主要原因"}
-{"key": "BAC009S0769W0210", "wav": "./aishell/wav/test/S0769/BAC009S0769W0210.wav", "txt": "我委核准发行的企业债券累计为七千亿元"}
-{"key": "BAC009S0769W0211", "wav": "./aishell/wav/test/S0769/BAC009S0769W0211.wav", "txt": "其中城投债券共发行七千亿元"}
-{"key": "BAC009S0769W0212", "wav": "./aishell/wav/test/S0769/BAC009S0769W0212.wav", "txt": "占比只有百分之七"}
-{"key": "BAC009S0769W0213", "wav": "./aishell/wav/test/S0769/BAC009S0769W0213.wav", "txt": "城投债券的发行有比较严格的条件"}
-{"key": "BAC009S0769W0214", "wav": "./aishell/wav/test/S0769/BAC009S0769W0214.wav", "txt": "从已发行的城投债券用途看"}
-{"key": "BAC009S0769W0215", "wav": "./aishell/wav/test/S0769/BAC009S0769W0215.wav", "txt": "保障房建设和棚户区改造"}
-{"key": "BAC009S0769W0216", "wav": "./aishell/wav/test/S0769/BAC009S0769W0216.wav", "txt": "城市文化和体育设施"}
-{"key": "BAC009S0769W0217", "wav": "./aishell/wav/test/S0769/BAC009S0769W0217.wav", "txt": "地震灾后重建等领域"}
-{"key": "BAC009S0769W0218", "wav": "./aishell/wav/test/S0769/BAC009S0769W0218.wav", "txt": "都起到了积极的作用"}
-{"key": "BAC009S0769W0219", "wav": "./aishell/wav/test/S0769/BAC009S0769W0219.wav", "txt": "随着我国资本市场的进一步发展"}
-{"key": "BAC009S0769W0220", "wav": "./aishell/wav/test/S0769/BAC009S0769W0220.wav", "txt": "城投债券作为中国债券市场的准市政债"}
-{"key": "BAC009S0769W0221", "wav": "./aishell/wav/test/S0769/BAC009S0769W0221.wav", "txt": "发行规模还会稳步扩大"}
-{"key": "BAC009S0769W0222", "wav": "./aishell/wav/test/S0769/BAC009S0769W0222.wav", "txt": "中国证券报面对市场对城投债券风险的担忧"}
-{"key": "BAC009S0769W0223", "wav": "./aishell/wav/test/S0769/BAC009S0769W0223.wav", "txt": "是如何更好地防范城投债券可能出现的风险的"}
-{"key": "BAC009S0769W0224", "wav": "./aishell/wav/test/S0769/BAC009S0769W0224.wav", "txt": "虽然已发行的城投债券的还本付息都是正常的"}
-{"key": "BAC009S0769W0225", "wav": "./aishell/wav/test/S0769/BAC009S0769W0225.wav", "txt": "城投债作为一个信用产品"}
-{"key": "BAC009S0769W0226", "wav": "./aishell/wav/test/S0769/BAC009S0769W0226.wav", "txt": "不可能是完全无风险的"}
-{"key": "BAC009S0769W0227", "wav": "./aishell/wav/test/S0769/BAC009S0769W0227.wav", "txt": "我看了以后很受震动"}
-{"key": "BAC009S0769W0228", "wav": "./aishell/wav/test/S0769/BAC009S0769W0228.wav", "txt": "虽然报道内容并没有具体的城投债券还本付息违约案"}
-{"key": "BAC009S0769W0229", "wav": "./aishell/wav/test/S0769/BAC009S0769W0229.wav", "txt": "但却提醒了我们要更加关注城投债券可能出现的风险"}
-{"key": "BAC009S0769W0230", "wav": "./aishell/wav/test/S0769/BAC009S0769W0230.wav", "txt": "并采取措施切实保护债券投资人的合法权益"}
-{"key": "BAC009S0769W0231", "wav": "./aishell/wav/test/S0769/BAC009S0769W0231.wav", "txt": "作为城投债券发行监管部门"}
-{"key": "BAC009S0769W0232", "wav": "./aishell/wav/test/S0769/BAC009S0769W0232.wav", "txt": "我们对城投债券发行人的审核一直是比较严格的"}
-{"key": "BAC009S0769W0233", "wav": "./aishell/wav/test/S0769/BAC009S0769W0233.wav", "txt": "地方投融资平台公司申请发行债券"}
-{"key": "BAC009S0769W0234", "wav": "./aishell/wav/test/S0769/BAC009S0769W0234.wav", "txt": "必须符合一些基本的条件企业必须连续三年盈利"}
-{"key": "BAC009S0769W0235", "wav": "./aishell/wav/test/S0769/BAC009S0769W0235.wav", "txt": "所投项目必须经过合规性审查"}
-{"key": "BAC009S0769W0236", "wav": "./aishell/wav/test/S0769/BAC009S0769W0236.wav", "txt": "我们还控制了投融资平台公司发债的范围"}
-{"key": "BAC009S0769W0237", "wav": "./aishell/wav/test/S0769/BAC009S0769W0237.wav", "txt": "才能申请发行城投债券"}
-{"key": "BAC009S0769W0238", "wav": "./aishell/wav/test/S0769/BAC009S0769W0238.wav", "txt": "就不得再通过发行城投债券新增政府性债务"}
-{"key": "BAC009S0769W0239", "wav": "./aishell/wav/test/S0769/BAC009S0769W0239.wav", "txt": "正是有了这样一些严格的规定"}
-{"key": "BAC009S0769W0240", "wav": "./aishell/wav/test/S0769/BAC009S0769W0240.wav", "txt": "使得很多投融资平台公司"}
-{"key": "BAC009S0769W0241", "wav": "./aishell/wav/test/S0769/BAC009S0769W0241.wav", "txt": "难以满足发行城投债券的资格和条件"}
-{"key": "BAC009S0769W0242", "wav": "./aishell/wav/test/S0769/BAC009S0769W0242.wav", "txt": "这在相当程度上控制了城投债劵的发行规模"}
-{"key": "BAC009S0769W0243", "wav": "./aishell/wav/test/S0769/BAC009S0769W0243.wav", "txt": "也降低了城投债劵的风险"}
-{"key": "BAC009S0769W0244", "wav": "./aishell/wav/test/S0769/BAC009S0769W0244.wav", "txt": "为了控制地方政府本届发债下届还钱的道德风险"}
-{"key": "BAC009S0769W0245", "wav": "./aishell/wav/test/S0769/BAC009S0769W0245.wav", "txt": "我们还安排了专门的偿债均摊机制"}
-{"key": "BAC009S0769W0246", "wav": "./aishell/wav/test/S0769/BAC009S0769W0246.wav", "txt": "也就是将债劵还本压力在债劵存续期内进行合理分摊"}
-{"key": "BAC009S0769W0247", "wav": "./aishell/wav/test/S0769/BAC009S0769W0247.wav", "txt": "避免在最后一年累积过大的还本压力和风险"}
-{"key": "BAC009S0769W0248", "wav": "./aishell/wav/test/S0769/BAC009S0769W0248.wav", "txt": "有媒体报道了云投集团等发债企业转移核心资产"}
-{"key": "BAC009S0769W0249", "wav": "./aishell/wav/test/S0769/BAC009S0769W0249.wav", "txt": "损害债劵持有人利益的事件"}
-{"key": "BAC009S0769W0250", "wav": "./aishell/wav/test/S0769/BAC009S0769W0250.wav", "txt": "并对债券市场形成了不小的冲击"}
-{"key": "BAC009S0769W0251", "wav": "./aishell/wav/test/S0769/BAC009S0769W0251.wav", "txt": "你们如何考虑防止这类事件再次发生"}
-{"key": "BAC009S0769W0252", "wav": "./aishell/wav/test/S0769/BAC009S0769W0252.wav", "txt": "更好地保护债券投资人的利益"}
-{"key": "BAC009S0769W0253", "wav": "./aishell/wav/test/S0769/BAC009S0769W0253.wav", "txt": "据新华社电有病当然要吃药"}
-{"key": "BAC009S0769W0254", "wav": "./aishell/wav/test/S0769/BAC009S0769W0254.wav", "txt": "但吃下去的药能否真正作用到病灶就很难说了"}
-{"key": "BAC009S0769W0255", "wav": "./aishell/wav/test/S0769/BAC009S0769W0255.wav", "txt": "通过它能够实现药物的精准投送"}
-{"key": "BAC009S0769W0256", "wav": "./aishell/wav/test/S0769/BAC009S0769W0256.wav", "txt": "他们开发出一种只有二十微米长的机器人"}
-{"key": "BAC009S0769W0257", "wav": "./aishell/wav/test/S0769/BAC009S0769W0257.wav", "txt": "这个机器人由高分子材料制成"}
-{"key": "BAC009S0769W0258", "wav": "./aishell/wav/test/S0769/BAC009S0769W0258.wav", "txt": "当它进入动物胃部时"}
-{"key": "BAC009S0769W0259", "wav": "./aishell/wav/test/S0769/BAC009S0769W0259.wav", "txt": "锌就会与胃酸发生反应"}
-{"key": "BAC009S0769W0260", "wav": "./aishell/wav/test/S0769/BAC009S0769W0260.wav", "txt": "从而推动机器人在胃部前行"}
-{"key": "BAC009S0769W0261", "wav": "./aishell/wav/test/S0769/BAC009S0769W0261.wav", "txt": "这种技术很适合用来治疗胃溃疡等胃部疾病"}
-{"key": "BAC009S0769W0262", "wav": "./aishell/wav/test/S0769/BAC009S0769W0262.wav", "txt": "高效精准投送药物不仅可降低用药量"}
-{"key": "BAC009S0769W0263", "wav": "./aishell/wav/test/S0769/BAC009S0769W0263.wav", "txt": "这项技术离临床应用还有一段距离"}
-{"key": "BAC009S0769W0264", "wav": "./aishell/wav/test/S0769/BAC009S0769W0264.wav", "txt": "据新华社电有病当然要吃药"}
-{"key": "BAC009S0769W0265", "wav": "./aishell/wav/test/S0769/BAC009S0769W0265.wav", "txt": "但吃下去的药能否真正作用到病灶就很难说了"}
-{"key": "BAC009S0769W0266", "wav": "./aishell/wav/test/S0769/BAC009S0769W0266.wav", "txt": "美国政府部门当地时间周四警示称"}
-{"key": "BAC009S0769W0267", "wav": "./aishell/wav/test/S0769/BAC009S0769W0267.wav", "txt": "苹果设备的用户应当注意"}
-{"key": "BAC009S0769W0269", "wav": "./aishell/wav/test/S0769/BAC009S0769W0269.wav", "txt": "不要在弹出窗口点击安装打开应用时"}
-{"key": "BAC009S0769W0271", "wav": "./aishell/wav/test/S0769/BAC009S0769W0271.wav", "txt": "苹果公司也在第一时间发布官方声明"}
-{"key": "BAC009S0769W0273", "wav": "./aishell/wav/test/S0769/BAC009S0769W0273.wav", "txt": "还没有任何一个用户真正遭受过此攻击"}
-{"key": "BAC009S0769W0274", "wav": "./aishell/wav/test/S0769/BAC009S0769W0274.wav", "txt": "我们鼓励用户只从可信任的渠道"}
-{"key": "BAC009S0769W0276", "wav": "./aishell/wav/test/S0769/BAC009S0769W0276.wav", "txt": "并注意下载过程中的任何警告"}
-{"key": "BAC009S0769W0277", "wav": "./aishell/wav/test/S0769/BAC009S0769W0277.wav", "txt": "企业用户在安装定制应用程序时"}
-{"key": "BAC009S0769W0278", "wav": "./aishell/wav/test/S0769/BAC009S0769W0278.wav", "txt": "须从他们公司的安全网站上进行下载并安装"}
-{"key": "BAC009S0769W0279", "wav": "./aishell/wav/test/S0769/BAC009S0769W0279.wav", "txt": "美国政府部门当地时间周四警示称"}
-{"key": "BAC009S0769W0280", "wav": "./aishell/wav/test/S0769/BAC009S0769W0280.wav", "txt": "苹果设备的用户应当注意"}
-{"key": "BAC009S0769W0283", "wav": "./aishell/wav/test/S0769/BAC009S0769W0283.wav", "txt": "据新华社电印度官员透露"}
-{"key": "BAC009S0769W0284", "wav": "./aishell/wav/test/S0769/BAC009S0769W0284.wav", "txt": "美国将向印度转让两项军事技术"}
-{"key": "BAC009S0769W0285", "wav": "./aishell/wav/test/S0769/BAC009S0769W0285.wav", "txt": "其中包括美国大鸦无人机今后将由印度工厂制造"}
-{"key": "BAC009S0769W0286", "wav": "./aishell/wav/test/S0769/BAC009S0769W0286.wav", "txt": "印度斯坦时报二十四日援引消息人士的话报道"}
-{"key": "BAC009S0769W0287", "wav": "./aishell/wav/test/S0769/BAC009S0769W0287.wav", "txt": "大鸦无人机由美国航空环境公司研制"}
-{"key": "BAC009S0769W0288", "wav": "./aishell/wav/test/S0769/BAC009S0769W0288.wav", "txt": "由士兵直接用手投掷起飞"}
-{"key": "BAC009S0769W0289", "wav": "./aishell/wav/test/S0769/BAC009S0769W0289.wav", "txt": "二零零三年以来在阿富汗得到了广泛应用"}
-{"key": "BAC009S0769W0290", "wav": "./aishell/wav/test/S0769/BAC009S0769W0290.wav", "txt": "预计从二零一五年下半年开始"}
-{"key": "BAC009S0769W0291", "wav": "./aishell/wav/test/S0769/BAC009S0769W0291.wav", "txt": "美国将不再生产大鸦无人机"}
-{"key": "BAC009S0769W0292", "wav": "./aishell/wav/test/S0769/BAC009S0769W0292.wav", "txt": "改由设在印度本加卢鲁的一家美印合资公司生产"}
-{"key": "BAC009S0769W0293", "wav": "./aishell/wav/test/S0769/BAC009S0769W0293.wav", "txt": "一名印度高级官员透露"}
-{"key": "BAC009S0769W0294", "wav": "./aishell/wav/test/S0769/BAC009S0769W0294.wav", "txt": "眼下已有七个国家打算购买大鸦无人机"}
-{"key": "BAC009S0769W0295", "wav": "./aishell/wav/test/S0769/BAC009S0769W0295.wav", "txt": "预计订单总额为三十亿美元"}
-{"key": "BAC009S0769W0296", "wav": "./aishell/wav/test/S0769/BAC009S0769W0296.wav", "txt": "美国航空环境公司停止生产大鸦无人机后"}
-{"key": "BAC009S0769W0297", "wav": "./aishell/wav/test/S0769/BAC009S0769W0297.wav", "txt": "印方工厂将继续完成剩馀订单"}
-{"key": "BAC009S0769W0298", "wav": "./aishell/wav/test/S0769/BAC009S0769W0298.wav", "txt": "此外还将与美方联手研制一款升级版大鸦无人机"}
-{"key": "BAC009S0769W0299", "wav": "./aishell/wav/test/S0769/BAC009S0769W0299.wav", "txt": "该技术可用于识别隐藏于伪装下的目标"}
-{"key": "BAC009S0769W0300", "wav": "./aishell/wav/test/S0769/BAC009S0769W0300.wav", "txt": "从而把运输机转化为更为复杂的远程侦察机"}
-{"key": "BAC009S0769W0301", "wav": "./aishell/wav/test/S0769/BAC009S0769W0301.wav", "txt": "美国外交消息人士透露"}
-{"key": "BAC009S0769W0302", "wav": "./aishell/wav/test/S0769/BAC009S0769W0302.wav", "txt": "肯德尔定于二月二十三日访问印度"}
-{"key": "BAC009S0769W0303", "wav": "./aishell/wav/test/S0769/BAC009S0769W0303.wav", "txt": "且达到情节特别严重程度"}
-{"key": "BAC009S0769W0304", "wav": "./aishell/wav/test/S0769/BAC009S0769W0304.wav", "txt": "故依法裁定驳回上诉"}
-{"key": "BAC009S0769W0306", "wav": "./aishell/wav/test/S0769/BAC009S0769W0306.wav", "txt": "从而获取用户信息的案件"}
-{"key": "BAC009S0769W0308", "wav": "./aishell/wav/test/S0769/BAC009S0769W0308.wav", "txt": "虽然工信部很快就删除了后半句话"}
-{"key": "BAC009S0769W0309", "wav": "./aishell/wav/test/S0769/BAC009S0769W0309.wav", "txt": "但还是引发业内广泛关注"}
-{"key": "BAC009S0769W0310", "wav": "./aishell/wav/test/S0769/BAC009S0769W0310.wav", "txt": "这种宣传方式的目的是什么"}
-{"key": "BAC009S0769W0311", "wav": "./aishell/wav/test/S0769/BAC009S0769W0311.wav", "txt": "截至中国经营报记者发稿前"}
-{"key": "BAC009S0769W0312", "wav": "./aishell/wav/test/S0769/BAC009S0769W0312.wav", "txt": "浪潮官方尚未给出回应"}
-{"key": "BAC009S0769W0313", "wav": "./aishell/wav/test/S0769/BAC009S0769W0313.wav", "txt": "旗下拥有浪潮信息浪潮软件浪潮国际三家上市公司"}
-{"key": "BAC009S0769W0314", "wav": "./aishell/wav/test/S0769/BAC009S0769W0314.wav", "txt": "尽管政府对国产品牌有一定扶持"}
-{"key": "BAC009S0769W0315", "wav": "./aishell/wav/test/S0769/BAC009S0769W0315.wav", "txt": "浪潮的发展也有可圈可可点之处"}
-{"key": "BAC009S0769W0318", "wav": "./aishell/wav/test/S0769/BAC009S0769W0318.wav", "txt": "浪潮信息的研发支出约四亿元"}
-{"key": "BAC009S0769W0319", "wav": "./aishell/wav/test/S0769/BAC009S0769W0319.wav", "txt": "占营业收入的比例是五点百分之四十七"}
-{"key": "BAC009S0769W0320", "wav": "./aishell/wav/test/S0769/BAC009S0769W0320.wav", "txt": "较上年同期增长八十四点百分之三十九"}
-{"key": "BAC009S0769W0321", "wav": "./aishell/wav/test/S0769/BAC009S0769W0321.wav", "txt": "研发支出主要用于服务器产品的研究开发和升级换代"}
-{"key": "BAC009S0769W0322", "wav": "./aishell/wav/test/S0769/BAC009S0769W0322.wav", "txt": "研发投入是一个刚性指标"}
-{"key": "BAC009S0769W0323", "wav": "./aishell/wav/test/S0769/BAC009S0769W0323.wav", "txt": "与技术的更新换代速度还是有相关性"}
-{"key": "BAC009S0769W0324", "wav": "./aishell/wav/test/S0769/BAC009S0769W0324.wav", "txt": "国内几个服务器品牌的盘子还比较小"}
-{"key": "BAC009S0769W0325", "wav": "./aishell/wav/test/S0769/BAC009S0769W0325.wav", "txt": "他们的硬件技术研发等力量"}
-{"key": "BAC009S0769W0326", "wav": "./aishell/wav/test/S0769/BAC009S0769W0326.wav", "txt": "经验积累不足也是一个大问题"}
-{"key": "BAC009S0769W0327", "wav": "./aishell/wav/test/S0769/BAC009S0769W0327.wav", "txt": "国产服务器即使是自主设计"}
-{"key": "BAC009S0769W0329", "wav": "./aishell/wav/test/S0769/BAC009S0769W0329.wav", "txt": "核心架构也基本照抄国外厂商"}
-{"key": "BAC009S0769W0330", "wav": "./aishell/wav/test/S0769/BAC009S0769W0330.wav", "txt": "在中低端市场或占有相应份额"}
-{"key": "BAC009S0769W0331", "wav": "./aishell/wav/test/S0769/BAC009S0769W0331.wav", "txt": "但高端市场仍然难以企及"}
-{"key": "BAC009S0769W0332", "wav": "./aishell/wav/test/S0769/BAC009S0769W0332.wav", "txt": "一位股份制银行科技部负责人如此讲述"}
-{"key": "BAC009S0769W0334", "wav": "./aishell/wav/test/S0769/BAC009S0769W0334.wav", "txt": "国内厂商在高端核心技术上普遍存有差距"}
-{"key": "BAC009S0769W0336", "wav": "./aishell/wav/test/S0769/BAC009S0769W0336.wav", "txt": "浪潮高管在接受媒体采访时表示"}
-{"key": "BAC009S0769W0337", "wav": "./aishell/wav/test/S0769/BAC009S0769W0337.wav", "txt": "浪潮将通过产品渠道服务价格的全方位发力"}
-{"key": "BAC009S0769W0338", "wav": "./aishell/wav/test/S0769/BAC009S0769W0338.wav", "txt": "一百米栏辛迪罗勒德尔"}
-{"key": "BAC009S0769W0339", "wav": "./aishell/wav/test/S0769/BAC009S0769W0339.wav", "txt": "三千米障碍吉萨费里欣塔斯卡鲁塞"}
-{"key": "BAC009S0769W0340", "wav": "./aishell/wav/test/S0769/BAC009S0769W0340.wav", "txt": "跳高玛丽劳伦斯荣格菲利斯"}
-{"key": "BAC009S0769W0341", "wav": "./aishell/wav/test/S0769/BAC009S0769W0341.wav", "txt": "撑杆跳丽萨莱兹奇"}
-{"key": "BAC009S0769W0342", "wav": "./aishell/wav/test/S0769/BAC009S0769W0342.wav", "txt": "跳远莱纳马尔库斯"}
-{"key": "BAC009S0769W0343", "wav": "./aishell/wav/test/S0769/BAC009S0769W0343.wav", "txt": "三级跳克里斯丁吉尔奇"}
-{"key": "BAC009S0769W0344", "wav": "./aishell/wav/test/S0769/BAC009S0769W0344.wav", "txt": "铅球克里斯蒂娜斯齐万兹"}
-{"key": "BAC009S0769W0345", "wav": "./aishell/wav/test/S0769/BAC009S0769W0345.wav", "txt": "铁饼沙尼斯克拉夫特"}
-{"key": "BAC009S0769W0346", "wav": "./aishell/wav/test/S0769/BAC009S0769W0346.wav", "txt": "链球贝蒂海德尔"}
-{"key": "BAC009S0769W0347", "wav": "./aishell/wav/test/S0769/BAC009S0769W0347.wav", "txt": "标枪克里斯丁胡宋"}
-{"key": "BAC009S0769W0348", "wav": "./aishell/wav/test/S0769/BAC009S0769W0348.wav", "txt": "克里斯蒂娜奥伯福尔"}
-{"key": "BAC009S0769W0349", "wav": "./aishell/wav/test/S0769/BAC009S0769W0349.wav", "txt": "全能詹妮弗奥赛尔"}
-{"key": "BAC009S0769W0350", "wav": "./aishell/wav/test/S0769/BAC009S0769W0350.wav", "txt": "四乘一百米接力亚历山大布尔格哈德特"}
-{"key": "BAC009S0769W0351", "wav": "./aishell/wav/test/S0769/BAC009S0769W0351.wav", "txt": "安娜莱纳法拉塞"}
-{"key": "BAC009S0769W0352", "wav": "./aishell/wav/test/S0769/BAC009S0769W0352.wav", "txt": "吉娜卢克肯科姆普尔"}
-{"key": "BAC009S0769W0353", "wav": "./aishell/wav/test/S0769/BAC009S0769W0353.wav", "txt": "孙杨因心脏不适退出一千五百米自由泳决赛"}
-{"key": "BAC009S0769W0354", "wav": "./aishell/wav/test/S0769/BAC009S0769W0354.wav", "txt": "无疑是刚刚结束的喀山世锦赛最大的遗憾"}
-{"key": "BAC009S0769W0355", "wav": "./aishell/wav/test/S0769/BAC009S0769W0355.wav", "txt": "孙杨在一千五百米自由泳上的实力不容置疑"}
-{"key": "BAC009S0769W0356", "wav": "./aishell/wav/test/S0769/BAC009S0769W0356.wav", "txt": "而这一次击败他的不是对手"}
-{"key": "BAC009S0769W0357", "wav": "./aishell/wav/test/S0769/BAC009S0769W0357.wav", "txt": "孙杨的心脏不适早就不是秘密"}
-{"key": "BAC009S0769W0358", "wav": "./aishell/wav/test/S0769/BAC009S0769W0358.wav", "txt": "是孙杨在二零一四年因治疗心脏不适"}
-{"key": "BAC009S0769W0359", "wav": "./aishell/wav/test/S0769/BAC009S0769W0359.wav", "txt": "误服曲美他嗪导致兴奋剂检测呈阳性遭禁赛"}
-{"key": "BAC009S0769W0360", "wav": "./aishell/wav/test/S0769/BAC009S0769W0360.wav", "txt": "正是治疗他心悸不适症状的"}
-{"key": "BAC009S0769W0361", "wav": "./aishell/wav/test/S0769/BAC009S0769W0361.wav", "txt": "也第一次被媒体关注"}
-{"key": "BAC009S0769W0362", "wav": "./aishell/wav/test/S0769/BAC009S0769W0362.wav", "txt": "记者从浙江省游泳协会了解到"}
-{"key": "BAC009S0769W0363", "wav": "./aishell/wav/test/S0769/BAC009S0769W0363.wav", "txt": "孙杨就出现过心脏问题"}
-{"key": "BAC009S0769W0364", "wav": "./aishell/wav/test/S0769/BAC009S0769W0364.wav", "txt": "孙杨因感冒后出现了胸闷心悸不适等症状"}
-{"key": "BAC009S0769W0365", "wav": "./aishell/wav/test/S0769/BAC009S0769W0365.wav", "txt": "专家会诊之后认为孙杨存在心肌缺血情况"}
-{"key": "BAC009S0769W0366", "wav": "./aishell/wav/test/S0769/BAC009S0769W0366.wav", "txt": "与感冒病毒感染损伤心肌有关"}
-{"key": "BAC009S0769W0367", "wav": "./aishell/wav/test/S0769/BAC009S0769W0367.wav", "txt": "予服用处方药以治疗心肌缺血保护心肌"}
-{"key": "BAC009S0769W0368", "wav": "./aishell/wav/test/S0769/BAC009S0769W0368.wav", "txt": "孙杨的心肌损伤是在感冒后引发的"}
-{"key": "BAC009S0769W0369", "wav": "./aishell/wav/test/S0769/BAC009S0769W0369.wav", "txt": "心肌同位素扫描显示局部灌注差"}
-{"key": "BAC009S0769W0370", "wav": "./aishell/wav/test/S0769/BAC009S0769W0370.wav", "txt": "达到保护心脏的作用"}
-{"key": "BAC009S0769W0371", "wav": "./aishell/wav/test/S0769/BAC009S0769W0371.wav", "txt": "是去年备战亚运会选拔赛期间"}
-{"key": "BAC009S0769W0372", "wav": "./aishell/wav/test/S0769/BAC009S0769W0372.wav", "txt": "直到二零一四年四月才解禁复出"}
-{"key": "BAC009S0769W0373", "wav": "./aishell/wav/test/S0769/BAC009S0769W0373.wav", "txt": "尽管期间孙杨的训练并没有中断"}
-{"key": "BAC009S0769W0374", "wav": "./aishell/wav/test/S0769/BAC009S0769W0374.wav", "txt": "但训练量几乎和正常时不可同日而语"}
-{"key": "BAC009S0769W0375", "wav": "./aishell/wav/test/S0769/BAC009S0769W0375.wav", "txt": "为了备战亚运会选拔赛"}
-{"key": "BAC009S0769W0376", "wav": "./aishell/wav/test/S0769/BAC009S0769W0376.wav", "txt": "在世锦赛决赛检录前突感不适"}
-{"key": "BAC009S0769W0377", "wav": "./aishell/wav/test/S0769/BAC009S0769W0377.wav", "txt": "也是孙杨整个比赛期间疲劳所致"}
-{"key": "BAC009S0769W0378", "wav": "./aishell/wav/test/S0769/BAC009S0769W0378.wav", "txt": "从四百米预赛到最后的一千五百米预赛"}
-{"key": "BAC009S0769W0379", "wav": "./aishell/wav/test/S0769/BAC009S0769W0379.wav", "txt": "二百米的高强度无氧到一千五百米的有氧"}
-{"key": "BAC009S0769W0380", "wav": "./aishell/wav/test/S0769/BAC009S0769W0380.wav", "txt": "师姐罗雪娟也忍不住落泪"}
-{"key": "BAC009S0769W0381", "wav": "./aishell/wav/test/S0769/BAC009S0769W0381.wav", "txt": "回忆起自己从前训练时因心脏不适被抢救的事"}
-{"key": "BAC009S0769W0382", "wav": "./aishell/wav/test/S0769/BAC009S0769W0382.wav", "txt": "更大的战场还在里约"}
-{"key": "BAC009S0769W0383", "wav": "./aishell/wav/test/S0769/BAC009S0769W0383.wav", "txt": "华西都市报记者陈甘露"}
-{"key": "BAC009S0769W0384", "wav": "./aishell/wav/test/S0769/BAC009S0769W0384.wav", "txt": "二零零八年北京奥运会时"}
-{"key": "BAC009S0769W0385", "wav": "./aishell/wav/test/S0769/BAC009S0769W0385.wav", "txt": "曾经在鸟巢服务的志愿者们"}
-{"key": "BAC009S0769W0386", "wav": "./aishell/wav/test/S0769/BAC009S0769W0386.wav", "txt": "顶级田径赛事再次落户鸟巢"}
-{"key": "BAC009S0769W0387", "wav": "./aishell/wav/test/S0769/BAC009S0769W0387.wav", "txt": "如今为这次赛事服务的志愿者们更为年轻"}
-{"key": "BAC009S0769W0388", "wav": "./aishell/wav/test/S0769/BAC009S0769W0388.wav", "txt": "他们几乎都是九零后"}
-{"key": "BAC009S0769W0389", "wav": "./aishell/wav/test/S0769/BAC009S0769W0389.wav", "txt": "这批志愿者也被称为新鸟巢一代"}
-{"key": "BAC009S0769W0390", "wav": "./aishell/wav/test/S0769/BAC009S0769W0390.wav", "txt": "而他们已经为这次田径世锦赛做好了准备"}
-{"key": "BAC009S0769W0391", "wav": "./aishell/wav/test/S0769/BAC009S0769W0391.wav", "txt": "并要为国内外运动员献上一张张北京最美的名片"}
-{"key": "BAC009S0769W0392", "wav": "./aishell/wav/test/S0769/BAC009S0769W0392.wav", "txt": "在每次大型赛事中志愿者都是必不可少的一部分"}
-{"key": "BAC009S0769W0393", "wav": "./aishell/wav/test/S0769/BAC009S0769W0393.wav", "txt": "他们也是历届大赛的一个亮点"}
-{"key": "BAC009S0769W0394", "wav": "./aishell/wav/test/S0769/BAC009S0769W0394.wav", "txt": "总共有二千七百六十人来为这项大赛志愿服务"}
-{"key": "BAC009S0769W0395", "wav": "./aishell/wav/test/S0769/BAC009S0769W0395.wav", "txt": "他们最大的特点就是九零后占主角"}
-{"key": "BAC009S0769W0396", "wav": "./aishell/wav/test/S0769/BAC009S0769W0396.wav", "txt": "比例超过百分之九十四的志愿者是九零后"}
-{"key": "BAC009S0769W0397", "wav": "./aishell/wav/test/S0769/BAC009S0769W0397.wav", "txt": "在今年世锦赛的志愿者中"}
-{"key": "BAC009S0769W0398", "wav": "./aishell/wav/test/S0769/BAC009S0769W0398.wav", "txt": "有的人还会八国语言"}
-{"key": "BAC009S0769W0399", "wav": "./aishell/wav/test/S0769/BAC009S0769W0399.wav", "txt": "志愿者除了要具备流利的英语交流能力外"}
-{"key": "BAC009S0769W0400", "wav": "./aishell/wav/test/S0769/BAC009S0769W0400.wav", "txt": "还要求具备大型赛会或日常从事社会志愿服务的经验"}
-{"key": "BAC009S0769W0401", "wav": "./aishell/wav/test/S0769/BAC009S0769W0401.wav", "txt": "北京青年报记者昨日在鸟巢采访了一些志愿者"}
-{"key": "BAC009S0769W0402", "wav": "./aishell/wav/test/S0769/BAC009S0769W0402.wav", "txt": "发现他们中间真有不少是志愿达人"}
-{"key": "BAC009S0769W0403", "wav": "./aishell/wav/test/S0769/BAC009S0769W0403.wav", "txt": "例如在竞赛部赛后控制中心的陈田希"}
-{"key": "BAC009S0769W0404", "wav": "./aishell/wav/test/S0769/BAC009S0769W0404.wav", "txt": "也等待了很长时间"}
-{"key": "BAC009S0769W0405", "wav": "./aishell/wav/test/S0769/BAC009S0769W0405.wav", "txt": "但最终却因为出品公司相对论影业申请破产"}
-{"key": "BAC009S0769W0406", "wav": "./aishell/wav/test/S0769/BAC009S0769W0406.wav", "txt": "而不得不离开这个项目"}
-{"key": "BAC009S0769W0407", "wav": "./aishell/wav/test/S0769/BAC009S0769W0407.wav", "txt": "乌鸦在没有其他公司愿意接手的情况下"}
-{"key": "BAC009S0769W0408", "wav": "./aishell/wav/test/S0769/BAC009S0769W0408.wav", "txt": "谈到前日爸爸谢贤在宣传活动上出手打曾江"}
-{"key": "BAC009S0769W0409", "wav": "./aishell/wav/test/S0769/BAC009S0769W0409.wav", "txt": "婷婷指收到消息时正在拍摄广告"}
-{"key": "BAC009S0769W0410", "wav": "./aishell/wav/test/S0769/BAC009S0769W0410.wav", "txt": "亦未联络到爸爸了解他不是一个常打架的人"}
-{"key": "BAC009S0769W0411", "wav": "./aishell/wav/test/S0769/BAC009S0769W0411.wav", "txt": "他是一个大人"}
-{"key": "BAC009S0769W0412", "wav": "./aishell/wav/test/S0769/BAC009S0769W0412.wav", "txt": "他一定有他的原因"}
-{"key": "BAC009S0769W0413", "wav": "./aishell/wav/test/S0769/BAC009S0769W0413.wav", "txt": "又指自己未试过受爸爸体罚"}
-{"key": "BAC009S0769W0415", "wav": "./aishell/wav/test/S0769/BAC009S0769W0415.wav", "txt": "婷婷就坦言靠传媒得知"}
-{"key": "BAC009S0769W0416", "wav": "./aishell/wav/test/S0769/BAC009S0769W0416.wav", "txt": "但会给哥哥谢霆锋传短信了解情况"}
-{"key": "BAC009S0769W0417", "wav": "./aishell/wav/test/S0769/BAC009S0769W0417.wav", "txt": "中新网六月二十四日电六月二十三日"}
-{"key": "BAC009S0769W0418", "wav": "./aishell/wav/test/S0769/BAC009S0769W0418.wav", "txt": "谢霆锋妹妹谢婷婷在微博晒出与父亲合影"}
-{"key": "BAC009S0769W0419", "wav": "./aishell/wav/test/S0769/BAC009S0769W0419.wav", "txt": "谢婷婷将头挨着父亲的头"}
-{"key": "BAC009S0769W0420", "wav": "./aishell/wav/test/S0769/BAC009S0769W0420.wav", "txt": "二人一脸笑容"}
-{"key": "BAC009S0769W0421", "wav": "./aishell/wav/test/S0769/BAC009S0769W0421.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0769W0422", "wav": "./aishell/wav/test/S0769/BAC009S0769W0422.wav", "txt": "艺人谢婷婷从小就成为媒体焦点"}
-{"key": "BAC009S0769W0423", "wav": "./aishell/wav/test/S0769/BAC009S0769W0423.wav", "txt": "而有鬼妹仔性格的婷婷不时以性感打扮亮相"}
-{"key": "BAC009S0769W0424", "wav": "./aishell/wav/test/S0769/BAC009S0769W0424.wav", "txt": "她去游泳解暑"}
-{"key": "BAC009S0769W0425", "wav": "./aishell/wav/test/S0769/BAC009S0769W0425.wav", "txt": "还在网上分享身穿比基尼泳装照"}
-{"key": "BAC009S0769W0426", "wav": "./aishell/wav/test/S0769/BAC009S0769W0426.wav", "txt": "这种天气很适合搞池边派对"}
-{"key": "BAC009S0769W0427", "wav": "./aishell/wav/test/S0769/BAC009S0769W0427.wav", "txt": "中新网五月二十一日报道据香港明报消息"}
-{"key": "BAC009S0769W0428", "wav": "./aishell/wav/test/S0769/BAC009S0769W0428.wav", "txt": "谢婷婷为服装拍摄时装宣传照"}
-{"key": "BAC009S0769W0429", "wav": "./aishell/wav/test/S0769/BAC009S0769W0429.wav", "txt": "她透露现在父母哥哥谢霆锋都各忙各的"}
-{"key": "BAC009S0769W0430", "wav": "./aishell/wav/test/S0769/BAC009S0769W0430.wav", "txt": "一家人很难有机会团聚"}
-{"key": "BAC009S0769W0431", "wav": "./aishell/wav/test/S0769/BAC009S0769W0431.wav", "txt": "施王祥被陆丰市纪委立案调查"}
-{"key": "BAC009S0769W0432", "wav": "./aishell/wav/test/S0769/BAC009S0769W0432.wav", "txt": "二零一三一二二六"}
-{"key": "BAC009S0769W0433", "wav": "./aishell/wav/test/S0769/BAC009S0769W0433.wav", "txt": "二零一四三七"}
-{"key": "BAC009S0769W0434", "wav": "./aishell/wav/test/S0769/BAC009S0769W0434.wav", "txt": "陆丰市纪委决定给予施王祥开除党籍处分"}
-{"key": "BAC009S0769W0435", "wav": "./aishell/wav/test/S0769/BAC009S0769W0435.wav", "txt": "南粤清风网通报该案详情"}
-{"key": "BAC009S0769W0436", "wav": "./aishell/wav/test/S0769/BAC009S0769W0436.wav", "txt": "村官遭判刑处罚证据涉嫌造假公检法自查迟迟无果"}
-{"key": "BAC009S0769W0437", "wav": "./aishell/wav/test/S0769/BAC009S0769W0437.wav", "txt": "山西省临汾市尧都区刘村镇刘南村一零名村干部"}
-{"key": "BAC009S0769W0438", "wav": "./aishell/wav/test/S0769/BAC009S0769W0438.wav", "txt": "因决定取消刁天恩的土地承包合同移栽地上树苗"}
-{"key": "BAC009S0769W0439", "wav": "./aishell/wav/test/S0769/BAC009S0769W0439.wav", "txt": "被法院以故意毁坏财物罪判刑或处罚"}
-{"key": "BAC009S0769W0440", "wav": "./aishell/wav/test/S0769/BAC009S0769W0440.wav", "txt": "村小老师自掏腰包八零零零元为贫困生设奖学金"}
-{"key": "BAC009S0769W0441", "wav": "./aishell/wav/test/S0769/BAC009S0769W0441.wav", "txt": "薛孝文在学生家中家访"}
-{"key": "BAC009S0769W0442", "wav": "./aishell/wav/test/S0769/BAC009S0769W0442.wav", "txt": "从金堂县城驱车一个半小时至土桥镇的大禹村"}
-{"key": "BAC009S0769W0443", "wav": "./aishell/wav/test/S0769/BAC009S0769W0443.wav", "txt": "就到了薛孝文任教的学校金堂县平桥学校"}
-{"key": "BAC009S0769W0444", "wav": "./aishell/wav/test/S0769/BAC009S0769W0444.wav", "txt": "乡间公路也就四米宽"}
-{"key": "BAC009S0769W0445", "wav": "./aishell/wav/test/S0769/BAC009S0769W0445.wav", "txt": "薛孝文还在给学生上课"}
-{"key": "BAC009S0769W0446", "wav": "./aishell/wav/test/S0769/BAC009S0769W0446.wav", "txt": "在年轻时也有着跳龙门的梦"}
-{"key": "BAC009S0769W0447", "wav": "./aishell/wav/test/S0769/BAC009S0769W0447.wav", "txt": "他辗转三所乡村学校"}
-{"key": "BAC009S0769W0448", "wav": "./aishell/wav/test/S0769/BAC009S0769W0448.wav", "txt": "村干部大闹天宫孙大圣口碑爆棚"}
-{"key": "BAC009S0769W0449", "wav": "./aishell/wav/test/S0769/BAC009S0769W0449.wav", "txt": "村干部强揽工程遭拒绝雇百名老人阻挠施工"}
-{"key": "BAC009S0769W0450", "wav": "./aishell/wav/test/S0769/BAC009S0769W0450.wav", "txt": "犯罪嫌疑人刘德怀等六人被刑拘"}
-{"key": "BAC009S0769W0451", "wav": "./aishell/wav/test/S0769/BAC009S0769W0451.wav", "txt": "村干部靠打架成名被抓后喊我是市人大代表"}
-{"key": "BAC009S0769W0452", "wav": "./aishell/wav/test/S0769/BAC009S0769W0452.wav", "txt": "和平花苑现已更名为龙和华府"}
-{"key": "BAC009S0769W0453", "wav": "./aishell/wav/test/S0769/BAC009S0769W0453.wav", "txt": "村庄晴天降奇冰十几斤重来历不明"}
-{"key": "BAC009S0769W0454", "wav": "./aishell/wav/test/S0769/BAC009S0769W0454.wav", "txt": "天上掉下一块重约十几斤的冰块"}
-{"key": "BAC009S0769W0455", "wav": "./aishell/wav/test/S0769/BAC009S0769W0455.wav", "txt": "虽然事情过去三天了"}
-{"key": "BAC009S0769W0456", "wav": "./aishell/wav/test/S0769/BAC009S0769W0456.wav", "txt": "但嵩县德亭镇大王沟村村民们仍感到好奇"}
-{"key": "BAC009S0769W0457", "wav": "./aishell/wav/test/S0769/BAC009S0769W0457.wav", "txt": "一零月一零日临近中午"}
-{"key": "BAC009S0769W0458", "wav": "./aishell/wav/test/S0769/BAC009S0769W0458.wav", "txt": "砸到了村民的菜地里"}
-{"key": "BAC009S0769W0459", "wav": "./aishell/wav/test/S0769/BAC009S0769W0459.wav", "txt": "还把地面砸了个大坑"}
-{"key": "BAC009S0769W0460", "wav": "./aishell/wav/test/S0769/BAC009S0769W0460.wav", "txt": "附近村民闻讯纷纷赶来瞧个新鲜"}
-{"key": "BAC009S0769W0461", "wav": "./aishell/wav/test/S0769/BAC009S0769W0461.wav", "txt": "捡拾一些冰块回家冰冻保存"}
-{"key": "BAC009S0769W0462", "wav": "./aishell/wav/test/S0769/BAC009S0769W0462.wav", "txt": "专家排除了冰雹和飞机上落冰的两种可能"}
-{"key": "BAC009S0769W0463", "wav": "./aishell/wav/test/S0769/BAC009S0769W0463.wav", "txt": "这块天降奇冰究竟是何物"}
-{"key": "BAC009S0769W0464", "wav": "./aishell/wav/test/S0769/BAC009S0769W0464.wav", "txt": "村庄现两名村支书假支书无名有实村内掌权"}
-{"key": "BAC009S0769W0465", "wav": "./aishell/wav/test/S0769/BAC009S0769W0465.wav", "txt": "村庄遭人倾倒数百吨化工废料附近植物全空死"}
-{"key": "BAC009S0769W0466", "wav": "./aishell/wav/test/S0769/BAC009S0769W0466.wav", "txt": "非法倾倒数百吨化工废料"}
-{"key": "BAC009S0769W0467", "wav": "./aishell/wav/test/S0769/BAC009S0769W0467.wav", "txt": "村民生活因此发生巨变井水变味田地减产前日"}
-{"key": "BAC009S0769W0468", "wav": "./aishell/wav/test/S0769/BAC009S0769W0468.wav", "txt": "该村村民黎胜明向楚天快报求助"}
-{"key": "BAC009S0769W0469", "wav": "./aishell/wav/test/S0769/BAC009S0769W0469.wav", "txt": "希望相关部门能处理此事"}
-{"key": "BAC009S0769W0470", "wav": "./aishell/wav/test/S0769/BAC009S0769W0470.wav", "txt": "村支书一周只上二小时班村民称反映会遭报复"}
-{"key": "BAC009S0769W0471", "wav": "./aishell/wav/test/S0769/BAC009S0769W0471.wav", "txt": "村支书上班时间带彩娱乐神秘人曝光视频证据"}
-{"key": "BAC009S0769W0472", "wav": "./aishell/wav/test/S0769/BAC009S0769W0472.wav", "txt": "视频中正在带彩娱乐的灰衣男"}
-{"key": "BAC009S0769W0473", "wav": "./aishell/wav/test/S0769/BAC009S0769W0473.wav", "txt": "被警方确认为新农村党支部书记毛家文"}
-{"key": "BAC009S0769W0474", "wav": "./aishell/wav/test/S0769/BAC009S0769W0474.wav", "txt": "村支书为考公务员改小一零岁一四岁时三月内生两子"}
-{"key": "BAC009S0769W0475", "wav": "./aishell/wav/test/S0769/BAC009S0769W0475.wav", "txt": "淅川县上集镇一名村支书被指将年龄改小一零岁"}
-{"key": "BAC009S0769W0476", "wav": "./aishell/wav/test/S0769/BAC009S0769W0476.wav", "txt": "图为时上集镇派出所"}
-{"key": "BAC009S0769W0477", "wav": "./aishell/wav/test/S0769/BAC009S0769W0477.wav", "txt": "三个月内连生两个儿子"}
-{"key": "BAC009S0769W0478", "wav": "./aishell/wav/test/S0769/BAC009S0769W0478.wav", "txt": "村支书将两女儿家七口人列为搬迁户骗领搬迁款"}
-{"key": "BAC009S0769W0479", "wav": "./aishell/wav/test/S0769/BAC009S0769W0479.wav", "txt": "村支书违法占地建加油站多部门介入处罚仍未拆"}
-{"key": "BAC009S0769W0480", "wav": "./aishell/wav/test/S0769/BAC009S0769W0480.wav", "txt": "浙江在线零九月二一日讯浙江日报记者季建荣近日"}
-{"key": "BAC009S0769W0481", "wav": "./aishell/wav/test/S0769/BAC009S0769W0481.wav", "txt": "村民多次向温岭市有关部门投诉反映"}
-{"key": "BAC009S0769W0482", "wav": "./aishell/wav/test/S0769/BAC009S0769W0482.wav", "txt": "但问题至今没有解决"}
-{"key": "BAC009S0769W0483", "wav": "./aishell/wav/test/S0769/BAC009S0769W0483.wav", "txt": "村支书违规建小产权房花钱买通所有关系"}
-{"key": "BAC009S0769W0484", "wav": "./aishell/wav/test/S0769/BAC009S0769W0484.wav", "txt": "都说下属有困难找领导"}
-{"key": "BAC009S0769W0485", "wav": "./aishell/wav/test/S0769/BAC009S0769W0485.wav", "txt": "灵璧县韦集镇韦集村原村支书石某"}
-{"key": "BAC009S0769W0486", "wav": "./aishell/wav/test/S0769/BAC009S0769W0486.wav", "txt": "就花钱请领导为他撑腰"}
-{"key": "BAC009S0769W0487", "wav": "./aishell/wav/test/S0769/BAC009S0769W0487.wav", "txt": "村支书遭集体举报买鼠药欲投毒报复村民"}
-{"key": "BAC009S0769W0488", "wav": "./aishell/wav/test/S0769/BAC009S0769W0488.wav", "txt": "本报一零月五日讯国庆长假"}
-{"key": "BAC009S0769W0489", "wav": "./aishell/wav/test/S0769/BAC009S0769W0489.wav", "txt": "省纪委要求新闻媒体主动参与到纠四风监督工作中"}
-{"key": "BAC009S0769W0490", "wav": "./aishell/wav/test/S0769/BAC009S0769W0490.wav", "txt": "强化媒体根据群众举报开展调查采访和舆论监督"}
-{"key": "BAC009S0769W0491", "wav": "./aishell/wav/test/S0769/BAC009S0769W0491.wav", "txt": "发生了一起村民举报村支书贪腐"}
-{"key": "BAC009S0769W0492", "wav": "./aishell/wav/test/S0769/BAC009S0769W0492.wav", "txt": "村支书以在全村井水中投毒以报复村民的离奇事件"}
-{"key": "BAC009S0769W0493", "wav": "./aishell/wav/test/S0769/BAC009S0769W0493.wav", "txt": "村支书醉驾撞伤孕妇刑满释放后仍当人大代表"}
-{"key": "BAC009S0769W0494", "wav": "./aishell/wav/test/S0769/BAC009S0769W0494.wav", "txt": "华江瑶族乡十四届人大代表会第五次会议会务材料上"}
-{"key": "BAC009S0769W0495", "wav": "./aishell/wav/test/S0769/BAC009S0769W0495.wav", "txt": "于二零一四年四月一九日晚"}
-{"key": "BAC009S0770W0121", "wav": "./aishell/wav/test/S0770/BAC009S0770W0121.wav", "txt": "住宅土地出让金及成交面积均大幅下降"}
-{"key": "BAC009S0770W0122", "wav": "./aishell/wav/test/S0770/BAC009S0770W0122.wav", "txt": "开发商进驻一二线城市"}
-{"key": "BAC009S0770W0123", "wav": "./aishell/wav/test/S0770/BAC009S0770W0123.wav", "txt": "抛售三线城市"}
-{"key": "BAC009S0770W0124", "wav": "./aishell/wav/test/S0770/BAC009S0770W0124.wav", "txt": "遇到毛利率低的问题"}
-{"key": "BAC009S0770W0125", "wav": "./aishell/wav/test/S0770/BAC009S0770W0125.wav", "txt": "发现土地成本占比持续提升"}
-{"key": "BAC009S0770W0126", "wav": "./aishell/wav/test/S0770/BAC009S0770W0126.wav", "txt": "目前全国该指标"}
-{"key": "BAC009S0770W0127", "wav": "./aishell/wav/test/S0770/BAC009S0770W0127.wav", "txt": "一线城市超过三成"}
-{"key": "BAC009S0770W0128", "wav": "./aishell/wav/test/S0770/BAC009S0770W0128.wav", "txt": "三线及以下为一成"}
-{"key": "BAC009S0770W0129", "wav": "./aishell/wav/test/S0770/BAC009S0770W0129.wav", "txt": "一二线城市用地紧张"}
-{"key": "BAC009S0770W0130", "wav": "./aishell/wav/test/S0770/BAC009S0770W0130.wav", "txt": "房地产商需要解决毛利率低的问题"}
-{"key": "BAC009S0770W0131", "wav": "./aishell/wav/test/S0770/BAC009S0770W0131.wav", "txt": "中指院广州公司总经理张化学向南都记者表示"}
-{"key": "BAC009S0770W0132", "wav": "./aishell/wav/test/S0770/BAC009S0770W0132.wav", "txt": "三线城市库存积压又逼倒房地产商在一线城市抢地"}
-{"key": "BAC009S0770W0133", "wav": "./aishell/wav/test/S0770/BAC009S0770W0133.wav", "txt": "建议房企不要一味强调做大"}
-{"key": "BAC009S0770W0134", "wav": "./aishell/wav/test/S0770/BAC009S0770W0134.wav", "txt": "可以重点关注如何做强"}
-{"key": "BAC009S0770W0135", "wav": "./aishell/wav/test/S0770/BAC009S0770W0135.wav", "txt": "在自身优势领域发力"}
-{"key": "BAC009S0770W0136", "wav": "./aishell/wav/test/S0770/BAC009S0770W0136.wav", "txt": "发现无论是千亿巨头地产商"}
-{"key": "BAC009S0770W0137", "wav": "./aishell/wav/test/S0770/BAC009S0770W0137.wav", "txt": "多数在积极剑指一线城市"}
-{"key": "BAC009S0770W0138", "wav": "./aishell/wav/test/S0770/BAC009S0770W0138.wav", "txt": "从今年房企的买地情况来看"}
-{"key": "BAC009S0770W0139", "wav": "./aishell/wav/test/S0770/BAC009S0770W0139.wav", "txt": "今年万科拿下九宗地块"}
-{"key": "BAC009S0770W0140", "wav": "./aishell/wav/test/S0770/BAC009S0770W0140.wav", "txt": "包括五个一二线城市"}
-{"key": "BAC009S0770W0141", "wav": "./aishell/wav/test/S0770/BAC009S0770W0141.wav", "txt": "保利地产开始进军成都珠海"}
-{"key": "BAC009S0770W0142", "wav": "./aishell/wav/test/S0770/BAC009S0770W0142.wav", "txt": "中海地产作为国企龙头"}
-{"key": "BAC009S0770W0143", "wav": "./aishell/wav/test/S0770/BAC009S0770W0143.wav", "txt": "也在厦门拿下几宗商住用地和济南几宗居住用地"}
-{"key": "BAC009S0770W0144", "wav": "./aishell/wav/test/S0770/BAC009S0770W0144.wav", "txt": "在房企扎堆一二线城市时"}
-{"key": "BAC009S0770W0145", "wav": "./aishell/wav/test/S0770/BAC009S0770W0145.wav", "txt": "更致命的是中小房企在融资方面的短板"}
-{"key": "BAC009S0770W0146", "wav": "./aishell/wav/test/S0770/BAC009S0770W0146.wav", "txt": "相比千亿房企的借贷利率"}
-{"key": "BAC009S0770W0147", "wav": "./aishell/wav/test/S0770/BAC009S0770W0147.wav", "txt": "中小房企要面临高达两位数利率"}
-{"key": "BAC009S0770W0148", "wav": "./aishell/wav/test/S0770/BAC009S0770W0148.wav", "txt": "中国市场空间多样化"}
-{"key": "BAC009S0770W0149", "wav": "./aishell/wav/test/S0770/BAC009S0770W0149.wav", "txt": "房企除了像千亿地产一样做大"}
-{"key": "BAC009S0770W0150", "wav": "./aishell/wav/test/S0770/BAC009S0770W0150.wav", "txt": "在某一方面找到自己的企业竞争力"}
-{"key": "BAC009S0770W0151", "wav": "./aishell/wav/test/S0770/BAC009S0770W0151.wav", "txt": "行业的玩家门槛越来越高"}
-{"key": "BAC009S0770W0152", "wav": "./aishell/wav/test/S0770/BAC009S0770W0152.wav", "txt": "主动退出和寻求并购的中小开发商增多"}
-{"key": "BAC009S0770W0153", "wav": "./aishell/wav/test/S0770/BAC009S0770W0153.wav", "txt": "大开发商有机会借此提高行业集中程度"}
-{"key": "BAC009S0770W0154", "wav": "./aishell/wav/test/S0770/BAC009S0770W0154.wav", "txt": "张大伟向南都记者分析"}
-{"key": "BAC009S0770W0155", "wav": "./aishell/wav/test/S0770/BAC009S0770W0155.wav", "txt": "在三线城市库存高攀销售停滞的情况下"}
-{"key": "BAC009S0770W0156", "wav": "./aishell/wav/test/S0770/BAC009S0770W0156.wav", "txt": "没有雄厚的资金良好的业绩以及成熟的融资平台"}
-{"key": "BAC009S0770W0157", "wav": "./aishell/wav/test/S0770/BAC009S0770W0157.wav", "txt": "似乎难以在一二线城市站稳"}
-{"key": "BAC009S0770W0158", "wav": "./aishell/wav/test/S0770/BAC009S0770W0158.wav", "txt": "房地产业将在明年有所洗牌"}
-{"key": "BAC009S0770W0159", "wav": "./aishell/wav/test/S0770/BAC009S0770W0159.wav", "txt": "点击进入股友会参与讨论"}
-{"key": "BAC009S0770W0160", "wav": "./aishell/wav/test/S0770/BAC009S0770W0160.wav", "txt": "今年国有土地出让权收入四千亿元"}
-{"key": "BAC009S0770W0161", "wav": "./aishell/wav/test/S0770/BAC009S0770W0161.wav", "txt": "今年房地产市场地域分化将加剧"}
-{"key": "BAC009S0770W0162", "wav": "./aishell/wav/test/S0770/BAC009S0770W0162.wav", "txt": "政策放松和高库存背景下"}
-{"key": "BAC009S0770W0163", "wav": "./aishell/wav/test/S0770/BAC009S0770W0163.wav", "txt": "开发商均面临不均衡的复苏前景"}
-{"key": "BAC009S0770W0164", "wav": "./aishell/wav/test/S0770/BAC009S0770W0164.wav", "txt": "今年中国房地产开发商仍将面临供应过剩"}
-{"key": "BAC009S0770W0165", "wav": "./aishell/wav/test/S0770/BAC009S0770W0165.wav", "txt": "房价不太可能强劲反弹"}
-{"key": "BAC009S0770W0166", "wav": "./aishell/wav/test/S0770/BAC009S0770W0166.wav", "txt": "房地产在不同城市之间的复苏也将存在分化"}
-{"key": "BAC009S0770W0167", "wav": "./aishell/wav/test/S0770/BAC009S0770W0167.wav", "txt": "一线城市或将复苏率先复苏"}
-{"key": "BAC009S0770W0168", "wav": "./aishell/wav/test/S0770/BAC009S0770W0168.wav", "txt": "三四线城市可能在继续因高库存而承压"}
-{"key": "BAC009S0770W0169", "wav": "./aishell/wav/test/S0770/BAC009S0770W0169.wav", "txt": "中国房地产的市场价格和销量将继续调整"}
-{"key": "BAC009S0770W0170", "wav": "./aishell/wav/test/S0770/BAC009S0770W0170.wav", "txt": "但下半年的销售可能会回升"}
-{"key": "BAC009S0770W0171", "wav": "./aishell/wav/test/S0770/BAC009S0770W0171.wav", "txt": "开发商只需选择继续降价"}
-{"key": "BAC009S0770W0172", "wav": "./aishell/wav/test/S0770/BAC009S0770W0172.wav", "txt": "尤其是在三四线城市"}
-{"key": "BAC009S0770W0173", "wav": "./aishell/wav/test/S0770/BAC009S0770W0173.wav", "txt": "中国经济增速放缓的背景下"}
-{"key": "BAC009S0770W0174", "wav": "./aishell/wav/test/S0770/BAC009S0770W0174.wav", "txt": "预期政府将继续放松政策"}
-{"key": "BAC009S0770W0175", "wav": "./aishell/wav/test/S0770/BAC009S0770W0175.wav", "txt": "而政府放松限购按揭和内地融资政策"}
-{"key": "BAC009S0770W0176", "wav": "./aishell/wav/test/S0770/BAC009S0770W0176.wav", "txt": "房地产需求可能会上升"}
-{"key": "BAC009S0770W0177", "wav": "./aishell/wav/test/S0770/BAC009S0770W0177.wav", "txt": "这将有助于开发商明年维持销量"}
-{"key": "BAC009S0770W0178", "wav": "./aishell/wav/test/S0770/BAC009S0770W0178.wav", "txt": "政府放松政策对房地产销售的正面影响可能会提升"}
-{"key": "BAC009S0770W0179", "wav": "./aishell/wav/test/S0770/BAC009S0770W0179.wav", "txt": "标普信用分析师孔磊说道"}
-{"key": "BAC009S0770W0180", "wav": "./aishell/wav/test/S0770/BAC009S0770W0180.wav", "txt": "关于明年的房价走势"}
-{"key": "BAC009S0770W0181", "wav": "./aishell/wav/test/S0770/BAC009S0770W0181.wav", "txt": "标普在基准情景假设下的预期是"}
-{"key": "BAC009S0770W0182", "wav": "./aishell/wav/test/S0770/BAC009S0770W0182.wav", "txt": "明年平均售价将维持不变"}
-{"key": "BAC009S0770W0183", "wav": "./aishell/wav/test/S0770/BAC009S0770W0183.wav", "txt": "销售额则将维持不变"}
-{"key": "BAC009S0770W0184", "wav": "./aishell/wav/test/S0770/BAC009S0770W0184.wav", "txt": "房地产价格调整还未完全结束"}
-{"key": "BAC009S0770W0185", "wav": "./aishell/wav/test/S0770/BAC009S0770W0185.wav", "txt": "未来一年内中国房地产价格不太可能强劲反弹"}
-{"key": "BAC009S0770W0186", "wav": "./aishell/wav/test/S0770/BAC009S0770W0186.wav", "txt": "虽然过去一年一些获评级开发商的信用状况变差"}
-{"key": "BAC009S0770W0187", "wav": "./aishell/wav/test/S0770/BAC009S0770W0187.wav", "txt": "徐林发债企业在债劵存续期内进行资产转移"}
-{"key": "BAC009S0770W0188", "wav": "./aishell/wav/test/S0770/BAC009S0770W0188.wav", "txt": "极可能对债劵持有人利益构成不利影响"}
-{"key": "BAC009S0770W0189", "wav": "./aishell/wav/test/S0770/BAC009S0770W0189.wav", "txt": "直接涉及到债劵持有人的利益保护问题"}
-{"key": "BAC009S0770W0190", "wav": "./aishell/wav/test/S0770/BAC009S0770W0190.wav", "txt": "我们立即与云投集团进行了沟通"}
-{"key": "BAC009S0770W0191", "wav": "./aishell/wav/test/S0770/BAC009S0770W0191.wav", "txt": "并严格按照合规程序进行"}
-{"key": "BAC009S0770W0192", "wav": "./aishell/wav/test/S0770/BAC009S0770W0192.wav", "txt": "我委也注意到在企业债劵存续期内"}
-{"key": "BAC009S0770W0193", "wav": "./aishell/wav/test/S0770/BAC009S0770W0193.wav", "txt": "需要对发行人资产重组等重大事宜加强监管"}
-{"key": "BAC009S0770W0194", "wav": "./aishell/wav/test/S0770/BAC009S0770W0194.wav", "txt": "在制度上对债券人的合法权益进行保护"}
-{"key": "BAC009S0770W0195", "wav": "./aishell/wav/test/S0770/BAC009S0770W0195.wav", "txt": "建立地方政府债务管理体系"}
-{"key": "BAC009S0770W0196", "wav": "./aishell/wav/test/S0770/BAC009S0770W0196.wav", "txt": "从您刚才的介绍中我们了解到"}
-{"key": "BAC009S0770W0197", "wav": "./aishell/wav/test/S0770/BAC009S0770W0197.wav", "txt": "城投债劵对公司城市基础设施和市政的建设"}
-{"key": "BAC009S0770W0198", "wav": "./aishell/wav/test/S0770/BAC009S0770W0198.wav", "txt": "起到了非常积极的作用"}
-{"key": "BAC009S0770W0199", "wav": "./aishell/wav/test/S0770/BAC009S0770W0199.wav", "txt": "对丰富债劵市场品种也具有积极意义"}
-{"key": "BAC009S0770W0200", "wav": "./aishell/wav/test/S0770/BAC009S0770W0200.wav", "txt": "结合地方政府债务管理制度的完善"}
-{"key": "BAC009S0770W0201", "wav": "./aishell/wav/test/S0770/BAC009S0770W0201.wav", "txt": "下一步我国的城投债劵还需要做哪些完善"}
-{"key": "BAC009S0770W0202", "wav": "./aishell/wav/test/S0770/BAC009S0770W0202.wav", "txt": "这个问题涉及到一系列的制度完善"}
-{"key": "BAC009S0770W0203", "wav": "./aishell/wav/test/S0770/BAC009S0770W0203.wav", "txt": "是一个比较复杂的问题"}
-{"key": "BAC009S0770W0204", "wav": "./aishell/wav/test/S0770/BAC009S0770W0204.wav", "txt": "我个人是这么认识的"}
-{"key": "BAC009S0770W0205", "wav": "./aishell/wav/test/S0770/BAC009S0770W0205.wav", "txt": "我国还处于城市化快速发展期"}
-{"key": "BAC009S0770W0206", "wav": "./aishell/wav/test/S0770/BAC009S0770W0206.wav", "txt": "需要为各地的城市建设提供规范的融资渠道"}
-{"key": "BAC009S0770W0207", "wav": "./aishell/wav/test/S0770/BAC009S0770W0207.wav", "txt": "农业与非农产业之间劳动生产率的差距也很大"}
-{"key": "BAC009S0770W0208", "wav": "./aishell/wav/test/S0770/BAC009S0770W0208.wav", "txt": "这决定了我国城市化动力十分强劲"}
-{"key": "BAC009S0770W0209", "wav": "./aishell/wav/test/S0770/BAC009S0770W0209.wav", "txt": "城市化进程远未结束"}
-{"key": "BAC009S0770W0210", "wav": "./aishell/wav/test/S0770/BAC009S0770W0210.wav", "txt": "城市化快速发展期的重要特征就是基础设施投资需求量大"}
-{"key": "BAC009S0770W0211", "wav": "./aishell/wav/test/S0770/BAC009S0770W0211.wav", "txt": "这是我国所处的发展阶段决定的"}
-{"key": "BAC009S0770W0212", "wav": "./aishell/wav/test/S0770/BAC009S0770W0212.wav", "txt": "政府通过债务融资从事基础建设"}
-{"key": "BAC009S0770W0213", "wav": "./aishell/wav/test/S0770/BAC009S0770W0213.wav", "txt": "我们应该建设可控的规范化的地方政府融资机制"}
-{"key": "BAC009S0770W0214", "wav": "./aishell/wav/test/S0770/BAC009S0770W0214.wav", "txt": "为各地的基础建设设提供有制度保障的融资渠道"}
-{"key": "BAC009S0770W0215", "wav": "./aishell/wav/test/S0770/BAC009S0770W0215.wav", "txt": "城投债劵作为准市政债劵仍将是有效的融资工具"}
-{"key": "BAC009S0770W0216", "wav": "./aishell/wav/test/S0770/BAC009S0770W0216.wav", "txt": "但是还需要进一步改进"}
-{"key": "BAC009S0770W0217", "wav": "./aishell/wav/test/S0770/BAC009S0770W0217.wav", "txt": "在政府投融资体制改革过程中"}
-{"key": "BAC009S0770W0218", "wav": "./aishell/wav/test/S0770/BAC009S0770W0218.wav", "txt": "从事当地基础建设"}
-{"key": "BAC009S0770W0219", "wav": "./aishell/wav/test/S0770/BAC009S0770W0219.wav", "txt": "相当于过去体制而言是更加市场化的"}
-{"key": "BAC009S0770W0220", "wav": "./aishell/wav/test/S0770/BAC009S0770W0220.wav", "txt": "城投债劵作为融资平台公司最透明的直接融资工具"}
-{"key": "BAC009S0770W0221", "wav": "./aishell/wav/test/S0770/BAC009S0770W0221.wav", "txt": "仍然存在并具有发展空间"}
-{"key": "BAC009S0770W0222", "wav": "./aishell/wav/test/S0770/BAC009S0770W0222.wav", "txt": "由于目前城投债劵的发行需要符合企业的债劵发行的条件"}
-{"key": "BAC009S0770W0223", "wav": "./aishell/wav/test/S0770/BAC009S0770W0223.wav", "txt": "这使得我国城投债劵的发行利率相对偏高"}
-{"key": "BAC009S0770W0224", "wav": "./aishell/wav/test/S0770/BAC009S0770W0224.wav", "txt": "城投债劵的发行期限和利率"}
-{"key": "BAC009S0770W0225", "wav": "./aishell/wav/test/S0770/BAC009S0770W0225.wav", "txt": "未来应该在制度上进一步完善"}
-{"key": "BAC009S0770W0226", "wav": "./aishell/wav/test/S0770/BAC009S0770W0226.wav", "txt": "使得城投公司能够发行真正意义上的长期市政债劵"}
-{"key": "BAC009S0770W0227", "wav": "./aishell/wav/test/S0770/BAC009S0770W0227.wav", "txt": "要尽快建立我国的地方政府债务管理体系"}
-{"key": "BAC009S0770W0228", "wav": "./aishell/wav/test/S0770/BAC009S0770W0228.wav", "txt": "对于如何建立规范的地方政府融资渠道"}
-{"key": "BAC009S0770W0229", "wav": "./aishell/wav/test/S0770/BAC009S0770W0229.wav", "txt": "加强地方政府债务管理和风险防控"}
-{"key": "BAC009S0770W0230", "wav": "./aishell/wav/test/S0770/BAC009S0770W0230.wav", "txt": "一些专家学者提出了许多好的建议"}
-{"key": "BAC009S0770W0231", "wav": "./aishell/wav/test/S0770/BAC009S0770W0231.wav", "txt": "如建立规范透明的地方政府融资渠道"}
-{"key": "BAC009S0770W0232", "wav": "./aishell/wav/test/S0770/BAC009S0770W0232.wav", "txt": "并对地方政府债务进行监控和风险防范等"}
-{"key": "BAC009S0770W0233", "wav": "./aishell/wav/test/S0770/BAC009S0770W0233.wav", "txt": "由于我国还没有建立统一的地方政府债务风险管理制度"}
-{"key": "BAC009S0770W0234", "wav": "./aishell/wav/test/S0770/BAC009S0770W0234.wav", "txt": "设定政府性债务风险控制指标和标准"}
-{"key": "BAC009S0770W0235", "wav": "./aishell/wav/test/S0770/BAC009S0770W0235.wav", "txt": "并对政府性债务进行馀额管理"}
-{"key": "BAC009S0770W0236", "wav": "./aishell/wav/test/S0770/BAC009S0770W0236.wav", "txt": "使用地方政府的债务融资规模控制在安全范围内"}
-{"key": "BAC009S0770W0237", "wav": "./aishell/wav/test/S0770/BAC009S0770W0237.wav", "txt": "远低于发生债务危机的欧美国家"}
-{"key": "BAC009S0770W0238", "wav": "./aishell/wav/test/S0770/BAC009S0770W0238.wav", "txt": "债劵发行人是优质的"}
-{"key": "BAC009S0770W0239", "wav": "./aishell/wav/test/S0770/BAC009S0770W0239.wav", "txt": "还本付息也是正常的"}
-{"key": "BAC009S0770W0240", "wav": "./aishell/wav/test/S0770/BAC009S0770W0240.wav", "txt": "应该建立风险可控的规范化地方政府融资机制"}
-{"key": "BAC009S0770W0241", "wav": "./aishell/wav/test/S0770/BAC009S0770W0241.wav", "txt": "为各地的基础建设提供有力的保障的融资渠道"}
-{"key": "BAC009S0770W0242", "wav": "./aishell/wav/test/S0770/BAC009S0770W0242.wav", "txt": "责任编辑廖一宁"}
-{"key": "BAC009S0770W0243", "wav": "./aishell/wav/test/S0770/BAC009S0770W0243.wav", "txt": "该政策将于二零一二年施行"}
-{"key": "BAC009S0770W0244", "wav": "./aishell/wav/test/S0770/BAC009S0770W0244.wav", "txt": "要继续深化天然气价格改革"}
-{"key": "BAC009S0770W0245", "wav": "./aishell/wav/test/S0770/BAC009S0770W0245.wav", "txt": "加快理顺天然气价格与可代替能源的比价关系"}
-{"key": "BAC009S0770W0246", "wav": "./aishell/wav/test/S0770/BAC009S0770W0246.wav", "txt": "引导天然气合理消费"}
-{"key": "BAC009S0770W0247", "wav": "./aishell/wav/test/S0770/BAC009S0770W0247.wav", "txt": "提高天然气利用率支持天然气贸易机制创新"}
-{"key": "BAC009S0770W0248", "wav": "./aishell/wav/test/S0770/BAC009S0770W0248.wav", "txt": "天然气用户为优先允许限制类和禁止类"}
-{"key": "BAC009S0770W0249", "wav": "./aishell/wav/test/S0770/BAC009S0770W0249.wav", "txt": "限制类主要是指天然化工"}
-{"key": "BAC009S0770W0250", "wav": "./aishell/wav/test/S0770/BAC009S0770W0250.wav", "txt": "各地要按照天然气利用优先顺序加强需求侧管理"}
-{"key": "BAC009S0770W0251", "wav": "./aishell/wav/test/S0770/BAC009S0770W0251.wav", "txt": "鼓励优先类支持允许类天然气利用项目发展"}
-{"key": "BAC009S0770W0252", "wav": "./aishell/wav/test/S0770/BAC009S0770W0252.wav", "txt": "对限制类项目的核准和审核要从严把握"}
-{"key": "BAC009S0770W0253", "wav": "./aishell/wav/test/S0770/BAC009S0770W0253.wav", "txt": "商议向印度转移更多军事技术的事宜"}
-{"key": "BAC009S0770W0254", "wav": "./aishell/wav/test/S0770/BAC009S0770W0254.wav", "txt": "据新华社电印度官员透露"}
-{"key": "BAC009S0770W0255", "wav": "./aishell/wav/test/S0770/BAC009S0770W0255.wav", "txt": "美国将向印度转让两项军事技术"}
-{"key": "BAC009S0770W0256", "wav": "./aishell/wav/test/S0770/BAC009S0770W0256.wav", "txt": "其中包括美国大鸦无人机今后将由印度工厂制造"}
-{"key": "BAC009S0770W0257", "wav": "./aishell/wav/test/S0770/BAC009S0770W0257.wav", "txt": "印度斯坦时报对二十四日援引消息人士的话报道"}
-{"key": "BAC009S0770W0258", "wav": "./aishell/wav/test/S0770/BAC009S0770W0258.wav", "txt": "二零一五年最适宜供职的公司仍在科技领域"}
-{"key": "BAC009S0770W0259", "wav": "./aishell/wav/test/S0770/BAC009S0770W0259.wav", "txt": "该网站根据雇员的反馈"}
-{"key": "BAC009S0770W0260", "wav": "./aishell/wav/test/S0770/BAC009S0770W0260.wav", "txt": "给出了前五十名的公司排名"}
-{"key": "BAC009S0770W0261", "wav": "./aishell/wav/test/S0770/BAC009S0770W0261.wav", "txt": "排名前十的科技公司"}
-{"key": "BAC009S0770W0263", "wav": "./aishell/wav/test/S0770/BAC009S0770W0263.wav", "txt": "不仅在科技公司领域排名第一"}
-{"key": "BAC009S0770W0264", "wav": "./aishell/wav/test/S0770/BAC009S0770W0264.wav", "txt": "而且在整个榜单也位居首位"}
-{"key": "BAC009S0770W0265", "wav": "./aishell/wav/test/S0770/BAC009S0770W0265.wav", "txt": "谷歌不仅会以优厚薪酬招募顶尖人才"}
-{"key": "BAC009S0770W0267", "wav": "./aishell/wav/test/S0770/BAC009S0770W0267.wav", "txt": "该应用交付网络在整个榜单中位居第四"}
-{"key": "BAC009S0770W0268", "wav": "./aishell/wav/test/S0770/BAC009S0770W0268.wav", "txt": "在科技领域排名第二"}
-{"key": "BAC009S0770W0270", "wav": "./aishell/wav/test/S0770/BAC009S0770W0270.wav", "txt": "这家社交网络巨头对待员工也是相当慷慨"}
-{"key": "BAC009S0770W0271", "wav": "./aishell/wav/test/S0770/BAC009S0770W0271.wav", "txt": "谷歌的福利待遇他家基本都有"}
-{"key": "BAC009S0770W0272", "wav": "./aishell/wav/test/S0770/BAC009S0770W0272.wav", "txt": "之前刚刚提出为女性员工提供冷冻卵子费用"}
-{"key": "BAC009S0770W0274", "wav": "./aishell/wav/test/S0770/BAC009S0770W0274.wav", "txt": "去年高通被评为最佳实习科技公司"}
-{"key": "BAC009S0770W0276", "wav": "./aishell/wav/test/S0770/BAC009S0770W0276.wav", "txt": "对于苹果公司来说这是很关键的一年"}
-{"key": "BAC009S0770W0279", "wav": "./aishell/wav/test/S0770/BAC009S0770W0279.wav", "txt": "都是该公司的强心剂"}
-{"key": "BAC009S0770W0280", "wav": "./aishell/wav/test/S0770/BAC009S0770W0280.wav", "txt": "雇员们也在很大程度上受到了鼓舞"}
-{"key": "BAC009S0770W0282", "wav": "./aishell/wav/test/S0770/BAC009S0770W0282.wav", "txt": "作为全球最大的职业社交网站"}
-{"key": "BAC009S0770W0283", "wav": "./aishell/wav/test/S0770/BAC009S0770W0283.wav", "txt": "领英在榜单上的成绩也是相当不错的"}
-{"key": "BAC009S0770W0284", "wav": "./aishell/wav/test/S0770/BAC009S0770W0284.wav", "txt": "提供免费房地产估价服务的网站"}
-{"key": "BAC009S0770W0285", "wav": "./aishell/wav/test/S0770/BAC009S0770W0285.wav", "txt": "在美国一上线就造成大轰动"}
-{"key": "BAC009S0770W0290", "wav": "./aishell/wav/test/S0770/BAC009S0770W0290.wav", "txt": "且把服务范围特别局限在医疗健康领域"}
-{"key": "BAC009S0770W0291", "wav": "./aishell/wav/test/S0770/BAC009S0770W0291.wav", "txt": "搜狐消息外媒消息"}
-{"key": "BAC009S0770W0292", "wav": "./aishell/wav/test/S0770/BAC009S0770W0292.wav", "txt": "二零一五年最适宜供职的公司仍在科技领域"}
-{"key": "BAC009S0770W0293", "wav": "./aishell/wav/test/S0770/BAC009S0770W0293.wav", "txt": "该网站根据雇员的反馈"}
-{"key": "BAC009S0770W0297", "wav": "./aishell/wav/test/S0770/BAC009S0770W0297.wav", "txt": "排名从第十三位上升至第十一位"}
-{"key": "BAC009S0770W0298", "wav": "./aishell/wav/test/S0770/BAC009S0770W0298.wav", "txt": "高通二零一四年所获专利也增长了百分之二十三"}
-{"key": "BAC009S0770W0299", "wav": "./aishell/wav/test/S0770/BAC009S0770W0299.wav", "txt": "排名从第九升至第七"}
-{"key": "BAC009S0770W0300", "wav": "./aishell/wav/test/S0770/BAC009S0770W0300.wav", "txt": "以上大多数专利都与计算软件及相关技术有关"}
-{"key": "BAC009S0770W0304", "wav": "./aishell/wav/test/S0770/BAC009S0770W0304.wav", "txt": "加速推进中国服务器市场份额的第一目标"}
-{"key": "BAC009S0770W0305", "wav": "./aishell/wav/test/S0770/BAC009S0770W0305.wav", "txt": "这是浪潮借助政策东风来做的营销手段"}
-{"key": "BAC009S0770W0306", "wav": "./aishell/wav/test/S0770/BAC009S0770W0306.wav", "txt": "对于企业提高股价促成业务"}
-{"key": "BAC009S0770W0307", "wav": "./aishell/wav/test/S0770/BAC009S0770W0307.wav", "txt": "某个银行的系统采购"}
-{"key": "BAC009S0770W0308", "wav": "./aishell/wav/test/S0770/BAC009S0770W0308.wav", "txt": "在确保系统顺利运行的情况下"}
-{"key": "BAC009S0770W0309", "wav": "./aishell/wav/test/S0770/BAC009S0770W0309.wav", "txt": "大家可能因为国家政策扶持国产品牌的大势"}
-{"key": "BAC009S0770W0310", "wav": "./aishell/wav/test/S0770/BAC009S0770W0310.wav", "txt": "而选择国产的服务器"}
-{"key": "BAC009S0770W0311", "wav": "./aishell/wav/test/S0770/BAC009S0770W0311.wav", "txt": "就更加愿意长期持有他们的股票"}
-{"key": "BAC009S0770W0312", "wav": "./aishell/wav/test/S0770/BAC009S0770W0312.wav", "txt": "核心技术待突破自棱镜门事件之后"}
-{"key": "BAC009S0770W0313", "wav": "./aishell/wav/test/S0770/BAC009S0770W0313.wav", "txt": "国家信息安全的问题被推到了风口浪尖"}
-{"key": "BAC009S0770W0314", "wav": "./aishell/wav/test/S0770/BAC009S0770W0314.wav", "txt": "而体现在服务器产业上"}
-{"key": "BAC009S0770W0315", "wav": "./aishell/wav/test/S0770/BAC009S0770W0315.wav", "txt": "由于中国政府的大力扶持"}
-{"key": "BAC009S0770W0316", "wav": "./aishell/wav/test/S0770/BAC009S0770W0316.wav", "txt": "国产服务器厂商迎来利好"}
-{"key": "BAC009S0770W0317", "wav": "./aishell/wav/test/S0770/BAC009S0770W0317.wav", "txt": "在国内四大厂商浪潮华为联想曙光中"}
-{"key": "BAC009S0770W0318", "wav": "./aishell/wav/test/S0770/BAC009S0770W0318.wav", "txt": "浪潮的特点在于定制化策略"}
-{"key": "BAC009S0770W0319", "wav": "./aishell/wav/test/S0770/BAC009S0770W0319.wav", "txt": "与互联网企业深度合作"}
-{"key": "BAC009S0770W0320", "wav": "./aishell/wav/test/S0770/BAC009S0770W0320.wav", "txt": "而这种策略带来的结果是市场份额的快速提升"}
-{"key": "BAC009S0770W0322", "wav": "./aishell/wav/test/S0770/BAC009S0770W0322.wav", "txt": "至于像整机柜这类深度定制化的细分市场"}
-{"key": "BAC009S0770W0323", "wav": "./aishell/wav/test/S0770/BAC009S0770W0323.wav", "txt": "百分之百为国产品牌"}
-{"key": "BAC009S0770W0324", "wav": "./aishell/wav/test/S0770/BAC009S0770W0324.wav", "txt": "其中浪潮达到了百分之六十的市场占有率"}
-{"key": "BAC009S0770W0325", "wav": "./aishell/wav/test/S0770/BAC009S0770W0325.wav", "txt": "近年来随着国内互联网企业的快速发展"}
-{"key": "BAC009S0770W0326", "wav": "./aishell/wav/test/S0770/BAC009S0770W0326.wav", "txt": "宽带和服务器的采购量也水涨船高"}
-{"key": "BAC009S0770W0327", "wav": "./aishell/wav/test/S0770/BAC009S0770W0327.wav", "txt": "由于各家之间竞争激烈"}
-{"key": "BAC009S0770W0328", "wav": "./aishell/wav/test/S0770/BAC009S0770W0328.wav", "txt": "往往在采购过程中尽量压低报价"}
-{"key": "BAC009S0770W0329", "wav": "./aishell/wav/test/S0770/BAC009S0770W0329.wav", "txt": "再加上互联网企业对服务器技术性可能等要求很高"}
-{"key": "BAC009S0770W0330", "wav": "./aishell/wav/test/S0770/BAC009S0770W0330.wav", "txt": "很多服务器厂商进入做一两年"}
-{"key": "BAC009S0770W0331", "wav": "./aishell/wav/test/S0770/BAC009S0770W0331.wav", "txt": "而浪潮从二零一零坚持做到现在"}
-{"key": "BAC009S0770W0332", "wav": "./aishell/wav/test/S0770/BAC009S0770W0332.wav", "txt": "业内对其做法的解读是先凭着低价杀入市场"}
-{"key": "BAC009S0770W0333", "wav": "./aishell/wav/test/S0770/BAC009S0770W0333.wav", "txt": "以品质和服务黏住用户"}
-{"key": "BAC009S0770W0334", "wav": "./aishell/wav/test/S0770/BAC009S0770W0334.wav", "txt": "虽然面对赔钱赚吆喝的质疑"}
-{"key": "BAC009S0770W0335", "wav": "./aishell/wav/test/S0770/BAC009S0770W0335.wav", "txt": "浪潮与海关总署启动战略合作助推智慧海关搜狐科技"}
-{"key": "BAC009S0770W0336", "wav": "./aishell/wav/test/S0770/BAC009S0770W0336.wav", "txt": "浪潮集团与海关总署启动战略合作"}
-{"key": "BAC009S0770W0337", "wav": "./aishell/wav/test/S0770/BAC009S0770W0337.wav", "txt": "合作范围遍及全国各直属海关及隶属海关"}
-{"key": "BAC009S0770W0338", "wav": "./aishell/wav/test/S0770/BAC009S0770W0338.wav", "txt": "对于我而言现在已经成为了一种习惯与本能"}
-{"key": "BAC009S0770W0339", "wav": "./aishell/wav/test/S0770/BAC009S0770W0339.wav", "txt": "有着较为丰富志愿服务经历的九零后吴雯的话"}
-{"key": "BAC009S0770W0340", "wav": "./aishell/wav/test/S0770/BAC009S0770W0340.wav", "txt": "只是本次田径世锦志愿者这个大群体的一个缩影"}
-{"key": "BAC009S0770W0341", "wav": "./aishell/wav/test/S0770/BAC009S0770W0341.wav", "txt": "他们有理由相信九零后同样可以做好"}
-{"key": "BAC009S0770W0342", "wav": "./aishell/wav/test/S0770/BAC009S0770W0342.wav", "txt": "我希望能通过这次田径世锦赛"}
-{"key": "BAC009S0770W0343", "wav": "./aishell/wav/test/S0770/BAC009S0770W0343.wav", "txt": "以及未来几年更多志愿经历"}
-{"key": "BAC009S0770W0344", "wav": "./aishell/wav/test/S0770/BAC009S0770W0344.wav", "txt": "来为二零二二年的冬奥会积累经验"}
-{"key": "BAC009S0770W0345", "wav": "./aishell/wav/test/S0770/BAC009S0770W0345.wav", "txt": "到时将会成为冬奥会志愿者的主力"}
-{"key": "BAC009S0770W0346", "wav": "./aishell/wav/test/S0770/BAC009S0770W0346.wav", "txt": "张锦麟将为自己称为鸟巢新一代志愿者"}
-{"key": "BAC009S0770W0347", "wav": "./aishell/wav/test/S0770/BAC009S0770W0347.wav", "txt": "他在为此时刻准备着"}
-{"key": "BAC009S0770W0348", "wav": "./aishell/wav/test/S0770/BAC009S0770W0348.wav", "txt": "本报记者宋翔王薇"}
-{"key": "BAC009S0770W0349", "wav": "./aishell/wav/test/S0770/BAC009S0770W0349.wav", "txt": "著名双人滑运动员庞清和董健虽未正式宣布退役"}
-{"key": "BAC009S0770W0350", "wav": "./aishell/wav/test/S0770/BAC009S0770W0350.wav", "txt": "但现在的生活已经进入了准退役状态"}
-{"key": "BAC009S0770W0351", "wav": "./aishell/wav/test/S0770/BAC009S0770W0351.wav", "txt": "两人把更多的精力放到了花滑运动的推广上"}
-{"key": "BAC009S0770W0352", "wav": "./aishell/wav/test/S0770/BAC009S0770W0352.wav", "txt": "他俩组建了工作团队"}
-{"key": "BAC009S0770W0353", "wav": "./aishell/wav/test/S0770/BAC009S0770W0353.wav", "txt": "过上了比运动员复杂得多的生活"}
-{"key": "BAC009S0770W0355", "wav": "./aishell/wav/test/S0770/BAC009S0770W0355.wav", "txt": "九月初顺利通过了考试"}
-{"key": "BAC009S0770W0356", "wav": "./aishell/wav/test/S0770/BAC009S0770W0356.wav", "txt": "佟健已经完成了第一个学模块的学习"}
-{"key": "BAC009S0770W0357", "wav": "./aishell/wav/test/S0770/BAC009S0770W0357.wav", "txt": "常年的专业训练给身体带来了各种伤病"}
-{"key": "BAC009S0770W0358", "wav": "./aishell/wav/test/S0770/BAC009S0770W0358.wav", "txt": "二零一四年索契冬奥会上"}
-{"key": "BAC009S0770W0359", "wav": "./aishell/wav/test/S0770/BAC009S0770W0359.wav", "txt": "早到了退役年龄的庞清和佟健克服了伤病困难"}
-{"key": "BAC009S0770W0360", "wav": "./aishell/wav/test/S0770/BAC009S0770W0360.wav", "txt": "但这对老将却以追梦无悔的精神"}
-{"key": "BAC009S0770W0361", "wav": "./aishell/wav/test/S0770/BAC009S0770W0361.wav", "txt": "赢得了同行媒体和观众的敬意"}
-{"key": "BAC009S0770W0362", "wav": "./aishell/wav/test/S0770/BAC009S0770W0362.wav", "txt": "庞清和佟健没有马上退役"}
-{"key": "BAC009S0770W0363", "wav": "./aishell/wav/test/S0770/BAC009S0770W0363.wav", "txt": "而是坚持参加了今年三月的世界花滑锦标赛"}
-{"key": "BAC009S0770W0364", "wav": "./aishell/wav/test/S0770/BAC009S0770W0364.wav", "txt": "一方面是他们从事花样滑冰二零多年"}
-{"key": "BAC009S0770W0365", "wav": "./aishell/wav/test/S0770/BAC009S0770W0365.wav", "txt": "与这项运动结下深厚感情"}
-{"key": "BAC009S0770W0366", "wav": "./aishell/wav/test/S0770/BAC009S0770W0366.wav", "txt": "始终对那块冰面恋恋不舍"}
-{"key": "BAC009S0770W0367", "wav": "./aishell/wav/test/S0770/BAC009S0770W0367.wav", "txt": "也是中国双人滑在申雪赵宏退役后"}
-{"key": "BAC009S0770W0368", "wav": "./aishell/wav/test/S0770/BAC009S0770W0368.wav", "txt": "庞清和佟健仍肩负着扛起中国双人滑大旗的重任"}
-{"key": "BAC009S0770W0369", "wav": "./aishell/wav/test/S0770/BAC009S0770W0369.wav", "txt": "这让他们的退役迟迟没有提上日程"}
-{"key": "BAC009S0770W0370", "wav": "./aishell/wav/test/S0770/BAC009S0770W0370.wav", "txt": "中国双人滑项目的后续发展应当有了较为清晰的前景"}
-{"key": "BAC009S0770W0371", "wav": "./aishell/wav/test/S0770/BAC009S0770W0371.wav", "txt": "庞清和佟健终于可以放心地考虑退役的事情了"}
-{"key": "BAC009S0770W0372", "wav": "./aishell/wav/test/S0770/BAC009S0770W0372.wav", "txt": "受大学生的提议启发"}
-{"key": "BAC009S0770W0373", "wav": "./aishell/wav/test/S0770/BAC009S0770W0373.wav", "txt": "该公众号已经举办了两期公益活动"}
-{"key": "BAC009S0770W0374", "wav": "./aishell/wav/test/S0770/BAC009S0770W0374.wav", "txt": "佟健又将国内部分优秀的单人滑和冰舞运动员集合起来"}
-{"key": "BAC009S0770W0375", "wav": "./aishell/wav/test/S0770/BAC009S0770W0375.wav", "txt": "组建了花滑表演团队"}
-{"key": "BAC009S0770W0376", "wav": "./aishell/wav/test/S0770/BAC009S0770W0376.wav", "txt": "与商业性冰场达成合作协议"}
-{"key": "BAC009S0770W0377", "wav": "./aishell/wav/test/S0770/BAC009S0770W0377.wav", "txt": "以表演的方式推广花样滑冰"}
-{"key": "BAC009S0770W0378", "wav": "./aishell/wav/test/S0770/BAC009S0770W0378.wav", "txt": "现成的选择就在面前"}
-{"key": "BAC009S0770W0379", "wav": "./aishell/wav/test/S0770/BAC009S0770W0379.wav", "txt": "或进入体育行政机关"}
-{"key": "BAC009S0770W0380", "wav": "./aishell/wav/test/S0770/BAC009S0770W0380.wav", "txt": "这些出路也是中国运动员比较常见的退役选择"}
-{"key": "BAC009S0770W0381", "wav": "./aishell/wav/test/S0770/BAC009S0770W0381.wav", "txt": "但庞清和佟健并不愿意随遇而安地安排自己的后半生"}
-{"key": "BAC009S0770W0382", "wav": "./aishell/wav/test/S0770/BAC009S0770W0382.wav", "txt": "自己和庞清曾在赛场上努力地追求优秀更优秀"}
-{"key": "BAC009S0770W0383", "wav": "./aishell/wav/test/S0770/BAC009S0770W0383.wav", "txt": "他们对退役后的人生同样也有追求"}
-{"key": "BAC009S0770W0384", "wav": "./aishell/wav/test/S0770/BAC009S0770W0384.wav", "txt": "佟健给自己定下了要做就做到最好"}
-{"key": "BAC009S0770W0385", "wav": "./aishell/wav/test/S0770/BAC009S0770W0385.wav", "txt": "和绝不会是短期行为的基调"}
-{"key": "BAC009S0770W0386", "wav": "./aishell/wav/test/S0770/BAC009S0770W0386.wav", "txt": "对于工作中遇到的管理经验和能力欠缺问题"}
-{"key": "BAC009S0770W0387", "wav": "./aishell/wav/test/S0770/BAC009S0770W0387.wav", "txt": "佟健的解决办法就只能努力提高自己"}
-{"key": "BAC009S0770W0388", "wav": "./aishell/wav/test/S0770/BAC009S0770W0388.wav", "txt": "佟健报考了北大光华管理学院"}
-{"key": "BAC009S0770W0389", "wav": "./aishell/wav/test/S0770/BAC009S0770W0389.wav", "txt": "在九月初参加考试时"}
-{"key": "BAC009S0770W0390", "wav": "./aishell/wav/test/S0770/BAC009S0770W0390.wav", "txt": "佟健做好了考不上的思想准备"}
-{"key": "BAC009S0770W0391", "wav": "./aishell/wav/test/S0770/BAC009S0770W0391.wav", "txt": "佟健因此顺利通过了入学考试"}
-{"key": "BAC009S0770W0392", "wav": "./aishell/wav/test/S0770/BAC009S0770W0392.wav", "txt": "佟健是同班同学里唯一运动员出身的"}
-{"key": "BAC009S0770W0393", "wav": "./aishell/wav/test/S0770/BAC009S0770W0393.wav", "txt": "记者查阅相关资料发现"}
-{"key": "BAC009S0770W0395", "wav": "./aishell/wav/test/S0770/BAC009S0770W0395.wav", "txt": "来自体育圈的并不多见"}
-{"key": "BAC009S0770W0396", "wav": "./aishell/wav/test/S0770/BAC009S0770W0396.wav", "txt": "只有姚明和刘国梁等少数几个人"}
-{"key": "BAC009S0770W0397", "wav": "./aishell/wav/test/S0770/BAC009S0770W0397.wav", "txt": "佟健希望自己能真的学到管理知识"}
-{"key": "BAC009S0770W0398", "wav": "./aishell/wav/test/S0770/BAC009S0770W0398.wav", "txt": "管理知识肯定都是用的上的"}
-{"key": "BAC009S0770W0399", "wav": "./aishell/wav/test/S0770/BAC009S0770W0399.wav", "txt": "至于中国花滑运动的推广"}
-{"key": "BAC009S0770W0400", "wav": "./aishell/wav/test/S0770/BAC009S0770W0400.wav", "txt": "佟健更希望能有实实在在的发展"}
-{"key": "BAC009S0770W0401", "wav": "./aishell/wav/test/S0770/BAC009S0770W0401.wav", "txt": "这同样需要有效的办法和手段"}
-{"key": "BAC009S0770W0402", "wav": "./aishell/wav/test/S0770/BAC009S0770W0402.wav", "txt": "借着北京将要举办二零二二年冬奥会的东风"}
-{"key": "BAC009S0770W0403", "wav": "./aishell/wav/test/S0770/BAC009S0770W0403.wav", "txt": "冰雪运动在中国势必会有一次发展高潮"}
-{"key": "BAC009S0770W0404", "wav": "./aishell/wav/test/S0770/BAC009S0770W0404.wav", "txt": "很可能被雪藏下去"}
-{"key": "BAC009S0770W0405", "wav": "./aishell/wav/test/S0770/BAC009S0770W0405.wav", "txt": "搜狐娱乐赛文耷子备受关注重拍版乌鸦"}
-{"key": "BAC009S0770W0406", "wav": "./aishell/wav/test/S0770/BAC009S0770W0406.wav", "txt": "在原定男主角卢克伊万斯退出剧组之后"}
-{"key": "BAC009S0770W0407", "wav": "./aishell/wav/test/S0770/BAC009S0770W0407.wav", "txt": "将双双加盟该片"}
-{"key": "BAC009S0770W0408", "wav": "./aishell/wav/test/S0770/BAC009S0770W0408.wav", "txt": "搜狐娱乐据香港媒体报道"}
-{"key": "BAC009S0770W0409", "wav": "./aishell/wav/test/S0770/BAC009S0770W0409.wav", "txt": "谢婷婷九月七日三十三岁生日"}
-{"key": "BAC009S0770W0410", "wav": "./aishell/wav/test/S0770/BAC009S0770W0410.wav", "txt": "网友纷纷留言祝谢婷婷生日快乐"}
-{"key": "BAC009S0770W0411", "wav": "./aishell/wav/test/S0770/BAC009S0770W0411.wav", "txt": "还拉赞姑还是那么漂亮"}
-{"key": "BAC009S0770W0412", "wav": "./aishell/wav/test/S0770/BAC009S0770W0412.wav", "txt": "搜狐娱乐讯北京时间八月十二日消息"}
-{"key": "BAC009S0770W0413", "wav": "./aishell/wav/test/S0770/BAC009S0770W0413.wav", "txt": "据香港媒体报道"}
-{"key": "BAC009S0770W0414", "wav": "./aishell/wav/test/S0770/BAC009S0770W0414.wav", "txt": "谢贤昨天庆祝七十九岁生日"}
-{"key": "BAC009S0770W0415", "wav": "./aishell/wav/test/S0770/BAC009S0770W0415.wav", "txt": "相约家人到谢霆锋家中上演十二道锋味私房菜"}
-{"key": "BAC009S0770W0416", "wav": "./aishell/wav/test/S0770/BAC009S0770W0416.wav", "txt": "由于谢霆锋亲为家人做大厨"}
-{"key": "BAC009S0770W0417", "wav": "./aishell/wav/test/S0770/BAC009S0770W0417.wav", "txt": "同场更有两个神秘嘉宾"}
-{"key": "BAC009S0770W0419", "wav": "./aishell/wav/test/S0770/BAC009S0770W0419.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0770W0420", "wav": "./aishell/wav/test/S0770/BAC009S0770W0420.wav", "txt": "谢贤怒打曾江"}
-{"key": "BAC009S0770W0421", "wav": "./aishell/wav/test/S0770/BAC009S0770W0421.wav", "txt": "究竟是演戏还是积怨已深"}
-{"key": "BAC009S0770W0422", "wav": "./aishell/wav/test/S0770/BAC009S0770W0422.wav", "txt": "只有他们才知道"}
-{"key": "BAC009S0770W0423", "wav": "./aishell/wav/test/S0770/BAC009S0770W0423.wav", "txt": "有不少幕后花絮片花"}
-{"key": "BAC009S0770W0424", "wav": "./aishell/wav/test/S0770/BAC009S0770W0424.wav", "txt": "节目推出至今收视很高"}
-{"key": "BAC009S0770W0425", "wav": "./aishell/wav/test/S0770/BAC009S0770W0425.wav", "txt": "下星期更进入结局周"}
-{"key": "BAC009S0770W0426", "wav": "./aishell/wav/test/S0770/BAC009S0770W0426.wav", "txt": "曾江谢贤四哥及胡枫修哥大谈往日情时"}
-{"key": "BAC009S0770W0427", "wav": "./aishell/wav/test/S0770/BAC009S0770W0427.wav", "txt": "曾江当时说我和谢贤相识多年"}
-{"key": "BAC009S0770W0428", "wav": "./aishell/wav/test/S0770/BAC009S0770W0428.wav", "txt": "也没有发生什么冲突"}
-{"key": "BAC009S0770W0429", "wav": "./aishell/wav/test/S0770/BAC009S0770W0429.wav", "txt": "不好的事情发生"}
-{"key": "BAC009S0770W0430", "wav": "./aishell/wav/test/S0770/BAC009S0770W0430.wav", "txt": "怎料无心说话却一语成谶"}
-{"key": "BAC009S0770W0431", "wav": "./aishell/wav/test/S0770/BAC009S0770W0431.wav", "txt": "经兴安县人大常委会许可"}
-{"key": "BAC009S0770W0432", "wav": "./aishell/wav/test/S0770/BAC009S0770W0432.wav", "txt": "杨爱明被兴安警方刑事拘留"}
-{"key": "BAC009S0770W0433", "wav": "./aishell/wav/test/S0770/BAC009S0770W0433.wav", "txt": "二零一四九二"}
-{"key": "BAC009S0770W0434", "wav": "./aishell/wav/test/S0770/BAC009S0770W0434.wav", "txt": "兴安县法院判杨爱明拘役四个月"}
-{"key": "BAC009S0770W0435", "wav": "./aishell/wav/test/S0770/BAC009S0770W0435.wav", "txt": "杨爱明却参加了兴安县第十五届人大五次会议"}
-{"key": "BAC009S0770W0436", "wav": "./aishell/wav/test/S0770/BAC009S0770W0436.wav", "txt": "村支书骗拆迁款一二万获刑一一年"}
-{"key": "BAC009S0770W0437", "wav": "./aishell/wav/test/S0770/BAC009S0770W0437.wav", "txt": "骗取搬迁补偿金一二二万元"}
-{"key": "BAC009S0770W0438", "wav": "./aishell/wav/test/S0770/BAC009S0770W0438.wav", "txt": "北京晨报记者昨天获悉"}
-{"key": "BAC009S0770W0439", "wav": "./aishell/wav/test/S0770/BAC009S0770W0439.wav", "txt": "延庆法院一审以贪污罪判处钱某有期徒刑一一年"}
-{"key": "BAC009S0770W0440", "wav": "./aishell/wav/test/S0770/BAC009S0770W0440.wav", "txt": "村支书村民被政府工作人员土埋系邻里纠纷"}
-{"key": "BAC009S0770W0441", "wav": "./aishell/wav/test/S0770/BAC009S0770W0441.wav", "txt": "河南省新乡市封丘县留光镇政府东五零零米左右"}
-{"key": "BAC009S0770W0442", "wav": "./aishell/wav/test/S0770/BAC009S0770W0442.wav", "txt": "当地村民孙秋英在自家门口因是否垫路与邻居产生争执"}
-{"key": "BAC009S0770W0443", "wav": "./aishell/wav/test/S0770/BAC009S0770W0443.wav", "txt": "遭到镇政府工作人员用土掩埋"}
-{"key": "BAC009S0770W0444", "wav": "./aishell/wav/test/S0770/BAC009S0770W0444.wav", "txt": "肇事方为镇政府安全保卫人员"}
-{"key": "BAC009S0770W0445", "wav": "./aishell/wav/test/S0770/BAC009S0770W0445.wav", "txt": "所开拉土车辆是镇政府扣押车辆"}
-{"key": "BAC009S0770W0446", "wav": "./aishell/wav/test/S0770/BAC009S0770W0446.wav", "txt": "村支委办公室猥亵女童被刑拘的孩子奶奶在隔壁开会"}
-{"key": "BAC009S0770W0447", "wav": "./aishell/wav/test/S0770/BAC009S0770W0447.wav", "txt": "海峡都市报大白天"}
-{"key": "BAC009S0770W0448", "wav": "./aishell/wav/test/S0770/BAC009S0770W0448.wav", "txt": "在村委会办公楼书记办公室"}
-{"key": "BAC009S0770W0449", "wav": "./aishell/wav/test/S0770/BAC009S0770W0449.wav", "txt": "五一岁的村支委猥亵一名一零岁的留守儿童隔壁"}
-{"key": "BAC009S0770W0450", "wav": "./aishell/wav/test/S0770/BAC009S0770W0450.wav", "txt": "孩子的奶奶正在参加村里的道路环境综合治理工作会议"}
-{"key": "BAC009S0770W0451", "wav": "./aishell/wav/test/S0770/BAC009S0770W0451.wav", "txt": "这事发生在福建省漳州市诏安县林头村"}
-{"key": "BAC009S0770W0452", "wav": "./aishell/wav/test/S0770/BAC009S0770W0452.wav", "txt": "该村支委李某因涉嫌猥亵儿童被警方传唤"}
-{"key": "BAC009S0770W0453", "wav": "./aishell/wav/test/S0770/BAC009S0770W0453.wav", "txt": "村民一零年在沙洲植树造林已成林却被指种错地方"}
-{"key": "BAC009S0770W0454", "wav": "./aishell/wav/test/S0770/BAC009S0770W0454.wav", "txt": "两个村子之间的长江江面上"}
-{"key": "BAC009S0770W0455", "wav": "./aishell/wav/test/S0770/BAC009S0770W0455.wav", "txt": "有一块面积近五零零零面积的沙洲"}
-{"key": "BAC009S0770W0456", "wav": "./aishell/wav/test/S0770/BAC009S0770W0456.wav", "txt": "沙洲几乎年年被淹"}
-{"key": "BAC009S0770W0457", "wav": "./aishell/wav/test/S0770/BAC009S0770W0457.wav", "txt": "村民二六零棵梨树被连根推倒在地里住房被夷为平地"}
-{"key": "BAC009S0770W0458", "wav": "./aishell/wav/test/S0770/BAC009S0770W0458.wav", "txt": "华商报讯记者张林实习生邓泽惠一夜之间"}
-{"key": "BAC009S0770W0459", "wav": "./aishell/wav/test/S0770/BAC009S0770W0459.wav", "txt": "村民地里二六零馀棵正在挂果的梨树被连根推倒"}
-{"key": "BAC009S0770W0460", "wav": "./aishell/wav/test/S0770/BAC009S0770W0460.wav", "txt": "地头边的一间平房也被夷为平地"}
-{"key": "BAC009S0770W0461", "wav": "./aishell/wav/test/S0770/BAC009S0770W0461.wav", "txt": "至今未找到肇事者"}
-{"key": "BAC009S0770W0462", "wav": "./aishell/wav/test/S0770/BAC009S0770W0462.wav", "txt": "村民不满地讨说法要求楼盘开发商停工被拘留"}
-{"key": "BAC009S0770W0463", "wav": "./aishell/wav/test/S0770/BAC009S0770W0463.wav", "txt": "去年一二月四日在村民多次上访无果的情况下"}
-{"key": "BAC009S0770W0464", "wav": "./aishell/wav/test/S0770/BAC009S0770W0464.wav", "txt": "大家到施工现场的临时大门外"}
-{"key": "BAC009S0770W0465", "wav": "./aishell/wav/test/S0770/BAC009S0770W0465.wav", "txt": "尽管检察管最后以事事实不清"}
-{"key": "BAC009S0770W0466", "wav": "./aishell/wav/test/S0770/BAC009S0770W0466.wav", "txt": "但在张关押了三七天后"}
-{"key": "BAC009S0770W0467", "wav": "./aishell/wav/test/S0770/BAC009S0770W0467.wav", "txt": "公安局仍采取了取保候审的手段"}
-{"key": "BAC009S0770W0468", "wav": "./aishell/wav/test/S0770/BAC009S0770W0468.wav", "txt": "没有发生任何肢体冲突"}
-{"key": "BAC009S0770W0469", "wav": "./aishell/wav/test/S0770/BAC009S0770W0469.wav", "txt": "更没有扰乱社会秩序"}
-{"key": "BAC009S0770W0470", "wav": "./aishell/wav/test/S0770/BAC009S0770W0470.wav", "txt": "村民不满行政批复诉市区政府区长庭应诉"}
-{"key": "BAC009S0770W0471", "wav": "./aishell/wav/test/S0770/BAC009S0770W0471.wav", "txt": "门头沟雁翅镇村民李冬梅因不服行政批复"}
-{"key": "BAC009S0770W0472", "wav": "./aishell/wav/test/S0770/BAC009S0770W0472.wav", "txt": "将市区两级政府告上法庭"}
-{"key": "BAC009S0770W0473", "wav": "./aishell/wav/test/S0770/BAC009S0770W0473.wav", "txt": "门头沟区长张贵林出庭应诉"}
-{"key": "BAC009S0770W0474", "wav": "./aishell/wav/test/S0770/BAC009S0770W0474.wav", "txt": "门头沟雁翅镇村民李冬梅向市政府提起了行政复议"}
-{"key": "BAC009S0770W0475", "wav": "./aishell/wav/test/S0770/BAC009S0770W0475.wav", "txt": "复议维持了区政府的认定结论"}
-{"key": "BAC009S0770W0476", "wav": "./aishell/wav/test/S0770/BAC009S0770W0476.wav", "txt": "村民为多拿补偿在拆迁前突击装修全用劣质建材"}
-{"key": "BAC009S0770W0477", "wav": "./aishell/wav/test/S0770/BAC009S0770W0477.wav", "txt": "村里随处可见装潢小广告"}
-{"key": "BAC009S0770W0478", "wav": "./aishell/wav/test/S0770/BAC009S0770W0478.wav", "txt": "村民为救坠井男童身亡被拉出时呈托举姿势"}
-{"key": "BAC009S0770W0479", "wav": "./aishell/wav/test/S0770/BAC009S0770W0479.wav", "txt": "为了一名坠入废井的男童"}
-{"key": "BAC009S0770W0480", "wav": "./aishell/wav/test/S0770/BAC009S0770W0480.wav", "txt": "邳州几名村民先后下井救人"}
-{"key": "BAC009S0770W0481", "wav": "./aishell/wav/test/S0770/BAC009S0770W0481.wav", "txt": "第一个下井救人的大叔却再也没能爬上来"}
-{"key": "BAC009S0770W0482", "wav": "./aishell/wav/test/S0770/BAC009S0770W0482.wav", "txt": "他的双手还保持着托举的姿势"}
-{"key": "BAC009S0770W0483", "wav": "./aishell/wav/test/S0770/BAC009S0770W0483.wav", "txt": "他的义举感动了四里八乡"}
-{"key": "BAC009S0770W0484", "wav": "./aishell/wav/test/S0770/BAC009S0770W0484.wav", "txt": "七月一三日的葬礼上"}
-{"key": "BAC009S0770W0485", "wav": "./aishell/wav/test/S0770/BAC009S0770W0485.wav", "txt": "数百名乡邻自发赶来送他一程"}
-{"key": "BAC009S0770W0486", "wav": "./aishell/wav/test/S0770/BAC009S0770W0486.wav", "txt": "实习生郭杨雪通讯员耿万志现代快报记者李伟豪"}
-{"key": "BAC009S0770W0487", "wav": "./aishell/wav/test/S0770/BAC009S0770W0487.wav", "txt": "村民为解决问题给领导建庙官员其诉求不合规"}
-{"key": "BAC009S0770W0488", "wav": "./aishell/wav/test/S0770/BAC009S0770W0488.wav", "txt": "其在村西旁花费万元建起一名叫清明堂的家庙"}
-{"key": "BAC009S0770W0489", "wav": "./aishell/wav/test/S0770/BAC009S0770W0489.wav", "txt": "每天烧香敬拜办事处主任"}
-{"key": "BAC009S0770W0490", "wav": "./aishell/wav/test/S0770/BAC009S0770W0490.wav", "txt": "该事件引发社会关注"}
-{"key": "BAC009S0770W0491", "wav": "./aishell/wav/test/S0770/BAC009S0770W0491.wav", "txt": "以上两村民所要求的内容不符合相关规定"}
-{"key": "BAC009S0770W0492", "wav": "./aishell/wav/test/S0770/BAC009S0770W0492.wav", "txt": "村民为阻止儿子与女友相见编造偷小孩谎言"}
-{"key": "BAC009S0770W0493", "wav": "./aishell/wav/test/S0770/BAC009S0770W0493.wav", "txt": "涉嫌编造谣言非法拘禁被刑拘"}
-{"key": "BAC009S0770W0494", "wav": "./aishell/wav/test/S0770/BAC009S0770W0494.wav", "txt": "村民举报县城干部建十馀栋别墅纪检委部门介入调查"}
-{"key": "BAC009S0770W0495", "wav": "./aishell/wav/test/S0770/BAC009S0770W0495.wav", "txt": "村小组干部未经过小组集集体讨论"}
-{"key": "BAC009S0901W0121", "wav": "./aishell/wav/test/S0901/BAC009S0901W0121.wav", "txt": "作为一线城市的北京"}
-{"key": "BAC009S0901W0122", "wav": "./aishell/wav/test/S0901/BAC009S0901W0122.wav", "txt": "其市管国管住房公积金政策也均进行调整"}
-{"key": "BAC009S0901W0123", "wav": "./aishell/wav/test/S0901/BAC009S0901W0123.wav", "txt": "公积金贷款最高额度由七万元提升至十万元"}
-{"key": "BAC009S0901W0124", "wav": "./aishell/wav/test/S0901/BAC009S0901W0124.wav", "txt": "公积金政策调整方式各异对楼市影响几何"}
-{"key": "BAC009S0901W0125", "wav": "./aishell/wav/test/S0901/BAC009S0901W0125.wav", "txt": "盘活各地公积金资源"}
-{"key": "BAC009S0901W0126", "wav": "./aishell/wav/test/S0901/BAC009S0901W0126.wav", "txt": "以北京提高公积金贷款最高额度为例"}
-{"key": "BAC009S0901W0127", "wav": "./aishell/wav/test/S0901/BAC009S0901W0127.wav", "txt": "据伟嘉安捷数据统计显示"}
-{"key": "BAC009S0901W0128", "wav": "./aishell/wav/test/S0901/BAC009S0901W0128.wav", "txt": "该政策在七月份实施一周后"}
-{"key": "BAC009S0901W0129", "wav": "./aishell/wav/test/S0901/BAC009S0901W0129.wav", "txt": "公积金贷款额度的提高"}
-{"key": "BAC009S0901W0130", "wav": "./aishell/wav/test/S0901/BAC009S0901W0130.wav", "txt": "将使更多购房者具备买房支付能力"}
-{"key": "BAC009S0901W0131", "wav": "./aishell/wav/test/S0901/BAC009S0901W0131.wav", "txt": "中原地产首席分析师张大伟认为"}
-{"key": "BAC009S0901W0132", "wav": "./aishell/wav/test/S0901/BAC009S0901W0132.wav", "txt": "放宽提取住房公积金支付房租条件则对楼市影响甚微"}
-{"key": "BAC009S0901W0133", "wav": "./aishell/wav/test/S0901/BAC009S0901W0133.wav", "txt": "对楼市也有较大影响"}
-{"key": "BAC009S0901W0134", "wav": "./aishell/wav/test/S0901/BAC009S0901W0134.wav", "txt": "利用公积金可以减少租赁者负担"}
-{"key": "BAC009S0901W0135", "wav": "./aishell/wav/test/S0901/BAC009S0901W0135.wav", "txt": "使其缓冲过度到买房阶段"}
-{"key": "BAC009S0901W0136", "wav": "./aishell/wav/test/S0901/BAC009S0901W0136.wav", "txt": "对楼市消化库存起到正面作用"}
-{"key": "BAC009S0901W0137", "wav": "./aishell/wav/test/S0901/BAC009S0901W0137.wav", "txt": "中新网房产频道每每"}
-{"key": "BAC009S0901W0138", "wav": "./aishell/wav/test/S0901/BAC009S0901W0138.wav", "txt": "要求各地放宽公积金贷款条件后"}
-{"key": "BAC009S0901W0139", "wav": "./aishell/wav/test/S0901/BAC009S0901W0139.wav", "txt": "美丽北京大型绿色公益品牌项目"}
-{"key": "BAC009S0901W0140", "wav": "./aishell/wav/test/S0901/BAC009S0901W0140.wav", "txt": "住建部等三部委再次联合发"}
-{"key": "BAC009S0901W0141", "wav": "./aishell/wav/test/S0901/BAC009S0901W0141.wav", "txt": "美丽北京大型绿色公益品牌项目"}
-{"key": "BAC009S0901W0142", "wav": "./aishell/wav/test/S0901/BAC009S0901W0142.wav", "txt": "随着广州住房公积金贷款政策的调整实施"}
-{"key": "BAC009S0901W0143", "wav": "./aishell/wav/test/S0901/BAC009S0901W0143.wav", "txt": "公积金贷款最高额度也不同程度上调"}
-{"key": "BAC009S0901W0144", "wav": "./aishell/wav/test/S0901/BAC009S0901W0144.wav", "txt": "住房公积金贷款因其利率较低的优势"}
-{"key": "BAC009S0901W0145", "wav": "./aishell/wav/test/S0901/BAC009S0901W0145.wav", "txt": "一直以来广受购房者青睐"}
-{"key": "BAC009S0901W0146", "wav": "./aishell/wav/test/S0901/BAC009S0901W0146.wav", "txt": "本轮住房公积金房贷政策调整"}
-{"key": "BAC009S0901W0147", "wav": "./aishell/wav/test/S0901/BAC009S0901W0147.wav", "txt": "进一步加速了消费者的入市节奏"}
-{"key": "BAC009S0901W0148", "wav": "./aishell/wav/test/S0901/BAC009S0901W0148.wav", "txt": "广州调整住房公积金个人住房贷款政策"}
-{"key": "BAC009S0901W0149", "wav": "./aishell/wav/test/S0901/BAC009S0901W0149.wav", "txt": "同时对申请公积金贷款的缴纳时限调整为五个月"}
-{"key": "BAC009S0901W0150", "wav": "./aishell/wav/test/S0901/BAC009S0901W0150.wav", "txt": "据广州日报昨天报道"}
-{"key": "BAC009S0901W0151", "wav": "./aishell/wav/test/S0901/BAC009S0901W0151.wav", "txt": "公积金贷款首付比例降低的消息令购房者喜出望外"}
-{"key": "BAC009S0901W0152", "wav": "./aishell/wav/test/S0901/BAC009S0901W0152.wav", "txt": "其中刚需买家入市积极性明显提高"}
-{"key": "BAC009S0901W0153", "wav": "./aishell/wav/test/S0901/BAC009S0901W0153.wav", "txt": "据伟嘉安捷提供的数据显示"}
-{"key": "BAC009S0901W0154", "wav": "./aishell/wav/test/S0901/BAC009S0901W0154.wav", "txt": "北京公积金贷款首付比例松绑一周后"}
-{"key": "BAC009S0901W0155", "wav": "./aishell/wav/test/S0901/BAC009S0901W0155.wav", "txt": "公积金贷款及组合贷咨询量明显上涨"}
-{"key": "BAC009S0901W0156", "wav": "./aishell/wav/test/S0901/BAC009S0901W0156.wav", "txt": "尤其组合贷的咨询量较上月月初一周上涨百分之五左右"}
-{"key": "BAC009S0901W0157", "wav": "./aishell/wav/test/S0901/BAC009S0901W0157.wav", "txt": "上海深圳等主要城市也在公积金新政推动下"}
-{"key": "BAC009S0901W0158", "wav": "./aishell/wav/test/S0901/BAC009S0901W0158.wav", "txt": "呈现购房者积极入市的行情"}
-{"key": "BAC009S0901W0159", "wav": "./aishell/wav/test/S0901/BAC009S0901W0159.wav", "txt": "全国已有超百个城市发布了不同力度的公积金松绑政策"}
-{"key": "BAC009S0901W0160", "wav": "./aishell/wav/test/S0901/BAC009S0901W0160.wav", "txt": "加之降息降准等政策组合拳"}
-{"key": "BAC009S0901W0161", "wav": "./aishell/wav/test/S0901/BAC009S0901W0161.wav", "txt": "呈现出量价齐涨的局面"}
-{"key": "BAC009S0901W0162", "wav": "./aishell/wav/test/S0901/BAC009S0901W0162.wav", "txt": "据中国指数研究院最新数据显示"}
-{"key": "BAC009S0901W0163", "wav": "./aishell/wav/test/S0901/BAC009S0901W0163.wav", "txt": "深圳环比上涨百分之七"}
-{"key": "BAC009S0901W0164", "wav": "./aishell/wav/test/S0901/BAC009S0901W0164.wav", "txt": "涨幅据十大城市之首"}
-{"key": "BAC009S0901W0165", "wav": "./aishell/wav/test/S0901/BAC009S0901W0165.wav", "txt": "五月份多地楼市的成交量明显上涨"}
-{"key": "BAC009S0901W0166", "wav": "./aishell/wav/test/S0901/BAC009S0901W0166.wav", "txt": "是房地产当前发展格局下的一个必然"}
-{"key": "BAC009S0901W0167", "wav": "./aishell/wav/test/S0901/BAC009S0901W0167.wav", "txt": "唯独这样才能盘活公积金资源"}
-{"key": "BAC009S0901W0168", "wav": "./aishell/wav/test/S0901/BAC009S0901W0168.wav", "txt": "促使更多购房者积极入市"}
-{"key": "BAC009S0901W0169", "wav": "./aishell/wav/test/S0901/BAC009S0901W0169.wav", "txt": "伴随着各地中住房公积金新政的落地实施"}
-{"key": "BAC009S0901W0170", "wav": "./aishell/wav/test/S0901/BAC009S0901W0170.wav", "txt": "楼市进展仍需进一步观望"}
-{"key": "BAC009S0901W0171", "wav": "./aishell/wav/test/S0901/BAC009S0901W0171.wav", "txt": "购房者受惠于政策利好的同时"}
-{"key": "BAC009S0901W0172", "wav": "./aishell/wav/test/S0901/BAC009S0901W0172.wav", "txt": "公积金在申请放贷流程上并未提速"}
-{"key": "BAC009S0901W0173", "wav": "./aishell/wav/test/S0901/BAC009S0901W0173.wav", "txt": "相反相关环节上审批更加严格"}
-{"key": "BAC009S0901W0174", "wav": "./aishell/wav/test/S0901/BAC009S0901W0174.wav", "txt": "从目前上海住房公积金的具体政策看"}
-{"key": "BAC009S0901W0175", "wav": "./aishell/wav/test/S0901/BAC009S0901W0175.wav", "txt": "购房的扶持力度在加大"}
-{"key": "BAC009S0901W0176", "wav": "./aishell/wav/test/S0901/BAC009S0901W0176.wav", "txt": "但主要还是体现在贷款成本的降低"}
-{"key": "BAC009S0901W0177", "wav": "./aishell/wav/test/S0901/BAC009S0901W0177.wav", "txt": "而申请公积金贷款方面还是需要走严格的流程"}
-{"key": "BAC009S0901W0178", "wav": "./aishell/wav/test/S0901/BAC009S0901W0178.wav", "txt": "公积金提取一直是目前试图突破的内容"}
-{"key": "BAC009S0901W0179", "wav": "./aishell/wav/test/S0901/BAC009S0901W0179.wav", "txt": "但目前还未出现大面积提取行为"}
-{"key": "BAC009S0901W0180", "wav": "./aishell/wav/test/S0901/BAC009S0901W0180.wav", "txt": "来自广州日报的报道称"}
-{"key": "BAC009S0901W0181", "wav": "./aishell/wav/test/S0901/BAC009S0901W0181.wav", "txt": "从申请到最后的拨放款"}
-{"key": "BAC009S0901W0182", "wav": "./aishell/wav/test/S0901/BAC009S0901W0182.wav", "txt": "部分客户甚至等两个多月"}
-{"key": "BAC009S0901W0183", "wav": "./aishell/wav/test/S0901/BAC009S0901W0183.wav", "txt": "如果申请公积金贷款或公积金贷款与商业贷款的组合贷"}
-{"key": "BAC009S0901W0184", "wav": "./aishell/wav/test/S0901/BAC009S0901W0184.wav", "txt": "伟嘉安捷对中新网房产频道表示"}
-{"key": "BAC009S0901W0185", "wav": "./aishell/wav/test/S0901/BAC009S0901W0185.wav", "txt": "现在公积金贷款办理需要一个月左右的时间"}
-{"key": "BAC009S0901W0186", "wav": "./aishell/wav/test/S0901/BAC009S0901W0186.wav", "txt": "而申请办理组合贷款的手续则更为复杂"}
-{"key": "BAC009S0901W0187", "wav": "./aishell/wav/test/S0901/BAC009S0901W0187.wav", "txt": "农业现代化水平显着提升"}
-{"key": "BAC009S0901W0188", "wav": "./aishell/wav/test/S0901/BAC009S0901W0188.wav", "txt": "发展现代农业的条件更加有利"}
-{"key": "BAC009S0901W0189", "wav": "./aishell/wav/test/S0901/BAC009S0901W0189.wav", "txt": "加快发展现代农业机遇遇得"}
-{"key": "BAC009S0901W0190", "wav": "./aishell/wav/test/S0901/BAC009S0901W0190.wav", "txt": "一是工业化城镇化的引领推动作用将更加明显"}
-{"key": "BAC009S0901W0191", "wav": "./aishell/wav/test/S0901/BAC009S0901W0191.wav", "txt": "信息化水平不断提高"}
-{"key": "BAC009S0901W0192", "wav": "./aishell/wav/test/S0901/BAC009S0901W0192.wav", "txt": "农村劳动力大量转移"}
-{"key": "BAC009S0901W0193", "wav": "./aishell/wav/test/S0901/BAC009S0901W0193.wav", "txt": "以及扩大内需战略的实施"}
-{"key": "BAC009S0901W0194", "wav": "./aishell/wav/test/S0901/BAC009S0901W0194.wav", "txt": "二是政策支持将更加强化"}
-{"key": "BAC009S0901W0195", "wav": "./aishell/wav/test/S0901/BAC009S0901W0195.wav", "txt": "随着我国综合国力和财政实力不断增强"}
-{"key": "BAC009S0901W0196", "wav": "./aishell/wav/test/S0901/BAC009S0901W0196.wav", "txt": "强农惠农富农政策力度将进一步加大"}
-{"key": "BAC009S0901W0197", "wav": "./aishell/wav/test/S0901/BAC009S0901W0197.wav", "txt": "支持现代农业发展的物质基础更加牢固"}
-{"key": "BAC009S0901W0198", "wav": "./aishell/wav/test/S0901/BAC009S0901W0198.wav", "txt": "三是科技支撑将更加有力"}
-{"key": "BAC009S0901W0199", "wav": "./aishell/wav/test/S0901/BAC009S0901W0199.wav", "txt": "科技创新孕育新突破"}
-{"key": "BAC009S0901W0200", "wav": "./aishell/wav/test/S0901/BAC009S0901W0200.wav", "txt": "全球绿色经济低碳技术正在兴起"}
-{"key": "BAC009S0901W0201", "wav": "./aishell/wav/test/S0901/BAC009S0901W0201.wav", "txt": "现代农业发展的动力更加强劲"}
-{"key": "BAC009S0901W0202", "wav": "./aishell/wav/test/S0901/BAC009S0901W0202.wav", "txt": "四是外部环境将更加优化"}
-{"key": "BAC009S0901W0203", "wav": "./aishell/wav/test/S0901/BAC009S0901W0203.wav", "txt": "形成合力推进现代农业发展的新局面"}
-{"key": "BAC009S0901W0204", "wav": "./aishell/wav/test/S0901/BAC009S0901W0204.wav", "txt": "广大农民的积极性创造性将得到进一步激发和释放"}
-{"key": "BAC009S0901W0205", "wav": "./aishell/wav/test/S0901/BAC009S0901W0205.wav", "txt": "发展现代农业的要求更加迫切"}
-{"key": "BAC009S0901W0206", "wav": "./aishell/wav/test/S0901/BAC009S0901W0206.wav", "txt": "在工业化城镇化快速推进时期"}
-{"key": "BAC009S0901W0207", "wav": "./aishell/wav/test/S0901/BAC009S0901W0207.wav", "txt": "农业面临着容易被忽视或削弱的风险"}
-{"key": "BAC009S0901W0208", "wav": "./aishell/wav/test/S0901/BAC009S0901W0208.wav", "txt": "我国工业化城镇化快速发展"}
-{"key": "BAC009S0901W0209", "wav": "./aishell/wav/test/S0901/BAC009S0901W0209.wav", "txt": "但农业现代化明显滞后"}
-{"key": "BAC009S0901W0210", "wav": "./aishell/wav/test/S0901/BAC009S0901W0210.wav", "txt": "面临着一系列严峻挑战"}
-{"key": "BAC009S0901W0211", "wav": "./aishell/wav/test/S0901/BAC009S0901W0211.wav", "txt": "科技创新和推广新应用能力不强"}
-{"key": "BAC009S0901W0212", "wav": "./aishell/wav/test/S0901/BAC009S0901W0212.wav", "txt": "农业社会化服务体系不健全"}
-{"key": "BAC009S0901W0213", "wav": "./aishell/wav/test/S0901/BAC009S0901W0213.wav", "txt": "国际农产品市场投机炒作及传导影响加深"}
-{"key": "BAC009S0901W0214", "wav": "./aishell/wav/test/S0901/BAC009S0901W0214.wav", "txt": "我国现代农业发展面临更多的外部不确定性"}
-{"key": "BAC009S0901W0215", "wav": "./aishell/wav/test/S0901/BAC009S0901W0215.wav", "txt": "必须珍惜抓住用好难得的历史机遇"}
-{"key": "BAC009S0901W0216", "wav": "./aishell/wav/test/S0901/BAC009S0901W0216.wav", "txt": "坚持用现代物质条件装备农业"}
-{"key": "BAC009S0901W0217", "wav": "./aishell/wav/test/S0901/BAC009S0901W0217.wav", "txt": "努力探索出一条具有中国特色的农业现代化道路"}
-{"key": "BAC009S0901W0218", "wav": "./aishell/wav/test/S0901/BAC009S0901W0218.wav", "txt": "指导思想基本原则与发展目标"}
-{"key": "BAC009S0901W0219", "wav": "./aishell/wav/test/S0901/BAC009S0901W0219.wav", "txt": "以邓小平理论和三个代表重要思想为指导"}
-{"key": "BAC009S0901W0220", "wav": "./aishell/wav/test/S0901/BAC009S0901W0220.wav", "txt": "深入贯彻落实科学发展观"}
-{"key": "BAC009S0901W0221", "wav": "./aishell/wav/test/S0901/BAC009S0901W0221.wav", "txt": "坚持走中国特色农业现代化道路"}
-{"key": "BAC009S0901W0222", "wav": "./aishell/wav/test/S0901/BAC009S0901W0222.wav", "txt": "以转变农业发展方式为主线"}
-{"key": "BAC009S0901W0223", "wav": "./aishell/wav/test/S0901/BAC009S0901W0223.wav", "txt": "着力强化政策科技设施装备人才和体制支撑"}
-{"key": "BAC009S0901W0224", "wav": "./aishell/wav/test/S0901/BAC009S0901W0224.wav", "txt": "着力完善现代农业产业体系"}
-{"key": "BAC009S0901W0225", "wav": "./aishell/wav/test/S0901/BAC009S0901W0225.wav", "txt": "提高农业现代化水平农民生活水平和新农村建设水平"}
-{"key": "BAC009S0901W0226", "wav": "./aishell/wav/test/S0901/BAC009S0901W0226.wav", "txt": "坚持确保国家粮食安全"}
-{"key": "BAC009S0901W0227", "wav": "./aishell/wav/test/S0901/BAC009S0901W0227.wav", "txt": "坚持立足国内实现粮食基本自给的方针"}
-{"key": "BAC009S0901W0228", "wav": "./aishell/wav/test/S0901/BAC009S0901W0228.wav", "txt": "实行最严格的耕地保护和节约用地制度"}
-{"key": "BAC009S0901W0229", "wav": "./aishell/wav/test/S0901/BAC009S0901W0229.wav", "txt": "加强农业基础设施建设"}
-{"key": "BAC009S0901W0230", "wav": "./aishell/wav/test/S0901/BAC009S0901W0230.wav", "txt": "着力提高粮食综合生产能力"}
-{"key": "BAC009S0901W0231", "wav": "./aishell/wav/test/S0901/BAC009S0901W0231.wav", "txt": "坚持和完善农村基本经营制度"}
-{"key": "BAC009S0901W0232", "wav": "./aishell/wav/test/S0901/BAC009S0901W0232.wav", "txt": "在保持农村土地承包关系稳定并长久不变的前提下"}
-{"key": "BAC009S0901W0233", "wav": "./aishell/wav/test/S0901/BAC009S0901W0233.wav", "txt": "推进农业经营体系体制创新"}
-{"key": "BAC009S0901W0234", "wav": "./aishell/wav/test/S0901/BAC009S0901W0234.wav", "txt": "坚持科教兴农和人才强农"}
-{"key": "BAC009S0901W0235", "wav": "./aishell/wav/test/S0901/BAC009S0901W0235.wav", "txt": "加快农业科技自主创新和农业农村人才培养"}
-{"key": "BAC009S0901W0236", "wav": "./aishell/wav/test/S0901/BAC009S0901W0236.wav", "txt": "加快农业科技成果转化与推广应用"}
-{"key": "BAC009S0901W0237", "wav": "./aishell/wav/test/S0901/BAC009S0901W0237.wav", "txt": "提高农业物质技术水装备水平"}
-{"key": "BAC009S0901W0238", "wav": "./aishell/wav/test/S0901/BAC009S0901W0238.wav", "txt": "坚持政府支持农民主体社会参与"}
-{"key": "BAC009S0901W0239", "wav": "./aishell/wav/test/S0901/BAC009S0901W0239.wav", "txt": "加大强农惠农富农力度"}
-{"key": "BAC009S0901W0240", "wav": "./aishell/wav/test/S0901/BAC009S0901W0240.wav", "txt": "充分发挥农民的主体作用和首创精神"}
-{"key": "BAC009S0901W0241", "wav": "./aishell/wav/test/S0901/BAC009S0901W0241.wav", "txt": "引导和鼓励社会资本投入农业"}
-{"key": "BAC009S0901W0242", "wav": "./aishell/wav/test/S0901/BAC009S0901W0242.wav", "txt": "合力推进现代农业发展"}
-{"key": "BAC009S0901W0243", "wav": "./aishell/wav/test/S0901/BAC009S0901W0243.wav", "txt": "坚持分类指导重点突破梯次推进"}
-{"key": "BAC009S0901W0244", "wav": "./aishell/wav/test/S0901/BAC009S0901W0244.wav", "txt": "进一步优化农业生产力布局"}
-{"key": "BAC009S0901W0245", "wav": "./aishell/wav/test/S0901/BAC009S0901W0245.wav", "txt": "因地制宜地采取有选择差别化扶持政策"}
-{"key": "BAC009S0901W0246", "wav": "./aishell/wav/test/S0901/BAC009S0901W0246.wav", "txt": "支持主要农产品优势产区建设"}
-{"key": "BAC009S0901W0247", "wav": "./aishell/wav/test/S0901/BAC009S0901W0247.wav", "txt": "鼓励有条件地区率先实现农业现代化"}
-{"key": "BAC009S0901W0248", "wav": "./aishell/wav/test/S0901/BAC009S0901W0248.wav", "txt": "推动其他地区加快发展"}
-{"key": "BAC009S0901W0249", "wav": "./aishell/wav/test/S0901/BAC009S0901W0249.wav", "txt": "全面提高农业现代化水平"}
-{"key": "BAC009S0901W0250", "wav": "./aishell/wav/test/S0901/BAC009S0901W0250.wav", "txt": "现代农业建设取得明显进展"}
-{"key": "BAC009S0901W0251", "wav": "./aishell/wav/test/S0901/BAC009S0901W0251.wav", "txt": "粮食等主要农产品供给得到有效保障"}
-{"key": "BAC009S0901W0252", "wav": "./aishell/wav/test/S0901/BAC009S0901W0252.wav", "txt": "物质装备水平明显提高"}
-{"key": "BAC009S0901W0253", "wav": "./aishell/wav/test/S0901/BAC009S0901W0253.wav", "txt": "并没有提供什么帮助"}
-{"key": "BAC009S0901W0254", "wav": "./aishell/wav/test/S0901/BAC009S0901W0254.wav", "txt": "由于关于乔布斯的电话即将上演了"}
-{"key": "BAC009S0901W0255", "wav": "./aishell/wav/test/S0901/BAC009S0901W0255.wav", "txt": "想了解苹果最初的事"}
-{"key": "BAC009S0901W0256", "wav": "./aishell/wav/test/S0901/BAC009S0901W0256.wav", "txt": "乔布斯在最初产品开发过程中"}
-{"key": "BAC009S0901W0257", "wav": "./aishell/wav/test/S0901/BAC009S0901W0257.wav", "txt": "到底发挥了什么作用"}
-{"key": "BAC009S0901W0259", "wav": "./aishell/wav/test/S0901/BAC009S0901W0259.wav", "txt": "乔布斯几乎没发挥什么作用"}
-{"key": "BAC009S0901W0261", "wav": "./aishell/wav/test/S0901/BAC009S0901W0261.wav", "txt": "而这都是我自己的努力"}
-{"key": "BAC009S0901W0262", "wav": "./aishell/wav/test/S0901/BAC009S0901W0262.wav", "txt": "乔布斯在它出现之前都不知道它的存在"}
-{"key": "BAC009S0901W0263", "wav": "./aishell/wav/test/S0901/BAC009S0901W0263.wav", "txt": "不过这话他在去年就说过"}
-{"key": "BAC009S0901W0264", "wav": "./aishell/wav/test/S0901/BAC009S0901W0264.wav", "txt": "其中一个回答就说过"}
-{"key": "BAC009S0901W0265", "wav": "./aishell/wav/test/S0901/BAC009S0901W0265.wav", "txt": "乔布斯不是一名工程师"}
-{"key": "BAC009S0901W0266", "wav": "./aishell/wav/test/S0901/BAC009S0901W0266.wav", "txt": "他从来没有写过代码"}
-{"key": "BAC009S0901W0267", "wav": "./aishell/wav/test/S0901/BAC009S0901W0267.wav", "txt": "也没有参与过任何产品的原始设计"}
-{"key": "BAC009S0901W0268", "wav": "./aishell/wav/test/S0901/BAC009S0901W0268.wav", "txt": "乔帮主并没有他说的那么不堪"}
-{"key": "BAC009S0901W0269", "wav": "./aishell/wav/test/S0901/BAC009S0901W0269.wav", "txt": "沃兹尼亚克自己也说"}
-{"key": "BAC009S0901W0270", "wav": "./aishell/wav/test/S0901/BAC009S0901W0270.wav", "txt": "乔布斯想成为重要人物"}
-{"key": "BAC009S0901W0271", "wav": "./aishell/wav/test/S0901/BAC009S0901W0271.wav", "txt": "而这种人通常是商业人士"}
-{"key": "BAC009S0901W0272", "wav": "./aishell/wav/test/S0901/BAC009S0901W0272.wav", "txt": "他是一个杰出的商人"}
-{"key": "BAC009S0901W0273", "wav": "./aishell/wav/test/S0901/BAC009S0901W0273.wav", "txt": "一个公司不能缺少两种人"}
-{"key": "BAC009S0901W0274", "wav": "./aishell/wav/test/S0901/BAC009S0901W0274.wav", "txt": "公司的成功缺一不可"}
-{"key": "BAC009S0901W0275", "wav": "./aishell/wav/test/S0901/BAC009S0901W0275.wav", "txt": "而沃兹尼亚克似乎乐于承担驱魅的角色"}
-{"key": "BAC009S0901W0276", "wav": "./aishell/wav/test/S0901/BAC009S0901W0276.wav", "txt": "车库没有发挥过太大作用"}
-{"key": "BAC009S0901W0277", "wav": "./aishell/wav/test/S0901/BAC009S0901W0277.wav", "txt": "除了有时候让他们觉得那里像家"}
-{"key": "BAC009S0901W0278", "wav": "./aishell/wav/test/S0901/BAC009S0901W0278.wav", "txt": "车库虽然最能够代表初期创业"}
-{"key": "BAC009S0901W0279", "wav": "./aishell/wav/test/S0901/BAC009S0901W0279.wav", "txt": "但是在那没做任何设计工作"}
-{"key": "BAC009S0901W0280", "wav": "./aishell/wav/test/S0901/BAC009S0901W0280.wav", "txt": "他还吐槽过乔布斯电影中的桥段"}
-{"key": "BAC009S0901W0281", "wav": "./aishell/wav/test/S0901/BAC009S0901W0281.wav", "txt": "他从未对产品被偷发表过任何评论"}
-{"key": "BAC009S0901W0282", "wav": "./aishell/wav/test/S0901/BAC009S0901W0282.wav", "txt": "并不像乔布斯那样激动"}
-{"key": "BAC009S0901W0283", "wav": "./aishell/wav/test/S0901/BAC009S0901W0283.wav", "txt": "我们外人是无法知道真相的"}
-{"key": "BAC009S0901W0284", "wav": "./aishell/wav/test/S0901/BAC009S0901W0284.wav", "txt": "原创张驰乔布斯逝世已久"}
-{"key": "BAC009S0901W0285", "wav": "./aishell/wav/test/S0901/BAC009S0901W0285.wav", "txt": "而苹果的另一位联合创始人沃兹尼亚克还活跃在科技圈"}
-{"key": "BAC009S0901W0286", "wav": "./aishell/wav/test/S0901/BAC009S0901W0286.wav", "txt": "而且以喜欢点评各家公司着称"}
-{"key": "BAC009S0901W0287", "wav": "./aishell/wav/test/S0901/BAC009S0901W0287.wav", "txt": "乔帮主在首批苹果产品的开发中"}
-{"key": "BAC009S0901W0288", "wav": "./aishell/wav/test/S0901/BAC009S0901W0288.wav", "txt": "苹果股价下跌百分之五分析师出现重大分歧搜狐科技"}
-{"key": "BAC009S0901W0289", "wav": "./aishell/wav/test/S0901/BAC009S0901W0289.wav", "txt": "本报记者纪佳鹏北京报道北京时间八月十二日"}
-{"key": "BAC009S0901W0290", "wav": "./aishell/wav/test/S0901/BAC009S0901W0290.wav", "txt": "作为科技股领头羊的苹果股份当天下挫百分之二"}
-{"key": "BAC009S0901W0291", "wav": "./aishell/wav/test/S0901/BAC009S0901W0291.wav", "txt": "人民币的贬值很可能会增加苹果设备进口的费用"}
-{"key": "BAC009S0901W0292", "wav": "./aishell/wav/test/S0901/BAC009S0901W0292.wav", "txt": "这也是影响股价的一大因素"}
-{"key": "BAC009S0901W0293", "wav": "./aishell/wav/test/S0901/BAC009S0901W0293.wav", "txt": "苹果股价的这轮连续下跌"}
-{"key": "BAC009S0901W0294", "wav": "./aishell/wav/test/S0901/BAC009S0901W0294.wav", "txt": "从今年的七月二十一日便开始了"}
-{"key": "BAC009S0901W0295", "wav": "./aishell/wav/test/S0901/BAC009S0901W0295.wav", "txt": "苹果股价已下挫了百分之七十九"}
-{"key": "BAC009S0901W0296", "wav": "./aishell/wav/test/S0901/BAC009S0901W0296.wav", "txt": "不少报道与评论表示"}
-{"key": "BAC009S0901W0299", "wav": "./aishell/wav/test/S0901/BAC009S0901W0299.wav", "txt": "也过分依赖于大中华地区"}
-{"key": "BAC009S0901W0300", "wav": "./aishell/wav/test/S0901/BAC009S0901W0300.wav", "txt": "甚至是负增长而其中"}
-{"key": "BAC009S0901W0301", "wav": "./aishell/wav/test/S0901/BAC009S0901W0301.wav", "txt": "根据近期公布的苹果第三财季业业绩"}
-{"key": "BAC009S0901W0302", "wav": "./aishell/wav/test/S0901/BAC009S0901W0302.wav", "txt": "该季度苹果大中华区营收为一百三十二点三亿美元"}
-{"key": "BAC009S0901W0303", "wav": "./aishell/wav/test/S0901/BAC009S0901W0303.wav", "txt": "为中国的智能制造产业做出贡献"}
-{"key": "BAC009S0901W0304", "wav": "./aishell/wav/test/S0901/BAC009S0901W0304.wav", "txt": "由于该项目尚处于保密期"}
-{"key": "BAC009S0901W0305", "wav": "./aishell/wav/test/S0901/BAC009S0901W0305.wav", "txt": "赵伟国并未透露更多内容"}
-{"key": "BAC009S0901W0306", "wav": "./aishell/wav/test/S0901/BAC009S0901W0306.wav", "txt": "沈阳机床董事长关锡友认为"}
-{"key": "BAC009S0901W0307", "wav": "./aishell/wav/test/S0901/BAC009S0901W0307.wav", "txt": "中国企业与世界企业同在同一起跑线上"}
-{"key": "BAC009S0901W0308", "wav": "./aishell/wav/test/S0901/BAC009S0901W0308.wav", "txt": "中国的中高端嵌入式芯片全部从德国日本进口"}
-{"key": "BAC009S0901W0309", "wav": "./aishell/wav/test/S0901/BAC009S0901W0309.wav", "txt": "德国制造业最核心的技术就是嵌入式系统"}
-{"key": "BAC009S0901W0310", "wav": "./aishell/wav/test/S0901/BAC009S0901W0310.wav", "txt": "在体积能耗上存在一定的不足"}
-{"key": "BAC009S0901W0311", "wav": "./aishell/wav/test/S0901/BAC009S0901W0311.wav", "txt": "紫光与沈阳机床可以在此布局"}
-{"key": "BAC009S0901W0312", "wav": "./aishell/wav/test/S0901/BAC009S0901W0312.wav", "txt": "三十一九二零一五"}
-{"key": "BAC009S0901W0313", "wav": "./aishell/wav/test/S0901/BAC009S0901W0313.wav", "txt": "紫光集团系清华控股旗下最主要的资产"}
-{"key": "BAC009S0901W0314", "wav": "./aishell/wav/test/S0901/BAC009S0901W0314.wav", "txt": "二零一三年二零一四年"}
-{"key": "BAC009S0901W0315", "wav": "./aishell/wav/test/S0901/BAC009S0901W0315.wav", "txt": "并一举成为中国最大全球第三大通讯芯片设计公司"}
-{"key": "BAC009S0901W0316", "wav": "./aishell/wav/test/S0901/BAC009S0901W0316.wav", "txt": "紫光集团还计划布局物联网网络设备芯片"}
-{"key": "BAC009S0901W0317", "wav": "./aishell/wav/test/S0901/BAC009S0901W0317.wav", "txt": "二零一五年紫光集团预计收入约四百亿元"}
-{"key": "BAC009S0901W0318", "wav": "./aishell/wav/test/S0901/BAC009S0901W0318.wav", "txt": "资产规模将达到六十五亿八百亿元"}
-{"key": "BAC009S0901W0319", "wav": "./aishell/wav/test/S0901/BAC009S0901W0319.wav", "txt": "中国机床龙头企业沈阳机床在北京举行战略发布会"}
-{"key": "BAC009S0901W0320", "wav": "./aishell/wav/test/S0901/BAC009S0901W0320.wav", "txt": "紫光股份云计算股收涨停搜狐科技"}
-{"key": "BAC009S0901W0321", "wav": "./aishell/wav/test/S0901/BAC009S0901W0321.wav", "txt": "大盘股仍是毫无作为"}
-{"key": "BAC009S0901W0322", "wav": "./aishell/wav/test/S0901/BAC009S0901W0322.wav", "txt": "题材股继续扮演黑马角色"}
-{"key": "BAC009S0901W0323", "wav": "./aishell/wav/test/S0901/BAC009S0901W0323.wav", "txt": "紫光股份在公告扩展云计算市场后"}
-{"key": "BAC009S0901W0324", "wav": "./aishell/wav/test/S0901/BAC009S0901W0324.wav", "txt": "盘中有二千六百八十六万元资金净流入"}
-{"key": "BAC009S0901W0325", "wav": "./aishell/wav/test/S0901/BAC009S0901W0325.wav", "txt": "主营信息电子和环保"}
-{"key": "BAC009S0901W0326", "wav": "./aishell/wav/test/S0901/BAC009S0901W0326.wav", "txt": "公司昨日发布公告称"}
-{"key": "BAC009S0901W0327", "wav": "./aishell/wav/test/S0901/BAC009S0901W0327.wav", "txt": "各方本着互惠互利优势互补合作共赢的原则"}
-{"key": "BAC009S0901W0328", "wav": "./aishell/wav/test/S0901/BAC009S0901W0328.wav", "txt": "通过搭建具有领先技术水平的混合云解决方案平台"}
-{"key": "BAC009S0901W0329", "wav": "./aishell/wav/test/S0901/BAC009S0901W0329.wav", "txt": "共同拓展国内云计算市场"}
-{"key": "BAC009S0901W0330", "wav": "./aishell/wav/test/S0901/BAC009S0901W0330.wav", "txt": "紫光股份将与世纪互联共同出资组建合资公司"}
-{"key": "BAC009S0901W0332", "wav": "./aishell/wav/test/S0901/BAC009S0901W0332.wav", "txt": "搭建混合云解决方案平台"}
-{"key": "BAC009S0901W0333", "wav": "./aishell/wav/test/S0901/BAC009S0901W0333.wav", "txt": "满足政府和企业级客户云计算下的定制化需求"}
-{"key": "BAC009S0901W0334", "wav": "./aishell/wav/test/S0901/BAC009S0901W0334.wav", "txt": "推动公司云服务战略的实施"}
-{"key": "BAC009S0901W0335", "wav": "./aishell/wav/test/S0901/BAC009S0901W0335.wav", "txt": "紫光股份拟定增募资二百二十五亿元"}
-{"key": "BAC009S0901W0336", "wav": "./aishell/wav/test/S0901/BAC009S0901W0336.wav", "txt": "公司继续推进云服务战略"}
-{"key": "BAC009S0901W0337", "wav": "./aishell/wav/test/S0901/BAC009S0901W0337.wav", "txt": "紫光集团和员工持股计划参与非公开增发"}
-{"key": "BAC009S0901W0338", "wav": "./aishell/wav/test/S0901/BAC009S0901W0338.wav", "txt": "医生此次将对惠若琪的心脏进行微创手术"}
-{"key": "BAC009S0901W0339", "wav": "./aishell/wav/test/S0901/BAC009S0901W0339.wav", "txt": "彻底解决目前存在的隐患"}
-{"key": "BAC009S0901W0340", "wav": "./aishell/wav/test/S0901/BAC009S0901W0340.wav", "txt": "惠若琪将在微创手术后回到南京调养"}
-{"key": "BAC009S0901W0341", "wav": "./aishell/wav/test/S0901/BAC009S0901W0341.wav", "txt": "张蓉芳主持排管中心"}
-{"key": "BAC009S0901W0342", "wav": "./aishell/wav/test/S0901/BAC009S0901W0342.wav", "txt": "成就了中国女排五连冠伟业"}
-{"key": "BAC009S0901W0343", "wav": "./aishell/wav/test/S0901/BAC009S0901W0343.wav", "txt": "北京时间九月十七日"}
-{"key": "BAC009S0901W0344", "wav": "./aishell/wav/test/S0901/BAC009S0901W0344.wav", "txt": "已经确定本赛季不会参加任何的比赛"}
-{"key": "BAC009S0901W0345", "wav": "./aishell/wav/test/S0901/BAC009S0901W0345.wav", "txt": "明年春天普鲁申科将再次进行手术"}
-{"key": "BAC009S0901W0346", "wav": "./aishell/wav/test/S0901/BAC009S0901W0346.wav", "txt": "作为有史以来天赋最高的花样滑冰运动员之一"}
-{"key": "BAC009S0901W0347", "wav": "./aishell/wav/test/S0901/BAC009S0901W0347.wav", "txt": "普鲁申科的职业生涯却堪称多灾多难"}
-{"key": "BAC009S0901W0348", "wav": "./aishell/wav/test/S0901/BAC009S0901W0348.wav", "txt": "他屡次受到伤病的困扰"}
-{"key": "BAC009S0901W0349", "wav": "./aishell/wav/test/S0901/BAC009S0901W0349.wav", "txt": "背伤更是常年阻碍着他的发挥"}
-{"key": "BAC009S0901W0350", "wav": "./aishell/wav/test/S0901/BAC009S0901W0350.wav", "txt": "去年的索契冬奥会上"}
-{"key": "BAC009S0901W0351", "wav": "./aishell/wav/test/S0901/BAC009S0901W0351.wav", "txt": "赛后有媒体发布了他几乎扭曲的背部肌肉的照片"}
-{"key": "BAC009S0901W0352", "wav": "./aishell/wav/test/S0901/BAC009S0901W0352.wav", "txt": "照片中看到普鲁申科的背部肌肉伤痕累累"}
-{"key": "BAC009S0901W0353", "wav": "./aishell/wav/test/S0901/BAC009S0901W0353.wav", "txt": "全都是手术缝合的痕迹"}
-{"key": "BAC009S0901W0354", "wav": "./aishell/wav/test/S0901/BAC009S0901W0354.wav", "txt": "他不仅动过多次肌肉手术"}
-{"key": "BAC009S0901W0355", "wav": "./aishell/wav/test/S0901/BAC009S0901W0355.wav", "txt": "连身上的痛觉神经都进行了更换"}
-{"key": "BAC009S0901W0356", "wav": "./aishell/wav/test/S0901/BAC009S0901W0356.wav", "txt": "普鲁申科出人意料地宣布复出"}
-{"key": "BAC009S0901W0357", "wav": "./aishell/wav/test/S0901/BAC009S0901W0357.wav", "txt": "表示愿意再征战一个冬奥会周期"}
-{"key": "BAC009S0901W0358", "wav": "./aishell/wav/test/S0901/BAC009S0901W0358.wav", "txt": "但就在外界期待着冰王子的卷土重来时"}
-{"key": "BAC009S0901W0359", "wav": "./aishell/wav/test/S0901/BAC009S0901W0359.wav", "txt": "本赛季的各项赛事参赛名单上却都没有见到他的身影"}
-{"key": "BAC009S0901W0360", "wav": "./aishell/wav/test/S0901/BAC009S0901W0360.wav", "txt": "据外媒最新的爆料显示"}
-{"key": "BAC009S0901W0361", "wav": "./aishell/wav/test/S0901/BAC009S0901W0361.wav", "txt": "普鲁申科被诊断患上了一种新的脊椎疾病"}
-{"key": "BAC009S0901W0362", "wav": "./aishell/wav/test/S0901/BAC009S0901W0362.wav", "txt": "这也让他必须在明年春天进行一次小手术来加以治疗"}
-{"key": "BAC009S0901W0363", "wav": "./aishell/wav/test/S0901/BAC009S0901W0363.wav", "txt": "普鲁申科将错过整个二零一五二零一六季一六赛季的比赛"}
-{"key": "BAC009S0901W0364", "wav": "./aishell/wav/test/S0901/BAC009S0901W0364.wav", "txt": "普鲁申科丝毫没有隐退的打算"}
-{"key": "BAC009S0901W0365", "wav": "./aishell/wav/test/S0901/BAC009S0901W0365.wav", "txt": "他还在积极地为二零一八年韩国平昌冬奥会进行着准备"}
-{"key": "BAC009S0901W0367", "wav": "./aishell/wav/test/S0901/BAC009S0901W0367.wav", "txt": "因为卷入兴奋剂丑闻"}
-{"key": "BAC009S0901W0368", "wav": "./aishell/wav/test/S0901/BAC009S0901W0368.wav", "txt": "朴泰桓无法加入海外先进的训练团队进行训练"}
-{"key": "BAC009S0901W0369", "wav": "./aishell/wav/test/S0901/BAC009S0901W0369.wav", "txt": "转投到昔日恩师卢民相任教练的游泳俱乐部训练"}
-{"key": "BAC009S0901W0370", "wav": "./aishell/wav/test/S0901/BAC009S0901W0370.wav", "txt": "但遭到了韩国国内舆论的非议"}
-{"key": "BAC009S0901W0372", "wav": "./aishell/wav/test/S0901/BAC009S0901W0372.wav", "txt": "到今年十二月为止将在东京的法政大学进行训练"}
-{"key": "BAC009S0901W0373", "wav": "./aishell/wav/test/S0901/BAC009S0901W0373.wav", "txt": "备战明年的里约奥运会"}
-{"key": "BAC009S0901W0374", "wav": "./aishell/wav/test/S0901/BAC009S0901W0374.wav", "txt": "但法政大学很快公开辟谣"}
-{"key": "BAC009S0901W0375", "wav": "./aishell/wav/test/S0901/BAC009S0901W0375.wav", "txt": "韩国媒体报道称朴泰桓确实人在日本"}
-{"key": "BAC009S0901W0376", "wav": "./aishell/wav/test/S0901/BAC009S0901W0376.wav", "txt": "他状告首尔某美容医院的官司将在十一月迎来终审"}
-{"key": "BAC009S0901W0377", "wav": "./aishell/wav/test/S0901/BAC009S0901W0377.wav", "txt": "判决结果成为他能否参加里约奥运的变数"}
-{"key": "BAC009S0901W0378", "wav": "./aishell/wav/test/S0901/BAC009S0901W0378.wav", "txt": "据韩国体育首尔的最新消息"}
-{"key": "BAC009S0901W0379", "wav": "./aishell/wav/test/S0901/BAC009S0901W0379.wav", "txt": "二十一日抵达日本的朴泰桓目前确实在东京"}
-{"key": "BAC009S0901W0380", "wav": "./aishell/wav/test/S0901/BAC009S0901W0380.wav", "txt": "计划在那里进行三个月的封闭训练"}
-{"key": "BAC009S0901W0381", "wav": "./aishell/wav/test/S0901/BAC009S0901W0381.wav", "txt": "备战明年的里约奥运"}
-{"key": "BAC009S0901W0382", "wav": "./aishell/wav/test/S0901/BAC009S0901W0382.wav", "txt": "但他的具体行踪成为谜团"}
-{"key": "BAC009S0901W0383", "wav": "./aishell/wav/test/S0901/BAC009S0901W0383.wav", "txt": "能否在里约奥运东山再起"}
-{"key": "BAC009S0901W0384", "wav": "./aishell/wav/test/S0901/BAC009S0901W0384.wav", "txt": "不仅要看他的竞技状态恢复程度"}
-{"key": "BAC009S0901W0385", "wav": "./aishell/wav/test/S0901/BAC009S0901W0385.wav", "txt": "首先要跨过大韩体育会这一关"}
-{"key": "BAC009S0901W0386", "wav": "./aishell/wav/test/S0901/BAC009S0901W0386.wav", "txt": "朴泰桓的禁期禁赛期将在明年三月期满"}
-{"key": "BAC009S0901W0387", "wav": "./aishell/wav/test/S0901/BAC009S0901W0387.wav", "txt": "因为服用禁药被停赛的选手在禁赛期满起的三年内"}
-{"key": "BAC009S0901W0388", "wav": "./aishell/wav/test/S0901/BAC009S0901W0388.wav", "txt": "都无法代表韩国参加国际比赛"}
-{"key": "BAC009S0901W0389", "wav": "./aishell/wav/test/S0901/BAC009S0901W0389.wav", "txt": "义不容辞地想拯救运动生涯在绝境中的朴泰桓"}
-{"key": "BAC009S0901W0390", "wav": "./aishell/wav/test/S0901/BAC009S0901W0390.wav", "txt": "废除这个第五条第六项"}
-{"key": "BAC009S0901W0391", "wav": "./aishell/wav/test/S0901/BAC009S0901W0391.wav", "txt": "为他参加里约奥运扫清最后的障碍"}
-{"key": "BAC009S0901W0392", "wav": "./aishell/wav/test/S0901/BAC009S0901W0392.wav", "txt": "体育首尔的报道分析"}
-{"key": "BAC009S0901W0393", "wav": "./aishell/wav/test/S0901/BAC009S0901W0393.wav", "txt": "大韩体育会这一计划的顺利实施"}
-{"key": "BAC009S0901W0394", "wav": "./aishell/wav/test/S0901/BAC009S0901W0394.wav", "txt": "最终判决结果将在十一月出炉"}
-{"key": "BAC009S0901W0395", "wav": "./aishell/wav/test/S0901/BAC009S0901W0395.wav", "txt": "如果该医院罪名被判成立的话"}
-{"key": "BAC009S0901W0396", "wav": "./aishell/wav/test/S0901/BAC009S0901W0396.wav", "txt": "这样一来可以获得韩国舆论的同情和理解"}
-{"key": "BAC009S0901W0397", "wav": "./aishell/wav/test/S0901/BAC009S0901W0397.wav", "txt": "民众自然会支持大韩体育会给他一个人修改规则"}
-{"key": "BAC009S0901W0398", "wav": "./aishell/wav/test/S0901/BAC009S0901W0398.wav", "txt": "如果美容院的医疗过失罪名不成立"}
-{"key": "BAC009S0901W0399", "wav": "./aishell/wav/test/S0901/BAC009S0901W0399.wav", "txt": "朴泰桓会面临更加严峻的舆论环境"}
-{"key": "BAC009S0901W0400", "wav": "./aishell/wav/test/S0901/BAC009S0901W0400.wav", "txt": "这场官司的前五次公判"}
-{"key": "BAC009S0901W0401", "wav": "./aishell/wav/test/S0901/BAC009S0901W0401.wav", "txt": "朴泰桓和美容院都互不相让"}
-{"key": "BAC009S0901W0402", "wav": "./aishell/wav/test/S0901/BAC009S0901W0402.wav", "txt": "一度让不少粉丝心碎不已"}
-{"key": "BAC009S0901W0403", "wav": "./aishell/wav/test/S0901/BAC009S0901W0403.wav", "txt": "退役之后的高桥大辅并未远离公众视线"}
-{"key": "BAC009S0901W0404", "wav": "./aishell/wav/test/S0901/BAC009S0901W0404.wav", "txt": "瓦尔兹将精心演绎这个著名角色"}
-{"key": "BAC009S0901W0405", "wav": "./aishell/wav/test/S0901/BAC009S0901W0405.wav", "txt": "其首脑恩斯特布鲁菲尔是邦德的最终敌人"}
-{"key": "BAC009S0901W0406", "wav": "./aishell/wav/test/S0901/BAC009S0901W0406.wav", "txt": "这个角色拥有一只白色的波斯猫作为自己的宠物"}
-{"key": "BAC009S0901W0407", "wav": "./aishell/wav/test/S0901/BAC009S0901W0407.wav", "txt": "值得一提的是"}
-{"key": "BAC009S0901W0408", "wav": "./aishell/wav/test/S0901/BAC009S0901W0408.wav", "txt": "搞怪调皮吐舌卖萌娱乐频道"}
-{"key": "BAC009S0901W0409", "wav": "./aishell/wav/test/S0901/BAC009S0901W0409.wav", "txt": "搜狐娱乐讯八月九日晚"}
-{"key": "BAC009S0901W0410", "wav": "./aishell/wav/test/S0901/BAC009S0901W0410.wav", "txt": "陈冠希在微博晒出一段小视频"}
-{"key": "BAC009S0901W0411", "wav": "./aishell/wav/test/S0901/BAC009S0901W0411.wav", "txt": "陈冠希开始一直把镜头对着帽子上的皮卡丘"}
-{"key": "BAC009S0901W0412", "wav": "./aishell/wav/test/S0901/BAC009S0901W0412.wav", "txt": "后来突然冒出头来"}
-{"key": "BAC009S0901W0413", "wav": "./aishell/wav/test/S0901/BAC009S0901W0413.wav", "txt": "对着镜头吐舌卖萌"}
-{"key": "BAC009S0901W0414", "wav": "./aishell/wav/test/S0901/BAC009S0901W0414.wav", "txt": "搜狐娱乐讯九月二日凌晨"}
-{"key": "BAC009S0901W0415", "wav": "./aishell/wav/test/S0901/BAC009S0901W0415.wav", "txt": "陈冠希在微博晒出一张自拍照"}
-{"key": "BAC009S0901W0416", "wav": "./aishell/wav/test/S0901/BAC009S0901W0416.wav", "txt": "陈冠希穿休闲短袖配宽松裤子"}
-{"key": "BAC009S0901W0417", "wav": "./aishell/wav/test/S0901/BAC009S0901W0417.wav", "txt": "网友纷纷留言越来越像潮流教父了"}
-{"key": "BAC009S0901W0418", "wav": "./aishell/wav/test/S0901/BAC009S0901W0418.wav", "txt": "这裙子娇艳"}
-{"key": "BAC009S0901W0419", "wav": "./aishell/wav/test/S0901/BAC009S0901W0419.wav", "txt": "帅出新高度"}
-{"key": "BAC009S0901W0420", "wav": "./aishell/wav/test/S0901/BAC009S0901W0420.wav", "txt": "搜狐娱乐讯据台湾媒体报道"}
-{"key": "BAC009S0901W0421", "wav": "./aishell/wav/test/S0901/BAC009S0901W0421.wav", "txt": "多次想复合却无下文"}
-{"key": "BAC009S0901W0422", "wav": "./aishell/wav/test/S0901/BAC009S0901W0422.wav", "txt": "感情事备受关注"}
-{"key": "BAC009S0901W0423", "wav": "./aishell/wav/test/S0901/BAC009S0901W0423.wav", "txt": "前天他在脸书晒出自拍照"}
-{"key": "BAC009S0901W0424", "wav": "./aishell/wav/test/S0901/BAC009S0901W0424.wav", "txt": "满脸黑斑与大眼袋"}
-{"key": "BAC009S0901W0425", "wav": "./aishell/wav/test/S0901/BAC009S0901W0425.wav", "txt": "老残样再度乍现"}
-{"key": "BAC009S0901W0426", "wav": "./aishell/wav/test/S0901/BAC009S0901W0426.wav", "txt": "搜狐娱乐讯据台湾媒体报道"}
-{"key": "BAC009S0901W0427", "wav": "./aishell/wav/test/S0901/BAC009S0901W0427.wav", "txt": "事后解释是生活观不同才分开"}
-{"key": "BAC009S0901W0428", "wav": "./aishell/wav/test/S0901/BAC009S0901W0428.wav", "txt": "但隔年三月却又分享一张女方坐他大腿的照片"}
-{"key": "BAC009S0901W0429", "wav": "./aishell/wav/test/S0901/BAC009S0901W0429.wav", "txt": "一度让外界以为两人复合"}
-{"key": "BAC009S0901W0430", "wav": "./aishell/wav/test/S0901/BAC009S0901W0430.wav", "txt": "但现在又有别的女孩坐上他的大腿"}
-{"key": "BAC009S0901W0431", "wav": "./aishell/wav/test/S0901/BAC009S0901W0431.wav", "txt": "校长邱勇上任后首次参加学生毕业典礼并演讲"}
-{"key": "BAC009S0901W0432", "wav": "./aishell/wav/test/S0901/BAC009S0901W0432.wav", "txt": "追求使命需要有强大的定力昨日上午"}
-{"key": "BAC009S0901W0433", "wav": "./aishell/wav/test/S0901/BAC009S0901W0433.wav", "txt": "他叮嘱五千馀名毕业生"}
-{"key": "BAC009S0901W0434", "wav": "./aishell/wav/test/S0901/BAC009S0901W0434.wav", "txt": "要有清晰的目标人文情怀和做到执着坚守"}
-{"key": "BAC009S0901W0435", "wav": "./aishell/wav/test/S0901/BAC009S0901W0435.wav", "txt": "清华法学院教授司法改革应限制两长权力"}
-{"key": "BAC009S0901W0436", "wav": "./aishell/wav/test/S0901/BAC009S0901W0436.wav", "txt": "本报讯记者汪红日前"}
-{"key": "BAC009S0901W0437", "wav": "./aishell/wav/test/S0901/BAC009S0901W0437.wav", "txt": "对允许其亲自过问的案件提出严格限定标准"}
-{"key": "BAC009S0901W0438", "wav": "./aishell/wav/test/S0901/BAC009S0901W0438.wav", "txt": "清华辟谣保安迫降无人机为人为诋毁"}
-{"key": "BAC009S0901W0439", "wav": "./aishell/wav/test/S0901/BAC009S0901W0439.wav", "txt": "该事件引发广泛关注"}
-{"key": "BAC009S0901W0440", "wav": "./aishell/wav/test/S0901/BAC009S0901W0440.wav", "txt": "清华大学通过调取监控录线发现"}
-{"key": "BAC009S0901W0441", "wav": "./aishell/wav/test/S0901/BAC009S0901W0441.wav", "txt": "该保安为附近大厦保安"}
-{"key": "BAC009S0901W0442", "wav": "./aishell/wav/test/S0901/BAC009S0901W0442.wav", "txt": "目前该保安承认有人花二百元雇他进行拍照"}
-{"key": "BAC009S0901W0443", "wav": "./aishell/wav/test/S0901/BAC009S0901W0443.wav", "txt": "称当时几位学生模样的人让他帮忙配合拍照用来宣传"}
-{"key": "BAC009S0901W0444", "wav": "./aishell/wav/test/S0901/BAC009S0901W0444.wav", "txt": "抓着男生的动作为摆拍"}
-{"key": "BAC009S0901W0445", "wav": "./aishell/wav/test/S0901/BAC009S0901W0445.wav", "txt": "摔毁无人机一事为杜撰"}
-{"key": "BAC009S0901W0446", "wav": "./aishell/wav/test/S0901/BAC009S0901W0446.wav", "txt": "保安得知自己被骗后表示我真的很恨他们"}
-{"key": "BAC009S0901W0447", "wav": "./aishell/wav/test/S0901/BAC009S0901W0447.wav", "txt": "记者联系发微博男子"}
-{"key": "BAC009S0901W0448", "wav": "./aishell/wav/test/S0901/BAC009S0901W0448.wav", "txt": "他表示我没想到弄这么大"}
-{"key": "BAC009S0901W0449", "wav": "./aishell/wav/test/S0901/BAC009S0901W0449.wav", "txt": "目前该男子已将微博内容全部删除"}
-{"key": "BAC009S0901W0450", "wav": "./aishell/wav/test/S0901/BAC009S0901W0450.wav", "txt": "清华附小昨迎百年校庆校长诠释成志教育理念"}
-{"key": "BAC009S0901W0451", "wav": "./aishell/wav/test/S0901/BAC009S0901W0451.wav", "txt": "清华附小校长窦桂海诠释成志教育理念"}
-{"key": "BAC009S0901W0452", "wav": "./aishell/wav/test/S0901/BAC009S0901W0452.wav", "txt": "清晨飘来辣眼白雾济南八名村民中毒入院"}
-{"key": "BAC009S0901W0453", "wav": "./aishell/wav/test/S0901/BAC009S0901W0453.wav", "txt": "赵女士的公公躺在病床上"}
-{"key": "BAC009S0901W0454", "wav": "./aishell/wav/test/S0901/BAC009S0901W0454.wav", "txt": "目前神志已恢复清醒"}
-{"key": "BAC009S0901W0455", "wav": "./aishell/wav/test/S0901/BAC009S0901W0455.wav", "txt": "记者李焜染摄十三日早晨"}
-{"key": "BAC009S0901W0456", "wav": "./aishell/wav/test/S0901/BAC009S0901W0456.wav", "txt": "历城区港沟镇神武村飘来多股白色不明气体"}
-{"key": "BAC009S0901W0457", "wav": "./aishell/wav/test/S0901/BAC009S0901W0457.wav", "txt": "八位村民先后出现中毒症状"}
-{"key": "BAC009S0901W0458", "wav": "./aishell/wav/test/S0901/BAC009S0901W0458.wav", "txt": "目前八人均已脱离生命危险"}
-{"key": "BAC009S0901W0459", "wav": "./aishell/wav/test/S0901/BAC009S0901W0459.wav", "txt": "神秘气体成分及来源正在进一步核实"}
-{"key": "BAC009S0901W0460", "wav": "./aishell/wav/test/S0901/BAC009S0901W0460.wav", "txt": "清洁工开宝马上下班真实身份为在逃诈骗犯"}
-{"key": "BAC009S0901W0461", "wav": "./aishell/wav/test/S0901/BAC009S0901W0461.wav", "txt": "彭某下班后准备开车离开"}
-{"key": "BAC009S0901W0462", "wav": "./aishell/wav/test/S0901/BAC009S0901W0462.wav", "txt": "清洁工开宝马上班被称励志故事经查系逃犯"}
-{"key": "BAC009S0901W0463", "wav": "./aishell/wav/test/S0901/BAC009S0901W0463.wav", "txt": "十四日开宝马来上班重庆晨报记者罗伟雷罗伟雷键摄"}
-{"key": "BAC009S0901W0464", "wav": "./aishell/wav/test/S0901/BAC009S0901W0464.wav", "txt": "清洁工被电梯咬断腿曾反映这样擦电源危险"}
-{"key": "BAC009S0901W0465", "wav": "./aishell/wav/test/S0901/BAC009S0901W0465.wav", "txt": "制图黄欣晨报记者佟继萍王亦菲实习生张诗欢"}
-{"key": "BAC009S0901W0466", "wav": "./aishell/wav/test/S0901/BAC009S0901W0466.wav", "txt": "网络时代信息的存在有了新方式"}
-{"key": "BAC009S0901W0467", "wav": "./aishell/wav/test/S0901/BAC009S0901W0467.wav", "txt": "云盘就是一种直接把信息存在网络空间里的存储工具"}
-{"key": "BAC009S0901W0468", "wav": "./aishell/wav/test/S0901/BAC009S0901W0468.wav", "txt": "和传统硬盘不同的是"}
-{"key": "BAC009S0901W0469", "wav": "./aishell/wav/test/S0901/BAC009S0901W0469.wav", "txt": "用户不需要把它带在身上"}
-{"key": "BAC009S0901W0470", "wav": "./aishell/wav/test/S0901/BAC009S0901W0470.wav", "txt": "只需要一个账户名和密码"}
-{"key": "BAC009S0901W0471", "wav": "./aishell/wav/test/S0901/BAC009S0901W0471.wav", "txt": "就可以在网络环境下"}
-{"key": "BAC009S0901W0472", "wav": "./aishell/wav/test/S0901/BAC009S0901W0472.wav", "txt": "上传读取和下载里面的信息"}
-{"key": "BAC009S0901W0473", "wav": "./aishell/wav/test/S0901/BAC009S0901W0473.wav", "txt": "本来云盘的出现方便了人们的生活和工作"}
-{"key": "BAC009S0901W0474", "wav": "./aishell/wav/test/S0901/BAC009S0901W0474.wav", "txt": "把云盘变成了一个淫秽色情信息的隐蔽聚散地"}
-{"key": "BAC009S0901W0475", "wav": "./aishell/wav/test/S0901/BAC009S0901W0475.wav", "txt": "清风正在吹散互联网雾霾"}
-{"key": "BAC009S0901W0476", "wav": "./aishell/wav/test/S0901/BAC009S0901W0476.wav", "txt": "四年前的一幕仍没从夏英俊的记忆中抹去"}
-{"key": "BAC009S0901W0477", "wav": "./aishell/wav/test/S0901/BAC009S0901W0477.wav", "txt": "渐冻男孩驾驶电动轮椅上班医生曾诊断活不过十八岁"}
-{"key": "BAC009S0901W0478", "wav": "./aishell/wav/test/S0901/BAC009S0901W0478.wav", "txt": "蔡兴桥在妈妈的帮助下靠墙练习站立"}
-{"key": "BAC009S0901W0479", "wav": "./aishell/wav/test/S0901/BAC009S0901W0479.wav", "txt": "渔民南海捞出外国间谍潜航器搜集情报或已传回"}
-{"key": "BAC009S0901W0480", "wav": "./aishell/wav/test/S0901/BAC009S0901W0480.wav", "txt": "在许多人眼里这都是小说和电影里才会出现的情节"}
-{"key": "BAC009S0901W0481", "wav": "./aishell/wav/test/S0901/BAC009S0901W0481.wav", "txt": "可实际上维护国家边海防安全保护国家利益不受侵犯"}
-{"key": "BAC009S0901W0482", "wav": "./aishell/wav/test/S0901/BAC009S0901W0482.wav", "txt": "这样的斗争和考验有时就发生在我们身边"}
-{"key": "BAC009S0901W0483", "wav": "./aishell/wav/test/S0901/BAC009S0901W0483.wav", "txt": "南海的渔民在捕鱼的时候就曾捞出过一个奇怪的东西"}
-{"key": "BAC009S0901W0484", "wav": "./aishell/wav/test/S0901/BAC009S0901W0484.wav", "txt": "由此引出一起重大安全案件"}
-{"key": "BAC009S0901W0485", "wav": "./aishell/wav/test/S0901/BAC009S0901W0485.wav", "txt": "渔民在南海打捞起可疑电子装置确系无人潜航器"}
-{"key": "BAC009S0901W0486", "wav": "./aishell/wav/test/S0901/BAC009S0901W0486.wav", "txt": "经国家安全部门会同有关技术权威部门鉴定"}
-{"key": "BAC009S0901W0487", "wav": "./aishell/wav/test/S0901/BAC009S0901W0487.wav", "txt": "它既能搜集我国重要海域内各类环境数据"}
-{"key": "BAC009S0901W0488", "wav": "./aishell/wav/test/S0901/BAC009S0901W0488.wav", "txt": "又能探测获取我海军舰队活动动向"}
-{"key": "BAC009S0901W0489", "wav": "./aishell/wav/test/S0901/BAC009S0901W0489.wav", "txt": "实现近距离侦查和情报收集任务"}
-{"key": "BAC009S0901W0490", "wav": "./aishell/wav/test/S0901/BAC009S0901W0490.wav", "txt": "渔民投诉遭离奇执法被派出所讨价还价式罚款"}
-{"key": "BAC009S0901W0491", "wav": "./aishell/wav/test/S0901/BAC009S0901W0491.wav", "txt": "海南临高籍多位渔民向中新网记者反应称"}
-{"key": "BAC009S0901W0492", "wav": "./aishell/wav/test/S0901/BAC009S0901W0492.wav", "txt": "二十二日在文昌市清澜港边防派出所执法检查时"}
-{"key": "BAC009S0901W0493", "wav": "./aishell/wav/test/S0901/BAC009S0901W0493.wav", "txt": "渔民缴纳罚款后在摁手印时"}
-{"key": "BAC009S0901W0494", "wav": "./aishell/wav/test/S0901/BAC009S0901W0494.wav", "txt": "被民警用针扎破手指"}
-{"key": "BAC009S0901W0495", "wav": "./aishell/wav/test/S0901/BAC009S0901W0495.wav", "txt": "让他们很担心会不会相互传染疾病"}
-{"key": "BAC009S0902W0121", "wav": "./aishell/wav/test/S0902/BAC009S0902W0121.wav", "txt": "所以审批加上放款的时间最快也要在七个半月左右"}
-{"key": "BAC009S0902W0122", "wav": "./aishell/wav/test/S0902/BAC009S0902W0122.wav", "txt": "作为取之于民用之于民的住房公积金"}
-{"key": "BAC009S0902W0123", "wav": "./aishell/wav/test/S0902/BAC009S0902W0123.wav", "txt": "缴存者还可以在租房装修离退休时提取"}
-{"key": "BAC009S0902W0124", "wav": "./aishell/wav/test/S0902/BAC009S0902W0124.wav", "txt": "因此操作环节的快捷性与便捷性非常重要"}
-{"key": "BAC009S0902W0125", "wav": "./aishell/wav/test/S0902/BAC009S0902W0125.wav", "txt": "后续要加大公积金贷款的便利性"}
-{"key": "BAC009S0902W0126", "wav": "./aishell/wav/test/S0902/BAC009S0902W0126.wav", "txt": "鼓励购房者积极缴纳公积金"}
-{"key": "BAC009S0902W0127", "wav": "./aishell/wav/test/S0902/BAC009S0902W0127.wav", "txt": "进而选择此类方式购房"}
-{"key": "BAC009S0902W0128", "wav": "./aishell/wav/test/S0902/BAC009S0902W0128.wav", "txt": "另外要处理公积金异地使用的问题"}
-{"key": "BAC009S0902W0129", "wav": "./aishell/wav/test/S0902/BAC009S0902W0129.wav", "txt": "这对于目前一线城市来说很紧要"}
-{"key": "BAC009S0902W0130", "wav": "./aishell/wav/test/S0902/BAC009S0902W0130.wav", "txt": "很多人受限购政策的影响"}
-{"key": "BAC009S0902W0131", "wav": "./aishell/wav/test/S0902/BAC009S0902W0131.wav", "txt": "难以在周边城市用公积金购房"}
-{"key": "BAC009S0902W0132", "wav": "./aishell/wav/test/S0902/BAC009S0902W0132.wav", "txt": "导致公积金资源闲置的问题出现"}
-{"key": "BAC009S0902W0133", "wav": "./aishell/wav/test/S0902/BAC009S0902W0133.wav", "txt": "美丽北京大型绿色公益品牌项目"}
-{"key": "BAC009S0902W0134", "wav": "./aishell/wav/test/S0902/BAC009S0902W0134.wav", "txt": "随着广州住房公积金贷款政策的调整实施"}
-{"key": "BAC009S0902W0135", "wav": "./aishell/wav/test/S0902/BAC009S0902W0135.wav", "txt": "政策内容主要涉及购房"}
-{"key": "BAC009S0902W0136", "wav": "./aishell/wav/test/S0902/BAC009S0902W0136.wav", "txt": "随着广州住房公积金贷款政策的调整实施"}
-{"key": "BAC009S0902W0137", "wav": "./aishell/wav/test/S0902/BAC009S0902W0137.wav", "txt": "公积金贷款最高额度亦不同程度上调"}
-{"key": "BAC009S0902W0138", "wav": "./aishell/wav/test/S0902/BAC009S0902W0138.wav", "txt": "住房公积金贷款因其利率较低的优势"}
-{"key": "BAC009S0902W0139", "wav": "./aishell/wav/test/S0902/BAC009S0902W0139.wav", "txt": "一直以来广受购房者青睐"}
-{"key": "BAC009S0902W0140", "wav": "./aishell/wav/test/S0902/BAC009S0902W0140.wav", "txt": "本轮本轮住房公积金房贷政策调整"}
-{"key": "BAC009S0902W0141", "wav": "./aishell/wav/test/S0902/BAC009S0902W0141.wav", "txt": "进一步加速了消费者的入市节奏"}
-{"key": "BAC009S0902W0142", "wav": "./aishell/wav/test/S0902/BAC009S0902W0142.wav", "txt": "广州调整住房公积金个人住房贷款政策"}
-{"key": "BAC009S0902W0143", "wav": "./aishell/wav/test/S0902/BAC009S0902W0143.wav", "txt": "同时对申请公积金贷款的缴纳时限调整为七个月"}
-{"key": "BAC009S0902W0144", "wav": "./aishell/wav/test/S0902/BAC009S0902W0144.wav", "txt": "据广州日报昨天报道"}
-{"key": "BAC009S0902W0145", "wav": "./aishell/wav/test/S0902/BAC009S0902W0145.wav", "txt": "公积金贷款首付比例降低的消息令购房者喜出望外"}
-{"key": "BAC009S0902W0146", "wav": "./aishell/wav/test/S0902/BAC009S0902W0146.wav", "txt": "其中刚需要买入市积极性明显提高"}
-{"key": "BAC009S0902W0147", "wav": "./aishell/wav/test/S0902/BAC009S0902W0147.wav", "txt": "据伟嘉安捷提供的数据显示"}
-{"key": "BAC009S0902W0148", "wav": "./aishell/wav/test/S0902/BAC009S0902W0148.wav", "txt": "北京公积金贷款首付比例松绑一周后"}
-{"key": "BAC009S0902W0149", "wav": "./aishell/wav/test/S0902/BAC009S0902W0149.wav", "txt": "公积金贷款及组合贷咨询量明显上涨"}
-{"key": "BAC009S0902W0150", "wav": "./aishell/wav/test/S0902/BAC009S0902W0150.wav", "txt": "尤其组合贷的咨询量较上月月初一上涨百分之七左右"}
-{"key": "BAC009S0902W0151", "wav": "./aishell/wav/test/S0902/BAC009S0902W0151.wav", "txt": "上海深圳等主要城市也在公积金新政推动下"}
-{"key": "BAC009S0902W0152", "wav": "./aishell/wav/test/S0902/BAC009S0902W0152.wav", "txt": "呈现购房者积入市的行情"}
-{"key": "BAC009S0902W0153", "wav": "./aishell/wav/test/S0902/BAC009S0902W0153.wav", "txt": "全国已有超百个城市发布了不同力度的公积金松绑政策"}
-{"key": "BAC009S0902W0154", "wav": "./aishell/wav/test/S0902/BAC009S0902W0154.wav", "txt": "加之降息降准等政策组合拳"}
-{"key": "BAC009S0902W0155", "wav": "./aishell/wav/test/S0902/BAC009S0902W0155.wav", "txt": "呈现出量价齐涨的局面"}
-{"key": "BAC009S0902W0156", "wav": "./aishell/wav/test/S0902/BAC009S0902W0156.wav", "txt": "据中国指数研究院最新数据显示"}
-{"key": "BAC009S0902W0157", "wav": "./aishell/wav/test/S0902/BAC009S0902W0157.wav", "txt": "深圳环比上上涨百分之七"}
-{"key": "BAC009S0902W0158", "wav": "./aishell/wav/test/S0902/BAC009S0902W0158.wav", "txt": "涨幅据十大城市之首"}
-{"key": "BAC009S0902W0159", "wav": "./aishell/wav/test/S0902/BAC009S0902W0159.wav", "txt": "五月份多地楼市的成交量明显上涨"}
-{"key": "BAC009S0902W0160", "wav": "./aishell/wav/test/S0902/BAC009S0902W0160.wav", "txt": "是房地产当前发局格局下的一个必然"}
-{"key": "BAC009S0902W0161", "wav": "./aishell/wav/test/S0902/BAC009S0902W0161.wav", "txt": "唯独这样才能盘活公积金资源"}
-{"key": "BAC009S0902W0162", "wav": "./aishell/wav/test/S0902/BAC009S0902W0162.wav", "txt": "促使更多购房者积极入市"}
-{"key": "BAC009S0902W0163", "wav": "./aishell/wav/test/S0902/BAC009S0902W0163.wav", "txt": "伴随着各地住房公积金新政的落地实施"}
-{"key": "BAC009S0902W0164", "wav": "./aishell/wav/test/S0902/BAC009S0902W0164.wav", "txt": "楼市进展仍需进一步观望"}
-{"key": "BAC009S0902W0165", "wav": "./aishell/wav/test/S0902/BAC009S0902W0165.wav", "txt": "购房者受惠于政策利好的同时"}
-{"key": "BAC009S0902W0166", "wav": "./aishell/wav/test/S0902/BAC009S0902W0166.wav", "txt": "公积金在申请放贷流程上并未提速"}
-{"key": "BAC009S0902W0167", "wav": "./aishell/wav/test/S0902/BAC009S0902W0167.wav", "txt": "相反相关环节上审批更加严格"}
-{"key": "BAC009S0902W0168", "wav": "./aishell/wav/test/S0902/BAC009S0902W0168.wav", "txt": "从目前上海住房公积金的具体政策看"}
-{"key": "BAC009S0902W0169", "wav": "./aishell/wav/test/S0902/BAC009S0902W0169.wav", "txt": "购房的扶持力度在加大"}
-{"key": "BAC009S0902W0170", "wav": "./aishell/wav/test/S0902/BAC009S0902W0170.wav", "txt": "但主要还是体现在贷款成本的降低"}
-{"key": "BAC009S0902W0171", "wav": "./aishell/wav/test/S0902/BAC009S0902W0171.wav", "txt": "而申请公积金贷款方面还是需要走严格的流程"}
-{"key": "BAC009S0902W0172", "wav": "./aishell/wav/test/S0902/BAC009S0902W0172.wav", "txt": "公积金提取一直是目前试图突破的内容"}
-{"key": "BAC009S0902W0173", "wav": "./aishell/wav/test/S0902/BAC009S0902W0173.wav", "txt": "但目前还未出现大面积提取行为"}
-{"key": "BAC009S0902W0174", "wav": "./aishell/wav/test/S0902/BAC009S0902W0174.wav", "txt": "来自广州日报的报道称"}
-{"key": "BAC009S0902W0175", "wav": "./aishell/wav/test/S0902/BAC009S0902W0175.wav", "txt": "从申请到最后的放款"}
-{"key": "BAC009S0902W0176", "wav": "./aishell/wav/test/S0902/BAC009S0902W0176.wav", "txt": "部分客户甚至等两个多月"}
-{"key": "BAC009S0902W0177", "wav": "./aishell/wav/test/S0902/BAC009S0902W0177.wav", "txt": "如果申请公积金贷款及公积金贷款与商业贷款的组合贷"}
-{"key": "BAC009S0902W0178", "wav": "./aishell/wav/test/S0902/BAC009S0902W0178.wav", "txt": "伟嘉安捷对中新网房产频道表示"}
-{"key": "BAC009S0902W0179", "wav": "./aishell/wav/test/S0902/BAC009S0902W0179.wav", "txt": "现在公积金贷款办理需要一个月左右的时间"}
-{"key": "BAC009S0902W0180", "wav": "./aishell/wav/test/S0902/BAC009S0902W0180.wav", "txt": "而申请办理组合贷款的手续则更为复杂"}
-{"key": "BAC009S0902W0181", "wav": "./aishell/wav/test/S0902/BAC009S0902W0181.wav", "txt": "所以审批加上放款的时间最快也要在五个半月左右"}
-{"key": "BAC009S0902W0182", "wav": "./aishell/wav/test/S0902/BAC009S0902W0182.wav", "txt": "作为取之于民用之于民的住房公积金"}
-{"key": "BAC009S0902W0183", "wav": "./aishell/wav/test/S0902/BAC009S0902W0183.wav", "txt": "缴存者还可以在租房装修离退休时提取"}
-{"key": "BAC009S0902W0184", "wav": "./aishell/wav/test/S0902/BAC009S0902W0184.wav", "txt": "因此操作环节的快捷性与便捷性非常重要"}
-{"key": "BAC009S0902W0185", "wav": "./aishell/wav/test/S0902/BAC009S0902W0185.wav", "txt": "后续要加大公积金贷款的便利性"}
-{"key": "BAC009S0902W0186", "wav": "./aishell/wav/test/S0902/BAC009S0902W0186.wav", "txt": "鼓励购房者积极缴纳公积金"}
-{"key": "BAC009S0902W0187", "wav": "./aishell/wav/test/S0902/BAC009S0902W0187.wav", "txt": "科技支撑能力显着增强"}
-{"key": "BAC009S0902W0188", "wav": "./aishell/wav/test/S0902/BAC009S0902W0188.wav", "txt": "生产经营方式不断优化"}
-{"key": "BAC009S0902W0189", "wav": "./aishell/wav/test/S0902/BAC009S0902W0189.wav", "txt": "农业产业体系更趋完善"}
-{"key": "BAC009S0902W0190", "wav": "./aishell/wav/test/S0902/BAC009S0902W0190.wav", "txt": "土地产出率劳动生产率资源利用率显着提高"}
-{"key": "BAC009S0902W0191", "wav": "./aishell/wav/test/S0902/BAC009S0902W0191.wav", "txt": "现代农业建设取得突破性进展"}
-{"key": "BAC009S0902W0192", "wav": "./aishell/wav/test/S0902/BAC009S0902W0192.wav", "txt": "主要农产品优势区基本实行农业现代化"}
-{"key": "BAC009S0902W0193", "wav": "./aishell/wav/test/S0902/BAC009S0902W0193.wav", "txt": "现代农业发展主要指标类别"}
-{"key": "BAC009S0902W0194", "wav": "./aishell/wav/test/S0902/BAC009S0902W0194.wav", "txt": "粮食综合生产能力五亿吨"}
-{"key": "BAC009S0902W0195", "wav": "./aishell/wav/test/S0902/BAC009S0902W0195.wav", "txt": "粮食播种面积五亿亩棉花总产量七万吨"}
-{"key": "BAC009S0902W0196", "wav": "./aishell/wav/test/S0902/BAC009S0902W0196.wav", "txt": "油料总产量七万吨"}
-{"key": "BAC009S0902W0197", "wav": "./aishell/wav/test/S0902/BAC009S0902W0197.wav", "txt": "肉类总产量五万吨"}
-{"key": "BAC009S0902W0198", "wav": "./aishell/wav/test/S0902/BAC009S0902W0198.wav", "txt": "奶类总产量七万吨水产品总产量七万吨"}
-{"key": "BAC009S0902W0199", "wav": "./aishell/wav/test/S0902/BAC009S0902W0199.wav", "txt": "农产品质量安全例行监测总体合格率百分之五十"}
-{"key": "BAC009S0902W0200", "wav": "./aishell/wav/test/S0902/BAC009S0902W0200.wav", "txt": "畜牧业产值占农业总产值比重百分之"}
-{"key": "BAC009S0902W0201", "wav": "./aishell/wav/test/S0902/BAC009S0902W0201.wav", "txt": "渔业产值占农业总产值比重百分之"}
-{"key": "BAC009S0902W0202", "wav": "./aishell/wav/test/S0902/BAC009S0902W0202.wav", "txt": "农产品加工业产值与农业总产值"}
-{"key": "BAC009S0902W0203", "wav": "./aishell/wav/test/S0902/BAC009S0902W0203.wav", "txt": "丰富和解调仲裁诉等维权内容和方式"}
-{"key": "BAC009S0902W0204", "wav": "./aishell/wav/test/S0902/BAC009S0902W0204.wav", "txt": "新增农田有效灌溉面积万亩"}
-{"key": "BAC009S0902W0205", "wav": "./aishell/wav/test/S0902/BAC009S0902W0205.wav", "txt": "耕种收综合机械化水平百分之五"}
-{"key": "BAC009S0902W0206", "wav": "./aishell/wav/test/S0902/BAC009S0902W0206.wav", "txt": "丰富和解调解仲裁诉诉讼等"}
-{"key": "BAC009S0902W0207", "wav": "./aishell/wav/test/S0902/BAC009S0902W0207.wav", "txt": "科技科技进步贡献率百分之七"}
-{"key": "BAC009S0902W0208", "wav": "./aishell/wav/test/S0902/BAC009S0902W0208.wav", "txt": "农村实用人才总量万人"}
-{"key": "BAC009S0902W0209", "wav": "./aishell/wav/test/S0902/BAC009S0902W0209.wav", "txt": "农业产业化组织带动农户数量亿户"}
-{"key": "BAC009S0902W0210", "wav": "./aishell/wav/test/S0902/BAC009S0902W0210.wav", "txt": "团结就是力量"}
-{"key": "BAC009S0902W0211", "wav": "./aishell/wav/test/S0902/BAC009S0902W0211.wav", "txt": "适宜农户沼气普及率百分之五"}
-{"key": "BAC009S0902W0212", "wav": "./aishell/wav/test/S0902/BAC009S0902W0212.wav", "txt": "农作物秸秆综合利用率百分之五"}
-{"key": "BAC009S0902W0213", "wav": "./aishell/wav/test/S0902/BAC009S0902W0213.wav", "txt": "薛之谦的歌儿很棒"}
-{"key": "BAC009S0902W0214", "wav": "./aishell/wav/test/S0902/BAC009S0902W0214.wav", "txt": "农林牧渔业增长值年均增长率百分之五"}
-{"key": "BAC009S0902W0215", "wav": "./aishell/wav/test/S0902/BAC009S0902W0215.wav", "txt": "增长速度按可比价格计算"}
-{"key": "BAC009S0902W0216", "wav": "./aishell/wav/test/S0902/BAC009S0902W0216.wav", "txt": "从加快转变农业发展的方式关键环节入手"}
-{"key": "BAC009S0902W0217", "wav": "./aishell/wav/test/S0902/BAC009S0902W0217.wav", "txt": "完善现代农业产业体系"}
-{"key": "BAC009S0902W0218", "wav": "./aishell/wav/test/S0902/BAC009S0902W0218.wav", "txt": "稳定发展粮食和棉油糖生产"}
-{"key": "BAC009S0902W0219", "wav": "./aishell/wav/test/S0902/BAC009S0902W0219.wav", "txt": "实施全国增长千亿斤粮食生产能力规划"}
-{"key": "BAC009S0902W0220", "wav": "./aishell/wav/test/S0902/BAC009S0902W0220.wav", "txt": "积极推进南方稻区单改双"}
-{"key": "BAC009S0902W0221", "wav": "./aishell/wav/test/S0902/BAC009S0902W0221.wav", "txt": "扩大东北优势区粳稻种植面积"}
-{"key": "BAC009S0902W0222", "wav": "./aishell/wav/test/S0902/BAC009S0902W0222.wav", "txt": "稳步推进江淮等粳高稻生产适宜区糟改粳"}
-{"key": "BAC009S0902W0223", "wav": "./aishell/wav/test/S0902/BAC009S0902W0223.wav", "txt": "稳定增加玉米播种面积"}
-{"key": "BAC009S0902W0224", "wav": "./aishell/wav/test/S0902/BAC009S0902W0224.wav", "txt": "积极恢复和稳定大豆种植面积"}
-{"key": "BAC009S0902W0225", "wav": "./aishell/wav/test/S0902/BAC009S0902W0225.wav", "txt": "积极开发和选育马铃薯优质专用高产品种"}
-{"key": "BAC009S0902W0226", "wav": "./aishell/wav/test/S0902/BAC009S0902W0226.wav", "txt": "提高脱毒种薯供给能力"}
-{"key": "BAC009S0902W0227", "wav": "./aishell/wav/test/S0902/BAC009S0902W0227.wav", "txt": "继续加强优质棉花生产基地建设"}
-{"key": "BAC009S0902W0228", "wav": "./aishell/wav/test/S0902/BAC009S0902W0228.wav", "txt": "多油并举稳定食用植物油自给率"}
-{"key": "BAC009S0902W0229", "wav": "./aishell/wav/test/S0902/BAC009S0902W0229.wav", "txt": "基本满足国内棉花消费需求"}
-{"key": "BAC009S0902W0230", "wav": "./aishell/wav/test/S0902/BAC009S0902W0230.wav", "txt": "积极发展菜篮子产品生产"}
-{"key": "BAC009S0902W0231", "wav": "./aishell/wav/test/S0902/BAC009S0902W0231.wav", "txt": "加强蔬菜水果肉蛋奶水产品等产品优势产区建设"}
-{"key": "BAC009S0902W0232", "wav": "./aishell/wav/test/S0902/BAC009S0902W0232.wav", "txt": "扩大大中城市郊区菜篮子产品生产基地规模"}
-{"key": "BAC009S0902W0233", "wav": "./aishell/wav/test/S0902/BAC009S0902W0233.wav", "txt": "推动苹果柑橘等优势园艺产品生产"}
-{"key": "BAC009S0902W0234", "wav": "./aishell/wav/test/S0902/BAC009S0902W0234.wav", "txt": "稳定发展生猪和蛋禽"}
-{"key": "BAC009S0902W0235", "wav": "./aishell/wav/test/S0902/BAC009S0902W0235.wav", "txt": "大力发展农产品加工和流通业"}
-{"key": "BAC009S0902W0236", "wav": "./aishell/wav/test/S0902/BAC009S0902W0236.wav", "txt": "加强主要农产品优势产区加工基地建设"}
-{"key": "BAC009S0902W0237", "wav": "./aishell/wav/test/S0902/BAC009S0902W0237.wav", "txt": "引导农产品加工业向种养业优势区域和城市郊区集中"}
-{"key": "BAC009S0902W0238", "wav": "./aishell/wav/test/S0902/BAC009S0902W0238.wav", "txt": "启动实施农产品加工提升工程"}
-{"key": "BAC009S0902W0239", "wav": "./aishell/wav/test/S0902/BAC009S0902W0239.wav", "txt": "提高生产流通组织化程度"}
-{"key": "BAC009S0902W0240", "wav": "./aishell/wav/test/S0902/BAC009S0902W0240.wav", "txt": "培育一批产值过百亿元的大型加工和流通企业集团"}
-{"key": "BAC009S0902W0241", "wav": "./aishell/wav/test/S0902/BAC009S0902W0241.wav", "txt": "强化流通基础设施建设和产销信息引导"}
-{"key": "BAC009S0902W0242", "wav": "./aishell/wav/test/S0902/BAC009S0902W0242.wav", "txt": "升级改造农产品批发市场"}
-{"key": "BAC009S0902W0243", "wav": "./aishell/wav/test/S0902/BAC009S0902W0243.wav", "txt": "支持优势产区现代化鲜活农产品批发市场建设"}
-{"key": "BAC009S0902W0244", "wav": "./aishell/wav/test/S0902/BAC009S0902W0244.wav", "txt": "大力发展冷链体系和生鲜农产品配送"}
-{"key": "BAC009S0902W0245", "wav": "./aishell/wav/test/S0902/BAC009S0902W0245.wav", "txt": "推进订单生产和农超对接"}
-{"key": "BAC009S0902W0246", "wav": "./aishell/wav/test/S0902/BAC009S0902W0246.wav", "txt": "落实鲜活农产品运输绿化通道政策"}
-{"key": "BAC009S0902W0247", "wav": "./aishell/wav/test/S0902/BAC009S0902W0247.wav", "txt": "降低农产品流通成本"}
-{"key": "BAC009S0902W0248", "wav": "./aishell/wav/test/S0902/BAC009S0902W0248.wav", "txt": "规范和完善农产品期货市场"}
-{"key": "BAC009S0902W0249", "wav": "./aishell/wav/test/S0902/BAC009S0902W0249.wav", "txt": "强化农业科技和人才支撑"}
-{"key": "BAC009S0902W0250", "wav": "./aishell/wav/test/S0902/BAC009S0902W0250.wav", "txt": "增强农业科技自主创新能力"}
-{"key": "BAC009S0902W0251", "wav": "./aishell/wav/test/S0902/BAC009S0902W0251.wav", "txt": "明确农业科技的公共性基础社会性地位"}
-{"key": "BAC009S0902W0252", "wav": "./aishell/wav/test/S0902/BAC009S0902W0252.wav", "txt": "加强基础性前沿性公益性重大农业科学技术研究"}
-{"key": "BAC009S0902W0253", "wav": "./aishell/wav/test/S0902/BAC009S0902W0253.wav", "txt": "比去年同期的六十二点三十亿美元大幅增长百分之十二"}
-{"key": "BAC009S0902W0254", "wav": "./aishell/wav/test/S0902/BAC009S0902W0254.wav", "txt": "系涨幅最为明显的地区"}
-{"key": "BAC009S0902W0255", "wav": "./aishell/wav/test/S0902/BAC009S0902W0255.wav", "txt": "占总营收的二十六点百分之六十点六十七"}
-{"key": "BAC009S0902W0257", "wav": "./aishell/wav/test/S0902/BAC009S0902W0257.wav", "txt": "苹果的股价有一定的波动规律"}
-{"key": "BAC009S0902W0258", "wav": "./aishell/wav/test/S0902/BAC009S0902W0258.wav", "txt": "即是在新品发布前的一个多季度的时间内"}
-{"key": "BAC009S0902W0259", "wav": "./aishell/wav/test/S0902/BAC009S0902W0259.wav", "txt": "因为在新品发布之前"}
-{"key": "BAC009S0902W0260", "wav": "./aishell/wav/test/S0902/BAC009S0902W0260.wav", "txt": "由于许多用户都持币待购"}
-{"key": "BAC009S0902W0261", "wav": "./aishell/wav/test/S0902/BAC009S0902W0261.wav", "txt": "因此需求会暂时被抑制住"}
-{"key": "BAC009S0902W0262", "wav": "./aishell/wav/test/S0902/BAC009S0902W0262.wav", "txt": "销量都会有一定的影响"}
-{"key": "BAC009S0902W0263", "wav": "./aishell/wav/test/S0902/BAC009S0902W0263.wav", "txt": "孙永杰对二十一世纪报道记者表示"}
-{"key": "BAC009S0902W0264", "wav": "./aishell/wav/test/S0902/BAC009S0902W0264.wav", "txt": "苹果的股价会随着销量相反"}
-{"key": "BAC009S0902W0266", "wav": "./aishell/wav/test/S0902/BAC009S0902W0266.wav", "txt": "缺乏新的业务增长点"}
-{"key": "BAC009S0902W0267", "wav": "./aishell/wav/test/S0902/BAC009S0902W0267.wav", "txt": "苹果在二零一四年营收为二百二十二亿美元"}
-{"key": "BAC009S0902W0269", "wav": "./aishell/wav/test/S0902/BAC009S0902W0269.wav", "txt": "就手机领域的发展趋势"}
-{"key": "BAC009S0902W0270", "wav": "./aishell/wav/test/S0902/BAC009S0902W0270.wav", "txt": "苹果高端市场已经确立了一个独一无二的地位"}
-{"key": "BAC009S0902W0271", "wav": "./aishell/wav/test/S0902/BAC009S0902W0271.wav", "txt": "以前在高端智能手机市场领域"}
-{"key": "BAC009S0902W0272", "wav": "./aishell/wav/test/S0902/BAC009S0902W0272.wav", "txt": "苹果有两个竞争对手"}
-{"key": "BAC009S0902W0274", "wav": "./aishell/wav/test/S0902/BAC009S0902W0274.wav", "txt": "今年股价已经累计下跌了百分之六十而三星的情况也不佳"}
-{"key": "BAC009S0902W0275", "wav": "./aishell/wav/test/S0902/BAC009S0902W0275.wav", "txt": "在三星第二季度财报中"}
-{"key": "BAC009S0902W0277", "wav": "./aishell/wav/test/S0902/BAC009S0902W0277.wav", "txt": "降至二十六点零六万亿韩元"}
-{"key": "BAC009S0902W0278", "wav": "./aishell/wav/test/S0902/BAC009S0902W0278.wav", "txt": "其中手机的销售额下降了七点百分之三"}
-{"key": "BAC009S0902W0279", "wav": "./aishell/wav/test/S0902/BAC009S0902W0279.wav", "txt": "至二十五点五万亿韩元"}
-{"key": "BAC009S0902W0280", "wav": "./aishell/wav/test/S0902/BAC009S0902W0280.wav", "txt": "在安卓手机的总体交付量中"}
-{"key": "BAC009S0902W0281", "wav": "./aishell/wav/test/S0902/BAC009S0902W0281.wav", "txt": "价格高于六百美元的高端手机占比为百分之一"}
-{"key": "BAC009S0902W0282", "wav": "./aishell/wav/test/S0902/BAC009S0902W0282.wav", "txt": "价格高于六百美元的高端手机"}
-{"key": "BAC009S0902W0283", "wav": "./aishell/wav/test/S0902/BAC009S0902W0283.wav", "txt": "在安卓出货量中的占比减少到了百分之六"}
-{"key": "BAC009S0902W0285", "wav": "./aishell/wav/test/S0902/BAC009S0902W0285.wav", "txt": "价格高于六百美元的占比从百分之七十增加到了百分之八十"}
-{"key": "BAC009S0902W0287", "wav": "./aishell/wav/test/S0902/BAC009S0902W0287.wav", "txt": "苹果在高端市场击溃了对手"}
-{"key": "BAC009S0902W0288", "wav": "./aishell/wav/test/S0902/BAC009S0902W0288.wav", "txt": "而这对于未来苹果保持高利润和利润率至关重要"}
-{"key": "BAC009S0902W0289", "wav": "./aishell/wav/test/S0902/BAC009S0902W0289.wav", "txt": "这对苹果是一个利好"}
-{"key": "BAC009S0902W0290", "wav": "./aishell/wav/test/S0902/BAC009S0902W0290.wav", "txt": "意味着只要用户要选择高端手机"}
-{"key": "BAC009S0902W0291", "wav": "./aishell/wav/test/S0902/BAC009S0902W0291.wav", "txt": "在类似印度之类的新兴市场"}
-{"key": "BAC009S0902W0292", "wav": "./aishell/wav/test/S0902/BAC009S0902W0292.wav", "txt": "因此从全球的角度来看"}
-{"key": "BAC009S0902W0293", "wav": "./aishell/wav/test/S0902/BAC009S0902W0293.wav", "txt": "智能手机仍然有增长和爆发的空间"}
-{"key": "BAC009S0902W0295", "wav": "./aishell/wav/test/S0902/BAC009S0902W0295.wav", "txt": "但是作为一个仍在成长没有其他对手的市场来讲"}
-{"key": "BAC009S0902W0296", "wav": "./aishell/wav/test/S0902/BAC009S0902W0296.wav", "txt": "苹果已然可以单点突破"}
-{"key": "BAC009S0902W0297", "wav": "./aishell/wav/test/S0902/BAC009S0902W0297.wav", "txt": "至于新的业务增长点"}
-{"key": "BAC009S0902W0298", "wav": "./aishell/wav/test/S0902/BAC009S0902W0298.wav", "txt": "但是瑞士联合银行分析师估计"}
-{"key": "BAC009S0902W0299", "wav": "./aishell/wav/test/S0902/BAC009S0902W0299.wav", "txt": "较最初的预期减少了一半左右"}
-{"key": "BAC009S0902W0302", "wav": "./aishell/wav/test/S0902/BAC009S0902W0302.wav", "txt": "已经占有了全球智能手表市场的百分之五"}
-{"key": "BAC009S0902W0303", "wav": "./aishell/wav/test/S0902/BAC009S0902W0303.wav", "txt": "云计算和大数据时代"}
-{"key": "BAC009S0902W0305", "wav": "./aishell/wav/test/S0902/BAC009S0902W0305.wav", "txt": "紫光股份曾经出现一连波连续十六个一字涨停的狂飙行市"}
-{"key": "BAC009S0902W0306", "wav": "./aishell/wav/test/S0902/BAC009S0902W0306.wav", "txt": "近日的走势也强于大盘"}
-{"key": "BAC009S0902W0307", "wav": "./aishell/wav/test/S0902/BAC009S0902W0307.wav", "txt": "两个机构专用席位列于买一和卖二的位置"}
-{"key": "BAC009S0902W0308", "wav": "./aishell/wav/test/S0902/BAC009S0902W0308.wav", "txt": "买卖前五名共计净出于该股六十二点九三万元"}
-{"key": "BAC009S0902W0309", "wav": "./aishell/wav/test/S0902/BAC009S0902W0309.wav", "txt": "大盘股仍是毫无作为"}
-{"key": "BAC009S0902W0310", "wav": "./aishell/wav/test/S0902/BAC009S0902W0310.wav", "txt": "题材股继续扮演黑马角色"}
-{"key": "BAC009S0902W0311", "wav": "./aishell/wav/test/S0902/BAC009S0902W0311.wav", "txt": "紫光股份千九十三八在公布拓展云计算市场后"}
-{"key": "BAC009S0902W0312", "wav": "./aishell/wav/test/S0902/BAC009S0902W0312.wav", "txt": "盘中有两千六百八十六万元资金净流入"}
-{"key": "BAC009S0902W0313", "wav": "./aishell/wav/test/S0902/BAC009S0902W0313.wav", "txt": "给孩子买儿童电话手表有必要吗"}
-{"key": "BAC009S0902W0314", "wav": "./aishell/wav/test/S0902/BAC009S0902W0314.wav", "txt": "消费者在听销售人员介绍小天才手表"}
-{"key": "BAC009S0902W0315", "wav": "./aishell/wav/test/S0902/BAC009S0902W0315.wav", "txt": "消费者在听销售人员介绍小天才手表"}
-{"key": "BAC009S0902W0316", "wav": "./aishell/wav/test/S0902/BAC009S0902W0316.wav", "txt": "消费者在听销售人员介绍小天才电话手表"}
-{"key": "BAC009S0902W0317", "wav": "./aishell/wav/test/S0902/BAC009S0902W0317.wav", "txt": "很多家长都在给孩子购置各种学习用品"}
-{"key": "BAC009S0902W0318", "wav": "./aishell/wav/test/S0902/BAC009S0902W0318.wav", "txt": "除了传统的书包文具以及辅导书外"}
-{"key": "BAC009S0902W0319", "wav": "./aishell/wav/test/S0902/BAC009S0902W0319.wav", "txt": "这个儿童电话手表以其强大的定位通话微聊等功能"}
-{"key": "BAC009S0902W0320", "wav": "./aishell/wav/test/S0902/BAC009S0902W0320.wav", "txt": "深受家长和儿童欢迎"}
-{"key": "BAC009S0902W0321", "wav": "./aishell/wav/test/S0902/BAC009S0902W0321.wav", "txt": "很多孩子都以拥有一款电话手表为豪"}
-{"key": "BAC009S0902W0322", "wav": "./aishell/wav/test/S0902/BAC009S0902W0322.wav", "txt": "而不少品牌的电话手表量销售量更是突破百万大关"}
-{"key": "BAC009S0902W0323", "wav": "./aishell/wav/test/S0902/BAC009S0902W0323.wav", "txt": "电话手表对儿童健康安全是否有危险"}
-{"key": "BAC009S0902W0324", "wav": "./aishell/wav/test/S0902/BAC009S0902W0324.wav", "txt": "老师是否允许孩子戴手表上学"}
-{"key": "BAC009S0902W0325", "wav": "./aishell/wav/test/S0902/BAC009S0902W0325.wav", "txt": "电话手表应该如何选购"}
-{"key": "BAC009S0902W0326", "wav": "./aishell/wav/test/S0902/BAC009S0902W0326.wav", "txt": "笔者进行了深度的了解"}
-{"key": "BAC009S0902W0327", "wav": "./aishell/wav/test/S0902/BAC009S0902W0327.wav", "txt": "儿童电话手表到底有多火"}
-{"key": "BAC009S0902W0328", "wav": "./aishell/wav/test/S0902/BAC009S0902W0328.wav", "txt": "年龄或大或小的孩子"}
-{"key": "BAC009S0902W0330", "wav": "./aishell/wav/test/S0902/BAC009S0902W0330.wav", "txt": "都会目不转睛的盯着"}
-{"key": "BAC009S0902W0331", "wav": "./aishell/wav/test/S0902/BAC009S0902W0331.wav", "txt": "或者跟着广告哼起歌曲来"}
-{"key": "BAC009S0902W0332", "wav": "./aishell/wav/test/S0902/BAC009S0902W0332.wav", "txt": "随着产品快速进入家长和孩童的视野"}
-{"key": "BAC009S0902W0333", "wav": "./aishell/wav/test/S0902/BAC009S0902W0333.wav", "txt": "每天的销量让你感受到儿童电话手表的火爆"}
-{"key": "BAC009S0902W0334", "wav": "./aishell/wav/test/S0902/BAC009S0902W0334.wav", "txt": "对于如此火爆的市市场需求"}
-{"key": "BAC009S0902W0335", "wav": "./aishell/wav/test/S0902/BAC009S0902W0335.wav", "txt": "来自广西的苏女士说家长对孩子安全的关心"}
-{"key": "BAC009S0902W0336", "wav": "./aishell/wav/test/S0902/BAC009S0902W0336.wav", "txt": "是电话手表今年大受欢迎的主要原因"}
-{"key": "BAC009S0902W0337", "wav": "./aishell/wav/test/S0902/BAC009S0902W0337.wav", "txt": "在电话手表出现之前"}
-{"key": "BAC009S0902W0338", "wav": "./aishell/wav/test/S0902/BAC009S0902W0338.wav", "txt": "据悉他正在积极进修表演准备进入演艺圈"}
-{"key": "BAC009S0902W0339", "wav": "./aishell/wav/test/S0902/BAC009S0902W0339.wav", "txt": "近日日本媒体曝出惊人消息"}
-{"key": "BAC009S0902W0340", "wav": "./aishell/wav/test/S0902/BAC009S0902W0340.wav", "txt": "称高桥大辅可能在一段时间里出柜"}
-{"key": "BAC009S0902W0341", "wav": "./aishell/wav/test/S0902/BAC009S0902W0341.wav", "txt": "公开自己的同性恋者身份"}
-{"key": "BAC009S0902W0342", "wav": "./aishell/wav/test/S0902/BAC009S0902W0342.wav", "txt": "恐怕又要传来不少女粉丝心碎的声音了"}
-{"key": "BAC009S0902W0343", "wav": "./aishell/wav/test/S0902/BAC009S0902W0343.wav", "txt": "高桥大辅堪称日本花样滑冰男单领域的领军人物"}
-{"key": "BAC009S0902W0344", "wav": "./aishell/wav/test/S0902/BAC009S0902W0344.wav", "txt": "在他的职业生涯里曾在二零一零年拿到世锦赛金牌"}
-{"key": "BAC009S0902W0345", "wav": "./aishell/wav/test/S0902/BAC009S0902W0345.wav", "txt": "温哥华冬奥会拿到铜牌"}
-{"key": "BAC009S0902W0346", "wav": "./aishell/wav/test/S0902/BAC009S0902W0346.wav", "txt": "一二年总决赛拿到金牌"}
-{"key": "BAC009S0902W0347", "wav": "./aishell/wav/test/S0902/BAC009S0902W0347.wav", "txt": "还曾经两次拿到了四大洲锦标赛的男单冠军"}
-{"key": "BAC009S0902W0348", "wav": "./aishell/wav/test/S0902/BAC009S0902W0348.wav", "txt": "表示未来会进入演艺圈发展"}
-{"key": "BAC009S0902W0349", "wav": "./aishell/wav/test/S0902/BAC009S0902W0349.wav", "txt": "颜值颇高的他今年四月远赴美国纽约"}
-{"key": "BAC009S0902W0350", "wav": "./aishell/wav/test/S0902/BAC009S0902W0350.wav", "txt": "高桥大辅丝毫不加掩饰"}
-{"key": "BAC009S0902W0351", "wav": "./aishell/wav/test/S0902/BAC009S0902W0351.wav", "txt": "他经常在社交网站公开美食等照片"}
-{"key": "BAC009S0902W0352", "wav": "./aishell/wav/test/S0902/BAC009S0902W0352.wav", "txt": "看起来在美国过得很开心的样子"}
-{"key": "BAC009S0902W0353", "wav": "./aishell/wav/test/S0902/BAC009S0902W0353.wav", "txt": "过去一直背负着日本花滑界的重压"}
-{"key": "BAC009S0902W0354", "wav": "./aishell/wav/test/S0902/BAC009S0902W0354.wav", "txt": "终于得到了释放的样子"}
-{"key": "BAC009S0902W0355", "wav": "./aishell/wav/test/S0902/BAC009S0902W0355.wav", "txt": "他每周二三天来学校"}
-{"key": "BAC009S0902W0356", "wav": "./aishell/wav/test/S0902/BAC009S0902W0356.wav", "txt": "还有记者爆料说居住在纽约的日本人透露"}
-{"key": "BAC009S0902W0357", "wav": "./aishell/wav/test/S0902/BAC009S0902W0357.wav", "txt": "高桥在当地过着奢华享乐的生活"}
-{"key": "BAC009S0902W0358", "wav": "./aishell/wav/test/S0902/BAC009S0902W0358.wav", "txt": "如果真的想学习的话"}
-{"key": "BAC009S0902W0359", "wav": "./aishell/wav/test/S0902/BAC009S0902W0359.wav", "txt": "就不会刻意选择位于纽约闹市区的这所大学"}
-{"key": "BAC009S0902W0360", "wav": "./aishell/wav/test/S0902/BAC009S0902W0360.wav", "txt": "图片中他们一行人面对镜头尽显搞怪天赋"}
-{"key": "BAC009S0902W0361", "wav": "./aishell/wav/test/S0902/BAC009S0902W0361.wav", "txt": "高桥大辅则是噘着嘴做出索吻的动作"}
-{"key": "BAC009S0902W0362", "wav": "./aishell/wav/test/S0902/BAC009S0902W0362.wav", "txt": "外界认为这是一种另有深意的暗示"}
-{"key": "BAC009S0902W0363", "wav": "./aishell/wav/test/S0902/BAC009S0902W0363.wav", "txt": "而对于他的好友小林尊"}
-{"key": "BAC009S0902W0364", "wav": "./aishell/wav/test/S0902/BAC009S0902W0364.wav", "txt": "被认为日本体育界的相关人士称"}
-{"key": "BAC009S0902W0365", "wav": "./aishell/wav/test/S0902/BAC009S0902W0365.wav", "txt": "但多年来关于他的形婚"}
-{"key": "BAC009S0902W0366", "wav": "./aishell/wav/test/S0902/BAC009S0902W0366.wav", "txt": "实际上是同性恋者的传闻一直未停过"}
-{"key": "BAC009S0902W0367", "wav": "./aishell/wav/test/S0902/BAC009S0902W0367.wav", "txt": "和澳洲鱼雷索普一样"}
-{"key": "BAC009S0902W0368", "wav": "./aishell/wav/test/S0902/BAC009S0902W0368.wav", "txt": "高桥大辅因为其比赛风格的妖娆多变"}
-{"key": "BAC009S0902W0369", "wav": "./aishell/wav/test/S0902/BAC009S0902W0369.wav", "txt": "多年来围绕其性取向的争论一直没有停息"}
-{"key": "BAC009S0902W0370", "wav": "./aishell/wav/test/S0902/BAC009S0902W0370.wav", "txt": "退役前高桥大辅曾与花滑女神浅田真央传出恋情"}
-{"key": "BAC009S0902W0371", "wav": "./aishell/wav/test/S0902/BAC009S0902W0371.wav", "txt": "身为上司而且已婚有儿女的桥本被指责涉嫌性侵"}
-{"key": "BAC009S0902W0372", "wav": "./aishell/wav/test/S0902/BAC009S0902W0372.wav", "txt": "不过两位当事人双双否认性侵的说法"}
-{"key": "BAC009S0902W0373", "wav": "./aishell/wav/test/S0902/BAC009S0902W0373.wav", "txt": "如今和小林尊出双入对"}
-{"key": "BAC009S0902W0374", "wav": "./aishell/wav/test/S0902/BAC009S0902W0374.wav", "txt": "高调参加同性恋者的年度盛事"}
-{"key": "BAC009S0902W0375", "wav": "./aishell/wav/test/S0902/BAC009S0902W0375.wav", "txt": "有可靠消息称高桥很可能在近期正式宣布出柜"}
-{"key": "BAC009S0902W0376", "wav": "./aishell/wav/test/S0902/BAC009S0902W0376.wav", "txt": "此消息一出迅速引发外界强烈关注"}
-{"key": "BAC009S0902W0377", "wav": "./aishell/wav/test/S0902/BAC009S0902W0377.wav", "txt": "日本网友也是众说纷纭一点儿也不吃惊"}
-{"key": "BAC009S0902W0378", "wav": "./aishell/wav/test/S0902/BAC009S0902W0378.wav", "txt": "看他在冰场上搔首弄姿地表现"}
-{"key": "BAC009S0902W0379", "wav": "./aishell/wav/test/S0902/BAC009S0902W0379.wav", "txt": "高桥大辅应该是他的新欢"}
-{"key": "BAC009S0902W0380", "wav": "./aishell/wav/test/S0902/BAC009S0902W0380.wav", "txt": "难怪他能接受年过半百的桥本的索吻"}
-{"key": "BAC009S0902W0381", "wav": "./aishell/wav/test/S0902/BAC009S0902W0381.wav", "txt": "许多为高桥痴迷多年的女粉丝肯定深受打击"}
-{"key": "BAC009S0902W0382", "wav": "./aishell/wav/test/S0902/BAC009S0902W0382.wav", "txt": "作为日本的花滑王子"}
-{"key": "BAC009S0902W0383", "wav": "./aishell/wav/test/S0902/BAC009S0902W0383.wav", "txt": "这么多年一直要压抑自己的性取向"}
-{"key": "BAC009S0902W0384", "wav": "./aishell/wav/test/S0902/BAC009S0902W0384.wav", "txt": "挺不容易的支持他追属属于自己的真正幸福"}
-{"key": "BAC009S0902W0385", "wav": "./aishell/wav/test/S0902/BAC009S0902W0385.wav", "txt": "据美联社十日报道"}
-{"key": "BAC009S0902W0386", "wav": "./aishell/wav/test/S0902/BAC009S0902W0386.wav", "txt": "一些参赛选手赛后感到胃部不适"}
-{"key": "BAC009S0902W0387", "wav": "./aishell/wav/test/S0902/BAC009S0902W0387.wav", "txt": "而队医怀疑这或许与比赛地水污染有关"}
-{"key": "BAC009S0902W0388", "wav": "./aishell/wav/test/S0902/BAC009S0902W0388.wav", "txt": "美国队官员不排除他们的队员因食物或饮水而生病"}
-{"key": "BAC009S0902W0389", "wav": "./aishell/wav/test/S0902/BAC009S0902W0389.wav", "txt": "近来有关里约水污染问题备受关注"}
-{"key": "BAC009S0902W0390", "wav": "./aishell/wav/test/S0902/BAC009S0902W0390.wav", "txt": "美联社公布的一项独立水质检测显示"}
-{"key": "BAC009S0902W0391", "wav": "./aishell/wav/test/S0902/BAC009S0902W0391.wav", "txt": "在奥运会赛艇和铁人三项公开水域等比赛地"}
-{"key": "BAC009S0902W0392", "wav": "./aishell/wav/test/S0902/BAC009S0902W0392.wav", "txt": "也存在高危病毒危险"}
-{"key": "BAC009S0902W0393", "wav": "./aishell/wav/test/S0902/BAC009S0902W0393.wav", "txt": "该湖区也将是明年奥运会赛艇比赛地"}
-{"key": "BAC009S0902W0394", "wav": "./aishell/wav/test/S0902/BAC009S0902W0394.wav", "txt": "比污染严重的瓜内巴拉湾相比"}
-{"key": "BAC009S0902W0395", "wav": "./aishell/wav/test/S0902/BAC009S0902W0395.wav", "txt": "赛艇比赛所在湖区的水污染问题近年来得到改善"}
-{"key": "BAC009S0902W0396", "wav": "./aishell/wav/test/S0902/BAC009S0902W0396.wav", "txt": "但是上周公布的水质检测显示"}
-{"key": "BAC009S0902W0397", "wav": "./aishell/wav/test/S0902/BAC009S0902W0397.wav", "txt": "湖区水污染仍旧十分严重"}
-{"key": "BAC009S0902W0398", "wav": "./aishell/wav/test/S0902/BAC009S0902W0398.wav", "txt": "在本次赛艇测试赛期间"}
-{"key": "BAC009S0902W0399", "wav": "./aishell/wav/test/S0902/BAC009S0902W0399.wav", "txt": "一些参赛选手也向新华社记者表示"}
-{"key": "BAC009S0902W0400", "wav": "./aishell/wav/test/S0902/BAC009S0902W0400.wav", "txt": "比赛地的湖水比较浑浊"}
-{"key": "BAC009S0902W0401", "wav": "./aishell/wav/test/S0902/BAC009S0902W0401.wav", "txt": "但还是担心水质问题"}
-{"key": "BAC009S0902W0402", "wav": "./aishell/wav/test/S0902/BAC009S0902W0402.wav", "txt": "来自中国的赛艇选手崔帅豪说"}
-{"key": "BAC009S0902W0403", "wav": "./aishell/wav/test/S0902/BAC009S0902W0403.wav", "txt": "比赛地水不是太干净"}
-{"key": "BAC009S0902W0404", "wav": "./aishell/wav/test/S0902/BAC009S0902W0404.wav", "txt": "他自己还将出任影片的男主角"}
-{"key": "BAC009S0902W0405", "wav": "./aishell/wav/test/S0902/BAC009S0902W0405.wav", "txt": "忙碌成本可想而知"}
-{"key": "BAC009S0902W0406", "wav": "./aishell/wav/test/S0902/BAC009S0902W0406.wav", "txt": "外媒发布了更令人兴奋的消息"}
-{"key": "BAC009S0902W0407", "wav": "./aishell/wav/test/S0902/BAC009S0902W0407.wav", "txt": "将在本届美国电影学会影展中进行秘密放映"}
-{"key": "BAC009S0902W0408", "wav": "./aishell/wav/test/S0902/BAC009S0902W0408.wav", "txt": "对方是二十五岁的人妻名模泰舒培"}
-{"key": "BAC009S0902W0409", "wav": "./aishell/wav/test/S0902/BAC009S0902W0409.wav", "txt": "搜狐娱乐讯七月十五日"}
-{"key": "BAC009S0902W0410", "wav": "./aishell/wav/test/S0902/BAC009S0902W0410.wav", "txt": "陈冠希前女友嫩模黄榕在香港书展出席写真宣传活动"}
-{"key": "BAC009S0902W0411", "wav": "./aishell/wav/test/S0902/BAC009S0902W0411.wav", "txt": "身穿白色抹胸的她大秀性感好身材"}
-{"key": "BAC009S0902W0412", "wav": "./aishell/wav/test/S0902/BAC009S0902W0412.wav", "txt": "谈及前男友陈冠希近日被指外貌衰老了不少"}
-{"key": "BAC009S0902W0413", "wav": "./aishell/wav/test/S0902/BAC009S0902W0413.wav", "txt": "黄榕坦言可能他做了太多运动"}
-{"key": "BAC009S0902W0414", "wav": "./aishell/wav/test/S0902/BAC009S0902W0414.wav", "txt": "搜狐娱乐讯日前"}
-{"key": "BAC009S0902W0415", "wav": "./aishell/wav/test/S0902/BAC009S0902W0415.wav", "txt": "众星云集上海出席某商家的开业活动"}
-{"key": "BAC009S0902W0416", "wav": "./aishell/wav/test/S0902/BAC009S0902W0416.wav", "txt": "由潮男陈冠希打头阵"}
-{"key": "BAC009S0902W0417", "wav": "./aishell/wav/test/S0902/BAC009S0902W0417.wav", "txt": "更云集了罗中旭前任"}
-{"key": "BAC009S0902W0418", "wav": "./aishell/wav/test/S0902/BAC009S0902W0418.wav", "txt": "黄宗泽绯闻女友等女星"}
-{"key": "BAC009S0902W0419", "wav": "./aishell/wav/test/S0902/BAC009S0902W0419.wav", "txt": "现场气氛火爆"}
-{"key": "BAC009S0902W0420", "wav": "./aishell/wav/test/S0902/BAC009S0902W0420.wav", "txt": "粉丝们一度失控"}
-{"key": "BAC009S0902W0421", "wav": "./aishell/wav/test/S0902/BAC009S0902W0421.wav", "txt": "陈冠希坦言认为陈奕迅是ｋ歌之王"}
-{"key": "BAC009S0902W0422", "wav": "./aishell/wav/test/S0902/BAC009S0902W0422.wav", "txt": "但由于风格不同"}
-{"key": "BAC009S0902W0423", "wav": "./aishell/wav/test/S0902/BAC009S0902W0423.wav", "txt": "新专辑音乐方面还是坚持做自己"}
-{"key": "BAC009S0902W0424", "wav": "./aishell/wav/test/S0902/BAC009S0902W0424.wav", "txt": "搜狐娱乐讯九月五日"}
-{"key": "BAC009S0902W0425", "wav": "./aishell/wav/test/S0902/BAC009S0902W0425.wav", "txt": "一怒之下把大叔身份证扔在地上"}
-{"key": "BAC009S0902W0426", "wav": "./aishell/wav/test/S0902/BAC009S0902W0426.wav", "txt": "二人发生姓肢体冲突"}
-{"key": "BAC009S0902W0427", "wav": "./aishell/wav/test/S0902/BAC009S0902W0427.wav", "txt": "此视频曝光后"}
-{"key": "BAC009S0902W0428", "wav": "./aishell/wav/test/S0902/BAC009S0902W0428.wav", "txt": "网友纷纷力挺陈冠希"}
-{"key": "BAC009S0902W0429", "wav": "./aishell/wav/test/S0902/BAC009S0902W0429.wav", "txt": "温兆伦许飞欧弟等明星也通过微博表示支持力挺"}
-{"key": "BAC009S0902W0430", "wav": "./aishell/wav/test/S0902/BAC009S0902W0430.wav", "txt": "搜狐娱乐讯北京时间六月二十四日消息"}
-{"key": "BAC009S0902W0431", "wav": "./aishell/wav/test/S0902/BAC009S0902W0431.wav", "txt": "渔船凶案嫌疑借发动机声将同船同事依次杀害"}
-{"key": "BAC009S0902W0432", "wav": "./aishell/wav/test/S0902/BAC009S0902W0432.wav", "txt": "渔船海上爆炸沉没四名渔民漂流三天获救"}
-{"key": "BAC009S0902W0433", "wav": "./aishell/wav/test/S0902/BAC009S0902W0433.wav", "txt": "昨天上午七点五零分"}
-{"key": "BAC009S0902W0434", "wav": "./aishell/wav/test/S0902/BAC009S0902W0434.wav", "txt": "目前正在根据海事部门的要求开往盐城大分港"}
-{"key": "BAC009S0902W0435", "wav": "./aishell/wav/test/S0902/BAC009S0902W0435.wav", "txt": "准备将获救的四人送上岸边医院救治"}
-{"key": "BAC009S0902W0436", "wav": "./aishell/wav/test/S0902/BAC009S0902W0436.wav", "txt": "渔船海上被撞翻仅一人逃生同伴求救却无能为力"}
-{"key": "BAC009S0902W0437", "wav": "./aishell/wav/test/S0902/BAC009S0902W0437.wav", "txt": "出事的渔船被拖到韩榆石桥海边"}
-{"key": "BAC009S0902W0438", "wav": "./aishell/wav/test/S0902/BAC009S0902W0438.wav", "txt": "渔船被其他船撞翻六人死海事部门悬赏五万寻肇事者"}
-{"key": "BAC009S0902W0439", "wav": "./aishell/wav/test/S0902/BAC009S0902W0439.wav", "txt": "快报讯通讯员李欢乐记者王晓宇八月二六日"}
-{"key": "BAC009S0902W0440", "wav": "./aishell/wav/test/S0902/BAC009S0902W0440.wav", "txt": "船上八名船员六人不幸遇难"}
-{"key": "BAC009S0902W0441", "wav": "./aishell/wav/test/S0902/BAC009S0902W0441.wav", "txt": "只有一名船员得以逃生"}
-{"key": "BAC009S0902W0442", "wav": "./aishell/wav/test/S0902/BAC009S0902W0442.wav", "txt": "渝武高速武胜段发生追尾事故已造成六死九伤"}
-{"key": "BAC009S0902W0443", "wav": "./aishell/wav/test/S0902/BAC009S0902W0443.wav", "txt": "记者从广安消防部门获悉"}
-{"key": "BAC009S0902W0444", "wav": "./aishell/wav/test/S0902/BAC009S0902W0444.wav", "txt": "大客车的车头和车身损毁严重"}
-{"key": "BAC009S0902W0445", "wav": "./aishell/wav/test/S0902/BAC009S0902W0445.wav", "txt": "车辆载有数十名乘客"}
-{"key": "BAC009S0902W0446", "wav": "./aishell/wav/test/S0902/BAC009S0902W0446.wav", "txt": "截至九点四零分消防人员撤离时"}
-{"key": "BAC009S0902W0447", "wav": "./aishell/wav/test/S0902/BAC009S0902W0447.wav", "txt": "已造成六人死亡九人受伤"}
-{"key": "BAC009S0902W0448", "wav": "./aishell/wav/test/S0902/BAC009S0902W0448.wav", "txt": "目前记者正赶往武胜县人民医院"}
-{"key": "BAC009S0902W0449", "wav": "./aishell/wav/test/S0902/BAC009S0902W0449.wav", "txt": "渝蓉高速四川段计划明年通车被称最拖沓高速"}
-{"key": "BAC009S0902W0450", "wav": "./aishell/wav/test/S0902/BAC009S0902W0450.wav", "txt": "渝蓉高速四川段因烂尾被称为最拖沓高速"}
-{"key": "BAC009S0902W0451", "wav": "./aishell/wav/test/S0902/BAC009S0902W0451.wav", "txt": "渝蓉高速四川段资金断裂烂尾已修了六年"}
-{"key": "BAC009S0902W0452", "wav": "./aishell/wav/test/S0902/BAC009S0902W0452.wav", "txt": "渣土车右转弯骑车男童被卷入车轮下不幸身亡"}
-{"key": "BAC009S0902W0453", "wav": "./aishell/wav/test/S0902/BAC009S0902W0453.wav", "txt": "肇事车及损伤严重的自行车报料人供图"}
-{"key": "BAC009S0902W0454", "wav": "./aishell/wav/test/S0902/BAC009S0902W0454.wav", "txt": "渣土车挂倒电动车致一死一伤肇事车主逃逸"}
-{"key": "BAC009S0902W0455", "wav": "./aishell/wav/test/S0902/BAC009S0902W0455.wav", "txt": "蚌飞市发生一起惨剧"}
-{"key": "BAC009S0902W0456", "wav": "./aishell/wav/test/S0902/BAC009S0902W0456.wav", "txt": "一对男女骑电动车在通过一个十字路口时"}
-{"key": "BAC009S0902W0457", "wav": "./aishell/wav/test/S0902/BAC009S0902W0457.wav", "txt": "被同方向行驶的一辆渣土车挂倒"}
-{"key": "BAC009S0902W0458", "wav": "./aishell/wav/test/S0902/BAC009S0902W0458.wav", "txt": "骑电动车男子当场死亡"}
-{"key": "BAC009S0902W0459", "wav": "./aishell/wav/test/S0902/BAC009S0902W0459.wav", "txt": "但渣土车司机肇事后不仅没有下车救援"}
-{"key": "BAC009S0902W0460", "wav": "./aishell/wav/test/S0902/BAC009S0902W0460.wav", "txt": "目前当地警方正在追查这名司机"}
-{"key": "BAC009S0902W0461", "wav": "./aishell/wav/test/S0902/BAC009S0902W0461.wav", "txt": "渣土车撞进路边民房女子抱小孩幸运逃生"}
-{"key": "BAC009S0902W0462", "wav": "./aishell/wav/test/S0902/BAC009S0902W0462.wav", "txt": "山水湾小区斜对面的一处工地旁"}
-{"key": "BAC009S0902W0463", "wav": "./aishell/wav/test/S0902/BAC009S0902W0463.wav", "txt": "肇事的大卡车车头仍然卡在工房内"}
-{"key": "BAC009S0902W0464", "wav": "./aishell/wav/test/S0902/BAC009S0902W0464.wav", "txt": "图记者陈斌潇湘晨报长沙讯一零月一一日下午"}
-{"key": "BAC009S0902W0465", "wav": "./aishell/wav/test/S0902/BAC009S0902W0465.wav", "txt": "长沙县湘龙西路一处十字路口"}
-{"key": "BAC009S0902W0466", "wav": "./aishell/wav/test/S0902/BAC009S0902W0466.wav", "txt": "一辆红色的卡车和一辆黄色的渣土车发生碰撞"}
-{"key": "BAC009S0902W0467", "wav": "./aishell/wav/test/S0902/BAC009S0902W0467.wav", "txt": "黄色渣土车一头撞进了路边的工房"}
-{"key": "BAC009S0902W0468", "wav": "./aishell/wav/test/S0902/BAC009S0902W0468.wav", "txt": "被撞废的奔驰昨日二二时左右"}
-{"key": "BAC009S0902W0469", "wav": "./aishell/wav/test/S0902/BAC009S0902W0469.wav", "txt": "省城政务区习友路与怀宁路交叉口"}
-{"key": "BAC009S0902W0470", "wav": "./aishell/wav/test/S0902/BAC009S0902W0470.wav", "txt": "一辆渣土车突然冲向逆向车道"}
-{"key": "BAC009S0902W0471", "wav": "./aishell/wav/test/S0902/BAC009S0902W0471.wav", "txt": "连续撞了五辆小轿车最终才停了下来"}
-{"key": "BAC009S0902W0472", "wav": "./aishell/wav/test/S0902/BAC009S0902W0472.wav", "txt": "其中一辆奔驰轿车被撞出近一零米远"}
-{"key": "BAC009S0902W0473", "wav": "./aishell/wav/test/S0902/BAC009S0902W0473.wav", "txt": "渤海一渔船沉没船上一六人落水一二人失踪"}
-{"key": "BAC009S0902W0474", "wav": "./aishell/wav/test/S0902/BAC009S0902W0474.wav", "txt": "唐山乐亭一船队在渤海与一山东渔船发生纠纷"}
-{"key": "BAC009S0902W0475", "wav": "./aishell/wav/test/S0902/BAC009S0902W0475.wav", "txt": "导致唐山一渔船沉没"}
-{"key": "BAC009S0902W0476", "wav": "./aishell/wav/test/S0902/BAC009S0902W0476.wav", "txt": "但因海上风大浪急影响救援"}
-{"key": "BAC009S0902W0477", "wav": "./aishell/wav/test/S0902/BAC009S0902W0477.wav", "txt": "目前仍未发现失踪船员"}
-{"key": "BAC009S0902W0478", "wav": "./aishell/wav/test/S0902/BAC009S0902W0478.wav", "txt": "渤海垃圾成堆变死海"}
-{"key": "BAC009S0902W0479", "wav": "./aishell/wav/test/S0902/BAC009S0902W0479.wav", "txt": "与韩国西海相连的中国渤海湾由于垃圾堆积"}
-{"key": "BAC009S0902W0480", "wav": "./aishell/wav/test/S0902/BAC009S0902W0480.wav", "txt": "有人忧虑渤海湾的污染会直接影响到韩国西部海域"}
-{"key": "BAC009S0902W0481", "wav": "./aishell/wav/test/S0902/BAC009S0902W0481.wav", "txt": "渤海失事河北籍渔船已致四人遇难仍有八人失踪"}
-{"key": "BAC009S0902W0482", "wav": "./aishell/wav/test/S0902/BAC009S0902W0482.wav", "txt": "又在船仓内发现四名船员遗体"}
-{"key": "BAC009S0902W0483", "wav": "./aishell/wav/test/S0902/BAC009S0902W0483.wav", "txt": "目前仍有八名失踪人员下落不明"}
-{"key": "BAC009S0902W0484", "wav": "./aishell/wav/test/S0902/BAC009S0902W0484.wav", "txt": "渤海湾溢油事故赔偿案宣判康菲公司被判赔一六八万"}
-{"key": "BAC009S0902W0485", "wav": "./aishell/wav/test/S0902/BAC009S0902W0485.wav", "txt": "温岭倒塌厂房系违章建筑涉事负责人已被控制"}
-{"key": "BAC009S0902W0486", "wav": "./aishell/wav/test/S0902/BAC009S0902W0486.wav", "txt": "据新华社电七月四日一六时许"}
-{"key": "BAC009S0902W0487", "wav": "./aishell/wav/test/S0902/BAC009S0902W0487.wav", "txt": "浙江温岭市大溪镇发生鞋厂厂房倒塌事故"}
-{"key": "BAC009S0902W0488", "wav": "./aishell/wav/test/S0902/BAC009S0902W0488.wav", "txt": "共造成一四人死亡三三人受伤"}
-{"key": "BAC009S0902W0489", "wav": "./aishell/wav/test/S0902/BAC009S0902W0489.wav", "txt": "事故厂房系违章建筑"}
-{"key": "BAC009S0902W0490", "wav": "./aishell/wav/test/S0902/BAC009S0902W0490.wav", "txt": "此前已被列入拆除范围"}
-{"key": "BAC009S0902W0491", "wav": "./aishell/wav/test/S0902/BAC009S0902W0491.wav", "txt": "涉事两企业负责人均已被控制"}
-{"key": "BAC009S0902W0492", "wav": "./aishell/wav/test/S0902/BAC009S0902W0492.wav", "txt": "温岭医院助理殴打女病人五年后提拔为副院长"}
-{"key": "BAC009S0902W0493", "wav": "./aishell/wav/test/S0902/BAC009S0902W0493.wav", "txt": "法晚深度即时记者杜雯雯实习生张明明近日"}
-{"key": "BAC009S0902W0494", "wav": "./aishell/wav/test/S0902/BAC009S0902W0494.wav", "txt": "此关于滕灵方此后晋升为副院长一事"}
-{"key": "BAC009S0902W0495", "wav": "./aishell/wav/test/S0902/BAC009S0902W0495.wav", "txt": "该医院党委书记杨幼萍向晚报记者表示"}
-{"key": "BAC009S0903W0121", "wav": "./aishell/wav/test/S0903/BAC009S0903W0121.wav", "txt": "进而选择此类方式购房"}
-{"key": "BAC009S0903W0122", "wav": "./aishell/wav/test/S0903/BAC009S0903W0122.wav", "txt": "另外要处理公积金异地使用的问题"}
-{"key": "BAC009S0903W0123", "wav": "./aishell/wav/test/S0903/BAC009S0903W0123.wav", "txt": "这对于目前一线城市来说很紧要"}
-{"key": "BAC009S0903W0124", "wav": "./aishell/wav/test/S0903/BAC009S0903W0124.wav", "txt": "很多人受限购政策的影响"}
-{"key": "BAC009S0903W0125", "wav": "./aishell/wav/test/S0903/BAC009S0903W0125.wav", "txt": "难以在周边城市用公积金购房"}
-{"key": "BAC009S0903W0126", "wav": "./aishell/wav/test/S0903/BAC009S0903W0126.wav", "txt": "导致公积金资源闲置的问题出现"}
-{"key": "BAC009S0903W0127", "wav": "./aishell/wav/test/S0903/BAC009S0903W0127.wav", "txt": "中新网房产频道"}
-{"key": "BAC009S0903W0128", "wav": "./aishell/wav/test/S0903/BAC009S0903W0128.wav", "txt": "随着广州住房公积金贷款政策的调整实施"}
-{"key": "BAC009S0903W0129", "wav": "./aishell/wav/test/S0903/BAC009S0903W0129.wav", "txt": "北上广深四个一线城市已经全部放开公积金房贷业"}
-{"key": "BAC009S0903W0130", "wav": "./aishell/wav/test/S0903/BAC009S0903W0130.wav", "txt": "公积金新政加速楼市库存消化至搜狐财经"}
-{"key": "BAC009S0903W0131", "wav": "./aishell/wav/test/S0903/BAC009S0903W0131.wav", "txt": "住建部等三部委联合发文"}
-{"key": "BAC009S0903W0132", "wav": "./aishell/wav/test/S0903/BAC009S0903W0132.wav", "txt": "再次降低公积金贷款的门槛"}
-{"key": "BAC009S0903W0133", "wav": "./aishell/wav/test/S0903/BAC009S0903W0133.wav", "txt": "还清首套房公积金贷款"}
-{"key": "BAC009S0903W0134", "wav": "./aishell/wav/test/S0903/BAC009S0903W0134.wav", "txt": "再次申请公积金贷款购买第二套房的"}
-{"key": "BAC009S0903W0135", "wav": "./aishell/wav/test/S0903/BAC009S0903W0135.wav", "txt": "该政策延续了去年新政以来"}
-{"key": "BAC009S0903W0136", "wav": "./aishell/wav/test/S0903/BAC009S0903W0136.wav", "txt": "也延续了公积金担当扶持楼市主力军的政策选择"}
-{"key": "BAC009S0903W0137", "wav": "./aishell/wav/test/S0903/BAC009S0903W0137.wav", "txt": "从去年三部委发文"}
-{"key": "BAC009S0903W0138", "wav": "./aishell/wav/test/S0903/BAC009S0903W0138.wav", "txt": "公积金对楼市的扶持力度不断加大"}
-{"key": "BAC009S0903W0139", "wav": "./aishell/wav/test/S0903/BAC009S0903W0139.wav", "txt": "相继有一百多个城市出台了公积金新政"}
-{"key": "BAC009S0903W0140", "wav": "./aishell/wav/test/S0903/BAC009S0903W0140.wav", "txt": "公积金贷款利率也数次下调"}
-{"key": "BAC009S0903W0141", "wav": "./aishell/wav/test/S0903/BAC009S0903W0141.wav", "txt": "二套还清十首套比例降至五成"}
-{"key": "BAC009S0903W0142", "wav": "./aishell/wav/test/S0903/BAC009S0903W0142.wav", "txt": "与新政相比"}
-{"key": "BAC009S0903W0143", "wav": "./aishell/wav/test/S0903/BAC009S0903W0143.wav", "txt": "目前公积金政策已经与去年等同了"}
-{"key": "BAC009S0903W0144", "wav": "./aishell/wav/test/S0903/BAC009S0903W0144.wav", "txt": "此次公积金政策大力度调整"}
-{"key": "BAC009S0903W0145", "wav": "./aishell/wav/test/S0903/BAC009S0903W0145.wav", "txt": "主要目的是通过激励改善型住房需求"}
-{"key": "BAC009S0903W0146", "wav": "./aishell/wav/test/S0903/BAC009S0903W0146.wav", "txt": "实现三四线城市去库存"}
-{"key": "BAC009S0903W0147", "wav": "./aishell/wav/test/S0903/BAC009S0903W0147.wav", "txt": "尽管全国商品房销售面积持续回升"}
-{"key": "BAC009S0903W0148", "wav": "./aishell/wav/test/S0903/BAC009S0903W0148.wav", "txt": "但库存压力却难以缓减"}
-{"key": "BAC009S0903W0149", "wav": "./aishell/wav/test/S0903/BAC009S0903W0149.wav", "txt": "全国商品房待售面积比七月末增加了五百万平方米"}
-{"key": "BAC009S0903W0150", "wav": "./aishell/wav/test/S0903/BAC009S0903W0150.wav", "txt": "比去年底增加了七万平方米"}
-{"key": "BAC009S0903W0151", "wav": "./aishell/wav/test/S0903/BAC009S0903W0151.wav", "txt": "库存逆势攀升的根本原因在于供求错配"}
-{"key": "BAC009S0903W0152", "wav": "./aishell/wav/test/S0903/BAC009S0903W0152.wav", "txt": "推动全国成交面积止跌反弹"}
-{"key": "BAC009S0903W0153", "wav": "./aishell/wav/test/S0903/BAC009S0903W0153.wav", "txt": "但供应和库存却主要集中在七个三四线城市"}
-{"key": "BAC009S0903W0154", "wav": "./aishell/wav/test/S0903/BAC009S0903W0154.wav", "txt": "且待售库存单套面积较大"}
-{"key": "BAC009S0903W0155", "wav": "./aishell/wav/test/S0903/BAC009S0903W0155.wav", "txt": "无论是降低二套房公积金首付比例"}
-{"key": "BAC009S0903W0156", "wav": "./aishell/wav/test/S0903/BAC009S0903W0156.wav", "txt": "还是不再区分普通和非普通住房"}
-{"key": "BAC009S0903W0157", "wav": "./aishell/wav/test/S0903/BAC009S0903W0157.wav", "txt": "都意在有针对性地加大三四线城市楼市库存消化力度"}
-{"key": "BAC009S0903W0158", "wav": "./aishell/wav/test/S0903/BAC009S0903W0158.wav", "txt": "只有楼市库存真正消化了"}
-{"key": "BAC009S0903W0159", "wav": "./aishell/wav/test/S0903/BAC009S0903W0159.wav", "txt": "才能提振开发商拿地和开工的积极性"}
-{"key": "BAC009S0903W0160", "wav": "./aishell/wav/test/S0903/BAC009S0903W0160.wav", "txt": "在公积金利率已降至历史低位"}
-{"key": "BAC009S0903W0161", "wav": "./aishell/wav/test/S0903/BAC009S0903W0161.wav", "txt": "站在金九银十即将来临的起点上"}
-{"key": "BAC009S0903W0162", "wav": "./aishell/wav/test/S0903/BAC009S0903W0162.wav", "txt": "再次降低公积金贷款首付比例"}
-{"key": "BAC009S0903W0163", "wav": "./aishell/wav/test/S0903/BAC009S0903W0163.wav", "txt": "目的也是为了夯实楼市回升的基础"}
-{"key": "BAC009S0903W0164", "wav": "./aishell/wav/test/S0903/BAC009S0903W0164.wav", "txt": "尽管去年新政以来"}
-{"key": "BAC009S0903W0165", "wav": "./aishell/wav/test/S0903/BAC009S0903W0165.wav", "txt": "楼市持续三个季度回升"}
-{"key": "BAC009S0903W0166", "wav": "./aishell/wav/test/S0903/BAC009S0903W0166.wav", "txt": "回升势头有转弱的迹象"}
-{"key": "BAC009S0903W0167", "wav": "./aishell/wav/test/S0903/BAC009S0903W0167.wav", "txt": "首先是重点城市楼市成交回落趋势明显"}
-{"key": "BAC009S0903W0168", "wav": "./aishell/wav/test/S0903/BAC009S0903W0168.wav", "txt": "领头羊一线城市分别下降百分之一和百分之七"}
-{"key": "BAC009S0903W0169", "wav": "./aishell/wav/test/S0903/BAC009S0903W0169.wav", "txt": "而重点城市的供应也在七月份下滑了百分之七"}
-{"key": "BAC009S0903W0170", "wav": "./aishell/wav/test/S0903/BAC009S0903W0170.wav", "txt": "五月份更是增加了七百万平方米"}
-{"key": "BAC009S0903W0171", "wav": "./aishell/wav/test/S0903/BAC009S0903W0171.wav", "txt": "银行房贷额度开始紧张"}
-{"key": "BAC009S0903W0172", "wav": "./aishell/wav/test/S0903/BAC009S0903W0172.wav", "txt": "首套房贷利润优惠也开始减少"}
-{"key": "BAC009S0903W0173", "wav": "./aishell/wav/test/S0903/BAC009S0903W0173.wav", "txt": "近期人民币贬值叠加资本外流预期"}
-{"key": "BAC009S0903W0174", "wav": "./aishell/wav/test/S0903/BAC009S0903W0174.wav", "txt": "资金面紧张对楼市的影响开始显现"}
-{"key": "BAC009S0903W0175", "wav": "./aishell/wav/test/S0903/BAC009S0903W0175.wav", "txt": "市场对金九银十的预期也开始谨慎起来"}
-{"key": "BAC009S0903W0176", "wav": "./aishell/wav/test/S0903/BAC009S0903W0176.wav", "txt": "除了去库存和夯实楼市回升基础外"}
-{"key": "BAC009S0903W0177", "wav": "./aishell/wav/test/S0903/BAC009S0903W0177.wav", "txt": "此次公积金政策调整"}
-{"key": "BAC009S0903W0178", "wav": "./aishell/wav/test/S0903/BAC009S0903W0178.wav", "txt": "也在于全面落实分类调控因城施策"}
-{"key": "BAC009S0903W0179", "wav": "./aishell/wav/test/S0903/BAC009S0903W0179.wav", "txt": "纠偏政策一刀切的负面影响"}
-{"key": "BAC009S0903W0180", "wav": "./aishell/wav/test/S0903/BAC009S0903W0180.wav", "txt": "去年新政以来"}
-{"key": "BAC009S0903W0181", "wav": "./aishell/wav/test/S0903/BAC009S0903W0181.wav", "txt": "松绑二套房贷认定标准降低二套房贷首付比例"}
-{"key": "BAC009S0903W0182", "wav": "./aishell/wav/test/S0903/BAC009S0903W0182.wav", "txt": "以及营业税免征期"}
-{"key": "BAC009S0903W0183", "wav": "./aishell/wav/test/S0903/BAC009S0903W0183.wav", "txt": "第一次在公积金上提出差别对待"}
-{"key": "BAC009S0903W0184", "wav": "./aishell/wav/test/S0903/BAC009S0903W0184.wav", "txt": "包括上海广州厦门南京在内的重点城市"}
-{"key": "BAC009S0903W0185", "wav": "./aishell/wav/test/S0903/BAC009S0903W0185.wav", "txt": "以及前几次公积金新政的实施"}
-{"key": "BAC009S0903W0186", "wav": "./aishell/wav/test/S0903/BAC009S0903W0186.wav", "txt": "公积金可贷额度受到严重冲击"}
-{"key": "BAC009S0903W0187", "wav": "./aishell/wav/test/S0903/BAC009S0903W0187.wav", "txt": "着力解决一批影响现代农业发展全局的重大科技问题"}
-{"key": "BAC009S0903W0188", "wav": "./aishell/wav/test/S0903/BAC009S0903W0188.wav", "txt": "加快农业技术引进消化吸收再创新步伐"}
-{"key": "BAC009S0903W0189", "wav": "./aishell/wav/test/S0903/BAC009S0903W0189.wav", "txt": "加强农业科技领域国际合作"}
-{"key": "BAC009S0903W0190", "wav": "./aishell/wav/test/S0903/BAC009S0903W0190.wav", "txt": "调整优化农业科研布局"}
-{"key": "BAC009S0903W0191", "wav": "./aishell/wav/test/S0903/BAC009S0903W0191.wav", "txt": "加强农业科研基地和重点实验室建设"}
-{"key": "BAC009S0903W0192", "wav": "./aishell/wav/test/S0903/BAC009S0903W0192.wav", "txt": "完善农业科技创新体系和现代农业产业技术体系"}
-{"key": "BAC009S0903W0193", "wav": "./aishell/wav/test/S0903/BAC009S0903W0193.wav", "txt": "启动实施农业科技创新能力建设工程"}
-{"key": "BAC009S0903W0194", "wav": "./aishell/wav/test/S0903/BAC009S0903W0194.wav", "txt": "组建一批产业技术创新战略联盟和国家农业科技园区"}
-{"key": "BAC009S0903W0195", "wav": "./aishell/wav/test/S0903/BAC009S0903W0195.wav", "txt": "完善农业科技评价机制"}
-{"key": "BAC009S0903W0196", "wav": "./aishell/wav/test/S0903/BAC009S0903W0196.wav", "txt": "激发农业科技创新活力"}
-{"key": "BAC009S0903W0197", "wav": "./aishell/wav/test/S0903/BAC009S0903W0197.wav", "txt": "大力发展现代农作物种业"}
-{"key": "BAC009S0903W0198", "wav": "./aishell/wav/test/S0903/BAC009S0903W0198.wav", "txt": "实施好转基因生物新品种培育重大专项"}
-{"key": "BAC009S0903W0199", "wav": "./aishell/wav/test/S0903/BAC009S0903W0199.wav", "txt": "加快发展生物育种战略性新兴产业"}
-{"key": "BAC009S0903W0200", "wav": "./aishell/wav/test/S0903/BAC009S0903W0200.wav", "txt": "加快农业新品种新技术转化应用"}
-{"key": "BAC009S0903W0201", "wav": "./aishell/wav/test/S0903/BAC009S0903W0201.wav", "txt": "加强小麦一喷三防喷施叶面肥"}
-{"key": "BAC009S0903W0202", "wav": "./aishell/wav/test/S0903/BAC009S0903W0202.wav", "txt": "加快牲畜水产遗传改良进程"}
-{"key": "BAC009S0903W0203", "wav": "./aishell/wav/test/S0903/BAC009S0903W0203.wav", "txt": "创新农业技术推广机制"}
-{"key": "BAC009S0903W0204", "wav": "./aishell/wav/test/S0903/BAC009S0903W0204.wav", "txt": "大规模开展高产创建"}
-{"key": "BAC009S0903W0205", "wav": "./aishell/wav/test/S0903/BAC009S0903W0205.wav", "txt": "在有条件地区实行整乡整县场推进"}
-{"key": "BAC009S0903W0206", "wav": "./aishell/wav/test/S0903/BAC009S0903W0206.wav", "txt": "力争实现优势产区和主要品种全复盖"}
-{"key": "BAC009S0903W0207", "wav": "./aishell/wav/test/S0903/BAC009S0903W0207.wav", "txt": "壮大农业农村人才队伍"}
-{"key": "BAC009S0903W0208", "wav": "./aishell/wav/test/S0903/BAC009S0903W0208.wav", "txt": "以实施现代农业人才支撑计划为抓手"}
-{"key": "BAC009S0903W0209", "wav": "./aishell/wav/test/S0903/BAC009S0903W0209.wav", "txt": "加大农村劳动力培训阳光工程实施力度"}
-{"key": "BAC009S0903W0210", "wav": "./aishell/wav/test/S0903/BAC009S0903W0210.wav", "txt": "大力发展农业职业培养"}
-{"key": "BAC009S0903W0211", "wav": "./aishell/wav/test/S0903/BAC009S0903W0211.wav", "txt": "加快技能型人才培养"}
-{"key": "BAC009S0903W0212", "wav": "./aishell/wav/test/S0903/BAC009S0903W0212.wav", "txt": "支持高校毕业生和各类优秀人才投身现代农业建设"}
-{"key": "BAC009S0903W0213", "wav": "./aishell/wav/test/S0903/BAC009S0903W0213.wav", "txt": "鼓励外出务工农农民带技术带资金回乡创业"}
-{"key": "BAC009S0903W0214", "wav": "./aishell/wav/test/S0903/BAC009S0903W0214.wav", "txt": "改善农业基础设备和装备条件"}
-{"key": "BAC009S0903W0215", "wav": "./aishell/wav/test/S0903/BAC009S0903W0215.wav", "txt": "大规模开展高标准农田建设"}
-{"key": "BAC009S0903W0216", "wav": "./aishell/wav/test/S0903/BAC009S0903W0216.wav", "txt": "按照统筹规划分工协作集中投入连片推进的思想"}
-{"key": "BAC009S0903W0217", "wav": "./aishell/wav/test/S0903/BAC009S0903W0217.wav", "txt": "大规模改造中低产田"}
-{"key": "BAC009S0903W0218", "wav": "./aishell/wav/test/S0903/BAC009S0903W0218.wav", "txt": "建设旱涝保收高标准农田"}
-{"key": "BAC009S0903W0219", "wav": "./aishell/wav/test/S0903/BAC009S0903W0219.wav", "txt": "加快大中型灌区排灌泵站配套改造"}
-{"key": "BAC009S0903W0220", "wav": "./aishell/wav/test/S0903/BAC009S0903W0220.wav", "txt": "大力开展小型农田水利建设"}
-{"key": "BAC009S0903W0221", "wav": "./aishell/wav/test/S0903/BAC009S0903W0221.wav", "txt": "增加农田有效灌溉面积"}
-{"key": "BAC009S0903W0222", "wav": "./aishell/wav/test/S0903/BAC009S0903W0222.wav", "txt": "加强新增千亿斤粮食生产能力规划的田间工程建设"}
-{"key": "BAC009S0903W0223", "wav": "./aishell/wav/test/S0903/BAC009S0903W0223.wav", "txt": "完善机耕道农田防护林等设施"}
-{"key": "BAC009S0903W0224", "wav": "./aishell/wav/test/S0903/BAC009S0903W0224.wav", "txt": "推广土壤有机质提升测土配方施肥等培肥地力技术"}
-{"key": "BAC009S0903W0225", "wav": "./aishell/wav/test/S0903/BAC009S0903W0225.wav", "txt": "完善高标准农田建后管护支持政策和制度"}
-{"key": "BAC009S0903W0226", "wav": "./aishell/wav/test/S0903/BAC009S0903W0226.wav", "txt": "延长各类设施使用年限"}
-{"key": "BAC009S0903W0227", "wav": "./aishell/wav/test/S0903/BAC009S0903W0227.wav", "txt": "确保农田综合生产能力长期持续稳定提升"}
-{"key": "BAC009S0903W0228", "wav": "./aishell/wav/test/S0903/BAC009S0903W0228.wav", "txt": "改善养殖业生产条件"}
-{"key": "BAC009S0903W0229", "wav": "./aishell/wav/test/S0903/BAC009S0903W0229.wav", "txt": "加快实施生禽良种工程"}
-{"key": "BAC009S0903W0230", "wav": "./aishell/wav/test/S0903/BAC009S0903W0230.wav", "txt": "支持生禽规模化养殖场小区开展标准化改造和建设"}
-{"key": "BAC009S0903W0231", "wav": "./aishell/wav/test/S0903/BAC009S0903W0231.wav", "txt": "加快草原围栏棚圈和牧区水利建设"}
-{"key": "BAC009S0903W0232", "wav": "./aishell/wav/test/S0903/BAC009S0903W0232.wav", "txt": "配套发展节水高效灌溉词草基地"}
-{"key": "BAC009S0903W0233", "wav": "./aishell/wav/test/S0903/BAC009S0903W0233.wav", "txt": "健全水产良良种体系"}
-{"key": "BAC009S0903W0234", "wav": "./aishell/wav/test/S0903/BAC009S0903W0234.wav", "txt": "开展池塘标准化改造"}
-{"key": "BAC009S0903W0235", "wav": "./aishell/wav/test/S0903/BAC009S0903W0235.wav", "txt": "建设水产健康养殖示范场"}
-{"key": "BAC009S0903W0236", "wav": "./aishell/wav/test/S0903/BAC009S0903W0236.wav", "txt": "加强渔港和渔政执法能力建设"}
-{"key": "BAC009S0903W0237", "wav": "./aishell/wav/test/S0903/BAC009S0903W0237.wav", "txt": "全面落实农机具购置补贴各项管理制度和规定"}
-{"key": "BAC009S0903W0238", "wav": "./aishell/wav/test/S0903/BAC009S0903W0238.wav", "txt": "加快推进水稻栽插收获和玉米收获机械化"}
-{"key": "BAC009S0903W0239", "wav": "./aishell/wav/test/S0903/BAC009S0903W0239.wav", "txt": "重点突破棉花油菜甘蔗收获机械化瓶颈"}
-{"key": "BAC009S0903W0240", "wav": "./aishell/wav/test/S0903/BAC009S0903W0240.wav", "txt": "大力发展高效植保机器"}
-{"key": "BAC009S0903W0241", "wav": "./aishell/wav/test/S0903/BAC009S0903W0241.wav", "txt": "积极推进养殖业园艺业农产品初加工机械化"}
-{"key": "BAC009S0903W0242", "wav": "./aishell/wav/test/S0903/BAC009S0903W0242.wav", "txt": "加快实施保护性耕作工程"}
-{"key": "BAC009S0903W0243", "wav": "./aishell/wav/test/S0903/BAC009S0903W0243.wav", "txt": "提高大型农机具和农药化肥农膜等农资生产水平"}
-{"key": "BAC009S0903W0244", "wav": "./aishell/wav/test/S0903/BAC009S0903W0244.wav", "txt": "加强农业防灾减灾能力建设"}
-{"key": "BAC009S0903W0245", "wav": "./aishell/wav/test/S0903/BAC009S0903W0245.wav", "txt": "提高防汛抗旱减灾能力"}
-{"key": "BAC009S0903W0246", "wav": "./aishell/wav/test/S0903/BAC009S0903W0246.wav", "txt": "加强种子饲草料等急救灾物资储备调运条件建设"}
-{"key": "BAC009S0903W0247", "wav": "./aishell/wav/test/S0903/BAC009S0903W0247.wav", "txt": "推广相应的生产技术和防灾减灾措施大力推进农业标准化"}
-{"key": "BAC009S0903W0248", "wav": "./aishell/wav/test/S0903/BAC009S0903W0248.wav", "txt": "以农兽药残留标准为重点"}
-{"key": "BAC009S0903W0249", "wav": "./aishell/wav/test/S0903/BAC009S0903W0249.wav", "txt": "加快健全农业标准体系"}
-{"key": "BAC009S0903W0250", "wav": "./aishell/wav/test/S0903/BAC009S0903W0250.wav", "txt": "以园艺产品生产品水产品等为重点"}
-{"key": "BAC009S0903W0251", "wav": "./aishell/wav/test/S0903/BAC009S0903W0251.wav", "txt": "推行统一的标准操作规程和技术规范"}
-{"key": "BAC009S0903W0252", "wav": "./aishell/wav/test/S0903/BAC009S0903W0252.wav", "txt": "加强国家级农业标准化整建制推进示范县场建设"}
-{"key": "BAC009S0903W0253", "wav": "./aishell/wav/test/S0903/BAC009S0903W0253.wav", "txt": "市场占有率为百分之五"}
-{"key": "BAC009S0903W0254", "wav": "./aishell/wav/test/S0903/BAC009S0903W0254.wav", "txt": "二零一四年三星期累计销售超过一百二十万块智能手表"}
-{"key": "BAC009S0903W0255", "wav": "./aishell/wav/test/S0903/BAC009S0903W0255.wav", "txt": "这个数据不及苹果的一个季度"}
-{"key": "BAC009S0903W0256", "wav": "./aishell/wav/test/S0903/BAC009S0903W0256.wav", "txt": "因此不能表示苹果没有新的业务增长点"}
-{"key": "BAC009S0903W0257", "wav": "./aishell/wav/test/S0903/BAC009S0903W0257.wav", "txt": "本报记者纪佳鹏北京报道北京时间八月十二日"}
-{"key": "BAC009S0903W0258", "wav": "./aishell/wav/test/S0903/BAC009S0903W0258.wav", "txt": "作为科技股领头羊的苹果股价当天下挫百分之二"}
-{"key": "BAC009S0903W0259", "wav": "./aishell/wav/test/S0903/BAC009S0903W0259.wav", "txt": "十二月二日路透社报道"}
-{"key": "BAC009S0903W0260", "wav": "./aishell/wav/test/S0903/BAC009S0903W0260.wav", "txt": "苹果股票每分钟交易量已超过六百七十万股"}
-{"key": "BAC009S0903W0261", "wav": "./aishell/wav/test/S0903/BAC009S0903W0261.wav", "txt": "这种巨大且异乎寻常的抛售量"}
-{"key": "BAC009S0903W0262", "wav": "./aishell/wav/test/S0903/BAC009S0903W0262.wav", "txt": "瞬间将苹果估价拉低了至少百分之六"}
-{"key": "BAC009S0903W0263", "wav": "./aishell/wav/test/S0903/BAC009S0903W0263.wav", "txt": "使其市值分秒间蒸发近四百亿美元"}
-{"key": "BAC009S0903W0264", "wav": "./aishell/wav/test/S0903/BAC009S0903W0264.wav", "txt": "成为苹果近三个月以来股价下跌最严重的一次"}
-{"key": "BAC009S0903W0265", "wav": "./aishell/wav/test/S0903/BAC009S0903W0265.wav", "txt": "苹果股价一度每分钟跌幅已破百分之三"}
-{"key": "BAC009S0903W0266", "wav": "./aishell/wav/test/S0903/BAC009S0903W0266.wav", "txt": "每股报价报收于一百一十一点二七美元"}
-{"key": "BAC009S0903W0267", "wav": "./aishell/wav/test/S0903/BAC009S0903W0267.wav", "txt": "报收于每股一百一十五点四五美元"}
-{"key": "BAC009S0903W0268", "wav": "./aishell/wav/test/S0903/BAC009S0903W0268.wav", "txt": "对于造成此次异常闪崩的原因目前尚未公布"}
-{"key": "BAC009S0903W0269", "wav": "./aishell/wav/test/S0903/BAC009S0903W0269.wav", "txt": "此举或与摩根士丹利下调苹果股票持股比例有关"}
-{"key": "BAC009S0903W0270", "wav": "./aishell/wav/test/S0903/BAC009S0903W0270.wav", "txt": "同时将苹果持股比例由百分之四下调至百分之三"}
-{"key": "BAC009S0903W0271", "wav": "./aishell/wav/test/S0903/BAC009S0903W0271.wav", "txt": "并建议客户减少对该股票在投资组合中的占比"}
-{"key": "BAC009S0903W0272", "wav": "./aishell/wav/test/S0903/BAC009S0903W0272.wav", "txt": "高频交易也与此次闪崩事件逃脱不了干系"}
-{"key": "BAC009S0903W0273", "wav": "./aishell/wav/test/S0903/BAC009S0903W0273.wav", "txt": "高频交易一直饱受诟病"}
-{"key": "BAC009S0903W0274", "wav": "./aishell/wav/test/S0903/BAC009S0903W0274.wav", "txt": "美国股市九点五十起"}
-{"key": "BAC009S0903W0275", "wav": "./aishell/wav/test/S0903/BAC009S0903W0275.wav", "txt": "超过三百馀种不同类别股票均出现不正常股价波动"}
-{"key": "BAC009S0903W0276", "wav": "./aishell/wav/test/S0903/BAC009S0903W0276.wav", "txt": "当出现此类价格变化时"}
-{"key": "BAC009S0903W0277", "wav": "./aishell/wav/test/S0903/BAC009S0903W0277.wav", "txt": "通常只是算法交易造成的影响"}
-{"key": "BAC009S0903W0278", "wav": "./aishell/wav/test/S0903/BAC009S0903W0278.wav", "txt": "也就是所说的流动性蒸发事实上"}
-{"key": "BAC009S0903W0279", "wav": "./aishell/wav/test/S0903/BAC009S0903W0279.wav", "txt": "流动性从未得到足够的重视"}
-{"key": "BAC009S0903W0280", "wav": "./aishell/wav/test/S0903/BAC009S0903W0280.wav", "txt": "我们当下的股市在流动性方面也表现得支离破碎"}
-{"key": "BAC009S0903W0281", "wav": "./aishell/wav/test/S0903/BAC009S0903W0281.wav", "txt": "苹果领头的股价闪崩原因可能比想象中的更为复杂"}
-{"key": "BAC009S0903W0282", "wav": "./aishell/wav/test/S0903/BAC009S0903W0282.wav", "txt": "现在就下结论将原因推给高频交易"}
-{"key": "BAC009S0903W0283", "wav": "./aishell/wav/test/S0903/BAC009S0903W0283.wav", "txt": "这种做法很容易误导客服"}
-{"key": "BAC009S0903W0284", "wav": "./aishell/wav/test/S0903/BAC009S0903W0284.wav", "txt": "阿里巴巴当日股价下跌一点百分之四"}
-{"key": "BAC009S0903W0285", "wav": "./aishell/wav/test/S0903/BAC009S0903W0285.wav", "txt": "谷歌十点五八分股价也出现一点百分之七十九的最大跌幅"}
-{"key": "BAC009S0903W0286", "wav": "./aishell/wav/test/S0903/BAC009S0903W0286.wav", "txt": "苹果股价闪崩只是正常股票套利的表现"}
-{"key": "BAC009S0903W0287", "wav": "./aishell/wav/test/S0903/BAC009S0903W0287.wav", "txt": "苹果股价相较十月份低点已经上涨约百分之二十五"}
-{"key": "BAC009S0903W0288", "wav": "./aishell/wav/test/S0903/BAC009S0903W0288.wav", "txt": "纳斯达克在此期间只涨了百分之十"}
-{"key": "BAC009S0903W0289", "wav": "./aishell/wav/test/S0903/BAC009S0903W0289.wav", "txt": "选择套现或也是情理之中"}
-{"key": "BAC009S0903W0290", "wav": "./aishell/wav/test/S0903/BAC009S0903W0290.wav", "txt": "每股下滑三点八八美元报收于一点一十五点零五美元"}
-{"key": "BAC009S0903W0292", "wav": "./aishell/wav/test/S0903/BAC009S0903W0292.wav", "txt": "苹果股票每分钟交易量已超过六十七万股"}
-{"key": "BAC009S0903W0293", "wav": "./aishell/wav/test/S0903/BAC009S0903W0293.wav", "txt": "这不仅创下苹果公司自二零一四年"}
-{"key": "BAC009S0903W0294", "wav": "./aishell/wav/test/S0903/BAC009S0903W0294.wav", "txt": "苹果背后那行字应该在每个中国人心里搜狐科技"}
-{"key": "BAC009S0903W0295", "wav": "./aishell/wav/test/S0903/BAC009S0903W0295.wav", "txt": "翻译过来就是加利福尼亚苹果公司设计"}
-{"key": "BAC009S0903W0296", "wav": "./aishell/wav/test/S0903/BAC009S0903W0296.wav", "txt": "按说这只是一个客观表述"}
-{"key": "BAC009S0903W0297", "wav": "./aishell/wav/test/S0903/BAC009S0903W0297.wav", "txt": "对于谋求转型发展怀揣创新型国家梦想的中国来说"}
-{"key": "BAC009S0903W0298", "wav": "./aishell/wav/test/S0903/BAC009S0903W0298.wav", "txt": "这行字值得我们深思"}
-{"key": "BAC009S0903W0299", "wav": "./aishell/wav/test/S0903/BAC009S0903W0299.wav", "txt": "众多跨国品牌在中国都有生产基地"}
-{"key": "BAC009S0903W0300", "wav": "./aishell/wav/test/S0903/BAC009S0903W0300.wav", "txt": "像苹果这样在产品上强调在本国设计的很少"}
-{"key": "BAC009S0903W0301", "wav": "./aishell/wav/test/S0903/BAC009S0903W0301.wav", "txt": "这样的做法当然是企业行为"}
-{"key": "BAC009S0903W0302", "wav": "./aishell/wav/test/S0903/BAC009S0903W0302.wav", "txt": "这行字对于我们来说"}
-{"key": "BAC009S0903W0303", "wav": "./aishell/wav/test/S0903/BAC009S0903W0303.wav", "txt": "很多家长都考虑给孩子配置具有定位功能的智能手机"}
-{"key": "BAC009S0903W0304", "wav": "./aishell/wav/test/S0903/BAC009S0903W0304.wav", "txt": "智能手机特定的上网和游戏功能"}
-{"key": "BAC009S0903W0305", "wav": "./aishell/wav/test/S0903/BAC009S0903W0305.wav", "txt": "注定了它强烈的娱乐性"}
-{"key": "BAC009S0903W0306", "wav": "./aishell/wav/test/S0903/BAC009S0903W0306.wav", "txt": "给孩子配置智能手机"}
-{"key": "BAC009S0903W0307", "wav": "./aishell/wav/test/S0903/BAC009S0903W0307.wav", "txt": "担心会直接影响孩子正常的学习"}
-{"key": "BAC009S0903W0308", "wav": "./aishell/wav/test/S0903/BAC009S0903W0308.wav", "txt": "儿童电话手表除了通话定位等功能外"}
-{"key": "BAC009S0903W0309", "wav": "./aishell/wav/test/S0903/BAC009S0903W0309.wav", "txt": "还针对性的设置了上课禁用等功能"}
-{"key": "BAC009S0903W0310", "wav": "./aishell/wav/test/S0903/BAC009S0903W0310.wav", "txt": "孩子带到学校既不会让孩子分心"}
-{"key": "BAC009S0903W0311", "wav": "./aishell/wav/test/S0903/BAC009S0903W0311.wav", "txt": "又可以让家长了解孩子的位置"}
-{"key": "BAC009S0903W0312", "wav": "./aishell/wav/test/S0903/BAC009S0903W0312.wav", "txt": "是很多父母迫切需要的"}
-{"key": "BAC009S0903W0313", "wav": "./aishell/wav/test/S0903/BAC009S0903W0313.wav", "txt": "对于小天才电话手表上课禁用功能"}
-{"key": "BAC009S0903W0314", "wav": "./aishell/wav/test/S0903/BAC009S0903W0314.wav", "txt": "相关人员表示为了方便和孩子保持联系"}
-{"key": "BAC009S0903W0315", "wav": "./aishell/wav/test/S0903/BAC009S0903W0315.wav", "txt": "之前很多家长会给孩子买手机"}
-{"key": "BAC009S0903W0316", "wav": "./aishell/wav/test/S0903/BAC009S0903W0316.wav", "txt": "影响学习虽说功能手机可以阻止孩子玩游戏"}
-{"key": "BAC009S0903W0317", "wav": "./aishell/wav/test/S0903/BAC009S0903W0317.wav", "txt": "儿童电话手表正好解决了这两个问题"}
-{"key": "BAC009S0903W0318", "wav": "./aishell/wav/test/S0903/BAC009S0903W0318.wav", "txt": "家长随时和孩子保持联系"}
-{"key": "BAC009S0903W0319", "wav": "./aishell/wav/test/S0903/BAC009S0903W0319.wav", "txt": "我就给自己的孩子也买了一个呢"}
-{"key": "BAC009S0903W0320", "wav": "./aishell/wav/test/S0903/BAC009S0903W0320.wav", "txt": "失孤等影片的上映"}
-{"key": "BAC009S0903W0321", "wav": "./aishell/wav/test/S0903/BAC009S0903W0321.wav", "txt": "也将儿童人身安全的话题推向了妙论的风口浪尖"}
-{"key": "BAC009S0903W0322", "wav": "./aishell/wav/test/S0903/BAC009S0903W0322.wav", "txt": "儿童电话手表的诞生"}
-{"key": "BAC009S0903W0323", "wav": "./aishell/wav/test/S0903/BAC009S0903W0323.wav", "txt": "为孩子多了一份强有力的保障"}
-{"key": "BAC009S0903W0324", "wav": "./aishell/wav/test/S0903/BAC009S0903W0324.wav", "txt": "电话手表正是瞄准了这一需求"}
-{"key": "BAC009S0903W0325", "wav": "./aishell/wav/test/S0903/BAC009S0903W0325.wav", "txt": "加上随身携带的便捷性和流畅的操作体验"}
-{"key": "BAC009S0903W0326", "wav": "./aishell/wav/test/S0903/BAC009S0903W0326.wav", "txt": "在手机平板电脑之外"}
-{"key": "BAC009S0903W0327", "wav": "./aishell/wav/test/S0903/BAC009S0903W0327.wav", "txt": "开扩了一个新的市场"}
-{"key": "BAC009S0903W0328", "wav": "./aishell/wav/test/S0903/BAC009S0903W0328.wav", "txt": "现在三百六十腾讯等大公司都涉足了这一领域"}
-{"key": "BAC009S0903W0329", "wav": "./aishell/wav/test/S0903/BAC009S0903W0329.wav", "txt": "自今年六月电话手表行业兴起起来"}
-{"key": "BAC009S0903W0330", "wav": "./aishell/wav/test/S0903/BAC009S0903W0330.wav", "txt": "整体行业出货量应该不断突破"}
-{"key": "BAC009S0903W0331", "wav": "./aishell/wav/test/S0903/BAC009S0903W0331.wav", "txt": "并将成为新兴的销售热点"}
-{"key": "BAC009S0903W0332", "wav": "./aishell/wav/test/S0903/BAC009S0903W0332.wav", "txt": "科技创新带动了电话手表行业"}
-{"key": "BAC009S0903W0333", "wav": "./aishell/wav/test/S0903/BAC009S0903W0333.wav", "txt": "其实儿童电话手表的火"}
-{"key": "BAC009S0903W0334", "wav": "./aishell/wav/test/S0903/BAC009S0903W0334.wav", "txt": "是火在行业的科技创新"}
-{"key": "BAC009S0903W0335", "wav": "./aishell/wav/test/S0903/BAC009S0903W0335.wav", "txt": "随着国家在科技创新方面的投入和关注度的增加"}
-{"key": "BAC009S0903W0336", "wav": "./aishell/wav/test/S0903/BAC009S0903W0336.wav", "txt": "新兴行业对于创新的热情也不断增加"}
-{"key": "BAC009S0903W0337", "wav": "./aishell/wav/test/S0903/BAC009S0903W0337.wav", "txt": "我们小天才电话手表就是不断创新的成果"}
-{"key": "BAC009S0903W0338", "wav": "./aishell/wav/test/S0903/BAC009S0903W0338.wav", "txt": "意大利选手弗菜戈也说我们在来里约之前"}
-{"key": "BAC009S0903W0339", "wav": "./aishell/wav/test/S0903/BAC009S0903W0339.wav", "txt": "看到了有关这里水污染的报道"}
-{"key": "BAC009S0903W0340", "wav": "./aishell/wav/test/S0903/BAC009S0903W0340.wav", "txt": "对这里的水质比较关心"}
-{"key": "BAC009S0903W0341", "wav": "./aishell/wav/test/S0903/BAC009S0903W0341.wav", "txt": "这个湖虽然没有漂浮的垃圾"}
-{"key": "BAC009S0903W0342", "wav": "./aishell/wav/test/S0903/BAC009S0903W0342.wav", "txt": "但湖水很脏也很浑浊"}
-{"key": "BAC009S0903W0343", "wav": "./aishell/wav/test/S0903/BAC009S0903W0343.wav", "txt": "里约奥组委此前表示"}
-{"key": "BAC009S0903W0344", "wav": "./aishell/wav/test/S0903/BAC009S0903W0344.wav", "txt": "运动员的健康是他们关注的头等大事"}
-{"key": "BAC009S0903W0345", "wav": "./aishell/wav/test/S0903/BAC009S0903W0345.wav", "txt": "无论帆船赛艇还是公开水域"}
-{"key": "BAC009S0903W0346", "wav": "./aishell/wav/test/S0903/BAC009S0903W0346.wav", "txt": "在奥运期间水质都可以保证运动员的健康"}
-{"key": "BAC009S0903W0347", "wav": "./aishell/wav/test/S0903/BAC009S0903W0347.wav", "txt": "二零一五年九月十二日星期六十一点"}
-{"key": "BAC009S0903W0348", "wav": "./aishell/wav/test/S0903/BAC009S0903W0348.wav", "txt": "开幕式举行了庄严的入场仪式"}
-{"key": "BAC009S0903W0349", "wav": "./aishell/wav/test/S0903/BAC009S0903W0349.wav", "txt": "裁判员队伍和参赛代表队依次入场亮相"}
-{"key": "BAC009S0903W0350", "wav": "./aishell/wav/test/S0903/BAC009S0903W0350.wav", "txt": "裁判员代表和运动员代表进行了宣誓"}
-{"key": "BAC009S0903W0351", "wav": "./aishell/wav/test/S0903/BAC009S0903W0351.wav", "txt": "曾春蕾和刘晓彤向各参赛队赠送了签名排球"}
-{"key": "BAC009S0903W0352", "wav": "./aishell/wav/test/S0903/BAC009S0903W0352.wav", "txt": "北京市体育局副局长孙学才宣布比赛开幕"}
-{"key": "BAC009S0903W0353", "wav": "./aishell/wav/test/S0903/BAC009S0903W0353.wav", "txt": "响应北京市振兴三大球战略的号召"}
-{"key": "BAC009S0903W0354", "wav": "./aishell/wav/test/S0903/BAC009S0903W0354.wav", "txt": "促进北京排球事业发展"}
-{"key": "BAC009S0903W0355", "wav": "./aishell/wav/test/S0903/BAC009S0903W0355.wav", "txt": "丰富北京市业馀排球群体活动"}
-{"key": "BAC009S0903W0356", "wav": "./aishell/wav/test/S0903/BAC009S0903W0356.wav", "txt": "激发广大群众对排球的热情"}
-{"key": "BAC009S0903W0357", "wav": "./aishell/wav/test/S0903/BAC009S0903W0357.wav", "txt": "为将其打造成具有影响力的群众性品牌赛事"}
-{"key": "BAC009S0903W0358", "wav": "./aishell/wav/test/S0903/BAC009S0903W0358.wav", "txt": "在社会主义核心价值观的指引下"}
-{"key": "BAC009S0903W0359", "wav": "./aishell/wav/test/S0903/BAC009S0903W0359.wav", "txt": "突出弘扬北京排球文化"}
-{"key": "BAC009S0903W0360", "wav": "./aishell/wav/test/S0903/BAC009S0903W0360.wav", "txt": "组委会在部门设置上调整了人员分工"}
-{"key": "BAC009S0903W0361", "wav": "./aishell/wav/test/S0903/BAC009S0903W0361.wav", "txt": "组委会工作机构共分为四部一室"}
-{"key": "BAC009S0903W0362", "wav": "./aishell/wav/test/S0903/BAC009S0903W0362.wav", "txt": "并且全部采用有经验的工作人员参与竞赛组织工作"}
-{"key": "BAC009S0903W0363", "wav": "./aishell/wav/test/S0903/BAC009S0903W0363.wav", "txt": "在制定竞赛规程方面严格遵循规范化专业化原则"}
-{"key": "BAC009S0903W0364", "wav": "./aishell/wav/test/S0903/BAC009S0903W0364.wav", "txt": "不仅能够将业馀排球与职业排球严格地区分开"}
-{"key": "BAC009S0903W0365", "wav": "./aishell/wav/test/S0903/BAC009S0903W0365.wav", "txt": "而且满足了绝大多数业馀排球爱好者的参赛需求"}
-{"key": "BAC009S0903W0366", "wav": "./aishell/wav/test/S0903/BAC009S0903W0366.wav", "txt": "充分做到公平公正公开"}
-{"key": "BAC009S0903W0367", "wav": "./aishell/wav/test/S0903/BAC009S0903W0367.wav", "txt": "其中国际级裁判员两名"}
-{"key": "BAC009S0903W0368", "wav": "./aishell/wav/test/S0903/BAC009S0903W0368.wav", "txt": "结合业馀排球特点做出细微调整制定而成"}
-{"key": "BAC009S0903W0369", "wav": "./aishell/wav/test/S0903/BAC009S0903W0369.wav", "txt": "营造出良好的比赛氛围"}
-{"key": "BAC009S0903W0370", "wav": "./aishell/wav/test/S0903/BAC009S0903W0370.wav", "txt": "悬挂于场馆醒目位置"}
-{"key": "BAC009S0903W0371", "wav": "./aishell/wav/test/S0903/BAC009S0903W0371.wav", "txt": "增强参赛者的荣誉感与积极性的同时"}
-{"key": "BAC009S0903W0372", "wav": "./aishell/wav/test/S0903/BAC009S0903W0372.wav", "txt": "进一步提升了比赛品质"}
-{"key": "BAC009S0903W0373", "wav": "./aishell/wav/test/S0903/BAC009S0903W0373.wav", "txt": "要将北京市业馀排球联赛打造成群众性品牌赛事"}
-{"key": "BAC009S0903W0374", "wav": "./aishell/wav/test/S0903/BAC009S0903W0374.wav", "txt": "离不开广大媒体的支持"}
-{"key": "BAC009S0903W0375", "wav": "./aishell/wav/test/S0903/BAC009S0903W0375.wav", "txt": "组委会特意举办隆重的开幕式"}
-{"key": "BAC009S0903W0376", "wav": "./aishell/wav/test/S0903/BAC009S0903W0376.wav", "txt": "并邀请京城排球界全部媒体参加报道"}
-{"key": "BAC009S0903W0377", "wav": "./aishell/wav/test/S0903/BAC009S0903W0377.wav", "txt": "并制作了精美的秩序册发给媒体及参赛队"}
-{"key": "BAC009S0903W0379", "wav": "./aishell/wav/test/S0903/BAC009S0903W0379.wav", "txt": "以大球套小球为设计理念"}
-{"key": "BAC009S0903W0380", "wav": "./aishell/wav/test/S0903/BAC009S0903W0380.wav", "txt": "为振兴三大球贡献自己的一份力量"}
-{"key": "BAC009S0903W0381", "wav": "./aishell/wav/test/S0903/BAC009S0903W0381.wav", "txt": "他们的造型由排球的五个经典动作组成"}
-{"key": "BAC009S0903W0382", "wav": "./aishell/wav/test/S0903/BAC009S0903W0382.wav", "txt": "分别是发接传垫扣"}
-{"key": "BAC009S0903W0383", "wav": "./aishell/wav/test/S0903/BAC009S0903W0383.wav", "txt": "颜色则是由代表着运动精神的奥运五环色组成"}
-{"key": "BAC009S0903W0384", "wav": "./aishell/wav/test/S0903/BAC009S0903W0384.wav", "txt": "来自全国各地的业馀排球爱好者纷纷前来踊跃报名"}
-{"key": "BAC009S0903W0385", "wav": "./aishell/wav/test/S0903/BAC009S0903W0385.wav", "txt": "半个月的报名期限未到"}
-{"key": "BAC009S0903W0386", "wav": "./aishell/wav/test/S0903/BAC009S0903W0386.wav", "txt": "二四个参赛名额就已经全部报满"}
-{"key": "BAC009S0903W0387", "wav": "./aishell/wav/test/S0903/BAC009S0903W0387.wav", "txt": "共有三百二十三名业馀排球爱好者报名参加比赛"}
-{"key": "BAC009S0903W0388", "wav": "./aishell/wav/test/S0903/BAC009S0903W0388.wav", "txt": "其中年龄最小的年仅十四岁"}
-{"key": "BAC009S0903W0389", "wav": "./aishell/wav/test/S0903/BAC009S0903W0389.wav", "txt": "最大的已经年过半百"}
-{"key": "BAC009S0903W0390", "wav": "./aishell/wav/test/S0903/BAC009S0903W0390.wav", "txt": "另外还有两名来自加拿大和美国的外籍华侨"}
-{"key": "BAC009S0903W0391", "wav": "./aishell/wav/test/S0903/BAC009S0903W0391.wav", "txt": "由此可见北京市业馀排球联赛的影响力与号召力"}
-{"key": "BAC009S0903W0392", "wav": "./aishell/wav/test/S0903/BAC009S0903W0392.wav", "txt": "在参赛的二四支队伍中"}
-{"key": "BAC009S0903W0393", "wav": "./aishell/wav/test/S0903/BAC009S0903W0393.wav", "txt": "有一些临时组建的球队"}
-{"key": "BAC009S0903W0394", "wav": "./aishell/wav/test/S0903/BAC009S0903W0394.wav", "txt": "但大部分都是常年活跃在业馀排球圈里成熟队球"}
-{"key": "BAC009S0903W0395", "wav": "./aishell/wav/test/S0903/BAC009S0903W0395.wav", "txt": "而且多次参加过业馀排球比赛"}
-{"key": "BAC009S0903W0396", "wav": "./aishell/wav/test/S0903/BAC009S0903W0396.wav", "txt": "相信有这些高水平业馀排球队的参与"}
-{"key": "BAC009S0903W0397", "wav": "./aishell/wav/test/S0903/BAC009S0903W0397.wav", "txt": "这一届北京市业馀排球联赛一定会精彩纷呈"}
-{"key": "BAC009S0903W0398", "wav": "./aishell/wav/test/S0903/BAC009S0903W0398.wav", "txt": "为期五天的比赛全部结束后"}
-{"key": "BAC009S0903W0399", "wav": "./aishell/wav/test/S0903/BAC009S0903W0399.wav", "txt": "将举行隆重的颁奖仪式"}
-{"key": "BAC009S0903W0400", "wav": "./aishell/wav/test/S0903/BAC009S0903W0400.wav", "txt": "从四分之一决赛开始每场比赛评选出一名优秀运动员"}
-{"key": "BAC009S0903W0401", "wav": "./aishell/wav/test/S0903/BAC009S0903W0401.wav", "txt": "为参加联赛的吸引力"}
-{"key": "BAC009S0903W0402", "wav": "./aishell/wav/test/S0903/BAC009S0903W0402.wav", "txt": "提升参赛队的积极性"}
-{"key": "BAC009S0903W0403", "wav": "./aishell/wav/test/S0903/BAC009S0903W0403.wav", "txt": "组委会提高了前三名的含金量"}
-{"key": "BAC009S0903W0404", "wav": "./aishell/wav/test/S0903/BAC009S0903W0404.wav", "txt": "这也是该片首次亮相大荧幕"}
-{"key": "BAC009S0903W0405", "wav": "./aishell/wav/test/S0903/BAC009S0903W0405.wav", "txt": "影片的正式公映要到圣诞节当天"}
-{"key": "BAC009S0903W0406", "wav": "./aishell/wav/test/S0903/BAC009S0903W0406.wav", "txt": "但本月评论界就可以知道该片的真实成色"}
-{"key": "BAC009S0903W0407", "wav": "./aishell/wav/test/S0903/BAC009S0903W0407.wav", "txt": "曾在二零一零年获得空前成功"}
-{"key": "BAC009S0903W0408", "wav": "./aishell/wav/test/S0903/BAC009S0903W0408.wav", "txt": "据香港媒体报道"}
-{"key": "BAC009S0903W0409", "wav": "./aishell/wav/test/S0903/BAC009S0903W0409.wav", "txt": "因参演剧集殭而与陈嘉宝及赖慰玲成为好姐妹"}
-{"key": "BAC009S0903W0410", "wav": "./aishell/wav/test/S0903/BAC009S0903W0410.wav", "txt": "众人一起为寿寿星女庆生"}
-{"key": "BAC009S0903W0411", "wav": "./aishell/wav/test/S0903/BAC009S0903W0411.wav", "txt": "陈嘉宝昨天六月二十三日将大合照上传个人主页"}
-{"key": "BAC009S0903W0412", "wav": "./aishell/wav/test/S0903/BAC009S0903W0412.wav", "txt": "除了看见陈嘉宝及赖慰玲外"}
-{"key": "BAC009S0903W0413", "wav": "./aishell/wav/test/S0903/BAC009S0903W0413.wav", "txt": "亮点正是与陈凯琳互相了解中郑嘉颖也有出席"}
-{"key": "BAC009S0903W0414", "wav": "./aishell/wav/test/S0903/BAC009S0903W0414.wav", "txt": "并做陈凯琳背后的男人"}
-{"key": "BAC009S0903W0415", "wav": "./aishell/wav/test/S0903/BAC009S0903W0415.wav", "txt": "网友纷纷将焦点转移到这对情侣身上"}
-{"key": "BAC009S0903W0416", "wav": "./aishell/wav/test/S0903/BAC009S0903W0416.wav", "txt": "中新网七月二十八日电据香港明报消息"}
-{"key": "BAC009S0903W0417", "wav": "./aishell/wav/test/S0903/BAC009S0903W0417.wav", "txt": "陈凯琳田心妮等出席新剧开机机仪式"}
-{"key": "BAC009S0903W0418", "wav": "./aishell/wav/test/S0903/BAC009S0903W0418.wav", "txt": "谈及此前她曾到横店探班郑嘉颖"}
-{"key": "BAC009S0903W0419", "wav": "./aishell/wav/test/S0903/BAC009S0903W0419.wav", "txt": "因为新剧的厂景和外景推迟了"}
-{"key": "BAC009S0903W0420", "wav": "./aishell/wav/test/S0903/BAC009S0903W0420.wav", "txt": "才有时间去探班"}
-{"key": "BAC009S0903W0421", "wav": "./aishell/wav/test/S0903/BAC009S0903W0421.wav", "txt": "在当地逗留了三四天"}
-{"key": "BAC009S0903W0422", "wav": "./aishell/wav/test/S0903/BAC009S0903W0422.wav", "txt": "自己也有带剧本去看"}
-{"key": "BAC009S0903W0423", "wav": "./aishell/wav/test/S0903/BAC009S0903W0423.wav", "txt": "搜狐娱乐讯北京时间十月二十六日消息"}
-{"key": "BAC009S0903W0424", "wav": "./aishell/wav/test/S0903/BAC009S0903W0424.wav", "txt": "据香港媒体报导"}
-{"key": "BAC009S0903W0425", "wav": "./aishell/wav/test/S0903/BAC009S0903W0425.wav", "txt": "昨晚张保仔播映大结局故演员齐集饭局以及庆祝"}
-{"key": "BAC009S0903W0426", "wav": "./aishell/wav/test/S0903/BAC009S0903W0426.wav", "txt": "陈展鹏风骚到场"}
-{"key": "BAC009S0903W0427", "wav": "./aishell/wav/test/S0903/BAC009S0903W0427.wav", "txt": "他要赶进厂开工"}
-{"key": "BAC009S0903W0428", "wav": "./aishell/wav/test/S0903/BAC009S0903W0428.wav", "txt": "因此开香槟后要先离场"}
-{"key": "BAC009S0903W0429", "wav": "./aishell/wav/test/S0903/BAC009S0903W0429.wav", "txt": "一直传他跟洪永城不和"}
-{"key": "BAC009S0903W0430", "wav": "./aishell/wav/test/S0903/BAC009S0903W0430.wav", "txt": "两人在台下分枱坐欠交流"}
-{"key": "BAC009S0903W0431", "wav": "./aishell/wav/test/S0903/BAC009S0903W0431.wav", "txt": "公安局的决定书说不对他做出行政处罚"}
-{"key": "BAC009S0903W0432", "wav": "./aishell/wav/test/S0903/BAC009S0903W0432.wav", "txt": "我们才按照正常程序给他转为副院长一职的"}
-{"key": "BAC009S0903W0433", "wav": "./aishell/wav/test/S0903/BAC009S0903W0433.wav", "txt": "温岭鞋厂倒塌事故已一四人遇难鞋厂老板被控制"}
-{"key": "BAC009S0903W0434", "wav": "./aishell/wav/test/S0903/BAC009S0903W0434.wav", "txt": "新京报快讯记者杨锋七月四日下午四时零八分"}
-{"key": "BAC009S0903W0435", "wav": "./aishell/wav/test/S0903/BAC009S0903W0435.wav", "txt": "浙江台州温岭市一一零指挥中心接警称"}
-{"key": "BAC009S0903W0436", "wav": "./aishell/wav/test/S0903/BAC009S0903W0436.wav", "txt": "新京报记者从温岭市政府新闻办获悉"}
-{"key": "BAC009S0903W0437", "wav": "./aishell/wav/test/S0903/BAC009S0903W0437.wav", "txt": "早前通报的五名失联人员已全部找到"}
-{"key": "BAC009S0903W0438", "wav": "./aishell/wav/test/S0903/BAC009S0903W0438.wav", "txt": "死亡人数上升至一四人"}
-{"key": "BAC009S0903W0439", "wav": "./aishell/wav/test/S0903/BAC009S0903W0439.wav", "txt": "涉事企业老板已被警方控制"}
-{"key": "BAC009S0903W0440", "wav": "./aishell/wav/test/S0903/BAC009S0903W0440.wav", "txt": "温州二零位面包师制出二五米蛋糕或申报吉尼斯纪录"}
-{"key": "BAC009S0903W0441", "wav": "./aishell/wav/test/S0903/BAC009S0903W0441.wav", "txt": "前往温州龙湾万达广场游玩的市民"}
-{"key": "BAC009S0903W0442", "wav": "./aishell/wav/test/S0903/BAC009S0903W0442.wav", "txt": "无不发出这样的惊叹"}
-{"key": "BAC009S0903W0443", "wav": "./aishell/wav/test/S0903/BAC009S0903W0443.wav", "txt": "一糕点店派出二零位面包师傅"}
-{"key": "BAC009S0903W0444", "wav": "./aishell/wav/test/S0903/BAC009S0903W0444.wav", "txt": "耗时一四个小时打造的二米五超长蛋糕"}
-{"key": "BAC009S0903W0445", "wav": "./aishell/wav/test/S0903/BAC009S0903W0445.wav", "txt": "吸引众多市民驻足观看"}
-{"key": "BAC009S0903W0446", "wav": "./aishell/wav/test/S0903/BAC009S0903W0446.wav", "txt": "温州二名已婚男为争美女驾豪车互撞四个回合"}
-{"key": "BAC009S0903W0447", "wav": "./aishell/wav/test/S0903/BAC009S0903W0447.wav", "txt": "车子被撞得破烂不堪七月四日凌晨"}
-{"key": "BAC009S0903W0448", "wav": "./aishell/wav/test/S0903/BAC009S0903W0448.wav", "txt": "宝马奔驰连续四次相撞"}
-{"key": "BAC009S0903W0449", "wav": "./aishell/wav/test/S0903/BAC009S0903W0449.wav", "txt": "两车驾驶员一度下车大打出手"}
-{"key": "BAC009S0903W0450", "wav": "./aishell/wav/test/S0903/BAC009S0903W0450.wav", "txt": "起因是为了一名年轻的刘姓美女"}
-{"key": "BAC009S0903W0451", "wav": "./aishell/wav/test/S0903/BAC009S0903W0451.wav", "txt": "经保险公司初步估算"}
-{"key": "BAC009S0903W0452", "wav": "./aishell/wav/test/S0903/BAC009S0903W0452.wav", "txt": "两车损失高达三四十万元"}
-{"key": "BAC009S0903W0453", "wav": "./aishell/wav/test/S0903/BAC009S0903W0453.wav", "txt": "温州七人涉嫌百倍抬杠非法经营期货三二亿被批货"}
-{"key": "BAC009S0903W0454", "wav": "./aishell/wav/test/S0903/BAC009S0903W0454.wav", "txt": "浙江温州一公司安装虚拟交易系统"}
-{"key": "BAC009S0903W0455", "wav": "./aishell/wav/test/S0903/BAC009S0903W0455.wav", "txt": "以一一零倍的杠杆吸引社会公众投资"}
-{"key": "BAC009S0903W0456", "wav": "./aishell/wav/test/S0903/BAC009S0903W0456.wav", "txt": "非法经营期货金额共计人民币三二亿元"}
-{"key": "BAC009S0903W0457", "wav": "./aishell/wav/test/S0903/BAC009S0903W0457.wav", "txt": "七名犯罪嫌疑人因涉嫌非法经营罪被批准逮捕"}
-{"key": "BAC009S0903W0458", "wav": "./aishell/wav/test/S0903/BAC009S0903W0458.wav", "txt": "温州城管掌掴女清洁工已被停职检查"}
-{"key": "BAC009S0903W0459", "wav": "./aishell/wav/test/S0903/BAC009S0903W0459.wav", "txt": "温州天价馒头续店方称顾客要狭索赔三条中华烟"}
-{"key": "BAC009S0903W0460", "wav": "./aishell/wav/test/S0903/BAC009S0903W0460.wav", "txt": "荞麦窝窝头一零月二零日"}
-{"key": "BAC009S0903W0461", "wav": "./aishell/wav/test/S0903/BAC009S0903W0461.wav", "txt": "网络上一张永嘉桥头国际饭店的结帐单十分引人注目"}
-{"key": "BAC009S0903W0462", "wav": "./aishell/wav/test/S0903/BAC009S0903W0462.wav", "txt": "菜单显示该饭店的荞麦窝窝头卖三八元一个"}
-{"key": "BAC009S0903W0463", "wav": "./aishell/wav/test/S0903/BAC009S0903W0463.wav", "txt": "三零馀位食客吃了四五个窝窝头"}
-{"key": "BAC009S0903W0464", "wav": "./aishell/wav/test/S0903/BAC009S0903W0464.wav", "txt": "发现事情并没有这么简单"}
-{"key": "BAC009S0903W0465", "wav": "./aishell/wav/test/S0903/BAC009S0903W0465.wav", "txt": "温州火锅先生后续涉案者父亲写公开道歉信"}
-{"key": "BAC009S0903W0466", "wav": "./aishell/wav/test/S0903/BAC009S0903W0466.wav", "txt": "温州网八月二十七日讯记者项锐见习记者黄梦思"}
-{"key": "BAC009S0903W0467", "wav": "./aishell/wav/test/S0903/BAC009S0903W0467.wav", "txt": "温州一七月大女童接种疫苗抽搐省疾控专家调查"}
-{"key": "BAC009S0903W0468", "wav": "./aishell/wav/test/S0903/BAC009S0903W0468.wav", "txt": "金报讯记者蓝莹九月一一日上午"}
-{"key": "BAC009S0903W0469", "wav": "./aishell/wav/test/S0903/BAC009S0903W0469.wav", "txt": "随即被送到儿童医院进行救治"}
-{"key": "BAC009S0903W0470", "wav": "./aishell/wav/test/S0903/BAC009S0903W0470.wav", "txt": "经过连续三天的抢救"}
-{"key": "BAC009S0903W0471", "wav": "./aishell/wav/test/S0903/BAC009S0903W0471.wav", "txt": "孩子仍处于昏迷阶段"}
-{"key": "BAC009S0903W0472", "wav": "./aishell/wav/test/S0903/BAC009S0903W0472.wav", "txt": "省市区三三级疾控部门专家已介入调查"}
-{"key": "BAC009S0903W0473", "wav": "./aishell/wav/test/S0903/BAC009S0903W0473.wav", "txt": "温州一中学门口氢气罐爆炸卖气球摊贩不治身亡"}
-{"key": "BAC009S0903W0474", "wav": "./aishell/wav/test/S0903/BAC009S0903W0474.wav", "txt": "温州一住持被免政协委员遭准儿媳举报娶妻开路虎"}
-{"key": "BAC009S0903W0475", "wav": "./aishell/wav/test/S0903/BAC009S0903W0475.wav", "txt": "关于中国嵩山少林寺方丈齐永信的举报风波尚未停歇"}
-{"key": "BAC009S0903W0476", "wav": "./aishell/wav/test/S0903/BAC009S0903W0476.wav", "txt": "因准儿媳的举报跌下神坛"}
-{"key": "BAC009S0903W0477", "wav": "./aishell/wav/test/S0903/BAC009S0903W0477.wav", "txt": "位于温州苍南龙港镇水门村的一个仓库发生火灾"}
-{"key": "BAC009S0903W0478", "wav": "./aishell/wav/test/S0903/BAC009S0903W0478.wav", "txt": "记者从消防部门处了解到"}
-{"key": "BAC009S0903W0479", "wav": "./aishell/wav/test/S0903/BAC009S0903W0479.wav", "txt": "这里存放着乙酯和工业酒精等化工品"}
-{"key": "BAC009S0903W0480", "wav": "./aishell/wav/test/S0903/BAC009S0903W0480.wav", "txt": "温州一夫妻非法集资五亿丈夫将赃款送给情妇洗钱"}
-{"key": "BAC009S0903W0481", "wav": "./aishell/wav/test/S0903/BAC009S0903W0481.wav", "txt": "温州一女士洗澡被已婚男多次偷窥二年后才报警"}
-{"key": "BAC009S0903W0482", "wav": "./aishell/wav/test/S0903/BAC009S0903W0482.wav", "txt": "温州网讯洗澡被偷窥却一忍再忍"}
-{"key": "BAC009S0903W0483", "wav": "./aishell/wav/test/S0903/BAC009S0903W0483.wav", "txt": "但愿望总是照不进现实"}
-{"key": "BAC009S0903W0484", "wav": "./aishell/wav/test/S0903/BAC009S0903W0484.wav", "txt": "称一名男子时常偷窥自己洗澡"}
-{"key": "BAC009S0903W0485", "wav": "./aishell/wav/test/S0903/BAC009S0903W0485.wav", "txt": "且时间已长达两年多了"}
-{"key": "BAC009S0903W0486", "wav": "./aishell/wav/test/S0903/BAC009S0903W0486.wav", "txt": "温州一家六口吃毒蘑菇身亡女婴拒吃面躲过死神"}
-{"key": "BAC009S0903W0487", "wav": "./aishell/wav/test/S0903/BAC009S0903W0487.wav", "txt": "温州一家六口误食毒菌五人已死亡"}
-{"key": "BAC009S0903W0488", "wav": "./aishell/wav/test/S0903/BAC009S0903W0488.wav", "txt": "温州永嘉县桥下镇吴山村的潘老伯一家六口"}
-{"key": "BAC009S0903W0489", "wav": "./aishell/wav/test/S0903/BAC009S0903W0489.wav", "txt": "半个月前因误食有毒野生菌中毒"}
-{"key": "BAC009S0903W0490", "wav": "./aishell/wav/test/S0903/BAC009S0903W0490.wav", "txt": "至七月一六日中午已有五人去世"}
-{"key": "BAC009S0903W0491", "wav": "./aishell/wav/test/S0903/BAC009S0903W0491.wav", "txt": "潘老伯二六岁的外孙还在医院重症监护室治疗"}
-{"key": "BAC009S0903W0492", "wav": "./aishell/wav/test/S0903/BAC009S0903W0492.wav", "txt": "仍处于深度昏迷状态"}
-{"key": "BAC009S0903W0493", "wav": "./aishell/wav/test/S0903/BAC009S0903W0493.wav", "txt": "温州一村主任台风夜转移群众其妻子遇难"}
-{"key": "BAC009S0903W0494", "wav": "./aishell/wav/test/S0903/BAC009S0903W0494.wav", "txt": "石柱村泥石流塌方现场"}
-{"key": "BAC009S0903W0495", "wav": "./aishell/wav/test/S0903/BAC009S0903W0495.wav", "txt": "温州一男子在医院放置假炸弹被判处有期徒刑一年"}
-{"key": "BAC009S0904W0121", "wav": "./aishell/wav/test/S0904/BAC009S0904W0121.wav", "txt": "为了解决额度荒的问题"}
-{"key": "BAC009S0904W0122", "wav": "./aishell/wav/test/S0904/BAC009S0904W0122.wav", "txt": "近期广州和南京已经开始启动公转商贷款贴息模式"}
-{"key": "BAC009S0904W0123", "wav": "./aishell/wav/test/S0904/BAC009S0904W0123.wav", "txt": "即由商业银行向市民发放执行公积金利率的贷款"}
-{"key": "BAC009S0904W0124", "wav": "./aishell/wav/test/S0904/BAC009S0904W0124.wav", "txt": "公积金贷款与商业贷款之间的利息差额"}
-{"key": "BAC009S0904W0125", "wav": "./aishell/wav/test/S0904/BAC009S0904W0125.wav", "txt": "由公积金中心向银行支付"}
-{"key": "BAC009S0904W0126", "wav": "./aishell/wav/test/S0904/BAC009S0904W0126.wav", "txt": "重点城市公积金贷额款度也有限制"}
-{"key": "BAC009S0904W0127", "wav": "./aishell/wav/test/S0904/BAC009S0904W0127.wav", "txt": "比如北京和上海家庭最高贷款额度均为一百万元"}
-{"key": "BAC009S0904W0128", "wav": "./aishell/wav/test/S0904/BAC009S0904W0128.wav", "txt": "广州和深圳分别为五万元和七万元"}
-{"key": "BAC009S0904W0129", "wav": "./aishell/wav/test/S0904/BAC009S0904W0129.wav", "txt": "在近期房价持续回升的背景下"}
-{"key": "BAC009S0904W0130", "wav": "./aishell/wav/test/S0904/BAC009S0904W0130.wav", "txt": "多数二线城市和三四线城市"}
-{"key": "BAC009S0904W0131", "wav": "./aishell/wav/test/S0904/BAC009S0904W0131.wav", "txt": "公积金贷款上限能够复盖单套房总价"}
-{"key": "BAC009S0904W0132", "wav": "./aishell/wav/test/S0904/BAC009S0904W0132.wav", "txt": "这些城市公积金贷款买房的比例也比较高"}
-{"key": "BAC009S0904W0133", "wav": "./aishell/wav/test/S0904/BAC009S0904W0133.wav", "txt": "此次政策调整也有较好的针对性"}
-{"key": "BAC009S0904W0134", "wav": "./aishell/wav/test/S0904/BAC009S0904W0134.wav", "txt": "包括此次三部委发布公积金新政"}
-{"key": "BAC009S0904W0135", "wav": "./aishell/wav/test/S0904/BAC009S0904W0135.wav", "txt": "再结合近期限外政策全面松绑"}
-{"key": "BAC009S0904W0136", "wav": "./aishell/wav/test/S0904/BAC009S0904W0136.wav", "txt": "具有实时性合理性和较强的针对性"}
-{"key": "BAC009S0904W0137", "wav": "./aishell/wav/test/S0904/BAC009S0904W0137.wav", "txt": "有助于发挥内需在稳增长中的积极作用"}
-{"key": "BAC009S0904W0138", "wav": "./aishell/wav/test/S0904/BAC009S0904W0138.wav", "txt": "住建部等三部委联合发文"}
-{"key": "BAC009S0904W0139", "wav": "./aishell/wav/test/S0904/BAC009S0904W0139.wav", "txt": "再次降低公积金贷款的门槛"}
-{"key": "BAC009S0904W0140", "wav": "./aishell/wav/test/S0904/BAC009S0904W0140.wav", "txt": "还清首套房公积金贷款"}
-{"key": "BAC009S0904W0141", "wav": "./aishell/wav/test/S0904/BAC009S0904W0141.wav", "txt": "在公积金贷款额度上调后一个月内"}
-{"key": "BAC009S0904W0142", "wav": "./aishell/wav/test/S0904/BAC009S0904W0142.wav", "txt": "北京公积金贷款成交量上涨百分之五"}
-{"key": "BAC009S0904W0143", "wav": "./aishell/wav/test/S0904/BAC009S0904W0143.wav", "txt": "中小户型住房去化速度明显加快"}
-{"key": "BAC009S0904W0144", "wav": "./aishell/wav/test/S0904/BAC009S0904W0144.wav", "txt": "北京公积金贷款额度提高"}
-{"key": "BAC009S0904W0145", "wav": "./aishell/wav/test/S0904/BAC009S0904W0145.wav", "txt": "虽有助于使刚需购房者长期受益"}
-{"key": "BAC009S0904W0146", "wav": "./aishell/wav/test/S0904/BAC009S0904W0146.wav", "txt": "但仍存在七万最高贷款额申请难等落地问题"}
-{"key": "BAC009S0904W0147", "wav": "./aishell/wav/test/S0904/BAC009S0904W0147.wav", "txt": "公积金政策放宽对楼市成交的短期刺激作用将难以持续"}
-{"key": "BAC009S0904W0148", "wav": "./aishell/wav/test/S0904/BAC009S0904W0148.wav", "txt": "各地公积金政策步入频繁调整期"}
-{"key": "BAC009S0904W0149", "wav": "./aishell/wav/test/S0904/BAC009S0904W0149.wav", "txt": "一向严格收紧购房政策的北京也加入此阵营"}
-{"key": "BAC009S0904W0150", "wav": "./aishell/wav/test/S0904/BAC009S0904W0150.wav", "txt": "据中新网房产频道梳理"}
-{"key": "BAC009S0904W0151", "wav": "./aishell/wav/test/S0904/BAC009S0904W0151.wav", "txt": "北京针对公积金的调整次数便达到五次"}
-{"key": "BAC009S0904W0152", "wav": "./aishell/wav/test/S0904/BAC009S0904W0152.wav", "txt": "美丽北京大型绿色公益品牌项目"}
-{"key": "BAC009S0904W0153", "wav": "./aishell/wav/test/S0904/BAC009S0904W0153.wav", "txt": "调整公积金年度缴存上下限和缴存比例"}
-{"key": "BAC009S0904W0154", "wav": "./aishell/wav/test/S0904/BAC009S0904W0154.wav", "txt": "放宽公积金贷款二套房认定标准"}
-{"key": "BAC009S0904W0155", "wav": "./aishell/wav/test/S0904/BAC009S0904W0155.wav", "txt": "将公积金贷款利率下调"}
-{"key": "BAC009S0904W0156", "wav": "./aishell/wav/test/S0904/BAC009S0904W0156.wav", "txt": "公积金政策的调整从未这么频繁"}
-{"key": "BAC009S0904W0157", "wav": "./aishell/wav/test/S0904/BAC009S0904W0157.wav", "txt": "从一系列公积金政策看来"}
-{"key": "BAC009S0904W0158", "wav": "./aishell/wav/test/S0904/BAC009S0904W0158.wav", "txt": "扶持刚需客群已经成为北京房地产调控的主要方向"}
-{"key": "BAC009S0904W0159", "wav": "./aishell/wav/test/S0904/BAC009S0904W0159.wav", "txt": "未来政策层面将继续保持宽松"}
-{"key": "BAC009S0904W0160", "wav": "./aishell/wav/test/S0904/BAC009S0904W0160.wav", "txt": "在上海易居房地产研究院研究员严跃进看来"}
-{"key": "BAC009S0904W0161", "wav": "./aishell/wav/test/S0904/BAC009S0904W0161.wav", "txt": "这一系列公积金政策的放宽"}
-{"key": "BAC009S0904W0162", "wav": "./aishell/wav/test/S0904/BAC009S0904W0162.wav", "txt": "与目前房地产救市的市场导向相吻合"}
-{"key": "BAC009S0904W0163", "wav": "./aishell/wav/test/S0904/BAC009S0904W0163.wav", "txt": "盘活各地公积金资源"}
-{"key": "BAC009S0904W0164", "wav": "./aishell/wav/test/S0904/BAC009S0904W0164.wav", "txt": "年初选择使用公积金贷款的购房者占比环比增多"}
-{"key": "BAC009S0904W0165", "wav": "./aishell/wav/test/S0904/BAC009S0904W0165.wav", "txt": "伟嘉安捷提供数据显示"}
-{"key": "BAC009S0904W0166", "wav": "./aishell/wav/test/S0904/BAC009S0904W0166.wav", "txt": "七月北京公积金贷款成交量提升了百分之五"}
-{"key": "BAC009S0904W0167", "wav": "./aishell/wav/test/S0904/BAC009S0904W0167.wav", "txt": "贷款需求将在下个月继续释放"}
-{"key": "BAC009S0904W0168", "wav": "./aishell/wav/test/S0904/BAC009S0904W0168.wav", "txt": "北京七月楼市的成交情况"}
-{"key": "BAC009S0904W0169", "wav": "./aishell/wav/test/S0904/BAC009S0904W0169.wav", "txt": "也佐证了公积金政策放宽刺激作用的显现"}
-{"key": "BAC009S0904W0170", "wav": "./aishell/wav/test/S0904/BAC009S0904W0170.wav", "txt": "在总成交中占比环比增加五个百分点"}
-{"key": "BAC009S0904W0171", "wav": "./aishell/wav/test/S0904/BAC009S0904W0171.wav", "txt": "且低于七十平米的小户型住房成交明显上升"}
-{"key": "BAC009S0904W0172", "wav": "./aishell/wav/test/S0904/BAC009S0904W0172.wav", "txt": "北京调整首套房公积金贷款最高额度"}
-{"key": "BAC009S0904W0173", "wav": "./aishell/wav/test/S0904/BAC009S0904W0173.wav", "txt": "伟业我爱我家集团副总裁胡景晖分析"}
-{"key": "BAC009S0904W0174", "wav": "./aishell/wav/test/S0904/BAC009S0904W0174.wav", "txt": "刚需人群观望心理正逐步消散"}
-{"key": "BAC009S0904W0175", "wav": "./aishell/wav/test/S0904/BAC009S0904W0175.wav", "txt": "开始加速进入新房市场"}
-{"key": "BAC009S0904W0176", "wav": "./aishell/wav/test/S0904/BAC009S0904W0176.wav", "txt": "公积金政策的放宽对二手房市场也产生了影响"}
-{"key": "BAC009S0904W0177", "wav": "./aishell/wav/test/S0904/BAC009S0904W0177.wav", "txt": "据伟业我爱我家市场研究院测算"}
-{"key": "BAC009S0904W0178", "wav": "./aishell/wav/test/S0904/BAC009S0904W0178.wav", "txt": "在过去六个月中处于高点"}
-{"key": "BAC009S0904W0179", "wav": "./aishell/wav/test/S0904/BAC009S0904W0179.wav", "txt": "虽然刚需购房者入市积极性有所提升"}
-{"key": "BAC009S0904W0180", "wav": "./aishell/wav/test/S0904/BAC009S0904W0180.wav", "txt": "但不少业内人士认为"}
-{"key": "BAC009S0904W0181", "wav": "./aishell/wav/test/S0904/BAC009S0904W0181.wav", "txt": "这一刺激作用并不会长时间延续"}
-{"key": "BAC009S0904W0182", "wav": "./aishell/wav/test/S0904/BAC009S0904W0182.wav", "txt": "上调公积金贷款额度对市场的刺激是短期的"}
-{"key": "BAC009S0904W0183", "wav": "./aishell/wav/test/S0904/BAC009S0904W0183.wav", "txt": "公积金短期拉动的购房需求有限"}
-{"key": "BAC009S0904W0184", "wav": "./aishell/wav/test/S0904/BAC009S0904W0184.wav", "txt": "更多是原本计划购房的客群享受到了政策利好"}
-{"key": "BAC009S0904W0185", "wav": "./aishell/wav/test/S0904/BAC009S0904W0185.wav", "txt": "原本短期内不考虑购房的客群"}
-{"key": "BAC009S0904W0186", "wav": "./aishell/wav/test/S0904/BAC009S0904W0186.wav", "txt": "在这一政策出台后匆忙购房"}
-{"key": "BAC009S0904W0187", "wav": "./aishell/wav/test/S0904/BAC009S0904W0187.wav", "txt": "加强农产品质量安全监管"}
-{"key": "BAC009S0904W0188", "wav": "./aishell/wav/test/S0904/BAC009S0904W0188.wav", "txt": "建立协调配合检打联动联防联控应急处置机制"}
-{"key": "BAC009S0904W0189", "wav": "./aishell/wav/test/S0904/BAC009S0904W0189.wav", "txt": "实行农产品产地安全分级管理"}
-{"key": "BAC009S0904W0190", "wav": "./aishell/wav/test/S0904/BAC009S0904W0190.wav", "txt": "推动农产品生产加工和流通企业建立诚信制度"}
-{"key": "BAC009S0904W0191", "wav": "./aishell/wav/test/S0904/BAC009S0904W0191.wav", "txt": "提高农业产业化和规模化经营水平"}
-{"key": "BAC009S0904W0192", "wav": "./aishell/wav/test/S0904/BAC009S0904W0192.wav", "txt": "推进农业产业化经营跨越式发展"}
-{"key": "BAC009S0904W0193", "wav": "./aishell/wav/test/S0904/BAC009S0904W0193.wav", "txt": "制定扶持农业产业化龙头企业发展的综合性政策"}
-{"key": "BAC009S0904W0194", "wav": "./aishell/wav/test/S0904/BAC009S0904W0194.wav", "txt": "启动实施农业产业化经营跨越发展行动"}
-{"key": "BAC009S0904W0195", "wav": "./aishell/wav/test/S0904/BAC009S0904W0195.wav", "txt": "按照扶优扶大扶强的原则"}
-{"key": "BAC009S0904W0196", "wav": "./aishell/wav/test/S0904/BAC009S0904W0196.wav", "txt": "依托农产品加工物流等各类农业园区"}
-{"key": "BAC009S0904W0197", "wav": "./aishell/wav/test/S0904/BAC009S0904W0197.wav", "txt": "选建一批农业产业化示范基地"}
-{"key": "BAC009S0904W0198", "wav": "./aishell/wav/test/S0904/BAC009S0904W0198.wav", "txt": "推进龙头企业集群发展"}
-{"key": "BAC009S0904W0199", "wav": "./aishell/wav/test/S0904/BAC009S0904W0199.wav", "txt": "引导龙头企业采取兼并重组参股收购等方式"}
-{"key": "BAC009S0904W0200", "wav": "./aishell/wav/test/S0904/BAC009S0904W0200.wav", "txt": "支持龙头企业跨区域经营"}
-{"key": "BAC009S0904W0201", "wav": "./aishell/wav/test/S0904/BAC009S0904W0201.wav", "txt": "提升产品研发精深加工技术水平和装备能力"}
-{"key": "BAC009S0904W0202", "wav": "./aishell/wav/test/S0904/BAC009S0904W0202.wav", "txt": "鼓励龙头企业采取参股合作等方式"}
-{"key": "BAC009S0904W0203", "wav": "./aishell/wav/test/S0904/BAC009S0904W0203.wav", "txt": "与农户建立紧密型利益联联结关系"}
-{"key": "BAC009S0904W0204", "wav": "./aishell/wav/test/S0904/BAC009S0904W0204.wav", "txt": "强化农民专业合作社组织带动能力"}
-{"key": "BAC009S0904W0205", "wav": "./aishell/wav/test/S0904/BAC009S0904W0205.wav", "txt": "广泛开展示范社建设行动"}
-{"key": "BAC009S0904W0206", "wav": "./aishell/wav/test/S0904/BAC009S0904W0206.wav", "txt": "加大合作社经营管理人员培训培养力度"}
-{"key": "BAC009S0904W0207", "wav": "./aishell/wav/test/S0904/BAC009S0904W0207.wav", "txt": "加强合作社辅导员队伍建设"}
-{"key": "BAC009S0904W0208", "wav": "./aishell/wav/test/S0904/BAC009S0904W0208.wav", "txt": "支持农民专业合作社参加农产品展示展销活动"}
-{"key": "BAC009S0904W0209", "wav": "./aishell/wav/test/S0904/BAC009S0904W0209.wav", "txt": "建立稳定的产销关系"}
-{"key": "BAC009S0904W0210", "wav": "./aishell/wav/test/S0904/BAC009S0904W0210.wav", "txt": "鼓励农民专业合作社开展信用合作"}
-{"key": "BAC009S0904W0211", "wav": "./aishell/wav/test/S0904/BAC009S0904W0211.wav", "txt": "在自愿基础上组建联合社"}
-{"key": "BAC009S0904W0212", "wav": "./aishell/wav/test/S0904/BAC009S0904W0212.wav", "txt": "提高生产经营和市场开拓能力"}
-{"key": "BAC009S0904W0213", "wav": "./aishell/wav/test/S0904/BAC009S0904W0213.wav", "txt": "扶持合作社建设农产品仓储冷藏初加工等设施"}
-{"key": "BAC009S0904W0214", "wav": "./aishell/wav/test/S0904/BAC009S0904W0214.wav", "txt": "发展多种形式的适度规模经营"}
-{"key": "BAC009S0904W0215", "wav": "./aishell/wav/test/S0904/BAC009S0904W0215.wav", "txt": "在依法自愿有偿和加强服务基础上"}
-{"key": "BAC009S0904W0216", "wav": "./aishell/wav/test/S0904/BAC009S0904W0216.wav", "txt": "完善土地承包经营权流转市场"}
-{"key": "BAC009S0904W0217", "wav": "./aishell/wav/test/S0904/BAC009S0904W0217.wav", "txt": "发展多种形式的规模化专业化生产经营"}
-{"key": "BAC009S0904W0218", "wav": "./aishell/wav/test/S0904/BAC009S0904W0218.wav", "txt": "引导土地承包经营权向生产和经营能手集中"}
-{"key": "BAC009S0904W0219", "wav": "./aishell/wav/test/S0904/BAC009S0904W0219.wav", "txt": "大力培育和发展种养大户家庭农牧场"}
-{"key": "BAC009S0904W0220", "wav": "./aishell/wav/test/S0904/BAC009S0904W0220.wav", "txt": "实施一村一品强村富民工程"}
-{"key": "BAC009S0904W0221", "wav": "./aishell/wav/test/S0904/BAC009S0904W0221.wav", "txt": "大力发展农业社会化服务"}
-{"key": "BAC009S0904W0222", "wav": "./aishell/wav/test/S0904/BAC009S0904W0222.wav", "txt": "增强农业公益性服务能力"}
-{"key": "BAC009S0904W0223", "wav": "./aishell/wav/test/S0904/BAC009S0904W0223.wav", "txt": "加快基层农技推广体系改革和建施"}
-{"key": "BAC009S0904W0224", "wav": "./aishell/wav/test/S0904/BAC009S0904W0224.wav", "txt": "健全公益性农业技术推广服务体系"}
-{"key": "BAC009S0904W0225", "wav": "./aishell/wav/test/S0904/BAC009S0904W0225.wav", "txt": "加强农业有害生物监测预警和防控能力建设"}
-{"key": "BAC009S0904W0226", "wav": "./aishell/wav/test/S0904/BAC009S0904W0226.wav", "txt": "加强农业资源和生态环境保护"}
-{"key": "BAC009S0904W0227", "wav": "./aishell/wav/test/S0904/BAC009S0904W0227.wav", "txt": "继续实行最严格的耕地保护制度"}
-{"key": "BAC009S0904W0228", "wav": "./aishell/wav/test/S0904/BAC009S0904W0228.wav", "txt": "确保耕地保有量保持在十亿亩"}
-{"key": "BAC009S0904W0229", "wav": "./aishell/wav/test/S0904/BAC009S0904W0229.wav", "txt": "基本农田不低于十亿亩"}
-{"key": "BAC009S0904W0230", "wav": "./aishell/wav/test/S0904/BAC009S0904W0230.wav", "txt": "科学保护和合理利用水资源"}
-{"key": "BAC009S0904W0231", "wav": "./aishell/wav/test/S0904/BAC009S0904W0231.wav", "txt": "大力发展节水增效农业"}
-{"key": "BAC009S0904W0232", "wav": "./aishell/wav/test/S0904/BAC009S0904W0232.wav", "txt": "继续建设国家级旱作农业示范区"}
-{"key": "BAC009S0904W0233", "wav": "./aishell/wav/test/S0904/BAC009S0904W0233.wav", "txt": "坚持基本草原保护制度"}
-{"key": "BAC009S0904W0234", "wav": "./aishell/wav/test/S0904/BAC009S0904W0234.wav", "txt": "推行禁牧休牧和划区轮牧"}
-{"key": "BAC009S0904W0235", "wav": "./aishell/wav/test/S0904/BAC009S0904W0235.wav", "txt": "实施草原保护重大工程"}
-{"key": "BAC009S0904W0236", "wav": "./aishell/wav/test/S0904/BAC009S0904W0236.wav", "txt": "加大水生生物资源养护力度"}
-{"key": "BAC009S0904W0237", "wav": "./aishell/wav/test/S0904/BAC009S0904W0237.wav", "txt": "强化水生生态修复和建设"}
-{"key": "BAC009S0904W0238", "wav": "./aishell/wav/test/S0904/BAC009S0904W0238.wav", "txt": "加强畜禽遗传资源和农业野生植物资源保护"}
-{"key": "BAC009S0904W0239", "wav": "./aishell/wav/test/S0904/BAC009S0904W0239.wav", "txt": "加强农业生态环境治理"}
-{"key": "BAC009S0904W0240", "wav": "./aishell/wav/test/S0904/BAC009S0904W0240.wav", "txt": "鼓励使用生物农药高效低毒低残留农药和有机肥料"}
-{"key": "BAC009S0904W0241", "wav": "./aishell/wav/test/S0904/BAC009S0904W0241.wav", "txt": "回收再利用农膜和农药包装物"}
-{"key": "BAC009S0904W0242", "wav": "./aishell/wav/test/S0904/BAC009S0904W0242.wav", "txt": "加快规模养殖场粪污处理利用"}
-{"key": "BAC009S0904W0243", "wav": "./aishell/wav/test/S0904/BAC009S0904W0243.wav", "txt": "治理和控制农业面源污染"}
-{"key": "BAC009S0904W0244", "wav": "./aishell/wav/test/S0904/BAC009S0904W0244.wav", "txt": "培育门类丰富层次齐用的综合利用产业"}
-{"key": "BAC009S0904W0245", "wav": "./aishell/wav/test/S0904/BAC009S0904W0245.wav", "txt": "建立秸秆禁烧和综合利用的长效机制"}
-{"key": "BAC009S0904W0246", "wav": "./aishell/wav/test/S0904/BAC009S0904W0246.wav", "txt": "继续实施农村沼气工程"}
-{"key": "BAC009S0904W0247", "wav": "./aishell/wav/test/S0904/BAC009S0904W0247.wav", "txt": "大力推进农村清洁工程建设"}
-{"key": "BAC009S0904W0248", "wav": "./aishell/wav/test/S0904/BAC009S0904W0248.wav", "txt": "清洁水源田园和家园"}
-{"key": "BAC009S0904W0249", "wav": "./aishell/wav/test/S0904/BAC009S0904W0249.wav", "txt": "大力推进农业节能减排"}
-{"key": "BAC009S0904W0250", "wav": "./aishell/wav/test/S0904/BAC009S0904W0250.wav", "txt": "树立绿色低碳发展理念"}
-{"key": "BAC009S0904W0251", "wav": "./aishell/wav/test/S0904/BAC009S0904W0251.wav", "txt": "积极发展资源节约型和环境友好型农业"}
-{"key": "BAC009S0904W0252", "wav": "./aishell/wav/test/S0904/BAC009S0904W0252.wav", "txt": "淘汰报废高耗能老旧农业机械"}
-{"key": "BAC009S0904W0253", "wav": "./aishell/wav/test/S0904/BAC009S0904W0253.wav", "txt": "应该也是个提醒中国再也不能仅仅满足于组装了"}
-{"key": "BAC009S0904W0254", "wav": "./aishell/wav/test/S0904/BAC009S0904W0254.wav", "txt": "我们在科技创新方面的进步非常显着"}
-{"key": "BAC009S0904W0255", "wav": "./aishell/wav/test/S0904/BAC009S0904W0255.wav", "txt": "一项项领先世界的科技成果"}
-{"key": "BAC009S0904W0256", "wav": "./aishell/wav/test/S0904/BAC009S0904W0256.wav", "txt": "不断刷新中国创造的精度高度深度"}
-{"key": "BAC009S0904W0257", "wav": "./aishell/wav/test/S0904/BAC009S0904W0257.wav", "txt": "成为一个个响亮的中国品牌"}
-{"key": "BAC009S0904W0258", "wav": "./aishell/wav/test/S0904/BAC009S0904W0258.wav", "txt": "我们的自主创新能力还不够强"}
-{"key": "BAC009S0904W0259", "wav": "./aishell/wav/test/S0904/BAC009S0904W0259.wav", "txt": "与世界先进水平相比还有明显差距"}
-{"key": "BAC009S0904W0260", "wav": "./aishell/wav/test/S0904/BAC009S0904W0260.wav", "txt": "特别是企业自主创新方面"}
-{"key": "BAC009S0904W0261", "wav": "./aishell/wav/test/S0904/BAC009S0904W0261.wav", "txt": "具有重大影响的科技产品还不是很多"}
-{"key": "BAC009S0904W0262", "wav": "./aishell/wav/test/S0904/BAC009S0904W0262.wav", "txt": "与世界第二经济大国的地位还不相称"}
-{"key": "BAC009S0904W0263", "wav": "./aishell/wav/test/S0904/BAC009S0904W0263.wav", "txt": "希望中国品牌在国际市场的知名度和影响力越来越大"}
-{"key": "BAC009S0904W0264", "wav": "./aishell/wav/test/S0904/BAC009S0904W0264.wav", "txt": "中国人从来不缺乏创新创造的基因"}
-{"key": "BAC009S0904W0265", "wav": "./aishell/wav/test/S0904/BAC009S0904W0265.wav", "txt": "创新是中华民族最鲜明的禀赋"}
-{"key": "BAC009S0904W0266", "wav": "./aishell/wav/test/S0904/BAC009S0904W0266.wav", "txt": "我们完全有理由树立创新自信"}
-{"key": "BAC009S0904W0267", "wav": "./aishell/wav/test/S0904/BAC009S0904W0267.wav", "txt": "上一次工业革命我们落在了西方发达国家后面很远"}
-{"key": "BAC009S0904W0268", "wav": "./aishell/wav/test/S0904/BAC009S0904W0268.wav", "txt": "面对以网络和数字技术为标志的信息技术发展"}
-{"key": "BAC009S0904W0269", "wav": "./aishell/wav/test/S0904/BAC009S0904W0269.wav", "txt": "我们迎来了赶超发达国家的难得机遇"}
-{"key": "BAC009S0904W0270", "wav": "./aishell/wav/test/S0904/BAC009S0904W0270.wav", "txt": "我国拥有近一四亿人口"}
-{"key": "BAC009S0904W0271", "wav": "./aishell/wav/test/S0904/BAC009S0904W0271.wav", "txt": "手机网民近五点六亿"}
-{"key": "BAC009S0904W0272", "wav": "./aishell/wav/test/S0904/BAC009S0904W0272.wav", "txt": "这样的规模没有任何一个国家可以比拟"}
-{"key": "BAC009S0904W0273", "wav": "./aishell/wav/test/S0904/BAC009S0904W0273.wav", "txt": "他们的消费需求是拉动创新创业的巨大牵引力"}
-{"key": "BAC009S0904W0274", "wav": "./aishell/wav/test/S0904/BAC009S0904W0274.wav", "txt": "规模超大的人才群体更是创新创造无与伦比的重要资源"}
-{"key": "BAC009S0904W0275", "wav": "./aishell/wav/test/S0904/BAC009S0904W0275.wav", "txt": "我国经济发展进入新常态"}
-{"key": "BAC009S0904W0276", "wav": "./aishell/wav/test/S0904/BAC009S0904W0276.wav", "txt": "双目标不仅包括保持中高速增长"}
-{"key": "BAC009S0904W0277", "wav": "./aishell/wav/test/S0904/BAC009S0904W0277.wav", "txt": "还包括迈向中高端水平"}
-{"key": "BAC009S0904W0278", "wav": "./aishell/wav/test/S0904/BAC009S0904W0278.wav", "txt": "我国的经济处在爬坡过坎的重要关口"}
-{"key": "BAC009S0904W0279", "wav": "./aishell/wav/test/S0904/BAC009S0904W0279.wav", "txt": "我们也许不用像以前那样为了追求某个数字赶紧赶慢了"}
-{"key": "BAC009S0904W0280", "wav": "./aishell/wav/test/S0904/BAC009S0904W0280.wav", "txt": "但松一口气的想法是没有出路的"}
-{"key": "BAC009S0904W0281", "wav": "./aishell/wav/test/S0904/BAC009S0904W0281.wav", "txt": "恰恰更需要我们有所作为"}
-{"key": "BAC009S0904W0282", "wav": "./aishell/wav/test/S0904/BAC009S0904W0282.wav", "txt": "就是在创新驱动上下功夫"}
-{"key": "BAC009S0904W0283", "wav": "./aishell/wav/test/S0904/BAC009S0904W0283.wav", "txt": "在转型发展上下功夫"}
-{"key": "BAC009S0904W0284", "wav": "./aishell/wav/test/S0904/BAC009S0904W0284.wav", "txt": "不断提高技术创新对经济发展的贡献率"}
-{"key": "BAC009S0904W0285", "wav": "./aishell/wav/test/S0904/BAC009S0904W0285.wav", "txt": "如果说过去的这些年"}
-{"key": "BAC009S0904W0286", "wav": "./aishell/wav/test/S0904/BAC009S0904W0286.wav", "txt": "我们成为世界工厂是不可逾越的发展阶段"}
-{"key": "BAC009S0904W0287", "wav": "./aishell/wav/test/S0904/BAC009S0904W0287.wav", "txt": "那么未来的五年十年二十年"}
-{"key": "BAC009S0904W0288", "wav": "./aishell/wav/test/S0904/BAC009S0904W0288.wav", "txt": "我们肯定不能再沾沾自喜于世界工厂"}
-{"key": "BAC009S0904W0289", "wav": "./aishell/wav/test/S0904/BAC009S0904W0289.wav", "txt": "也不能一直被贴上中国组装的标签"}
-{"key": "BAC009S0904W0290", "wav": "./aishell/wav/test/S0904/BAC009S0904W0290.wav", "txt": "长期处在产业链的末端"}
-{"key": "BAC009S0904W0291", "wav": "./aishell/wav/test/S0904/BAC009S0904W0291.wav", "txt": "期待着越来越多中国设计的产品不断涌现并享誉国际"}
-{"key": "BAC009S0904W0292", "wav": "./aishell/wav/test/S0904/BAC009S0904W0292.wav", "txt": "未必印在每个产品上"}
-{"key": "BAC009S0904W0293", "wav": "./aishell/wav/test/S0904/BAC009S0904W0293.wav", "txt": "但应刻在每个中国企业家甚至每个中国人心里"}
-{"key": "BAC009S0904W0295", "wav": "./aishell/wav/test/S0904/BAC009S0904W0295.wav", "txt": "中国经营网注有国外媒体报道称"}
-{"key": "BAC009S0904W0296", "wav": "./aishell/wav/test/S0904/BAC009S0904W0296.wav", "txt": "苹果市场价值达到七千亿美元刚刚过去几个月"}
-{"key": "BAC009S0904W0297", "wav": "./aishell/wav/test/S0904/BAC009S0904W0297.wav", "txt": "已经有股票经纪公司预测"}
-{"key": "BAC009S0904W0298", "wav": "./aishell/wav/test/S0904/BAC009S0904W0298.wav", "txt": "那么苹果能突破一万亿大关吗"}
-{"key": "BAC009S0904W0299", "wav": "./aishell/wav/test/S0904/BAC009S0904W0299.wav", "txt": "苹果公司上次发布全新产品是在五年以前"}
-{"key": "BAC009S0904W0301", "wav": "./aishell/wav/test/S0904/BAC009S0904W0301.wav", "txt": "苹果的目标股价也开始相应地上涨"}
-{"key": "BAC009S0904W0302", "wav": "./aishell/wav/test/S0904/BAC009S0904W0302.wav", "txt": "苹果的市值可能将突破万亿美元"}
-{"key": "BAC009S0904W0303", "wav": "./aishell/wav/test/S0904/BAC009S0904W0303.wav", "txt": "现在市面上的电话手表功能最主要有两个通话和定位"}
-{"key": "BAC009S0904W0304", "wav": "./aishell/wav/test/S0904/BAC009S0904W0304.wav", "txt": "儿童电话手表还推出了其他更多人性化的创新功能"}
-{"key": "BAC009S0904W0305", "wav": "./aishell/wav/test/S0904/BAC009S0904W0305.wav", "txt": "对手表的大力普及也起到了至关重要的作用"}
-{"key": "BAC009S0904W0306", "wav": "./aishell/wav/test/S0904/BAC009S0904W0306.wav", "txt": "以小天才电话手表为例"}
-{"key": "BAC009S0904W0307", "wav": "./aishell/wav/test/S0904/BAC009S0904W0307.wav", "txt": "除了能和手机一样接打电话"}
-{"key": "BAC009S0904W0308", "wav": "./aishell/wav/test/S0904/BAC009S0904W0308.wav", "txt": "做到全方位亲子沟通"}
-{"key": "BAC009S0904W0310", "wav": "./aishell/wav/test/S0904/BAC009S0904W0310.wav", "txt": "击掌成为加好友等功能也一应俱全"}
-{"key": "BAC009S0904W0311", "wav": "./aishell/wav/test/S0904/BAC009S0904W0311.wav", "txt": "电话手表就相当于一部简化的智能手机"}
-{"key": "BAC009S0904W0312", "wav": "./aishell/wav/test/S0904/BAC009S0904W0312.wav", "txt": "主要在于将通信和定位的模块大大缩小到方寸之间"}
-{"key": "BAC009S0904W0313", "wav": "./aishell/wav/test/S0904/BAC009S0904W0313.wav", "txt": "置入只有手机几分之一大小的手表表盘"}
-{"key": "BAC009S0904W0314", "wav": "./aishell/wav/test/S0904/BAC009S0904W0314.wav", "txt": "还要保证与手机一样的通话质量呢"}
-{"key": "BAC009S0904W0315", "wav": "./aishell/wav/test/S0904/BAC009S0904W0315.wav", "txt": "这是摆在行业面前最大的技术难题"}
-{"key": "BAC009S0904W0316", "wav": "./aishell/wav/test/S0904/BAC009S0904W0316.wav", "txt": "小天才产品负责人表示"}
-{"key": "BAC009S0904W0317", "wav": "./aishell/wav/test/S0904/BAC009S0904W0317.wav", "txt": "以小天才电话手表为例"}
-{"key": "BAC009S0904W0318", "wav": "./aishell/wav/test/S0904/BAC009S0904W0318.wav", "txt": "公司超百位研发人员历经半年多时间"}
-{"key": "BAC009S0904W0319", "wav": "./aishell/wav/test/S0904/BAC009S0904W0319.wav", "txt": "投入巨资研究经费攻关"}
-{"key": "BAC009S0904W0320", "wav": "./aishell/wav/test/S0904/BAC009S0904W0320.wav", "txt": "最后找到芬兰的高级技术团队"}
-{"key": "BAC009S0904W0321", "wav": "./aishell/wav/test/S0904/BAC009S0904W0321.wav", "txt": "才解决电话手表的内线内置问题"}
-{"key": "BAC009S0904W0322", "wav": "./aishell/wav/test/S0904/BAC009S0904W0322.wav", "txt": "对于这种突破性的天线内置方案"}
-{"key": "BAC009S0904W0323", "wav": "./aishell/wav/test/S0904/BAC009S0904W0323.wav", "txt": "我们进行了极为严谨的测试"}
-{"key": "BAC009S0904W0324", "wav": "./aishell/wav/test/S0904/BAC009S0904W0324.wav", "txt": "确保信号与手机相当才真正投放市场"}
-{"key": "BAC009S0904W0325", "wav": "./aishell/wav/test/S0904/BAC009S0904W0325.wav", "txt": "对于创新成果的实证和检验"}
-{"key": "BAC009S0904W0326", "wav": "./aishell/wav/test/S0904/BAC009S0904W0326.wav", "txt": "电话手表对儿童安全吗"}
-{"key": "BAC009S0904W0327", "wav": "./aishell/wav/test/S0904/BAC009S0904W0327.wav", "txt": "儿童电话手表的辐射对儿童的健康安全是否存在隐患呢"}
-{"key": "BAC009S0904W0328", "wav": "./aishell/wav/test/S0904/BAC009S0904W0328.wav", "txt": "这种说法到底有无科学依据呢"}
-{"key": "BAC009S0904W0329", "wav": "./aishell/wav/test/S0904/BAC009S0904W0329.wav", "txt": "关于手机等产品的辐射问题"}
-{"key": "BAC009S0904W0330", "wav": "./aishell/wav/test/S0904/BAC009S0904W0330.wav", "txt": "任何家用电器只要通电就会产生电磁辐射"}
-{"key": "BAC009S0904W0331", "wav": "./aishell/wav/test/S0904/BAC009S0904W0331.wav", "txt": "大到空调电视机电脑微波炉加湿器"}
-{"key": "BAC009S0904W0332", "wav": "./aishell/wav/test/S0904/BAC009S0904W0332.wav", "txt": "小到吹风机充电器甚至接线板都会产生电磁辐射"}
-{"key": "BAC009S0904W0333", "wav": "./aishell/wav/test/S0904/BAC009S0904W0333.wav", "txt": "虽然电磁辐射无处不在"}
-{"key": "BAC009S0904W0334", "wav": "./aishell/wav/test/S0904/BAC009S0904W0334.wav", "txt": "并非所有的电磁辐射都会对人体产生危害"}
-{"key": "BAC009S0904W0335", "wav": "./aishell/wav/test/S0904/BAC009S0904W0335.wav", "txt": "中国电力科学研究院高级工程师邬雄表示"}
-{"key": "BAC009S0904W0336", "wav": "./aishell/wav/test/S0904/BAC009S0904W0336.wav", "txt": "比如阳光也是一种电磁辐射"}
-{"key": "BAC009S0904W0337", "wav": "./aishell/wav/test/S0904/BAC009S0904W0337.wav", "txt": "根据国际非电离辐射防护委员会制定的标准"}
-{"key": "BAC009S0904W0338", "wav": "./aishell/wav/test/S0904/BAC009S0904W0338.wav", "txt": "北京市业馀排球联赛未来每年都将举办一届"}
-{"key": "BAC009S0904W0339", "wav": "./aishell/wav/test/S0904/BAC009S0904W0339.wav", "txt": "并且会逐渐扩大比赛规模"}
-{"key": "BAC009S0904W0340", "wav": "./aishell/wav/test/S0904/BAC009S0904W0340.wav", "txt": "筹备时间和比赛周期都将延长"}
-{"key": "BAC009S0904W0341", "wav": "./aishell/wav/test/S0904/BAC009S0904W0341.wav", "txt": "参赛队伍数量也会有所提升"}
-{"key": "BAC009S0904W0342", "wav": "./aishell/wav/test/S0904/BAC009S0904W0342.wav", "txt": "明年北京市业馀排球联赛将在中国排球协会备案"}
-{"key": "BAC009S0904W0343", "wav": "./aishell/wav/test/S0904/BAC009S0904W0343.wav", "txt": "北京市排球协会与天津排协已经初步达成合作意向"}
-{"key": "BAC009S0904W0344", "wav": "./aishell/wav/test/S0904/BAC009S0904W0344.wav", "txt": "今后北京与天津两地可能会联合办赛"}
-{"key": "BAC009S0904W0345", "wav": "./aishell/wav/test/S0904/BAC009S0904W0345.wav", "txt": "通过冠军赛季后赛垫场赛等形式"}
-{"key": "BAC009S0904W0346", "wav": "./aishell/wav/test/S0904/BAC009S0904W0346.wav", "txt": "通过未来几年的发展"}
-{"key": "BAC009S0904W0347", "wav": "./aishell/wav/test/S0904/BAC009S0904W0347.wav", "txt": "影响力强的全国性比赛"}
-{"key": "BAC009S0904W0348", "wav": "./aishell/wav/test/S0904/BAC009S0904W0348.wav", "txt": "高清图女排凯旋郎平受热捧"}
-{"key": "BAC009S0904W0349", "wav": "./aishell/wav/test/S0904/BAC009S0904W0349.wav", "txt": "时隔一二年重夺世界杯冠军的中国女排"}
-{"key": "BAC009S0904W0350", "wav": "./aishell/wav/test/S0904/BAC009S0904W0350.wav", "txt": "新队长曾春蕾揭秘了角色转变前后的幕后故事"}
-{"key": "BAC009S0904W0351", "wav": "./aishell/wav/test/S0904/BAC009S0904W0351.wav", "txt": "并且介绍自己是如何通过实战调整状态而渐入佳境的"}
-{"key": "BAC009S0904W0352", "wav": "./aishell/wav/test/S0904/BAC009S0904W0352.wav", "txt": "后两轮死磕俄罗斯和日本更是有红了眼的感觉"}
-{"key": "BAC009S0904W0353", "wav": "./aishell/wav/test/S0904/BAC009S0904W0353.wav", "txt": "回忆起当时临危受命接班队长一职的情况"}
-{"key": "BAC009S0904W0354", "wav": "./aishell/wav/test/S0904/BAC009S0904W0354.wav", "txt": "曾春蕾介绍是在中国女排出发的前一天"}
-{"key": "BAC009S0904W0355", "wav": "./aishell/wav/test/S0904/BAC009S0904W0355.wav", "txt": "主教练郎平训练结束后通知她的"}
-{"key": "BAC009S0904W0356", "wav": "./aishell/wav/test/S0904/BAC009S0904W0356.wav", "txt": "当时确实没有什么心理准备"}
-{"key": "BAC009S0904W0357", "wav": "./aishell/wav/test/S0904/BAC009S0904W0357.wav", "txt": "虽然知道惠若琪的心脏不太好"}
-{"key": "BAC009S0904W0358", "wav": "./aishell/wav/test/S0904/BAC009S0904W0358.wav", "txt": "但是也不好过问太多"}
-{"key": "BAC009S0904W0359", "wav": "./aishell/wav/test/S0904/BAC009S0904W0359.wav", "txt": "结果等到的消息是她不能去世界杯"}
-{"key": "BAC009S0904W0360", "wav": "./aishell/wav/test/S0904/BAC009S0904W0360.wav", "txt": "其实在二零一四年女排大奖赛的总决赛"}
-{"key": "BAC009S0904W0361", "wav": "./aishell/wav/test/S0904/BAC009S0904W0361.wav", "txt": "曾春蕾就曾经临时客串过队长职务"}
-{"key": "BAC009S0904W0362", "wav": "./aishell/wav/test/S0904/BAC009S0904W0362.wav", "txt": "不过和这次在世界杯当队长相比压力明显不同"}
-{"key": "BAC009S0904W0363", "wav": "./aishell/wav/test/S0904/BAC009S0904W0363.wav", "txt": "这位北京姑娘直言在三大赛当队长的感觉很特殊"}
-{"key": "BAC009S0904W0364", "wav": "./aishell/wav/test/S0904/BAC009S0904W0364.wav", "txt": "是心智上的一个考验"}
-{"key": "BAC009S0904W0365", "wav": "./aishell/wav/test/S0904/BAC009S0904W0365.wav", "txt": "刚开始无谓的心理压力很大"}
-{"key": "BAC009S0904W0366", "wav": "./aishell/wav/test/S0904/BAC009S0904W0366.wav", "txt": "甚至在头一场的比赛还影响到自己的技术发挥"}
-{"key": "BAC009S0904W0367", "wav": "./aishell/wav/test/S0904/BAC009S0904W0367.wav", "txt": "好在队友们相互弥补得非常出色"}
-{"key": "BAC009S0904W0368", "wav": "./aishell/wav/test/S0904/BAC009S0904W0368.wav", "txt": "曾春蕾通过自我调节而让竞技状态渐入佳境"}
-{"key": "BAC009S0904W0369", "wav": "./aishell/wav/test/S0904/BAC009S0904W0369.wav", "txt": "在保障好技术稳定发挥的同时"}
-{"key": "BAC009S0904W0370", "wav": "./aishell/wav/test/S0904/BAC009S0904W0370.wav", "txt": "还能够在情绪上带动队友"}
-{"key": "BAC009S0904W0371", "wav": "./aishell/wav/test/S0904/BAC009S0904W0371.wav", "txt": "谈及当队长的责任感"}
-{"key": "BAC009S0904W0372", "wav": "./aishell/wav/test/S0904/BAC009S0904W0372.wav", "txt": "曾春蕾认为中国女排的困难体现在伤病多"}
-{"key": "BAC009S0904W0373", "wav": "./aishell/wav/test/S0904/BAC009S0904W0373.wav", "txt": "需要不停地告诫自己要淡定下来"}
-{"key": "BAC009S0904W0374", "wav": "./aishell/wav/test/S0904/BAC009S0904W0374.wav", "txt": "毕竟她本人是经历过伦敦奥运会的"}
-{"key": "BAC009S0904W0375", "wav": "./aishell/wav/test/S0904/BAC009S0904W0375.wav", "txt": "当队长的一举一动都会带来情绪上影响"}
-{"key": "BAC009S0904W0376", "wav": "./aishell/wav/test/S0904/BAC009S0904W0376.wav", "txt": "因此一个眼神一个动作"}
-{"key": "BAC009S0904W0377", "wav": "./aishell/wav/test/S0904/BAC009S0904W0377.wav", "txt": "都要给队友们传递乐观和放松的讯号"}
-{"key": "BAC009S0904W0378", "wav": "./aishell/wav/test/S0904/BAC009S0904W0378.wav", "txt": "曾春蕾一记五米线的调整攻打得非常漂亮"}
-{"key": "BAC009S0904W0379", "wav": "./aishell/wav/test/S0904/BAC009S0904W0379.wav", "txt": "评价自己发挥的最好一场其实就是本场比赛"}
-{"key": "BAC009S0904W0380", "wav": "./aishell/wav/test/S0904/BAC009S0904W0380.wav", "txt": "因为和高手过招有种红了眼的感觉"}
-{"key": "BAC009S0904W0381", "wav": "./aishell/wav/test/S0904/BAC009S0904W0381.wav", "txt": "个别球更是像释放怒火一般"}
-{"key": "BAC009S0904W0382", "wav": "./aishell/wav/test/S0904/BAC009S0904W0382.wav", "txt": "桎梏挣脱开了就敢于发挥"}
-{"key": "BAC009S0904W0383", "wav": "./aishell/wav/test/S0904/BAC009S0904W0383.wav", "txt": "由于中国女排的前期准备特别充分"}
-{"key": "BAC009S0904W0384", "wav": "./aishell/wav/test/S0904/BAC009S0904W0384.wav", "txt": "这在曾春蕾看来打俄罗斯很有底"}
-{"key": "BAC009S0904W0385", "wav": "./aishell/wav/test/S0904/BAC009S0904W0385.wav", "txt": "发挥也很从容和淡定"}
-{"key": "BAC009S0904W0386", "wav": "./aishell/wav/test/S0904/BAC009S0904W0386.wav", "txt": "曾春蕾坦言打关键分的状态很忘我"}
-{"key": "BAC009S0904W0387", "wav": "./aishell/wav/test/S0904/BAC009S0904W0387.wav", "txt": "打日本从来都不需要动员"}
-{"key": "BAC009S0904W0388", "wav": "./aishell/wav/test/S0904/BAC009S0904W0388.wav", "txt": "队友彼此之间需要相互鼓励"}
-{"key": "BAC009S0904W0389", "wav": "./aishell/wav/test/S0904/BAC009S0904W0389.wav", "txt": "但更多的是落实在技术环节的细腻方面"}
-{"key": "BAC009S0904W0390", "wav": "./aishell/wav/test/S0904/BAC009S0904W0390.wav", "txt": "因为想要捧起来冠军奖杯的欲望太强烈"}
-{"key": "BAC009S0904W0391", "wav": "./aishell/wav/test/S0904/BAC009S0904W0391.wav", "txt": "直通里约奥运会的目标也近在咫尺"}
-{"key": "BAC009S0904W0392", "wav": "./aishell/wav/test/S0904/BAC009S0904W0392.wav", "txt": "身为大队员就会去提醒大家"}
-{"key": "BAC009S0904W0393", "wav": "./aishell/wav/test/S0904/BAC009S0904W0393.wav", "txt": "将去年输球的原因作为教训反思"}
-{"key": "BAC009S0904W0394", "wav": "./aishell/wav/test/S0904/BAC009S0904W0394.wav", "txt": "对垒日本女排有这样一个小细节"}
-{"key": "BAC009S0904W0395", "wav": "./aishell/wav/test/S0904/BAC009S0904W0395.wav", "txt": "曾春蕾在刘晓彤一传失误后直接说我来"}
-{"key": "BAC009S0904W0396", "wav": "./aishell/wav/test/S0904/BAC009S0904W0396.wav", "txt": "表明队长角色转换完成得还不错"}
-{"key": "BAC009S0904W0397", "wav": "./aishell/wav/test/S0904/BAC009S0904W0397.wav", "txt": "自言就应该去承担更多的任务"}
-{"key": "BAC009S0904W0398", "wav": "./aishell/wav/test/S0904/BAC009S0904W0398.wav", "txt": "曾春蕾保持着清醒的头脑"}
-{"key": "BAC009S0904W0399", "wav": "./aishell/wav/test/S0904/BAC009S0904W0399.wav", "txt": "深知世界杯夺冠是对过去努力的肯定"}
-{"key": "BAC009S0904W0400", "wav": "./aishell/wav/test/S0904/BAC009S0904W0400.wav", "txt": "但更多的是看到了自己的不足"}
-{"key": "BAC009S0904W0401", "wav": "./aishell/wav/test/S0904/BAC009S0904W0401.wav", "txt": "也知道了未来需要努力的方向"}
-{"key": "BAC009S0904W0402", "wav": "./aishell/wav/test/S0904/BAC009S0904W0402.wav", "txt": "视频中国三比一大胜俄罗斯独占女排世界杯榜首"}
-{"key": "BAC009S0904W0403", "wav": "./aishell/wav/test/S0904/BAC009S0904W0403.wav", "txt": "日本二零一五女排世界杯单循环赛战至第十轮"}
-{"key": "BAC009S0904W0404", "wav": "./aishell/wav/test/S0904/BAC009S0904W0404.wav", "txt": "不仅在全球收回十亿美元票房"}
-{"key": "BAC009S0904W0405", "wav": "./aishell/wav/test/S0904/BAC009S0904W0405.wav", "txt": "且获得奥斯卡最佳动画片大奖"}
-{"key": "BAC009S0904W0406", "wav": "./aishell/wav/test/S0904/BAC009S0904W0406.wav", "txt": "皮克斯终于有了拍摄玩具总动员四的计划"}
-{"key": "BAC009S0904W0407", "wav": "./aishell/wav/test/S0904/BAC009S0904W0407.wav", "txt": "这部正在酝酿中的续集敲定了导演"}
-{"key": "BAC009S0904W0408", "wav": "./aishell/wav/test/S0904/BAC009S0904W0408.wav", "txt": "但上台祝酒时都会交足戏"}
-{"key": "BAC009S0904W0409", "wav": "./aishell/wav/test/S0904/BAC009S0904W0409.wav", "txt": "洪永城还主动跟陈展鹏碰杯"}
-{"key": "BAC009S0904W0411", "wav": "./aishell/wav/test/S0904/BAC009S0904W0411.wav", "txt": "她自言最近在拍戏"}
-{"key": "BAC009S0904W0412", "wav": "./aishell/wav/test/S0904/BAC009S0904W0412.wav", "txt": "戏中的角色常常以性感打扮示人"}
-{"key": "BAC009S0904W0413", "wav": "./aishell/wav/test/S0904/BAC009S0904W0413.wav", "txt": "所以自己也很喜欢性感打扮"}
-{"key": "BAC009S0904W0414", "wav": "./aishell/wav/test/S0904/BAC009S0904W0414.wav", "txt": "问及男友郑嘉颖会不会介意这么性感"}
-{"key": "BAC009S0904W0416", "wav": "./aishell/wav/test/S0904/BAC009S0904W0416.wav", "txt": "这个程度是美的"}
-{"key": "BAC009S0904W0417", "wav": "./aishell/wav/test/S0904/BAC009S0904W0417.wav", "txt": "他应该也是喜欢"}
-{"key": "BAC009S0904W0418", "wav": "./aishell/wav/test/S0904/BAC009S0904W0418.wav", "txt": "问及最近是否有跟男友见面"}
-{"key": "BAC009S0904W0420", "wav": "./aishell/wav/test/S0904/BAC009S0904W0420.wav", "txt": "自己也有一段时间没有跟他见面了"}
-{"key": "BAC009S0904W0421", "wav": "./aishell/wav/test/S0904/BAC009S0904W0421.wav", "txt": "两人都是依赖电话沟通"}
-{"key": "BAC009S0904W0422", "wav": "./aishell/wav/test/S0904/BAC009S0904W0422.wav", "txt": "自己也很期待九月中旬和男友见面"}
-{"key": "BAC009S0904W0423", "wav": "./aishell/wav/test/S0904/BAC009S0904W0423.wav", "txt": "并大呼我自己也非常期待他回来"}
-{"key": "BAC009S0904W0424", "wav": "./aishell/wav/test/S0904/BAC009S0904W0424.wav", "txt": "因为很久了很想念他"}
-{"key": "BAC009S0904W0425", "wav": "./aishell/wav/test/S0904/BAC009S0904W0425.wav", "txt": "问及见面后两人怎样庆祝"}
-{"key": "BAC009S0904W0426", "wav": "./aishell/wav/test/S0904/BAC009S0904W0426.wav", "txt": "她表示应该是吃吃饭看电影之类的"}
-{"key": "BAC009S0904W0428", "wav": "./aishell/wav/test/S0904/BAC009S0904W0428.wav", "txt": "是否会请教男友拍戏上的问题"}
-{"key": "BAC009S0904W0429", "wav": "./aishell/wav/test/S0904/BAC009S0904W0429.wav", "txt": "她透露有些不懂的会问男友郑嘉颖"}
-{"key": "BAC009S0904W0430", "wav": "./aishell/wav/test/S0904/BAC009S0904W0430.wav", "txt": "对方给了她很大的帮助"}
-{"key": "BAC009S0904W0431", "wav": "./aishell/wav/test/S0904/BAC009S0904W0431.wav", "txt": "图自网络温州网讯有网友爆料"}
-{"key": "BAC009S0904W0432", "wav": "./aishell/wav/test/S0904/BAC009S0904W0432.wav", "txt": "温州瑞安一驾考考生在科目三考试中突然晕了过去"}
-{"key": "BAC009S0904W0433", "wav": "./aishell/wav/test/S0904/BAC009S0904W0433.wav", "txt": "送到医院时已没有呼吸"}
-{"key": "BAC009S0904W0434", "wav": "./aishell/wav/test/S0904/BAC009S0904W0434.wav", "txt": "现场图温都讯今天下午四时许"}
-{"key": "BAC009S0904W0435", "wav": "./aishell/wav/test/S0904/BAC009S0904W0435.wav", "txt": "看来温州市区电梯也该大整修了"}
-{"key": "BAC009S0904W0436", "wav": "./aishell/wav/test/S0904/BAC009S0904W0436.wav", "txt": "温州一网友造谣苏迪罗登陆期间水库崩塌被拘"}
-{"key": "BAC009S0904W0437", "wav": "./aishell/wav/test/S0904/BAC009S0904W0437.wav", "txt": "澎湃新闻八月一零日从浙江温州平阳警方获悉"}
-{"key": "BAC009S0904W0438", "wav": "./aishell/wav/test/S0904/BAC009S0904W0438.wav", "txt": "因在台风苏迪罗登陆期间在网络散布水库崩塌谣言"}
-{"key": "BAC009S0904W0439", "wav": "./aishell/wav/test/S0904/BAC009S0904W0439.wav", "txt": "温州一路虎店隐瞒新车维修史被判赔三一四万"}
-{"key": "BAC009S0904W0440", "wav": "./aishell/wav/test/S0904/BAC009S0904W0440.wav", "txt": "温州新力虎汽车销售公司展示台"}
-{"key": "BAC009S0904W0441", "wav": "./aishell/wav/test/S0904/BAC009S0904W0441.wav", "txt": "温州一酒店窝头三八元一个三盘消费一七一零元"}
-{"key": "BAC009S0904W0442", "wav": "./aishell/wav/test/S0904/BAC009S0904W0442.wav", "txt": "温州一闲置地块填满垃圾臭味浓烈附近居民不敢开窗"}
-{"key": "BAC009S0904W0443", "wav": "./aishell/wav/test/S0904/BAC009S0904W0443.wav", "txt": "小区外的空置地上填满垃圾近日"}
-{"key": "BAC009S0904W0444", "wav": "./aishell/wav/test/S0904/BAC009S0904W0444.wav", "txt": "与小区只有一河之隔的东边"}
-{"key": "BAC009S0904W0445", "wav": "./aishell/wav/test/S0904/BAC009S0904W0445.wav", "txt": "因惠民路南段从去年开通后"}
-{"key": "BAC009S0904W0446", "wav": "./aishell/wav/test/S0904/BAC009S0904W0446.wav", "txt": "对一块闲置地监管没有跟上"}
-{"key": "BAC009S0904W0447", "wav": "./aishell/wav/test/S0904/BAC009S0904W0447.wav", "txt": "近一年来每天晚上有垃圾倒在这块闲置地上"}
-{"key": "BAC009S0904W0448", "wav": "./aishell/wav/test/S0904/BAC009S0904W0448.wav", "txt": "垃圾刺鼻的臭味害得住户们连窗户都不敢打开"}
-{"key": "BAC009S0904W0449", "wav": "./aishell/wav/test/S0904/BAC009S0904W0449.wav", "txt": "此前温州政协委员连续两届提出要求整治垃圾污染问题"}
-{"key": "BAC009S0904W0450", "wav": "./aishell/wav/test/S0904/BAC009S0904W0450.wav", "txt": "日前本报记者前往实地调查了解"}
-{"key": "BAC009S0904W0451", "wav": "./aishell/wav/test/S0904/BAC009S0904W0451.wav", "txt": "温州三学生为庆生爬上浙江第一高楼玩自拍"}
-{"key": "BAC009S0904W0452", "wav": "./aishell/wav/test/S0904/BAC009S0904W0452.wav", "txt": "再上到楼顶一座高约四零米的铁塔上"}
-{"key": "BAC009S0904W0453", "wav": "./aishell/wav/test/S0904/BAC009S0904W0453.wav", "txt": "并在铁塔上借助自拍杆合影"}
-{"key": "BAC009S0904W0454", "wav": "./aishell/wav/test/S0904/BAC009S0904W0454.wav", "txt": "一则长达一分五七秒的视频在网络上热传"}
-{"key": "BAC009S0904W0455", "wav": "./aishell/wav/test/S0904/BAC009S0904W0455.wav", "txt": "有网友称之为青春任性"}
-{"key": "BAC009S0904W0456", "wav": "./aishell/wav/test/S0904/BAC009S0904W0456.wav", "txt": "温州三家熟食店摊主被捕为求卖相好添加日落黄"}
-{"key": "BAC009S0904W0457", "wav": "./aishell/wav/test/S0904/BAC009S0904W0457.wav", "txt": "本报讯记者范跃红通讯员瓯文为了卖相好"}
-{"key": "BAC009S0904W0458", "wav": "./aishell/wav/test/S0904/BAC009S0904W0458.wav", "txt": "温州两女孩溺水救护车因车多路堵错过救援时间"}
-{"key": "BAC009S0904W0459", "wav": "./aishell/wav/test/S0904/BAC009S0904W0459.wav", "txt": "温州两男子为争女人驾奔驰宝马街头四次对撞"}
-{"key": "BAC009S0904W0460", "wav": "./aishell/wav/test/S0904/BAC009S0904W0460.wav", "txt": "瑞安市商业街和联中路交叉口"}
-{"key": "BAC009S0904W0461", "wav": "./aishell/wav/test/S0904/BAC009S0904W0461.wav", "txt": "一辆宝马和一辆奔驰四次相撞"}
-{"key": "BAC009S0904W0462", "wav": "./aishell/wav/test/S0904/BAC009S0904W0462.wav", "txt": "两车损失高达数十万元"}
-{"key": "BAC009S0904W0463", "wav": "./aishell/wav/test/S0904/BAC009S0904W0463.wav", "txt": "温州企业家卖房建养老院捐给当地却被闲置三年"}
-{"key": "BAC009S0904W0464", "wav": "./aishell/wav/test/S0904/BAC009S0904W0464.wav", "txt": "浙江温州乐清七五岁的企业家虞一杰退休之后"}
-{"key": "BAC009S0904W0465", "wav": "./aishell/wav/test/S0904/BAC009S0904W0465.wav", "txt": "拿出了自己全部的积蓄"}
-{"key": "BAC009S0904W0466", "wav": "./aishell/wav/test/S0904/BAC009S0904W0466.wav", "txt": "还卖了自己在杭州和乐清的房子"}
-{"key": "BAC009S0904W0467", "wav": "./aishell/wav/test/S0904/BAC009S0904W0467.wav", "txt": "但是养老院建成至今已经有三年了"}
-{"key": "BAC009S0904W0468", "wav": "./aishell/wav/test/S0904/BAC009S0904W0468.wav", "txt": "那原因到底在哪里呢"}
-{"key": "BAC009S0904W0469", "wav": "./aishell/wav/test/S0904/BAC009S0904W0469.wav", "txt": "温州体育局官员逼女教练陪酒当地纪委介入调查"}
-{"key": "BAC009S0904W0470", "wav": "./aishell/wav/test/S0904/BAC009S0904W0470.wav", "txt": "以给编制五险一金等为借口"}
-{"key": "BAC009S0904W0471", "wav": "./aishell/wav/test/S0904/BAC009S0904W0471.wav", "txt": "诱逼女教练陪他喝酒吃饭唱歌"}
-{"key": "BAC009S0904W0472", "wav": "./aishell/wav/test/S0904/BAC009S0904W0472.wav", "txt": "并贴出多张聊天记录截图"}
-{"key": "BAC009S0904W0473", "wav": "./aishell/wav/test/S0904/BAC009S0904W0473.wav", "txt": "立即引起众多网友关注"}
-{"key": "BAC009S0904W0474", "wav": "./aishell/wav/test/S0904/BAC009S0904W0474.wav", "txt": "温州六旬老人辗转各地看公厕一二年还债七六万元"}
-{"key": "BAC009S0904W0475", "wav": "./aishell/wav/test/S0904/BAC009S0904W0475.wav", "txt": "温州网讯我不想死后给后人说闲话"}
-{"key": "BAC009S0904W0476", "wav": "./aishell/wav/test/S0904/BAC009S0904W0476.wav", "txt": "省吃俭用也要把该还的钱尽力还掉"}
-{"key": "BAC009S0904W0477", "wav": "./aishell/wav/test/S0904/BAC009S0904W0477.wav", "txt": "让借给我钱的好心人"}
-{"key": "BAC009S0904W0478", "wav": "./aishell/wav/test/S0904/BAC009S0904W0478.wav", "txt": "这是富林愚老人发自内心的一句话"}
-{"key": "BAC009S0904W0479", "wav": "./aishell/wav/test/S0904/BAC009S0904W0479.wav", "txt": "温州农贸市场现注胶虾业内人称为增加重量"}
-{"key": "BAC009S0904W0480", "wav": "./aishell/wav/test/S0904/BAC009S0904W0480.wav", "txt": "虾里有明显的胶状物质图片来源网友微信日前"}
-{"key": "BAC009S0904W0481", "wav": "./aishell/wav/test/S0904/BAC009S0904W0481.wav", "txt": "回家后发现大虾体内竟然被注射了不明胶状物"}
-{"key": "BAC009S0904W0482", "wav": "./aishell/wav/test/S0904/BAC009S0904W0482.wav", "txt": "瑞安市市场监管局玉海所介入调查"}
-{"key": "BAC009S0904W0483", "wav": "./aishell/wav/test/S0904/BAC009S0904W0483.wav", "txt": "当事水产摊贩已退还郑女士一零零元购虾款"}
-{"key": "BAC009S0904W0484", "wav": "./aishell/wav/test/S0904/BAC009S0904W0484.wav", "txt": "温州化工仓库起火殃及附近河流大量死鱼漂河面"}
-{"key": "BAC009S0904W0485", "wav": "./aishell/wav/test/S0904/BAC009S0904W0485.wav", "txt": "图为几天前村民拍到的河面"}
-{"key": "BAC009S0904W0486", "wav": "./aishell/wav/test/S0904/BAC009S0904W0486.wav", "txt": "温州医生夫妇贩婴被批捕女儿欲捐款替父赎罪"}
-{"key": "BAC009S0904W0487", "wav": "./aishell/wav/test/S0904/BAC009S0904W0487.wav", "txt": "温州医生涉贩卖儿童谎称活婴是死婴骗父母放弃"}
-{"key": "BAC009S0904W0488", "wav": "./aishell/wav/test/S0904/BAC009S0904W0488.wav", "txt": "参考消息网九月二五日报道新报称"}
-{"key": "BAC009S0904W0489", "wav": "./aishell/wav/test/S0904/BAC009S0904W0489.wav", "txt": "继陕西富平妇产科医生张淑侠因贩卖婴儿被判刑之后"}
-{"key": "BAC009S0904W0490", "wav": "./aishell/wav/test/S0904/BAC009S0904W0490.wav", "txt": "中国再现医生涉嫌拐卖婴儿的案例"}
-{"key": "BAC009S0904W0491", "wav": "./aishell/wav/test/S0904/BAC009S0904W0491.wav", "txt": "一对来自浙江温州的医生夫妇涉案被捕"}
-{"key": "BAC009S0904W0492", "wav": "./aishell/wav/test/S0904/BAC009S0904W0492.wav", "txt": "温州瑞安市发生一起违停女司机故意伤害交通协警案件"}
-{"key": "BAC009S0904W0493", "wav": "./aishell/wav/test/S0904/BAC009S0904W0493.wav", "txt": "温州城管协管员掌掴女清洁工被停职"}
-{"key": "BAC009S0904W0494", "wav": "./aishell/wav/test/S0904/BAC009S0904W0494.wav", "txt": "该段视频时长仅有六秒"}
-{"key": "BAC009S0904W0495", "wav": "./aishell/wav/test/S0904/BAC009S0904W0495.wav", "txt": "一名路人疾步上前将男子拦开"}
-{"key": "BAC009S0905W0121", "wav": "./aishell/wav/test/S0905/BAC009S0905W0121.wav", "txt": "公积金贷款额度的提高"}
-{"key": "BAC009S0905W0122", "wav": "./aishell/wav/test/S0905/BAC009S0905W0122.wav", "txt": "确实降低了刚需人群购房成本"}
-{"key": "BAC009S0905W0123", "wav": "./aishell/wav/test/S0905/BAC009S0905W0123.wav", "txt": "对房地产市场的利好影响将是长期的"}
-{"key": "BAC009S0905W0124", "wav": "./aishell/wav/test/S0905/BAC009S0905W0124.wav", "txt": "而这一落地难题也会影响其对刚需市场的支持效力"}
-{"key": "BAC009S0905W0125", "wav": "./aishell/wav/test/S0905/BAC009S0905W0125.wav", "txt": "这一公积金新政实际上仍然存在很多门槛"}
-{"key": "BAC009S0905W0126", "wav": "./aishell/wav/test/S0905/BAC009S0905W0126.wav", "txt": "在住房公积金贷款的申请过程中"}
-{"key": "BAC009S0905W0127", "wav": "./aishell/wav/test/S0905/BAC009S0905W0127.wav", "txt": "有些要与房企具体项目挂钩"}
-{"key": "BAC009S0905W0128", "wav": "./aishell/wav/test/S0905/BAC009S0905W0128.wav", "txt": "在公积金贷款额度上调后一个月内"}
-{"key": "BAC009S0905W0129", "wav": "./aishell/wav/test/S0905/BAC009S0905W0129.wav", "txt": "北京公积金贷款成交量上涨百分之五"}
-{"key": "BAC009S0905W0130", "wav": "./aishell/wav/test/S0905/BAC009S0905W0130.wav", "txt": "金融市场总体平稳鲁指冲高回落"}
-{"key": "BAC009S0905W0131", "wav": "./aishell/wav/test/S0905/BAC009S0905W0131.wav", "txt": "但专家预计短期央行仍可能会积极维稳"}
-{"key": "BAC009S0905W0132", "wav": "./aishell/wav/test/S0905/BAC009S0905W0132.wav", "txt": "汇率较大概率维持双向"}
-{"key": "BAC009S0905W0133", "wav": "./aishell/wav/test/S0905/BAC009S0905W0133.wav", "txt": "相关公司股票走势招商银行"}
-{"key": "BAC009S0905W0134", "wav": "./aishell/wav/test/S0905/BAC009S0905W0134.wav", "txt": "降准降息或再掀收益率的下降潮"}
-{"key": "BAC009S0905W0135", "wav": "./aishell/wav/test/S0905/BAC009S0905W0135.wav", "txt": "双降加上广州公积金贷款新政落地"}
-{"key": "BAC009S0905W0136", "wav": "./aishell/wav/test/S0905/BAC009S0905W0136.wav", "txt": "上周末成为潜在买家们争相咨询看楼的时机"}
-{"key": "BAC009S0905W0137", "wav": "./aishell/wav/test/S0905/BAC009S0905W0137.wav", "txt": "期待岁末能有更多利好出现"}
-{"key": "BAC009S0905W0138", "wav": "./aishell/wav/test/S0905/BAC009S0905W0138.wav", "txt": "第四季度二手住宅成交量将环比增幅在百分之七以内"}
-{"key": "BAC009S0905W0139", "wav": "./aishell/wav/test/S0905/BAC009S0905W0139.wav", "txt": "价格要到明年初才出现上涨"}
-{"key": "BAC009S0905W0140", "wav": "./aishell/wav/test/S0905/BAC009S0905W0140.wav", "txt": "广州日报讯记者林琳上周五"}
-{"key": "BAC009S0905W0141", "wav": "./aishell/wav/test/S0905/BAC009S0905W0141.wav", "txt": "再加上广州公积金贷款新政终于落地"}
-{"key": "BAC009S0905W0142", "wav": "./aishell/wav/test/S0905/BAC009S0905W0142.wav", "txt": "一系列利好消息影响下的首个周末"}
-{"key": "BAC009S0905W0143", "wav": "./aishell/wav/test/S0905/BAC009S0905W0143.wav", "txt": "买家积极咨询看楼"}
-{"key": "BAC009S0905W0144", "wav": "./aishell/wav/test/S0905/BAC009S0905W0144.wav", "txt": "降息消息传出后首日"}
-{"key": "BAC009S0905W0145", "wav": "./aishell/wav/test/S0905/BAC009S0905W0145.wav", "txt": "地铺门店咨询量与七月同期相比约有百分之七左右的增幅"}
-{"key": "BAC009S0905W0146", "wav": "./aishell/wav/test/S0905/BAC009S0905W0146.wav", "txt": "满堂红链家市场研究部高级经理周峰透露"}
-{"key": "BAC009S0905W0147", "wav": "./aishell/wav/test/S0905/BAC009S0905W0147.wav", "txt": "店均电话咨询量比上一个周末增加十一百分之左右"}
-{"key": "BAC009S0905W0148", "wav": "./aishell/wav/test/S0905/BAC009S0905W0148.wav", "txt": "看楼量对比上一周末大概增加百分之七左右"}
-{"key": "BAC009S0905W0149", "wav": "./aishell/wav/test/S0905/BAC009S0905W0149.wav", "txt": "不过他认为这种增幅并不算太明显"}
-{"key": "BAC009S0905W0150", "wav": "./aishell/wav/test/S0905/BAC009S0905W0150.wav", "txt": "搜房网广州二手房电商集团市场部总监罗来平发现"}
-{"key": "BAC009S0905W0151", "wav": "./aishell/wav/test/S0905/BAC009S0905W0151.wav", "txt": "市场上约有两成业主反价"}
-{"key": "BAC009S0905W0152", "wav": "./aishell/wav/test/S0905/BAC009S0905W0152.wav", "txt": "一个天河区的中介人士告诉记者"}
-{"key": "BAC009S0905W0153", "wav": "./aishell/wav/test/S0905/BAC009S0905W0153.wav", "txt": "市民对连续多次降息已经麻木了"}
-{"key": "BAC009S0905W0154", "wav": "./aishell/wav/test/S0905/BAC009S0905W0154.wav", "txt": "公积金贷款新政出台"}
-{"key": "BAC009S0905W0155", "wav": "./aishell/wav/test/S0905/BAC009S0905W0155.wav", "txt": "市场不可能那么快有反应"}
-{"key": "BAC009S0905W0156", "wav": "./aishell/wav/test/S0905/BAC009S0905W0156.wav", "txt": "七月广州二手楼市交投升温的态势已相当明确"}
-{"key": "BAC009S0905W0157", "wav": "./aishell/wav/test/S0905/BAC009S0905W0157.wav", "txt": "按照这一趋势发展下去"}
-{"key": "BAC009S0905W0158", "wav": "./aishell/wav/test/S0905/BAC009S0905W0158.wav", "txt": "再加上央行降息以及公积金新政等利好的叠加效应"}
-{"key": "BAC009S0905W0159", "wav": "./aishell/wav/test/S0905/BAC009S0905W0159.wav", "txt": "有望进一步激活买家在接近年底这段时间的入市积极性"}
-{"key": "BAC009S0905W0160", "wav": "./aishell/wav/test/S0905/BAC009S0905W0160.wav", "txt": "据阳光家缘网站公布数据统计"}
-{"key": "BAC009S0905W0161", "wav": "./aishell/wav/test/S0905/BAC009S0905W0161.wav", "txt": "广州二手住宅市场七月的网签量已达一千套"}
-{"key": "BAC009S0905W0162", "wav": "./aishell/wav/test/S0905/BAC009S0905W0162.wav", "txt": "广州二手住宅市场网签量达一千套"}
-{"key": "BAC009S0905W0163", "wav": "./aishell/wav/test/S0905/BAC009S0905W0163.wav", "txt": "超过五月七千套的水平"}
-{"key": "BAC009S0905W0164", "wav": "./aishell/wav/test/S0905/BAC009S0905W0164.wav", "txt": "目前市场上的低价房源已基本消耗完毕"}
-{"key": "BAC009S0905W0165", "wav": "./aishell/wav/test/S0905/BAC009S0905W0165.wav", "txt": "广州二手房迎来新一轮涨价潮"}
-{"key": "BAC009S0905W0166", "wav": "./aishell/wav/test/S0905/BAC009S0905W0166.wav", "txt": "搜房网广州二手房统计中心数据显示"}
-{"key": "BAC009S0905W0167", "wav": "./aishell/wav/test/S0905/BAC009S0905W0167.wav", "txt": "广州五月二手房均价为一千元每平方米"}
-{"key": "BAC009S0905W0168", "wav": "./aishell/wav/test/S0905/BAC009S0905W0168.wav", "txt": "比月初增长了一百元每平方米"}
-{"key": "BAC009S0905W0169", "wav": "./aishell/wav/test/S0905/BAC009S0905W0169.wav", "txt": "因此判断随着利好政策的实施和成交量的增加"}
-{"key": "BAC009S0905W0170", "wav": "./aishell/wav/test/S0905/BAC009S0905W0170.wav", "txt": "今年的房价还会有上升空间"}
-{"key": "BAC009S0905W0171", "wav": "./aishell/wav/test/S0905/BAC009S0905W0171.wav", "txt": "广州还是在执行严厉的限购政策"}
-{"key": "BAC009S0905W0172", "wav": "./aishell/wav/test/S0905/BAC009S0905W0172.wav", "txt": "我预计市场成交量会有所增加"}
-{"key": "BAC009S0905W0173", "wav": "./aishell/wav/test/S0905/BAC009S0905W0173.wav", "txt": "但增加的幅度不会太大"}
-{"key": "BAC009S0905W0174", "wav": "./aishell/wav/test/S0905/BAC009S0905W0174.wav", "txt": "他预测今年剩馀的两个月中"}
-{"key": "BAC009S0905W0175", "wav": "./aishell/wav/test/S0905/BAC009S0905W0175.wav", "txt": "昨日人民币汇率小幅走弱"}
-{"key": "BAC009S0905W0176", "wav": "./aishell/wav/test/S0905/BAC009S0905W0176.wav", "txt": "人民币中间价"}
-{"key": "BAC009S0905W0177", "wav": "./aishell/wav/test/S0905/BAC009S0905W0177.wav", "txt": "美丽北京大型绿色公益品牌项目"}
-{"key": "BAC009S0905W0178", "wav": "./aishell/wav/test/S0905/BAC009S0905W0178.wav", "txt": "在岸人民币兑美元收盘下跌百分之一"}
-{"key": "BAC009S0905W0179", "wav": "./aishell/wav/test/S0905/BAC009S0905W0179.wav", "txt": "双降后首日在岸人民币由弱转强"}
-{"key": "BAC009S0905W0180", "wav": "./aishell/wav/test/S0905/BAC009S0905W0180.wav", "txt": "人民币成交额减少百分之一"}
-{"key": "BAC009S0905W0181", "wav": "./aishell/wav/test/S0905/BAC009S0905W0181.wav", "txt": "报七千亿美元"}
-{"key": "BAC009S0905W0182", "wav": "./aishell/wav/test/S0905/BAC009S0905W0182.wav", "txt": "上周五的双降政策让市场担忧"}
-{"key": "BAC009S0905W0183", "wav": "./aishell/wav/test/S0905/BAC009S0905W0183.wav", "txt": "投金或在经济增长速度放缓形势下加速外流"}
-{"key": "BAC009S0905W0184", "wav": "./aishell/wav/test/S0905/BAC009S0905W0184.wav", "txt": "投资者担心这将加重人民币所面临的压力"}
-{"key": "BAC009S0905W0185", "wav": "./aishell/wav/test/S0905/BAC009S0905W0185.wav", "txt": "就在上周五双降公布之后"}
-{"key": "BAC009S0905W0186", "wav": "./aishell/wav/test/S0905/BAC009S0905W0186.wav", "txt": "招商银行同业金融部高级分析师刘东亮指出"}
-{"key": "BAC009S0905W0187", "wav": "./aishell/wav/test/S0905/BAC009S0905W0187.wav", "txt": "加快老旧渔船更新改造"}
-{"key": "BAC009S0905W0188", "wav": "./aishell/wav/test/S0905/BAC009S0905W0188.wav", "txt": "不断增强农业可持续发展能力"}
-{"key": "BAC009S0905W0189", "wav": "./aishell/wav/test/S0905/BAC009S0905W0189.wav", "txt": "创建国家现代农业示范区"}
-{"key": "BAC009S0905W0190", "wav": "./aishell/wav/test/S0905/BAC009S0905W0190.wav", "txt": "加大示范区建设力度"}
-{"key": "BAC009S0905W0191", "wav": "./aishell/wav/test/S0905/BAC009S0905W0191.wav", "txt": "加大示范目建设投入力度"}
-{"key": "BAC009S0905W0192", "wav": "./aishell/wav/test/S0905/BAC009S0905W0192.wav", "txt": "努力打造现代农业发展的典型和样板"}
-{"key": "BAC009S0905W0193", "wav": "./aishell/wav/test/S0905/BAC009S0905W0193.wav", "txt": "发挥示范区引领作用"}
-{"key": "BAC009S0905W0194", "wav": "./aishell/wav/test/S0905/BAC009S0905W0194.wav", "txt": "通过产业拉动技术辐射和人员培训等"}
-{"key": "BAC009S0905W0195", "wav": "./aishell/wav/test/S0905/BAC009S0905W0195.wav", "txt": "带动周边地区现代农业加快发展"}
-{"key": "BAC009S0905W0196", "wav": "./aishell/wav/test/S0905/BAC009S0905W0196.wav", "txt": "引导各地鉴借示范区发展现代农业的好做法和好经验"}
-{"key": "BAC009S0905W0197", "wav": "./aishell/wav/test/S0905/BAC009S0905W0197.wav", "txt": "推动创建不同层次特色鲜明的现代农业示范区"}
-{"key": "BAC009S0905W0198", "wav": "./aishell/wav/test/S0905/BAC009S0905W0198.wav", "txt": "按照分类指导突出重点梯次推进的思路"}
-{"key": "BAC009S0905W0199", "wav": "./aishell/wav/test/S0905/BAC009S0905W0199.wav", "txt": "以七区二十三带农业战略格局为核心"}
-{"key": "BAC009S0905W0200", "wav": "./aishell/wav/test/S0905/BAC009S0905W0200.wav", "txt": "着力建设重点推进率先实现和稳步发展三类区域"}
-{"key": "BAC009S0905W0201", "wav": "./aishell/wav/test/S0905/BAC009S0905W0201.wav", "txt": "引领全国现代农业加快发展"}
-{"key": "BAC009S0905W0202", "wav": "./aishell/wav/test/S0905/BAC009S0905W0202.wav", "txt": "重点推进区域"}
-{"key": "BAC009S0905W0203", "wav": "./aishell/wav/test/S0905/BAC009S0905W0203.wav", "txt": "农业生产技术较为成熟"}
-{"key": "BAC009S0905W0204", "wav": "./aishell/wav/test/S0905/BAC009S0905W0204.wav", "txt": "农业生产条件具有良好基础"}
-{"key": "BAC009S0905W0205", "wav": "./aishell/wav/test/S0905/BAC009S0905W0205.wav", "txt": "承担着主要农产品供给保证的主体功能"}
-{"key": "BAC009S0905W0206", "wav": "./aishell/wav/test/S0905/BAC009S0905W0206.wav", "txt": "加快推进该区域现代农业建设"}
-{"key": "BAC009S0905W0207", "wav": "./aishell/wav/test/S0905/BAC009S0905W0207.wav", "txt": "事关全国农业现代化进程和国家粮食安全大局"}
-{"key": "BAC009S0905W0208", "wav": "./aishell/wav/test/S0905/BAC009S0905W0208.wav", "txt": "继续发挥该区域粮食安全基础保障作用"}
-{"key": "BAC009S0905W0209", "wav": "./aishell/wav/test/S0905/BAC009S0905W0209.wav", "txt": "调动各方发展粮食生产积极性"}
-{"key": "BAC009S0905W0210", "wav": "./aishell/wav/test/S0905/BAC009S0905W0210.wav", "txt": "以建设小麦玉米水稻大豆优势产业带为重点"}
-{"key": "BAC009S0905W0211", "wav": "./aishell/wav/test/S0905/BAC009S0905W0211.wav", "txt": "深入开展粮食稳定增产行动"}
-{"key": "BAC009S0905W0212", "wav": "./aishell/wav/test/S0905/BAC009S0905W0212.wav", "txt": "加强农田水利和高标准农田建设"}
-{"key": "BAC009S0905W0213", "wav": "./aishell/wav/test/S0905/BAC009S0905W0213.wav", "txt": "提高农机装备和作业水平"}
-{"key": "BAC009S0905W0214", "wav": "./aishell/wav/test/S0905/BAC009S0905W0214.wav", "txt": "大力开展高产创建和科技指导服务"}
-{"key": "BAC009S0905W0215", "wav": "./aishell/wav/test/S0905/BAC009S0905W0215.wav", "txt": "推广防灾减灾增产关键技术"}
-{"key": "BAC009S0905W0216", "wav": "./aishell/wav/test/S0905/BAC009S0905W0216.wav", "txt": "加快选育应用优良品种"}
-{"key": "BAC009S0905W0217", "wav": "./aishell/wav/test/S0905/BAC009S0905W0217.wav", "txt": "大幅度提升粮食综合生产能力和现代化生产水平"}
-{"key": "BAC009S0905W0218", "wav": "./aishell/wav/test/S0905/BAC009S0905W0218.wav", "txt": "大力发展粮食精深加工及仓储物流业"}
-{"key": "BAC009S0905W0219", "wav": "./aishell/wav/test/S0905/BAC009S0905W0219.wav", "txt": "完善粮食仓储运输设备"}
-{"key": "BAC009S0905W0220", "wav": "./aishell/wav/test/S0905/BAC009S0905W0220.wav", "txt": "引导龙头企业向优势产区集聚"}
-{"key": "BAC009S0905W0221", "wav": "./aishell/wav/test/S0905/BAC009S0905W0221.wav", "txt": "提高粮食生产综合效益"}
-{"key": "BAC009S0905W0222", "wav": "./aishell/wav/test/S0905/BAC009S0905W0222.wav", "txt": "其他主要农产品优势区"}
-{"key": "BAC009S0905W0223", "wav": "./aishell/wav/test/S0905/BAC009S0905W0223.wav", "txt": "以及蔬菜蚕卓等农产品生产的主体区域"}
-{"key": "BAC009S0905W0224", "wav": "./aishell/wav/test/S0905/BAC009S0905W0224.wav", "txt": "以建设区域内各类农产品优势产业带为重点"}
-{"key": "BAC009S0905W0225", "wav": "./aishell/wav/test/S0905/BAC009S0905W0225.wav", "txt": "提高资源利用率和加工转化率"}
-{"key": "BAC009S0905W0226", "wav": "./aishell/wav/test/S0905/BAC009S0905W0226.wav", "txt": "继续巩固棉油糖水果和蔬菜等产品供给保证地位"}
-{"key": "BAC009S0905W0227", "wav": "./aishell/wav/test/S0905/BAC009S0905W0227.wav", "txt": "着力强化技术装备支撑"}
-{"key": "BAC009S0905W0228", "wav": "./aishell/wav/test/S0905/BAC009S0905W0228.wav", "txt": "提高现代化生产水平"}
-{"key": "BAC009S0905W0229", "wav": "./aishell/wav/test/S0905/BAC009S0905W0229.wav", "txt": "强化出口水产品生产基地功能"}
-{"key": "BAC009S0905W0230", "wav": "./aishell/wav/test/S0905/BAC009S0905W0230.wav", "txt": "加快现代养殖业发展"}
-{"key": "BAC009S0905W0231", "wav": "./aishell/wav/test/S0905/BAC009S0905W0231.wav", "txt": "率先实现区域"}
-{"key": "BAC009S0905W0232", "wav": "./aishell/wav/test/S0905/BAC009S0905W0232.wav", "txt": "该区域交通区位市场和人力资源优势明显"}
-{"key": "BAC009S0905W0233", "wav": "./aishell/wav/test/S0905/BAC009S0905W0233.wav", "txt": "资本技术等现代化生产要素集约化程度高"}
-{"key": "BAC009S0905W0234", "wav": "./aishell/wav/test/S0905/BAC009S0905W0234.wav", "txt": "加快该区域现代农业建设"}
-{"key": "BAC009S0905W0235", "wav": "./aishell/wav/test/S0905/BAC009S0905W0235.wav", "txt": "对于引领全国现代农业加快发展具有重要意义"}
-{"key": "BAC009S0905W0236", "wav": "./aishell/wav/test/S0905/BAC009S0905W0236.wav", "txt": "东部沿海先导农业区"}
-{"key": "BAC009S0905W0237", "wav": "./aishell/wav/test/S0905/BAC009S0905W0237.wav", "txt": "大力发展资本技术密集型农业"}
-{"key": "BAC009S0905W0238", "wav": "./aishell/wav/test/S0905/BAC009S0905W0238.wav", "txt": "保持耕地面积不减少"}
-{"key": "BAC009S0905W0239", "wav": "./aishell/wav/test/S0905/BAC009S0905W0239.wav", "txt": "探索企业化集团化发展模式"}
-{"key": "BAC009S0905W0240", "wav": "./aishell/wav/test/S0905/BAC009S0905W0240.wav", "txt": "大力推进标准化生产和集约化经营"}
-{"key": "BAC009S0905W0241", "wav": "./aishell/wav/test/S0905/BAC009S0905W0241.wav", "txt": "提高信息化优质化和品牌化水平"}
-{"key": "BAC009S0905W0242", "wav": "./aishell/wav/test/S0905/BAC009S0905W0242.wav", "txt": "提升产品的科技含量和附加值"}
-{"key": "BAC009S0905W0243", "wav": "./aishell/wav/test/S0905/BAC009S0905W0243.wav", "txt": "大城市郊区多功能农业区"}
-{"key": "BAC009S0905W0244", "wav": "./aishell/wav/test/S0905/BAC009S0905W0244.wav", "txt": "主要指沿海地区以外的直辖市省会城市等大城市郊区"}
-{"key": "BAC009S0905W0245", "wav": "./aishell/wav/test/S0905/BAC009S0905W0245.wav", "txt": "统筹推进新一轮菜篮子工程建设"}
-{"key": "BAC009S0905W0246", "wav": "./aishell/wav/test/S0905/BAC009S0905W0246.wav", "txt": "合理确定大城市郊区菜篮子产品生产用地保有数量"}
-{"key": "BAC009S0905W0247", "wav": "./aishell/wav/test/S0905/BAC009S0905W0247.wav", "txt": "提高大城市菜篮子产品的自给率"}
-{"key": "BAC009S0905W0248", "wav": "./aishell/wav/test/S0905/BAC009S0905W0248.wav", "txt": "在稳定城市副食品供应保证能力的基础上"}
-{"key": "BAC009S0905W0249", "wav": "./aishell/wav/test/S0905/BAC009S0905W0249.wav", "txt": "全面推进机械化标准化品牌化产业化发展"}
-{"key": "BAC009S0905W0250", "wav": "./aishell/wav/test/S0905/BAC009S0905W0250.wav", "txt": "加快农田基础设备和现代农业装备建设"}
-{"key": "BAC009S0905W0251", "wav": "./aishell/wav/test/S0905/BAC009S0905W0251.wav", "txt": "着力建设国家商品粮供给重点保证区"}
-{"key": "BAC009S0905W0252", "wav": "./aishell/wav/test/S0905/BAC009S0905W0252.wav", "txt": "提升垦区现代农业发展水平"}
-{"key": "BAC009S0905W0253", "wav": "./aishell/wav/test/S0905/BAC009S0905W0253.wav", "txt": "业界首次开始认真讨论苹果市值晋升万亿大关的潜力"}
-{"key": "BAC009S0905W0254", "wav": "./aishell/wav/test/S0905/BAC009S0905W0254.wav", "txt": "苹果股票价格创下历史新高"}
-{"key": "BAC009S0905W0255", "wav": "./aishell/wav/test/S0905/BAC009S0905W0255.wav", "txt": "苹果市值超过七千亿美元"}
-{"key": "BAC009S0905W0256", "wav": "./aishell/wav/test/S0905/BAC009S0905W0256.wav", "txt": "如果按照每股一二七美元的股价来算"}
-{"key": "BAC009S0905W0257", "wav": "./aishell/wav/test/S0905/BAC009S0905W0257.wav", "txt": "那么苹果市价约为七四四十亿美元"}
-{"key": "BAC009S0905W0258", "wav": "./aishell/wav/test/S0905/BAC009S0905W0258.wav", "txt": "这一价格也是目前华尔街给出的最高估值"}
-{"key": "BAC009S0905W0261", "wav": "./aishell/wav/test/S0905/BAC009S0905W0261.wav", "txt": "随着四克网络的在中国的展开"}
-{"key": "BAC009S0905W0262", "wav": "./aishell/wav/test/S0905/BAC009S0905W0262.wav", "txt": "苹果对电动汽车表现出的浓厚兴趣"}
-{"key": "BAC009S0905W0263", "wav": "./aishell/wav/test/S0905/BAC009S0905W0263.wav", "txt": "也能够给股票市场来带更多兴奋"}
-{"key": "BAC009S0905W0264", "wav": "./aishell/wav/test/S0905/BAC009S0905W0264.wav", "txt": "苹果将继续向股东返还现金"}
-{"key": "BAC009S0905W0265", "wav": "./aishell/wav/test/S0905/BAC009S0905W0265.wav", "txt": "四月份或将采取更多的举动"}
-{"key": "BAC009S0905W0266", "wav": "./aishell/wav/test/S0905/BAC009S0905W0266.wav", "txt": "这些力量的结合将会推动苹果的市盈率大幅上正"}
-{"key": "BAC009S0905W0267", "wav": "./aishell/wav/test/S0905/BAC009S0905W0267.wav", "txt": "苹果公司的市价将突破一万亿美金大关"}
-{"key": "BAC009S0905W0268", "wav": "./aishell/wav/test/S0905/BAC009S0905W0268.wav", "txt": "这只是最乐观的估计"}
-{"key": "BAC009S0905W0269", "wav": "./aishell/wav/test/S0905/BAC009S0905W0269.wav", "txt": "苹果在成长为万亿美元市场的巨无霸之前"}
-{"key": "BAC009S0905W0270", "wav": "./aishell/wav/test/S0905/BAC009S0905W0270.wav", "txt": "还有很多阻碍要解决"}
-{"key": "BAC009S0905W0271", "wav": "./aishell/wav/test/S0905/BAC009S0905W0271.wav", "txt": "先是价格昂贵功能鸡肋的特点遭到一众业内人士吐槽"}
-{"key": "BAC009S0905W0273", "wav": "./aishell/wav/test/S0905/BAC009S0905W0273.wav", "txt": "屏幕良品率仅在百分之三十至百分之四十之间"}
-{"key": "BAC009S0905W0274", "wav": "./aishell/wav/test/S0905/BAC009S0905W0274.wav", "txt": "苹果公司现在已将约三百万的原始订单削减了一半"}
-{"key": "BAC009S0905W0275", "wav": "./aishell/wav/test/S0905/BAC009S0905W0275.wav", "txt": "准备和特斯拉一较高下"}
-{"key": "BAC009S0905W0276", "wav": "./aishell/wav/test/S0905/BAC009S0905W0276.wav", "txt": "但相对于传统的汽车制造工业"}
-{"key": "BAC009S0905W0277", "wav": "./aishell/wav/test/S0905/BAC009S0905W0277.wav", "txt": "苹果作为消费数码产品的公司是否具备造车能力"}
-{"key": "BAC009S0905W0279", "wav": "./aishell/wav/test/S0905/BAC009S0905W0279.wav", "txt": "目前大部分华尔街分析师们都对苹果的未来保持乐观"}
-{"key": "BAC009S0905W0280", "wav": "./aishell/wav/test/S0905/BAC009S0905W0280.wav", "txt": "仅有三点百分之四的分析师建议卖出"}
-{"key": "BAC009S0905W0281", "wav": "./aishell/wav/test/S0905/BAC009S0905W0281.wav", "txt": "中国经营网注有国外媒体报道称"}
-{"key": "BAC009S0905W0282", "wav": "./aishell/wav/test/S0905/BAC009S0905W0282.wav", "txt": "苹果市场价值达到七千亿美元刚刚过去几个月"}
-{"key": "BAC009S0905W0283", "wav": "./aishell/wav/test/S0905/BAC009S0905W0283.wav", "txt": "已经有股票经纪公司预测"}
-{"key": "BAC009S0905W0284", "wav": "./aishell/wav/test/S0905/BAC009S0905W0284.wav", "txt": "苹果能否摆脱王者魔咒"}
-{"key": "BAC009S0905W0285", "wav": "./aishell/wav/test/S0905/BAC009S0905W0285.wav", "txt": "苹果晋身道指固属众望所归"}
-{"key": "BAC009S0905W0287", "wav": "./aishell/wav/test/S0905/BAC009S0905W0287.wav", "txt": "而苹果得以顺利跻身道指"}
-{"key": "BAC009S0905W0288", "wav": "./aishell/wav/test/S0905/BAC009S0905W0288.wav", "txt": "亦拜股份去年六月一拆七所赐"}
-{"key": "BAC009S0905W0289", "wav": "./aishell/wav/test/S0905/BAC009S0905W0289.wav", "txt": "却完全不足以彰显编制机构与时并进"}
-{"key": "BAC009S0905W0290", "wav": "./aishell/wav/test/S0905/BAC009S0905W0290.wav", "txt": "苹果固然不会因此而升格"}
-{"key": "BAC009S0905W0292", "wav": "./aishell/wav/test/S0905/BAC009S0905W0292.wav", "txt": "毕竟还有许多人的心愿"}
-{"key": "BAC009S0905W0293", "wav": "./aishell/wav/test/S0905/BAC009S0905W0293.wav", "txt": "老毕于跟苹果押注太阳能一文问过大家"}
-{"key": "BAC009S0905W0294", "wav": "./aishell/wav/test/S0905/BAC009S0905W0294.wav", "txt": "苹果股价在说不准的时间内有望上升三成"}
-{"key": "BAC009S0905W0295", "wav": "./aishell/wav/test/S0905/BAC009S0905W0295.wav", "txt": "是否能令捧场客心满意足"}
-{"key": "BAC009S0905W0296", "wav": "./aishell/wav/test/S0905/BAC009S0905W0296.wav", "txt": "问题焦点若是太阳能"}
-{"key": "BAC009S0905W0297", "wav": "./aishell/wav/test/S0905/BAC009S0905W0297.wav", "txt": "诸位自然不会满足于前面提及的潜在回报"}
-{"key": "BAC009S0905W0298", "wav": "./aishell/wav/test/S0905/BAC009S0905W0298.wav", "txt": "这家市值离万亿美元不远的股王"}
-{"key": "BAC009S0905W0299", "wav": "./aishell/wav/test/S0905/BAC009S0905W0299.wav", "txt": "难不成真能第三期发育"}
-{"key": "BAC009S0905W0300", "wav": "./aishell/wav/test/S0905/BAC009S0905W0300.wav", "txt": "读者若信经济学人"}
-{"key": "BAC009S0905W0302", "wav": "./aishell/wav/test/S0905/BAC009S0905W0302.wav", "txt": "若定苹果第三期发育的立场已呼之欲出"}
-{"key": "BAC009S0905W0303", "wav": "./aishell/wav/test/S0905/BAC009S0905W0303.wav", "txt": "手机辐射的比吸收率最高限值为二瓦特每千克"}
-{"key": "BAC009S0905W0304", "wav": "./aishell/wav/test/S0905/BAC009S0905W0304.wav", "txt": "我国的标准和国际差不多"}
-{"key": "BAC009S0905W0306", "wav": "./aishell/wav/test/S0905/BAC009S0905W0306.wav", "txt": "对生活中的电磁辐射进行了全面健康风险评估"}
-{"key": "BAC009S0905W0307", "wav": "./aishell/wav/test/S0905/BAC009S0905W0307.wav", "txt": "不存在实际健康问题"}
-{"key": "BAC009S0905W0308", "wav": "./aishell/wav/test/S0905/BAC009S0905W0308.wav", "txt": "辐射吸收率在国家的安全标准范围之内"}
-{"key": "BAC009S0905W0309", "wav": "./aishell/wav/test/S0905/BAC009S0905W0309.wav", "txt": "电话手表的辐射主要来自天线"}
-{"key": "BAC009S0905W0310", "wav": "./aishell/wav/test/S0905/BAC009S0905W0310.wav", "txt": "包括外置天线和内置天线"}
-{"key": "BAC009S0905W0311", "wav": "./aishell/wav/test/S0905/BAC009S0905W0311.wav", "txt": "正规厂家生产的电话手表辐射一般符合国家标准"}
-{"key": "BAC009S0905W0312", "wav": "./aishell/wav/test/S0905/BAC009S0905W0312.wav", "txt": "以小天才电话手表为例"}
-{"key": "BAC009S0905W0313", "wav": "./aishell/wav/test/S0905/BAC009S0905W0313.wav", "txt": "根据权威机构检测报告显示"}
-{"key": "BAC009S0905W0314", "wav": "./aishell/wav/test/S0905/BAC009S0905W0314.wav", "txt": "小天才电话手表辐射远小于国家标准二瓦特每千克"}
-{"key": "BAC009S0905W0315", "wav": "./aishell/wav/test/S0905/BAC009S0905W0315.wav", "txt": "只要辐射值小于或等于国家标准值"}
-{"key": "BAC009S0905W0316", "wav": "./aishell/wav/test/S0905/BAC009S0905W0316.wav", "txt": "就是符合国家标准的"}
-{"key": "BAC009S0905W0317", "wav": "./aishell/wav/test/S0905/BAC009S0905W0317.wav", "txt": "小天才负责人介绍说"}
-{"key": "BAC009S0905W0318", "wav": "./aishell/wav/test/S0905/BAC009S0905W0318.wav", "txt": "手机是直接贴着耳朵使用"}
-{"key": "BAC009S0905W0319", "wav": "./aishell/wav/test/S0905/BAC009S0905W0319.wav", "txt": "而电话手表通话时离头部还有一百零一百一十五厘米的距离"}
-{"key": "BAC009S0905W0320", "wav": "./aishell/wav/test/S0905/BAC009S0905W0320.wav", "txt": "可见电话手表的辐射比手机还小"}
-{"key": "BAC009S0905W0321", "wav": "./aishell/wav/test/S0905/BAC009S0905W0321.wav", "txt": "不排除有一些杂牌的电话手表辐射会超标"}
-{"key": "BAC009S0905W0322", "wav": "./aishell/wav/test/S0905/BAC009S0905W0322.wav", "txt": "建议家长通过正规渠道购买正规厂家生产的产品"}
-{"key": "BAC009S0905W0323", "wav": "./aishell/wav/test/S0905/BAC009S0905W0323.wav", "txt": "电话手表应如何选购"}
-{"key": "BAC009S0905W0324", "wav": "./aishell/wav/test/S0905/BAC009S0905W0324.wav", "txt": "关于儿童电话手表应该如何选购"}
-{"key": "BAC009S0905W0325", "wav": "./aishell/wav/test/S0905/BAC009S0905W0325.wav", "txt": "也是众多家长特别想了解的"}
-{"key": "BAC009S0905W0326", "wav": "./aishell/wav/test/S0905/BAC009S0905W0326.wav", "txt": "除了之前提到的关于辐射的测试报告外"}
-{"key": "BAC009S0905W0327", "wav": "./aishell/wav/test/S0905/BAC009S0905W0327.wav", "txt": "专家提醒相关的产品认证也是消费者必须要关注的"}
-{"key": "BAC009S0905W0328", "wav": "./aishell/wav/test/S0905/BAC009S0905W0328.wav", "txt": "所有在中国境内销售及使用的无线电组件产品"}
-{"key": "BAC009S0905W0329", "wav": "./aishell/wav/test/S0905/BAC009S0905W0329.wav", "txt": "必须取得无线电型号的核准认证"}
-{"key": "BAC009S0905W0330", "wav": "./aishell/wav/test/S0905/BAC009S0905W0330.wav", "txt": "没有该认证的产品属于违法产品"}
-{"key": "BAC009S0905W0331", "wav": "./aishell/wav/test/S0905/BAC009S0905W0331.wav", "txt": "未获得进网许可证的"}
-{"key": "BAC009S0905W0332", "wav": "./aishell/wav/test/S0905/BAC009S0905W0332.wav", "txt": "不得接入公用电信网使用和在国内销售"}
-{"key": "BAC009S0905W0333", "wav": "./aishell/wav/test/S0905/BAC009S0905W0333.wav", "txt": "小天才电话手表等国内几个大品牌都有"}
-{"key": "BAC009S0905W0334", "wav": "./aishell/wav/test/S0905/BAC009S0905W0334.wav", "txt": "这也是选购电话手表要注意关注的"}
-{"key": "BAC009S0905W0335", "wav": "./aishell/wav/test/S0905/BAC009S0905W0335.wav", "txt": "很多家长都在给孩子购置各种学习用"}
-{"key": "BAC009S0905W0336", "wav": "./aishell/wav/test/S0905/BAC009S0905W0336.wav", "txt": "网络安全漏洞挡道车联网阴霾笼罩搜狐科技"}
-{"key": "BAC009S0905W0337", "wav": "./aishell/wav/test/S0905/BAC009S0905W0337.wav", "txt": "对频频的骚扰电话显得无可奈何"}
-{"key": "BAC009S0905W0338", "wav": "./aishell/wav/test/S0905/BAC009S0905W0338.wav", "txt": "由郎平挂帅的中国女排在名古屋赛区"}
-{"key": "BAC009S0905W0339", "wav": "./aishell/wav/test/S0905/BAC009S0905W0339.wav", "txt": "提升战绩为九胜一负反超至榜首位置"}
-{"key": "BAC009S0905W0340", "wav": "./aishell/wav/test/S0905/BAC009S0905W0340.wav", "txt": "只要在明天的最后一战中赢下东道主日本"}
-{"key": "BAC009S0905W0341", "wav": "./aishell/wav/test/S0905/BAC009S0905W0341.wav", "txt": "高清女排力擒俄罗斯夺冠占主动众将喜极而泣"}
-{"key": "BAC009S0905W0342", "wav": "./aishell/wav/test/S0905/BAC009S0905W0342.wav", "txt": "能够赢得比赛真的很开心"}
-{"key": "BAC009S0905W0343", "wav": "./aishell/wav/test/S0905/BAC009S0905W0343.wav", "txt": "对手给我们制造了非常多的困难"}
-{"key": "BAC009S0905W0344", "wav": "./aishell/wav/test/S0905/BAC009S0905W0344.wav", "txt": "我和队友们一起团结努力克服了这些困难"}
-{"key": "BAC009S0905W0345", "wav": "./aishell/wav/test/S0905/BAC009S0905W0345.wav", "txt": "在今天的比赛中曾春蕾首发出场"}
-{"key": "BAC009S0905W0346", "wav": "./aishell/wav/test/S0905/BAC009S0905W0346.wav", "txt": "凭借十三分位列本队和扣球榜第二位"}
-{"key": "BAC009S0905W0347", "wav": "./aishell/wav/test/S0905/BAC009S0905W0347.wav", "txt": "而主教练郎平则在全面性方面对大家做了更多要求"}
-{"key": "BAC009S0905W0348", "wav": "./aishell/wav/test/S0905/BAC009S0905W0348.wav", "txt": "说到今天获胜的原因"}
-{"key": "BAC009S0905W0349", "wav": "./aishell/wav/test/S0905/BAC009S0905W0349.wav", "txt": "作为队长出席新闻发布会的曾春蕾提到了凝聚力三个字"}
-{"key": "BAC009S0905W0350", "wav": "./aishell/wav/test/S0905/BAC009S0905W0350.wav", "txt": "凝聚力一直都是中国女排的传统"}
-{"key": "BAC009S0905W0351", "wav": "./aishell/wav/test/S0905/BAC009S0905W0351.wav", "txt": "它都是女排精神的一部分"}
-{"key": "BAC009S0905W0352", "wav": "./aishell/wav/test/S0905/BAC009S0905W0352.wav", "txt": "当队伍遇到一些困难的时候"}
-{"key": "BAC009S0905W0353", "wav": "./aishell/wav/test/S0905/BAC009S0905W0353.wav", "txt": "我们不需要教练要求就会团结在一起"}
-{"key": "BAC009S0905W0354", "wav": "./aishell/wav/test/S0905/BAC009S0905W0354.wav", "txt": "像这种无形的向心力是在队伍中一直存在的"}
-{"key": "BAC009S0905W0355", "wav": "./aishell/wav/test/S0905/BAC009S0905W0355.wav", "txt": "在今天的比赛中中国女排始终相互鼓励相互扶持"}
-{"key": "BAC009S0905W0356", "wav": "./aishell/wav/test/S0905/BAC009S0905W0356.wav", "txt": "在几度遇险的情况下顽强咬住"}
-{"key": "BAC009S0905W0357", "wav": "./aishell/wav/test/S0905/BAC009S0905W0357.wav", "txt": "无论年轻队员还是老队员都可能在比赛中出现起伏"}
-{"key": "BAC009S0905W0358", "wav": "./aishell/wav/test/S0905/BAC009S0905W0358.wav", "txt": "我们要做的就是相互弥补"}
-{"key": "BAC009S0905W0359", "wav": "./aishell/wav/test/S0905/BAC009S0905W0359.wav", "txt": "今天作为队长我更多是在精神层面上提醒大家"}
-{"key": "BAC009S0905W0360", "wav": "./aishell/wav/test/S0905/BAC009S0905W0360.wav", "txt": "而在技术上年轻队员也弥补了我的不足"}
-{"key": "BAC009S0905W0361", "wav": "./aishell/wav/test/S0905/BAC009S0905W0361.wav", "txt": "这是我们每个人都应该做的"}
-{"key": "BAC009S0905W0362", "wav": "./aishell/wav/test/S0905/BAC009S0905W0362.wav", "txt": "如果能够战而胜之的话"}
-{"key": "BAC009S0905W0363", "wav": "./aishell/wav/test/S0905/BAC009S0905W0363.wav", "txt": "明天还剩最后一场比赛"}
-{"key": "BAC009S0905W0364", "wav": "./aishell/wav/test/S0905/BAC009S0905W0364.wav", "txt": "对我们来讲最重要的就是兢兢业业"}
-{"key": "BAC009S0905W0365", "wav": "./aishell/wav/test/S0905/BAC009S0905W0365.wav", "txt": "大家回去之后将马上投入到对日本的准备中"}
-{"key": "BAC009S0905W0366", "wav": "./aishell/wav/test/S0905/BAC009S0905W0366.wav", "txt": "明天比赛里我们会冷静下来落实到细节"}
-{"key": "BAC009S0905W0367", "wav": "./aishell/wav/test/S0905/BAC009S0905W0367.wav", "txt": "一分分和对手拼到最后"}
-{"key": "BAC009S0905W0368", "wav": "./aishell/wav/test/S0905/BAC009S0905W0368.wav", "txt": "北京时间明天晚间十八点"}
-{"key": "BAC009S0905W0369", "wav": "./aishell/wav/test/S0905/BAC009S0905W0369.wav", "txt": "中国女排将应战日本队"}
-{"key": "BAC009S0905W0370", "wav": "./aishell/wav/test/S0905/BAC009S0905W0370.wav", "txt": "搜狐体育郭健文"}
-{"key": "BAC009S0905W0371", "wav": "./aishell/wav/test/S0905/BAC009S0905W0371.wav", "txt": "女排三零阿根廷朱婷复出扣杀状态神勇"}
-{"key": "BAC009S0905W0372", "wav": "./aishell/wav/test/S0905/BAC009S0905W0372.wav", "txt": "搜狐体育郭健九月一日发自日本冈山今天下午"}
-{"key": "BAC009S0905W0373", "wav": "./aishell/wav/test/S0905/BAC009S0905W0373.wav", "txt": "二零一五年第十二届女排世界杯单循环赛战至第八轮"}
-{"key": "BAC009S0905W0374", "wav": "./aishell/wav/test/S0905/BAC009S0905W0374.wav", "txt": "从而将战绩提升为七胜一负积二十一分"}
-{"key": "BAC009S0905W0375", "wav": "./aishell/wav/test/S0905/BAC009S0905W0375.wav", "txt": "本场比赛朱婷复出担任首发主攻并当选为当场最佳"}
-{"key": "BAC009S0905W0376", "wav": "./aishell/wav/test/S0905/BAC009S0905W0376.wav", "txt": "虽然在比赛中没有得到出场机会"}
-{"key": "BAC009S0905W0377", "wav": "./aishell/wav/test/S0905/BAC009S0905W0377.wav", "txt": "但曾春蕾赛后还是以队长身份出席了新闻发布会"}
-{"key": "BAC009S0905W0378", "wav": "./aishell/wav/test/S0905/BAC009S0905W0378.wav", "txt": "很开心赢得今天的比赛"}
-{"key": "BAC009S0905W0379", "wav": "./aishell/wav/test/S0905/BAC009S0905W0379.wav", "txt": "队伍凭借稳定的整体发挥获得了三零的胜利"}
-{"key": "BAC009S0905W0380", "wav": "./aishell/wav/test/S0905/BAC009S0905W0380.wav", "txt": "曾春蕾表示阿根廷是一支拥有良好防守能力的球队"}
-{"key": "BAC009S0905W0381", "wav": "./aishell/wav/test/S0905/BAC009S0905W0381.wav", "txt": "这一点也值得中国女排学习"}
-{"key": "BAC009S0905W0382", "wav": "./aishell/wav/test/S0905/BAC009S0905W0382.wav", "txt": "中国女排队长坦言不仅是后面的几场比赛"}
-{"key": "BAC009S0905W0383", "wav": "./aishell/wav/test/S0905/BAC009S0905W0383.wav", "txt": "每场较量对球队都很关键"}
-{"key": "BAC009S0905W0384", "wav": "./aishell/wav/test/S0905/BAC009S0905W0384.wav", "txt": "我们球员要做的就是立足于自己"}
-{"key": "BAC009S0905W0385", "wav": "./aishell/wav/test/S0905/BAC009S0905W0385.wav", "txt": "争取把自身水平发挥出来"}
-{"key": "BAC009S0905W0386", "wav": "./aishell/wav/test/S0905/BAC009S0905W0386.wav", "txt": "至于其他球队的比赛结果"}
-{"key": "BAC009S0905W0387", "wav": "./aishell/wav/test/S0905/BAC009S0905W0387.wav", "txt": "阿根廷队队长索萨认为"}
-{"key": "BAC009S0905W0388", "wav": "./aishell/wav/test/S0905/BAC009S0905W0388.wav", "txt": "中国队的快速打法给自己的球队制造了很大的麻烦"}
-{"key": "BAC009S0905W0389", "wav": "./aishell/wav/test/S0905/BAC009S0905W0389.wav", "txt": "像她们这样的亚洲对手速度很快"}
-{"key": "BAC009S0905W0390", "wav": "./aishell/wav/test/S0905/BAC009S0905W0390.wav", "txt": "对我们来说比赛很困难"}
-{"key": "BAC009S0905W0391", "wav": "./aishell/wav/test/S0905/BAC009S0905W0391.wav", "txt": "还有三场非常重要的比赛"}
-{"key": "BAC009S0905W0392", "wav": "./aishell/wav/test/S0905/BAC009S0905W0392.wav", "txt": "希望得到想要的结果"}
-{"key": "BAC009S0905W0393", "wav": "./aishell/wav/test/S0905/BAC009S0905W0393.wav", "txt": "对阵中国这样的球队是非常困难的"}
-{"key": "BAC009S0905W0394", "wav": "./aishell/wav/test/S0905/BAC009S0905W0394.wav", "txt": "令我满意的是球队能够以一个积极的态度进行比赛"}
-{"key": "BAC009S0905W0395", "wav": "./aishell/wav/test/S0905/BAC009S0905W0395.wav", "txt": "以前接触比较多的巴西队速度也很快"}
-{"key": "BAC009S0905W0396", "wav": "./aishell/wav/test/S0905/BAC009S0905W0396.wav", "txt": "我们应该多和亚洲球队比赛来适应这样的打法"}
-{"key": "BAC009S0905W0397", "wav": "./aishell/wav/test/S0905/BAC009S0905W0397.wav", "txt": "接下来中国女排将转战名古屋"}
-{"key": "BAC009S0905W0398", "wav": "./aishell/wav/test/S0905/BAC009S0905W0398.wav", "txt": "从九月四日起迎接多米尼加俄罗斯和日本的挑战"}
-{"key": "BAC009S0905W0399", "wav": "./aishell/wav/test/S0905/BAC009S0905W0399.wav", "txt": "搜狐体育郭健文"}
-{"key": "BAC009S0905W0400", "wav": "./aishell/wav/test/S0905/BAC009S0905W0400.wav", "txt": "广州日报社记者许胚日前"}
-{"key": "BAC009S0905W0401", "wav": "./aishell/wav/test/S0905/BAC009S0905W0401.wav", "txt": "英国人保拉拉德克利夫公开了自己的血液检测结果"}
-{"key": "BAC009S0905W0402", "wav": "./aishell/wav/test/S0905/BAC009S0905W0402.wav", "txt": "以此证明自己并没有使用过违禁药物"}
-{"key": "BAC009S0905W0403", "wav": "./aishell/wav/test/S0905/BAC009S0905W0403.wav", "txt": "在英国议会关于血液兴奋剂的听证会中"}
-{"key": "BAC009S0905W0404", "wav": "./aishell/wav/test/S0905/BAC009S0905W0404.wav", "txt": "将出任玩具总动员四的导演"}
-{"key": "BAC009S0905W0405", "wav": "./aishell/wav/test/S0905/BAC009S0905W0405.wav", "txt": "影片将在二零一七年登陆全国"}
-{"key": "BAC009S0905W0406", "wav": "./aishell/wav/test/S0905/BAC009S0905W0406.wav", "txt": "来源时光网昨日"}
-{"key": "BAC009S0905W0407", "wav": "./aishell/wav/test/S0905/BAC009S0905W0407.wav", "txt": "在英格兰多塞特群的波维顿坦克博物馆"}
-{"key": "BAC009S0905W0408", "wav": "./aishell/wav/test/S0905/BAC009S0905W0408.wav", "txt": "至于有传拍台庆剧很容易获奖"}
-{"key": "BAC009S0905W0410", "wav": "./aishell/wav/test/S0905/BAC009S0905W0410.wav", "txt": "她笑称我不想说我没有信心"}
-{"key": "BAC009S0905W0411", "wav": "./aishell/wav/test/S0905/BAC009S0905W0411.wav", "txt": "很多演员都非常棒"}
-{"key": "BAC009S0905W0412", "wav": "./aishell/wav/test/S0905/BAC009S0905W0412.wav", "txt": "搜狐娱乐讯北京时间七月二十日消息"}
-{"key": "BAC009S0905W0413", "wav": "./aishell/wav/test/S0905/BAC009S0905W0413.wav", "txt": "据香港媒体报导"}
-{"key": "BAC009S0905W0417", "wav": "./aishell/wav/test/S0905/BAC009S0905W0417.wav", "txt": "不到几个月的时间已爱得如此火热了"}
-{"key": "BAC009S0905W0418", "wav": "./aishell/wav/test/S0905/BAC009S0905W0418.wav", "txt": "两人不想恋情变得高调"}
-{"key": "BAC009S0905W0419", "wav": "./aishell/wav/test/S0905/BAC009S0905W0419.wav", "txt": "却多次被身边的人将他们的行踪暴露出来"}
-{"key": "BAC009S0905W0420", "wav": "./aishell/wav/test/S0905/BAC009S0905W0420.wav", "txt": "两人被传媒追问恋情时都要求给予空间"}
-{"key": "BAC009S0905W0421", "wav": "./aishell/wav/test/S0905/BAC009S0905W0421.wav", "txt": "看来他们需要身边的朋友保密他们的行踪"}
-{"key": "BAC009S0905W0422", "wav": "./aishell/wav/test/S0905/BAC009S0905W0422.wav", "txt": "这样做反而更实际"}
-{"key": "BAC009S0905W0423", "wav": "./aishell/wav/test/S0905/BAC009S0905W0423.wav", "txt": "搜狐娱乐讯北京时间六月三十日消息"}
-{"key": "BAC009S0905W0424", "wav": "./aishell/wav/test/S0905/BAC009S0905W0424.wav", "txt": "据香港媒体报道"}
-{"key": "BAC009S0905W0425", "wav": "./aishell/wav/test/S0905/BAC009S0905W0425.wav", "txt": "陈凯琳的心被郑嘉颖成功俘虏"}
-{"key": "BAC009S0905W0426", "wav": "./aishell/wav/test/S0905/BAC009S0905W0426.wav", "txt": "更是郑嘉颖愿意公开承认的女友"}
-{"key": "BAC009S0905W0427", "wav": "./aishell/wav/test/S0905/BAC009S0905W0427.wav", "txt": "不过二人因给陈嘉宝把生日合照在网上公开才泄露恋情"}
-{"key": "BAC009S0905W0428", "wav": "./aishell/wav/test/S0905/BAC009S0905W0428.wav", "txt": "对此陈凯琳没有怪责陈嘉宝"}
-{"key": "BAC009S0905W0429", "wav": "./aishell/wav/test/S0905/BAC009S0905W0429.wav", "txt": "觉得对方只是分享生日上的喜悦"}
-{"key": "BAC009S0905W0430", "wav": "./aishell/wav/test/S0905/BAC009S0905W0430.wav", "txt": "陈凯琳之前说没交过男友"}
-{"key": "BAC009S0905W0431", "wav": "./aishell/wav/test/S0905/BAC009S0905W0431.wav", "txt": "温州鹿城区宣传部官微做出回应"}
-{"key": "BAC009S0905W0432", "wav": "./aishell/wav/test/S0905/BAC009S0905W0432.wav", "txt": "称涉事男子为某街道协管员"}
-{"key": "BAC009S0905W0433", "wav": "./aishell/wav/test/S0905/BAC009S0905W0433.wav", "txt": "其发现清洁工保洁不到位"}
-{"key": "BAC009S0905W0434", "wav": "./aishell/wav/test/S0905/BAC009S0905W0434.wav", "txt": "因此与清洁工引发争执"}
-{"key": "BAC009S0905W0435", "wav": "./aishell/wav/test/S0905/BAC009S0905W0435.wav", "txt": "进一步导致肢体冲突"}
-{"key": "BAC009S0905W0436", "wav": "./aishell/wav/test/S0905/BAC009S0905W0436.wav", "txt": "目前该协管已经停职"}
-{"key": "BAC009S0905W0437", "wav": "./aishell/wav/test/S0905/BAC009S0905W0437.wav", "txt": "温州多地商户拉横幅求降租导购不少店亏本经营"}
-{"key": "BAC009S0905W0438", "wav": "./aishell/wav/test/S0905/BAC009S0905W0438.wav", "txt": "东越花苑不少商铺都关门转租记者谢国林摄"}
-{"key": "BAC009S0905W0439", "wav": "./aishell/wav/test/S0905/BAC009S0905W0439.wav", "txt": "温州大妈年逾半百冒充女儿成功骗婚多名小鲜肉"}
-{"key": "BAC009S0905W0440", "wav": "./aishell/wav/test/S0905/BAC009S0905W0440.wav", "txt": "该女子已经行骗多地"}
-{"key": "BAC009S0905W0441", "wav": "./aishell/wav/test/S0905/BAC009S0905W0441.wav", "txt": "她一直假冒的林某竟是她的女儿"}
-{"key": "BAC009S0905W0442", "wav": "./aishell/wav/test/S0905/BAC009S0905W0442.wav", "txt": "而且她还是已婚身份"}
-{"key": "BAC009S0905W0443", "wav": "./aishell/wav/test/S0905/BAC009S0905W0443.wav", "txt": "凭着远比真实年龄看起来要年经许多的容貌"}
-{"key": "BAC009S0905W0444", "wav": "./aishell/wav/test/S0905/BAC009S0905W0444.wav", "txt": "雷某一直在河北邢台衡水等地干着游走骗婚的勾当"}
-{"key": "BAC009S0905W0445", "wav": "./aishell/wav/test/S0905/BAC009S0905W0445.wav", "txt": "温州天价窝头事件背后顾客要持持赔三条中华"}
-{"key": "BAC009S0905W0446", "wav": "./aishell/wav/test/S0905/BAC009S0905W0446.wav", "txt": "网络上一张永嘉桥头国际饭店的结帐单十分引人注目"}
-{"key": "BAC009S0905W0447", "wav": "./aishell/wav/test/S0905/BAC009S0905W0447.wav", "txt": "菜单显示该饭店的荞麦窝窝头卖三八元一个"}
-{"key": "BAC009S0905W0448", "wav": "./aishell/wav/test/S0905/BAC009S0905W0448.wav", "txt": "三零馀位食客吃了四五个窝窝头"}
-{"key": "BAC009S0905W0449", "wav": "./aishell/wav/test/S0905/BAC009S0905W0449.wav", "txt": "发现事情并没有这么简单"}
-{"key": "BAC009S0905W0450", "wav": "./aishell/wav/test/S0905/BAC009S0905W0450.wav", "txt": "温州女协管员侮辱环卫工行尸走肉已辞职"}
-{"key": "BAC009S0905W0451", "wav": "./aishell/wav/test/S0905/BAC009S0905W0451.wav", "txt": "温州女协管员发伪辱性文字环卫节一群行尸走肉"}
-{"key": "BAC009S0905W0452", "wav": "./aishell/wav/test/S0905/BAC009S0905W0452.wav", "txt": "温州女婴打疫苗后口吐白沫抽搐昏迷"}
-{"key": "BAC009S0905W0453", "wav": "./aishell/wav/test/S0905/BAC009S0905W0453.wav", "txt": "温州网讯在温医大附属育英儿童医院的重监护室里"}
-{"key": "BAC009S0905W0454", "wav": "./aishell/wav/test/S0905/BAC009S0905W0454.wav", "txt": "才七个月大的女童腾腾化名已昏迷了两天时间"}
-{"key": "BAC009S0905W0455", "wav": "./aishell/wav/test/S0905/BAC009S0905W0455.wav", "txt": "随即被送到儿童医院进行抢救"}
-{"key": "BAC009S0905W0456", "wav": "./aishell/wav/test/S0905/BAC009S0905W0456.wav", "txt": "区市省三级疾控部门专家已介入调查"}
-{"key": "BAC009S0905W0457", "wav": "./aishell/wav/test/S0905/BAC009S0905W0457.wav", "txt": "温州家庭误食毒蘑菇后续小女儿已确诊脑死亡"}
-{"key": "BAC009S0905W0458", "wav": "./aishell/wav/test/S0905/BAC009S0905W0458.wav", "txt": "温州少年峡谷失踪续二零万馀元赔偿款执行到位"}
-{"key": "BAC009S0905W0459", "wav": "./aishell/wav/test/S0905/BAC009S0905W0459.wav", "txt": "金报讯记者蓝莹还记得小温吗"}
-{"key": "BAC009S0905W0460", "wav": "./aishell/wav/test/S0905/BAC009S0905W0460.wav", "txt": "二零一三六二三"}
-{"key": "BAC009S0905W0461", "wav": "./aishell/wav/test/S0905/BAC009S0905W0461.wav", "txt": "温州一四岁少年小温迷失莒溪大峡谷"}
-{"key": "BAC009S0905W0462", "wav": "./aishell/wav/test/S0905/BAC009S0905W0462.wav", "txt": "浙江省史上规模最大的户外救援行动开始了"}
-{"key": "BAC009S0905W0463", "wav": "./aishell/wav/test/S0905/BAC009S0905W0463.wav", "txt": "经过长达四个月的搜救"}
-{"key": "BAC009S0905W0464", "wav": "./aishell/wav/test/S0905/BAC009S0905W0464.wav", "txt": "最终在峡谷上游的石头夹缝下"}
-{"key": "BAC009S0905W0465", "wav": "./aishell/wav/test/S0905/BAC009S0905W0465.wav", "txt": "发现小温残缺的遗骸"}
-{"key": "BAC009S0905W0466", "wav": "./aishell/wav/test/S0905/BAC009S0905W0466.wav", "txt": "温州市场现胶注虾业内不仅增重卖相更好"}
-{"key": "BAC009S0905W0467", "wav": "./aishell/wav/test/S0905/BAC009S0905W0467.wav", "txt": "温州市民郑女士在农贸市场购买了三只大虾"}
-{"key": "BAC009S0905W0468", "wav": "./aishell/wav/test/S0905/BAC009S0905W0468.wav", "txt": "回家后发现大虾体内居然被注射了不明胶状物"}
-{"key": "BAC009S0905W0469", "wav": "./aishell/wav/test/S0905/BAC009S0905W0469.wav", "txt": "生活经验让郑女士起了疑心"}
-{"key": "BAC009S0905W0470", "wav": "./aishell/wav/test/S0905/BAC009S0905W0470.wav", "txt": "她将几只虾的图片通过微博发布"}
-{"key": "BAC009S0905W0471", "wav": "./aishell/wav/test/S0905/BAC009S0905W0471.wav", "txt": "迅速引起了网友以及当地监管部门的关注"}
-{"key": "BAC009S0905W0472", "wav": "./aishell/wav/test/S0905/BAC009S0905W0472.wav", "txt": "温州市域铁路将成为全国第一条城市交通铁路"}
-{"key": "BAC009S0905W0475", "wav": "./aishell/wav/test/S0905/BAC009S0905W0475.wav", "txt": "温州市治堵办的负责人表示"}
-{"key": "BAC009S0905W0477", "wav": "./aishell/wav/test/S0905/BAC009S0905W0477.wav", "txt": "温州开水浇头服务员被批捕涉嫌故意伤害罪"}
-{"key": "BAC009S0905W0478", "wav": "./aishell/wav/test/S0905/BAC009S0905W0478.wav", "txt": "京华时报讯昨天下午"}
-{"key": "BAC009S0905W0479", "wav": "./aishell/wav/test/S0905/BAC009S0905W0479.wav", "txt": "浙江温州鹿城区检察院通报九月六日"}
-{"key": "BAC009S0905W0480", "wav": "./aishell/wav/test/S0905/BAC009S0905W0480.wav", "txt": "开水淋顾客的火锅店服务员朱某被依法批准逮捕"}
-{"key": "BAC009S0905W0481", "wav": "./aishell/wav/test/S0905/BAC009S0905W0481.wav", "txt": "温州惊现注胶虾续苍南再查六公斤注胶大虾"}
-{"key": "BAC009S0905W0482", "wav": "./aishell/wav/test/S0905/BAC009S0905W0482.wav", "txt": "温州一菜场惊现注胶虾追踪"}
-{"key": "BAC009S0905W0483", "wav": "./aishell/wav/test/S0905/BAC009S0905W0483.wav", "txt": "温州昆明出现注胶虾产地均指向广东湛江"}
-{"key": "BAC009S0905W0484", "wav": "./aishell/wav/test/S0905/BAC009S0905W0484.wav", "txt": "浙江温州市一位市民一零零元买回三只斑节虾"}
-{"key": "BAC009S0905W0485", "wav": "./aishell/wav/test/S0905/BAC009S0905W0485.wav", "txt": "在虾体内发现疑似胶状物质七月二十一日"}
-{"key": "BAC009S0905W0486", "wav": "./aishell/wav/test/S0905/BAC009S0905W0486.wav", "txt": "云南昆明市同样发现类似注胶虾"}
-{"key": "BAC009S0905W0487", "wav": "./aishell/wav/test/S0905/BAC009S0905W0487.wav", "txt": "国内两地出现注胶虾踪迹"}
-{"key": "BAC009S0905W0488", "wav": "./aishell/wav/test/S0905/BAC009S0905W0488.wav", "txt": "且产地均指向广东省湛江市"}
-{"key": "BAC009S0905W0489", "wav": "./aishell/wav/test/S0905/BAC009S0905W0489.wav", "txt": "温州景山花木市场发生大火火势已得到基本控制"}
-{"key": "BAC009S0905W0490", "wav": "./aishell/wav/test/S0905/BAC009S0905W0490.wav", "txt": "温州服务员向顾客头上泼开水继而已被批捕"}
-{"key": "BAC009S0905W0491", "wav": "./aishell/wav/test/S0905/BAC009S0905W0491.wav", "txt": "今天九月八日下午"}
-{"key": "BAC009S0905W0492", "wav": "./aishell/wav/test/S0905/BAC009S0905W0492.wav", "txt": "因火锅加水问题与顾客发生争执"}
-{"key": "BAC009S0905W0493", "wav": "./aishell/wav/test/S0905/BAC009S0905W0493.wav", "txt": "为泄愤将开水淋到顾客头上"}
-{"key": "BAC009S0905W0494", "wav": "./aishell/wav/test/S0905/BAC009S0905W0494.wav", "txt": "并将其摁倒在地殴打"}
-{"key": "BAC009S0905W0495", "wav": "./aishell/wav/test/S0905/BAC009S0905W0495.wav", "txt": "火锅店服务员朱某被温州市鹿城区检察院依法批准逮捕"}
-{"key": "BAC009S0906W0121", "wav": "./aishell/wav/test/S0906/BAC009S0906W0121.wav", "txt": "双降会令市场看贬人民币的情绪持续"}
-{"key": "BAC009S0906W0122", "wav": "./aishell/wav/test/S0906/BAC009S0906W0122.wav", "txt": "人民币未来贬值压力依然较大"}
-{"key": "BAC009S0906W0123", "wav": "./aishell/wav/test/S0906/BAC009S0906W0123.wav", "txt": "预计短期央行仍可能会积极维稳"}
-{"key": "BAC009S0906W0124", "wav": "./aishell/wav/test/S0906/BAC009S0906W0124.wav", "txt": "汇率较大概率维持双向波动"}
-{"key": "BAC009S0906W0125", "wav": "./aishell/wav/test/S0906/BAC009S0906W0125.wav", "txt": "公积金松绑接棒释压房价下跌动力趋缓至搜狐财经"}
-{"key": "BAC009S0906W0126", "wav": "./aishell/wav/test/S0906/BAC009S0906W0126.wav", "txt": "上海南昌等城市近期继续松绑了公积金贷款政策"}
-{"key": "BAC009S0906W0127", "wav": "./aishell/wav/test/S0906/BAC009S0906W0127.wav", "txt": "而南昌除了放松首套房界定标准"}
-{"key": "BAC009S0906W0128", "wav": "./aishell/wav/test/S0906/BAC009S0906W0128.wav", "txt": "还降低了首套房公积金首付比例"}
-{"key": "BAC009S0906W0129", "wav": "./aishell/wav/test/S0906/BAC009S0906W0129.wav", "txt": "公积金贷款首付款比例不低于百分之七"}
-{"key": "BAC009S0906W0130", "wav": "./aishell/wav/test/S0906/BAC009S0906W0130.wav", "txt": "上海易居研究院研究院严跃进认为"}
-{"key": "BAC009S0906W0131", "wav": "./aishell/wav/test/S0906/BAC009S0906W0131.wav", "txt": "存销比已经见顶回落"}
-{"key": "BAC009S0906W0132", "wav": "./aishell/wav/test/S0906/BAC009S0906W0132.wav", "txt": "房价下跌压力将趋于缓解"}
-{"key": "BAC009S0906W0133", "wav": "./aishell/wav/test/S0906/BAC009S0906W0133.wav", "txt": "公积金大力度松绑相关商业银行信贷政策"}
-{"key": "BAC009S0906W0134", "wav": "./aishell/wav/test/S0906/BAC009S0906W0134.wav", "txt": "各地对公积金贷款的松绑力度更大"}
-{"key": "BAC009S0906W0135", "wav": "./aishell/wav/test/S0906/BAC009S0906W0135.wav", "txt": "江苏省对省级机关住房公积金政策做出了调整"}
-{"key": "BAC009S0906W0136", "wav": "./aishell/wav/test/S0906/BAC009S0906W0136.wav", "txt": "昆明市住房公积金管理中心出台三项公积金新政"}
-{"key": "BAC009S0906W0137", "wav": "./aishell/wav/test/S0906/BAC009S0906W0137.wav", "txt": "上海市公积金管理中心公布公积金新政"}
-{"key": "BAC009S0906W0138", "wav": "./aishell/wav/test/S0906/BAC009S0906W0138.wav", "txt": "有一套住房并已结清公积金贷款"}
-{"key": "BAC009S0906W0139", "wav": "./aishell/wav/test/S0906/BAC009S0906W0139.wav", "txt": "再次申请公积金贷款购房的"}
-{"key": "BAC009S0906W0140", "wav": "./aishell/wav/test/S0906/BAC009S0906W0140.wav", "txt": "参照首套房贷款政策"}
-{"key": "BAC009S0906W0141", "wav": "./aishell/wav/test/S0906/BAC009S0906W0141.wav", "txt": "中原地产市场研究部统计数据显示截至目前"}
-{"key": "BAC009S0906W0142", "wav": "./aishell/wav/test/S0906/BAC009S0906W0142.wav", "txt": "二套执行认贷不认房"}
-{"key": "BAC009S0906W0143", "wav": "./aishell/wav/test/S0906/BAC009S0906W0143.wav", "txt": "二套首付降比百分之七"}
-{"key": "BAC009S0906W0144", "wav": "./aishell/wav/test/S0906/BAC009S0906W0144.wav", "txt": "南京武汉市放宽第二套房公积金贷款门槛"}
-{"key": "BAC009S0906W0145", "wav": "./aishell/wav/test/S0906/BAC009S0906W0145.wav", "txt": "扬州杭州成都无锡等地"}
-{"key": "BAC009S0906W0146", "wav": "./aishell/wav/test/S0906/BAC009S0906W0146.wav", "txt": "已有一套住房并结清贷款馀额的家庭"}
-{"key": "BAC009S0906W0147", "wav": "./aishell/wav/test/S0906/BAC009S0906W0147.wav", "txt": "再购房执行首套房贷款政策"}
-{"key": "BAC009S0906W0148", "wav": "./aishell/wav/test/S0906/BAC009S0906W0148.wav", "txt": "中原地产分析师张大伟认为"}
-{"key": "BAC009S0906W0149", "wav": "./aishell/wav/test/S0906/BAC009S0906W0149.wav", "txt": "公积金是地方政府可以直接通过政策调整动用的资金"}
-{"key": "BAC009S0906W0150", "wav": "./aishell/wav/test/S0906/BAC009S0906W0150.wav", "txt": "用公积金政策刺激市场是地方政府最习惯的举措"}
-{"key": "BAC009S0906W0151", "wav": "./aishell/wav/test/S0906/BAC009S0906W0151.wav", "txt": "对购房者心理影响也非常大"}
-{"key": "BAC009S0906W0152", "wav": "./aishell/wav/test/S0906/BAC009S0906W0152.wav", "txt": "由于公积金贷款利率相当于市场贷款利率的七折"}
-{"key": "BAC009S0906W0153", "wav": "./aishell/wav/test/S0906/BAC009S0906W0153.wav", "txt": "对需求拉动作用比较大"}
-{"key": "BAC009S0906W0154", "wav": "./aishell/wav/test/S0906/BAC009S0906W0154.wav", "txt": "上海作为一线城市代表"}
-{"key": "BAC009S0906W0155", "wav": "./aishell/wav/test/S0906/BAC009S0906W0155.wav", "txt": "对房地产市场的心理影响比较大"}
-{"key": "BAC009S0906W0156", "wav": "./aishell/wav/test/S0906/BAC009S0906W0156.wav", "txt": "预计还有其他城市将发布同类型松绑政策"}
-{"key": "BAC009S0906W0157", "wav": "./aishell/wav/test/S0906/BAC009S0906W0157.wav", "txt": "房价下跌压力缓解各地救市政策不断"}
-{"key": "BAC009S0906W0158", "wav": "./aishell/wav/test/S0906/BAC009S0906W0158.wav", "txt": "房企促销力度也在加大"}
-{"key": "BAC009S0906W0159", "wav": "./aishell/wav/test/S0906/BAC009S0906W0159.wav", "txt": "各城市库存压力正在减小"}
-{"key": "BAC009S0906W0160", "wav": "./aishell/wav/test/S0906/BAC009S0906W0160.wav", "txt": "房价下跌压力趋于缓解"}
-{"key": "BAC009S0906W0161", "wav": "./aishell/wav/test/S0906/BAC009S0906W0161.wav", "txt": "上海易居房地产研究院数据显示"}
-{"key": "BAC009S0906W0162", "wav": "./aishell/wav/test/S0906/BAC009S0906W0162.wav", "txt": "同比增长百分之七"}
-{"key": "BAC009S0906W0163", "wav": "./aishell/wav/test/S0906/BAC009S0906W0163.wav", "txt": "这是今年五月份以来库存环比增幅最小的一次"}
-{"key": "BAC009S0906W0164", "wav": "./aishell/wav/test/S0906/BAC009S0906W0164.wav", "txt": "环比增长百分之七"}
-{"key": "BAC009S0906W0165", "wav": "./aishell/wav/test/S0906/BAC009S0906W0165.wav", "txt": "同比减小百分之七"}
-{"key": "BAC009S0906W0166", "wav": "./aishell/wav/test/S0906/BAC009S0906W0166.wav", "txt": "五月份的供求关系是今年前五个月最均衡的一次"}
-{"key": "BAC009S0906W0167", "wav": "./aishell/wav/test/S0906/BAC009S0906W0167.wav", "txt": "存销比见顶的态势基本确立"}
-{"key": "BAC009S0906W0168", "wav": "./aishell/wav/test/S0906/BAC009S0906W0168.wav", "txt": "五个城市新建商品住宅存销比为七个月"}
-{"key": "BAC009S0906W0169", "wav": "./aishell/wav/test/S0906/BAC009S0906W0169.wav", "txt": "该存销比数值为七个月"}
-{"key": "BAC009S0906W0170", "wav": "./aishell/wav/test/S0906/BAC009S0906W0170.wav", "txt": "这直接利好去库存目标的实现"}
-{"key": "BAC009S0906W0171", "wav": "./aishell/wav/test/S0906/BAC009S0906W0171.wav", "txt": "二到五个城市的总体水平看"}
-{"key": "BAC009S0906W0172", "wav": "./aishell/wav/test/S0906/BAC009S0906W0172.wav", "txt": "库存去化周期依然偏大"}
-{"key": "BAC009S0906W0173", "wav": "./aishell/wav/test/S0906/BAC009S0906W0173.wav", "txt": "说明各城市涨价的时机还不成熟"}
-{"key": "BAC009S0906W0174", "wav": "./aishell/wav/test/S0906/BAC009S0906W0174.wav", "txt": "一至七月份大多数城市还是会采取积极降价的策略"}
-{"key": "BAC009S0906W0175", "wav": "./aishell/wav/test/S0906/BAC009S0906W0175.wav", "txt": "房价未来可能会略微有下跌"}
-{"key": "BAC009S0906W0176", "wav": "./aishell/wav/test/S0906/BAC009S0906W0176.wav", "txt": "一线城市由于需求面大"}
-{"key": "BAC009S0906W0177", "wav": "./aishell/wav/test/S0906/BAC009S0906W0177.wav", "txt": "未来住宅价格会企稳回升"}
-{"key": "BAC009S0906W0178", "wav": "./aishell/wav/test/S0906/BAC009S0906W0178.wav", "txt": "一些库存量较大的三四线城市"}
-{"key": "BAC009S0906W0179", "wav": "./aishell/wav/test/S0906/BAC009S0906W0179.wav", "txt": "房价继续下行的可能性仍然比较大"}
-{"key": "BAC009S0906W0180", "wav": "./aishell/wav/test/S0906/BAC009S0906W0180.wav", "txt": "同策咨询研究部总监张宏伟认为"}
-{"key": "BAC009S0906W0181", "wav": "./aishell/wav/test/S0906/BAC009S0906W0181.wav", "txt": "月度市场成交量开始出现环比回升"}
-{"key": "BAC009S0906W0182", "wav": "./aishell/wav/test/S0906/BAC009S0906W0182.wav", "txt": "市场去库存的速度在适度提高"}
-{"key": "BAC009S0906W0183", "wav": "./aishell/wav/test/S0906/BAC009S0906W0183.wav", "txt": "从一线城市及存销比在七个月以下的城市来看"}
-{"key": "BAC009S0906W0184", "wav": "./aishell/wav/test/S0906/BAC009S0906W0184.wav", "txt": "市场基本面有可能会率先好转"}
-{"key": "BAC009S0906W0185", "wav": "./aishell/wav/test/S0906/BAC009S0906W0185.wav", "txt": "年底将出现翘尾行情"}
-{"key": "BAC009S0906W0186", "wav": "./aishell/wav/test/S0906/BAC009S0906W0186.wav", "txt": "但年底出现翘尾行情并不代表楼市已经回暖"}
-{"key": "BAC009S0906W0187", "wav": "./aishell/wav/test/S0906/BAC009S0906W0187.wav", "txt": "示范带动周边地区发展"}
-{"key": "BAC009S0906W0188", "wav": "./aishell/wav/test/S0906/BAC009S0906W0188.wav", "txt": "并在农业走出去方面发挥重要作用"}
-{"key": "BAC009S0906W0189", "wav": "./aishell/wav/test/S0906/BAC009S0906W0189.wav", "txt": "稳步发展区域"}
-{"key": "BAC009S0906W0190", "wav": "./aishell/wav/test/S0906/BAC009S0906W0190.wav", "txt": "主要指草原生态经济区"}
-{"key": "BAC009S0906W0191", "wav": "./aishell/wav/test/S0906/BAC009S0906W0191.wav", "txt": "包括北方干旱半干旱草原地区和青藏高原草原地区"}
-{"key": "BAC009S0906W0192", "wav": "./aishell/wav/test/S0906/BAC009S0906W0192.wav", "txt": "加快该地区域现代农业建设"}
-{"key": "BAC009S0906W0193", "wav": "./aishell/wav/test/S0906/BAC009S0906W0193.wav", "txt": "对于保障全国生态安全具有不可代替的战略作用"}
-{"key": "BAC009S0906W0194", "wav": "./aishell/wav/test/S0906/BAC009S0906W0194.wav", "txt": "牢固树立生产生态有机结合生态优先的基本方针"}
-{"key": "BAC009S0906W0195", "wav": "./aishell/wav/test/S0906/BAC009S0906W0195.wav", "txt": "加强草原生态环境保护和建设"}
-{"key": "BAC009S0906W0196", "wav": "./aishell/wav/test/S0906/BAC009S0906W0196.wav", "txt": "稳步推进退牧还草和游牧民定居工程"}
-{"key": "BAC009S0906W0197", "wav": "./aishell/wav/test/S0906/BAC009S0906W0197.wav", "txt": "加强以节水灌溉饲草地为重点的牧区水利建设"}
-{"key": "BAC009S0906W0198", "wav": "./aishell/wav/test/S0906/BAC009S0906W0198.wav", "txt": "建立草原增加碳汇和生态补偿机制"}
-{"key": "BAC009S0906W0199", "wav": "./aishell/wav/test/S0906/BAC009S0906W0199.wav", "txt": "转变畜牧业发展方式"}
-{"key": "BAC009S0906W0200", "wav": "./aishell/wav/test/S0906/BAC009S0906W0200.wav", "txt": "优化生产布局和畜群结构"}
-{"key": "BAC009S0906W0201", "wav": "./aishell/wav/test/S0906/BAC009S0906W0201.wav", "txt": "提高科学饲养和经营水平"}
-{"key": "BAC009S0906W0202", "wav": "./aishell/wav/test/S0906/BAC009S0906W0202.wav", "txt": "加强农牧互补牧养结合"}
-{"key": "BAC009S0906W0203", "wav": "./aishell/wav/test/S0906/BAC009S0906W0203.wav", "txt": "以最急需最关键最薄弱的环节和领域为重点"}
-{"key": "BAC009S0906W0204", "wav": "./aishell/wav/test/S0906/BAC009S0906W0204.wav", "txt": "组织实施一批重大工程"}
-{"key": "BAC009S0906W0205", "wav": "./aishell/wav/test/S0906/BAC009S0906W0205.wav", "txt": "全面分实现代农业发展的物质基础"}
-{"key": "BAC009S0906W0206", "wav": "./aishell/wav/test/S0906/BAC009S0906W0206.wav", "txt": "一旱涝保收高标准农田建设工程"}
-{"key": "BAC009S0906W0207", "wav": "./aishell/wav/test/S0906/BAC009S0906W0207.wav", "txt": "落实土壤改良地力培肥等措施"}
-{"key": "BAC009S0906W0208", "wav": "./aishell/wav/test/S0906/BAC009S0906W0208.wav", "txt": "加快先进适用耕作技术推广应用"}
-{"key": "BAC009S0906W0209", "wav": "./aishell/wav/test/S0906/BAC009S0906W0209.wav", "txt": "新建旱涝保收高标准农田四亿亩"}
-{"key": "BAC009S0906W0210", "wav": "./aishell/wav/test/S0906/BAC009S0906W0210.wav", "txt": "新增千亿斤粮食生产能力建设工程"}
-{"key": "BAC009S0906W0211", "wav": "./aishell/wav/test/S0906/BAC009S0906W0211.wav", "txt": "棉油糖生产基地建设工程"}
-{"key": "BAC009S0906W0212", "wav": "./aishell/wav/test/S0906/BAC009S0906W0212.wav", "txt": "加强新疆黄淮海地区长江流域棉花生产基地建设"}
-{"key": "BAC009S0906W0213", "wav": "./aishell/wav/test/S0906/BAC009S0906W0213.wav", "txt": "支持南方甘蔗和北方甜菜生产基地建设"}
-{"key": "BAC009S0906W0214", "wav": "./aishell/wav/test/S0906/BAC009S0906W0214.wav", "txt": "着力改善田间基础设施良种科研繁育设施等生产条件"}
-{"key": "BAC009S0906W0215", "wav": "./aishell/wav/test/S0906/BAC009S0906W0215.wav", "txt": "新一轮菜篮子建设工程"}
-{"key": "BAC009S0906W0216", "wav": "./aishell/wav/test/S0906/BAC009S0906W0216.wav", "txt": "加强园艺作物标准园建设"}
-{"key": "BAC009S0906W0217", "wav": "./aishell/wav/test/S0906/BAC009S0906W0217.wav", "txt": "引导建设优质农产品物流配送中心"}
-{"key": "BAC009S0906W0218", "wav": "./aishell/wav/test/S0906/BAC009S0906W0218.wav", "txt": "发展农产品电子商务"}
-{"key": "BAC009S0906W0219", "wav": "./aishell/wav/test/S0906/BAC009S0906W0219.wav", "txt": "健全农作物种质资源和畜禽遗传资源保存体系"}
-{"key": "BAC009S0906W0220", "wav": "./aishell/wav/test/S0906/BAC009S0906W0220.wav", "txt": "建设动植物基因信息库"}
-{"key": "BAC009S0906W0221", "wav": "./aishell/wav/test/S0906/BAC009S0906W0221.wav", "txt": "建立转基因生物安全保障体系"}
-{"key": "BAC009S0906W0222", "wav": "./aishell/wav/test/S0906/BAC009S0906W0222.wav", "txt": "建设国家级农作物育制种基地"}
-{"key": "BAC009S0906W0223", "wav": "./aishell/wav/test/S0906/BAC009S0906W0223.wav", "txt": "完善农作物品种试验和种子检测设施条件"}
-{"key": "BAC009S0906W0224", "wav": "./aishell/wav/test/S0906/BAC009S0906W0224.wav", "txt": "建设水产遗传育种中心和原良种场"}
-{"key": "BAC009S0906W0225", "wav": "./aishell/wav/test/S0906/BAC009S0906W0225.wav", "txt": "渔政渔港建设工程"}
-{"key": "BAC009S0906W0226", "wav": "./aishell/wav/test/S0906/BAC009S0906W0226.wav", "txt": "建设一批大型渔政船"}
-{"key": "BAC009S0906W0227", "wav": "./aishell/wav/test/S0906/BAC009S0906W0227.wav", "txt": "加强渔政基地和管理信息系统建设"}
-{"key": "BAC009S0906W0228", "wav": "./aishell/wav/test/S0906/BAC009S0906W0228.wav", "txt": "动植物保护工程"}
-{"key": "BAC009S0906W0229", "wav": "./aishell/wav/test/S0906/BAC009S0906W0229.wav", "txt": "健全六级动物疫病防控体系"}
-{"key": "BAC009S0906W0230", "wav": "./aishell/wav/test/S0906/BAC009S0906W0230.wav", "txt": "健全兽药质量安全监管和动物防疫技术支撑体系"}
-{"key": "BAC009S0906W0231", "wav": "./aishell/wav/test/S0906/BAC009S0906W0231.wav", "txt": "建设四级农作物病虫疫情监测防控体系"}
-{"key": "BAC009S0906W0232", "wav": "./aishell/wav/test/S0906/BAC009S0906W0232.wav", "txt": "完善监测防控监管等设施设备"}
-{"key": "BAC009S0906W0233", "wav": "./aishell/wav/test/S0906/BAC009S0906W0233.wav", "txt": "农产品质量安全检验检测能力建设工程"}
-{"key": "BAC009S0906W0234", "wav": "./aishell/wav/test/S0906/BAC009S0906W0234.wav", "txt": "改扩建检验检测实验室"}
-{"key": "BAC009S0906W0235", "wav": "./aishell/wav/test/S0906/BAC009S0906W0235.wav", "txt": "建设部级水产品质量安全研究中心"}
-{"key": "BAC009S0906W0236", "wav": "./aishell/wav/test/S0906/BAC009S0906W0236.wav", "txt": "补充建设一批部级专业质检中心"}
-{"key": "BAC009S0906W0237", "wav": "./aishell/wav/test/S0906/BAC009S0906W0237.wav", "txt": "构建全国农产品质量安全监测信息预警平台"}
-{"key": "BAC009S0906W0238", "wav": "./aishell/wav/test/S0906/BAC009S0906W0238.wav", "txt": "乡镇农业公共服务能力建设工程"}
-{"key": "BAC009S0906W0239", "wav": "./aishell/wav/test/S0906/BAC009S0906W0239.wav", "txt": "农业机械化推进工程"}
-{"key": "BAC009S0906W0240", "wav": "./aishell/wav/test/S0906/BAC009S0906W0240.wav", "txt": "加大对秸秆机械化还田和收集打捆机具配套的支持力度"}
-{"key": "BAC009S0906W0241", "wav": "./aishell/wav/test/S0906/BAC009S0906W0241.wav", "txt": "完善农业气象等方面的航空站和作业起降点基础设施"}
-{"key": "BAC009S0906W0242", "wav": "./aishell/wav/test/S0906/BAC009S0906W0242.wav", "txt": "扶持农机服务组织发展"}
-{"key": "BAC009S0906W0243", "wav": "./aishell/wav/test/S0906/BAC009S0906W0243.wav", "txt": "农业信息化建设工程"}
-{"key": "BAC009S0906W0244", "wav": "./aishell/wav/test/S0906/BAC009S0906W0244.wav", "txt": "开展农业物物联网应用示范"}
-{"key": "BAC009S0906W0245", "wav": "./aishell/wav/test/S0906/BAC009S0906W0245.wav", "txt": "加大天然草原退牧还草工程实施力度"}
-{"key": "BAC009S0906W0246", "wav": "./aishell/wav/test/S0906/BAC009S0906W0246.wav", "txt": "加强京津风沙源区草地治理"}
-{"key": "BAC009S0906W0247", "wav": "./aishell/wav/test/S0906/BAC009S0906W0247.wav", "txt": "继续加强三江源等地区草原生态建设"}
-{"key": "BAC009S0906W0248", "wav": "./aishell/wav/test/S0906/BAC009S0906W0248.wav", "txt": "开展草原自然保护区建设和南方草地综合治理"}
-{"key": "BAC009S0906W0249", "wav": "./aishell/wav/test/S0906/BAC009S0906W0249.wav", "txt": "加快实施游牧民定居工程"}
-{"key": "BAC009S0906W0250", "wav": "./aishell/wav/test/S0906/BAC009S0906W0250.wav", "txt": "人工种草五亿亩"}
-{"key": "BAC009S0906W0251", "wav": "./aishell/wav/test/S0906/BAC009S0906W0251.wav", "txt": "新型农村人才培养工程"}
-{"key": "BAC009S0906W0252", "wav": "./aishell/wav/test/S0906/BAC009S0906W0252.wav", "txt": "必须从我国国情和农业发展实际出发"}
-{"key": "BAC009S0906W0253", "wav": "./aishell/wav/test/S0906/BAC009S0906W0253.wav", "txt": "亦不可能跟自然规律抗衡"}
-{"key": "BAC009S0906W0254", "wav": "./aishell/wav/test/S0906/BAC009S0906W0254.wav", "txt": "无止境地重复过去十年的惊人增长"}
-{"key": "BAC009S0906W0256", "wav": "./aishell/wav/test/S0906/BAC009S0906W0256.wav", "txt": "企业于某个领域称王称霸的一刻"}
-{"key": "BAC009S0906W0257", "wav": "./aishell/wav/test/S0906/BAC009S0906W0257.wav", "txt": "往往就是公司陷入灾难的开始"}
-{"key": "BAC009S0906W0258", "wav": "./aishell/wav/test/S0906/BAC009S0906W0258.wav", "txt": "市场给予它的估值却异常克制"}
-{"key": "BAC009S0906W0259", "wav": "./aishell/wav/test/S0906/BAC009S0906W0259.wav", "txt": "以二零一五年度每股八点五美元的盈利预测为准"}
-{"key": "BAC009S0906W0260", "wav": "./aishell/wav/test/S0906/BAC009S0906W0260.wav", "txt": "苹果市盈率仅一五倍"}
-{"key": "BAC009S0906W0261", "wav": "./aishell/wav/test/S0906/BAC009S0906W0261.wav", "txt": "莫说跟其他创意十足的科技股相提并论"}
-{"key": "BAC009S0906W0262", "wav": "./aishell/wav/test/S0906/BAC009S0906W0262.wav", "txt": "比之大市亦有所不如"}
-{"key": "BAC009S0906W0263", "wav": "./aishell/wav/test/S0906/BAC009S0906W0263.wav", "txt": "苹果早晚将步之前过气股王的后尘"}
-{"key": "BAC009S0906W0265", "wav": "./aishell/wav/test/S0906/BAC009S0906W0265.wav", "txt": "不同意的地方多于同意"}
-{"key": "BAC009S0906W0266", "wav": "./aishell/wav/test/S0906/BAC009S0906W0266.wav", "txt": "从随身听到智能电话"}
-{"key": "BAC009S0906W0267", "wav": "./aishell/wav/test/S0906/BAC009S0906W0267.wav", "txt": "苹果的拿手好戏是把市场上原霸主拉下马"}
-{"key": "BAC009S0906W0268", "wav": "./aishell/wav/test/S0906/BAC009S0906W0268.wav", "txt": "确认消费者喜新厌旧后"}
-{"key": "BAC009S0906W0269", "wav": "./aishell/wav/test/S0906/BAC009S0906W0269.wav", "txt": "快速建立以苹果产品服务为核心的生态系统"}
-{"key": "BAC009S0906W0270", "wav": "./aishell/wav/test/S0906/BAC009S0906W0270.wav", "txt": "透过不断的更新换代"}
-{"key": "BAC009S0906W0271", "wav": "./aishell/wav/test/S0906/BAC009S0906W0271.wav", "txt": "索尼黑莓以至诺基亚"}
-{"key": "BAC009S0906W0272", "wav": "./aishell/wav/test/S0906/BAC009S0906W0272.wav", "txt": "在最风光的时候看不见来自颠复者的威胁"}
-{"key": "BAC009S0906W0273", "wav": "./aishell/wav/test/S0906/BAC009S0906W0273.wav", "txt": "从不可一世到遭对手边缘化"}
-{"key": "BAC009S0906W0274", "wav": "./aishell/wav/test/S0906/BAC009S0906W0274.wav", "txt": "消费者贪新忘旧虽亦可能适用于苹果"}
-{"key": "BAC009S0906W0277", "wav": "./aishell/wav/test/S0906/BAC009S0906W0277.wav", "txt": "对投资者大有参考价值"}
-{"key": "BAC009S0906W0278", "wav": "./aishell/wav/test/S0906/BAC009S0906W0278.wav", "txt": "一九八三至二零零五年"}
-{"key": "BAC009S0906W0279", "wav": "./aishell/wav/test/S0906/BAC009S0906W0279.wav", "txt": "标普五百指数市值冠军宝座"}
-{"key": "BAC009S0906W0281", "wav": "./aishell/wav/test/S0906/BAC009S0906W0281.wav", "txt": "四大天王平均累计回报高达一千二百分之八十二"}
-{"key": "BAC009S0906W0282", "wav": "./aishell/wav/test/S0906/BAC009S0906W0282.wav", "txt": "四倍于标指同期的三十百分之二"}
-{"key": "BAC009S0906W0283", "wav": "./aishell/wav/test/S0906/BAC009S0906W0283.wav", "txt": "四大天王平均回报仅一百分之二十五"}
-{"key": "BAC009S0906W0284", "wav": "./aishell/wav/test/S0906/BAC009S0906W0284.wav", "txt": "明显跑输标普五百指数的一百分之九十九"}
-{"key": "BAC009S0906W0286", "wav": "./aishell/wav/test/S0906/BAC009S0906W0286.wav", "txt": "销售也总有饱和的一天"}
-{"key": "BAC009S0906W0287", "wav": "./aishell/wav/test/S0906/BAC009S0906W0287.wav", "txt": "苹果能否第三期发育"}
-{"key": "BAC009S0906W0290", "wav": "./aishell/wav/test/S0906/BAC009S0906W0290.wav", "txt": "从市场始终不愿给予苹果较高估值可见"}
-{"key": "BAC009S0906W0291", "wav": "./aishell/wav/test/S0906/BAC009S0906W0291.wav", "txt": "管理层眼光得再高一点"}
-{"key": "BAC009S0906W0292", "wav": "./aishell/wav/test/S0906/BAC009S0906W0292.wav", "txt": "苹果有意进军汽车产业"}
-{"key": "BAC009S0906W0294", "wav": "./aishell/wav/test/S0906/BAC009S0906W0294.wav", "txt": "老毕对此说甚有保留"}
-{"key": "BAC009S0906W0295", "wav": "./aishell/wav/test/S0906/BAC009S0906W0295.wav", "txt": "而库克若真有此意"}
-{"key": "BAC009S0906W0297", "wav": "./aishell/wav/test/S0906/BAC009S0906W0297.wav", "txt": "汽车是苹果下一个颠复目标"}
-{"key": "BAC009S0906W0298", "wav": "./aishell/wav/test/S0906/BAC009S0906W0298.wav", "txt": "马斯克乃商界新一代万人迷"}
-{"key": "BAC009S0906W0299", "wav": "./aishell/wav/test/S0906/BAC009S0906W0299.wav", "txt": "人气不逊乔布斯在世之时"}
-{"key": "BAC009S0906W0300", "wav": "./aishell/wav/test/S0906/BAC009S0906W0300.wav", "txt": "三藩市纪事报指此君曾与库克碰头"}
-{"key": "BAC009S0906W0302", "wav": "./aishell/wav/test/S0906/BAC009S0906W0302.wav", "txt": "越多人讲往往越难成事"}
-{"key": "BAC009S0906W0303", "wav": "./aishell/wav/test/S0906/BAC009S0906W0303.wav", "txt": "有黑客在网络上兜售车主信息"}
-{"key": "BAC009S0906W0304", "wav": "./aishell/wav/test/S0906/BAC009S0906W0304.wav", "txt": "雪铁龙车主信息泄露规模或超十万条"}
-{"key": "BAC009S0906W0305", "wav": "./aishell/wav/test/S0906/BAC009S0906W0305.wav", "txt": "该平台上显示的漏洞状态是"}
-{"key": "BAC009S0906W0306", "wav": "./aishell/wav/test/S0906/BAC009S0906W0306.wav", "txt": "漏洞已通知厂商但厂商忽略该漏洞"}
-{"key": "BAC009S0906W0307", "wav": "./aishell/wav/test/S0906/BAC009S0906W0307.wav", "txt": "该公司内部相关人士回应称"}
-{"key": "BAC009S0906W0308", "wav": "./aishell/wav/test/S0906/BAC009S0906W0308.wav", "txt": "东风雪铁龙的客户数据存放在专业数据库中"}
-{"key": "BAC009S0906W0309", "wav": "./aishell/wav/test/S0906/BAC009S0906W0309.wav", "txt": "对数据库设有监控及记录"}
-{"key": "BAC009S0906W0310", "wav": "./aishell/wav/test/S0906/BAC009S0906W0310.wav", "txt": "对用户信息做足了保密工作"}
-{"key": "BAC009S0906W0311", "wav": "./aishell/wav/test/S0906/BAC009S0906W0311.wav", "txt": "有业内人士分析指出"}
-{"key": "BAC009S0906W0312", "wav": "./aishell/wav/test/S0906/BAC009S0906W0312.wav", "txt": "车企在信息安全方面的投入不足已经越来越成为其软肋"}
-{"key": "BAC009S0906W0313", "wav": "./aishell/wav/test/S0906/BAC009S0906W0313.wav", "txt": "其中近一半的漏洞都可能造成网站用户的信息泄露"}
-{"key": "BAC009S0906W0314", "wav": "./aishell/wav/test/S0906/BAC009S0906W0314.wav", "txt": "背后涉及到百万车主的信息安全"}
-{"key": "BAC009S0906W0315", "wav": "./aishell/wav/test/S0906/BAC009S0906W0315.wav", "txt": "而绝大多数漏洞状态都是未联系到厂商或厂商忽略"}
-{"key": "BAC009S0906W0316", "wav": "./aishell/wav/test/S0906/BAC009S0906W0316.wav", "txt": "汽车这个行业缺乏成熟的网络安全管理体系"}
-{"key": "BAC009S0906W0317", "wav": "./aishell/wav/test/S0906/BAC009S0906W0317.wav", "txt": "网络运营人员的安全素质有待提高"}
-{"key": "BAC009S0906W0318", "wav": "./aishell/wav/test/S0906/BAC009S0906W0318.wav", "txt": "很多车企网站是外包给第三方公司开发的"}
-{"key": "BAC009S0906W0319", "wav": "./aishell/wav/test/S0906/BAC009S0906W0319.wav", "txt": "没有交付信息安全公司进行评估"}
-{"key": "BAC009S0906W0320", "wav": "./aishell/wav/test/S0906/BAC009S0906W0320.wav", "txt": "因此更有可能留下信息安全风险"}
-{"key": "BAC009S0906W0321", "wav": "./aishell/wav/test/S0906/BAC009S0906W0321.wav", "txt": "用户隐私遭泄露的问题日益突出"}
-{"key": "BAC009S0906W0322", "wav": "./aishell/wav/test/S0906/BAC009S0906W0322.wav", "txt": "如果许多传统制造行业中的企业一样"}
-{"key": "BAC009S0906W0323", "wav": "./aishell/wav/test/S0906/BAC009S0906W0323.wav", "txt": "车企诚待转化互联网思维以及加强互联网安全管控等"}
-{"key": "BAC009S0906W0324", "wav": "./aishell/wav/test/S0906/BAC009S0906W0324.wav", "txt": "要跟上互联网发展的步伐不太容易"}
-{"key": "BAC009S0906W0325", "wav": "./aishell/wav/test/S0906/BAC009S0906W0325.wav", "txt": "随着互联网快速发展"}
-{"key": "BAC009S0906W0326", "wav": "./aishell/wav/test/S0906/BAC009S0906W0326.wav", "txt": "这类专业人才往往集中在互联网企业"}
-{"key": "BAC009S0906W0327", "wav": "./aishell/wav/test/S0906/BAC009S0906W0327.wav", "txt": "而车企相对缺乏这类人才"}
-{"key": "BAC009S0906W0328", "wav": "./aishell/wav/test/S0906/BAC009S0906W0328.wav", "txt": "网络安全管理体系方面投资非常大"}
-{"key": "BAC009S0906W0329", "wav": "./aishell/wav/test/S0906/BAC009S0906W0329.wav", "txt": "涉及人才软件硬件服务以及管理等方面"}
-{"key": "BAC009S0906W0330", "wav": "./aishell/wav/test/S0906/BAC009S0906W0330.wav", "txt": "互联网企业也是一步步投入不断完善"}
-{"key": "BAC009S0906W0331", "wav": "./aishell/wav/test/S0906/BAC009S0906W0331.wav", "txt": "不同行业在网络安全方面投入比例不一"}
-{"key": "BAC009S0906W0332", "wav": "./aishell/wav/test/S0906/BAC009S0906W0332.wav", "txt": "预计汽车行业在网络安全方面投入往往较少"}
-{"key": "BAC009S0906W0333", "wav": "./aishell/wav/test/S0906/BAC009S0906W0333.wav", "txt": "一些车企为了节约成本"}
-{"key": "BAC009S0906W0334", "wav": "./aishell/wav/test/S0906/BAC009S0906W0334.wav", "txt": "往往将数据库服务器都放在公网上"}
-{"key": "BAC009S0906W0335", "wav": "./aishell/wav/test/S0906/BAC009S0906W0335.wav", "txt": "这样很容易被黑客攻破"}
-{"key": "BAC009S0906W0336", "wav": "./aishell/wav/test/S0906/BAC009S0906W0336.wav", "txt": "一旦发现系统有漏洞"}
-{"key": "BAC009S0906W0337", "wav": "./aishell/wav/test/S0906/BAC009S0906W0337.wav", "txt": "将及时采取主动或被动措施"}
-{"key": "BAC009S0906W0338", "wav": "./aishell/wav/test/S0906/BAC009S0906W0338.wav", "txt": "拉德克利夫认为自己被暗指有问题"}
-{"key": "BAC009S0906W0339", "wav": "./aishell/wav/test/S0906/BAC009S0906W0339.wav", "txt": "但检测结果的异常并不能就证实运动员使用违禁药物"}
-{"key": "BAC009S0906W0340", "wav": "./aishell/wav/test/S0906/BAC009S0906W0340.wav", "txt": "因为导致这项数值波动的原因有很多"}
-{"key": "BAC009S0906W0341", "wav": "./aishell/wav/test/S0906/BAC009S0906W0341.wav", "txt": "包括高原训练或身体过度消耗后立刻接受检测"}
-{"key": "BAC009S0906W0342", "wav": "./aishell/wav/test/S0906/BAC009S0906W0342.wav", "txt": "因此我请求世界反兴奋剂机构回顾前后所有的数据"}
-{"key": "BAC009S0906W0343", "wav": "./aishell/wav/test/S0906/BAC009S0906W0343.wav", "txt": "盘点昆仑决二零一五五大飙血之战搜狐体育"}
-{"key": "BAC009S0906W0344", "wav": "./aishell/wav/test/S0906/BAC009S0906W0344.wav", "txt": "无疑是擂台上最能引爆肾上腺素的震撼一幕"}
-{"key": "BAC009S0906W0345", "wav": "./aishell/wav/test/S0906/BAC009S0906W0345.wav", "txt": "令拳迷记忆犹新的飙血之战不计其数"}
-{"key": "BAC009S0906W0346", "wav": "./aishell/wav/test/S0906/BAC009S0906W0346.wav", "txt": "而这些战斗也成为了圈内久聊不厌的经典谈资"}
-{"key": "BAC009S0906W0347", "wav": "./aishell/wav/test/S0906/BAC009S0906W0347.wav", "txt": "在数百场真枪实弹的巅峰对决中"}
-{"key": "BAC009S0906W0348", "wav": "./aishell/wav/test/S0906/BAC009S0906W0348.wav", "txt": "不乏诸多脍炙人口的飙血之战"}
-{"key": "BAC009S0906W0349", "wav": "./aishell/wav/test/S0906/BAC009S0906W0349.wav", "txt": "十月二十八日与三十一日"}
-{"key": "BAC009S0906W0350", "wav": "./aishell/wav/test/S0906/BAC009S0906W0350.wav", "txt": "下面小编将盘点本年度迄今为止昆仑决五大惨烈血战"}
-{"key": "BAC009S0906W0353", "wav": "./aishell/wav/test/S0906/BAC009S0906W0353.wav", "txt": "北京时间七月二十八日晚"}
-{"key": "BAC009S0906W0354", "wav": "./aishell/wav/test/S0906/BAC009S0906W0354.wav", "txt": "这场对决的惨烈程度超出了所有人的想象"}
-{"key": "BAC009S0906W0355", "wav": "./aishell/wav/test/S0906/BAC009S0906W0355.wav", "txt": "比赛开始后仅仅十馀秒"}
-{"key": "BAC009S0906W0356", "wav": "./aishell/wav/test/S0906/BAC009S0906W0356.wav", "txt": "播求的头部便被对方的肘击割破"}
-{"key": "BAC009S0906W0357", "wav": "./aishell/wav/test/S0906/BAC009S0906W0357.wav", "txt": "打出一道深深的血口"}
-{"key": "BAC009S0906W0358", "wav": "./aishell/wav/test/S0906/BAC009S0906W0358.wav", "txt": "伴随着双方激战的火爆升级"}
-{"key": "BAC009S0906W0359", "wav": "./aishell/wav/test/S0906/BAC009S0906W0359.wav", "txt": "播求头部的伤口进一步扩大"}
-{"key": "BAC009S0906W0360", "wav": "./aishell/wav/test/S0906/BAC009S0906W0360.wav", "txt": "几乎全部被鲜血复盖的半边身体令人触目惊心"}
-{"key": "BAC009S0906W0361", "wav": "./aishell/wav/test/S0906/BAC009S0906W0361.wav", "txt": "双方的肘击对轰场面接连上演"}
-{"key": "BAC009S0906W0362", "wav": "./aishell/wav/test/S0906/BAC009S0906W0362.wav", "txt": "哈亚的肘击刁钻狠辣"}
-{"key": "BAC009S0906W0363", "wav": "./aishell/wav/test/S0906/BAC009S0906W0363.wav", "txt": "直肘反肘交替使用"}
-{"key": "BAC009S0906W0364", "wav": "./aishell/wav/test/S0906/BAC009S0906W0364.wav", "txt": "令人防不胜防播求的肘击则更具王者霸气"}
-{"key": "BAC009S0906W0365", "wav": "./aishell/wav/test/S0906/BAC009S0906W0365.wav", "txt": "以大刀阔斧的摆肘砸肘为主"}
-{"key": "BAC009S0906W0366", "wav": "./aishell/wav/test/S0906/BAC009S0906W0366.wav", "txt": "凶悍直接大开大合"}
-{"key": "BAC009S0906W0367", "wav": "./aishell/wav/test/S0906/BAC009S0906W0367.wav", "txt": "加之其半身浴血的黝黑健美体魄"}
-{"key": "BAC009S0906W0368", "wav": "./aishell/wav/test/S0906/BAC009S0906W0368.wav", "txt": "颇似从地狱中走出的修罗帝王"}
-{"key": "BAC009S0906W0369", "wav": "./aishell/wav/test/S0906/BAC009S0906W0369.wav", "txt": "播求久负盛名的扫腿与冲膝技术开始发威"}
-{"key": "BAC009S0906W0370", "wav": "./aishell/wav/test/S0906/BAC009S0906W0370.wav", "txt": "令对手不再敢贸然近身"}
-{"key": "BAC009S0906W0371", "wav": "./aishell/wav/test/S0906/BAC009S0906W0371.wav", "txt": "不得不暂停比赛进行处理"}
-{"key": "BAC009S0906W0372", "wav": "./aishell/wav/test/S0906/BAC009S0906W0372.wav", "txt": "双方均向对手发起了不遗馀力的猛攻"}
-{"key": "BAC009S0906W0373", "wav": "./aishell/wav/test/S0906/BAC009S0906W0373.wav", "txt": "这场史诗级的双王血战在两大强者最后的对决中"}
-{"key": "BAC009S0906W0374", "wav": "./aishell/wav/test/S0906/BAC009S0906W0374.wav", "txt": "迎来了结束铃声的敲响"}
-{"key": "BAC009S0906W0375", "wav": "./aishell/wav/test/S0906/BAC009S0906W0375.wav", "txt": "哈立以争议性的点数优势宣告获胜"}
-{"key": "BAC009S0906W0376", "wav": "./aishell/wav/test/S0906/BAC009S0906W0376.wav", "txt": "浑身是血的播求由于头部三处动脉破裂失血过多"}
-{"key": "BAC009S0906W0377", "wav": "./aishell/wav/test/S0906/BAC009S0906W0377.wav", "txt": "被立刻送往医院接受紧急输血治疗"}
-{"key": "BAC009S0906W0378", "wav": "./aishell/wav/test/S0906/BAC009S0906W0378.wav", "txt": "也被送往医院进行抢救"}
-{"key": "BAC009S0906W0379", "wav": "./aishell/wav/test/S0906/BAC009S0906W0379.wav", "txt": "对于任何一个目睹了整场比赛过程的人来讲"}
-{"key": "BAC009S0906W0380", "wav": "./aishell/wav/test/S0906/BAC009S0906W0380.wav", "txt": "这场史诗级惊天血战中没有失败者"}
-{"key": "BAC009S0906W0383", "wav": "./aishell/wav/test/S0906/BAC009S0906W0383.wav", "txt": "二零一五年六月七日"}
-{"key": "BAC009S0906W0384", "wav": "./aishell/wav/test/S0906/BAC009S0906W0384.wav", "txt": "昆仑决雄霸山城在重庆江南体育馆重装上阵"}
-{"key": "BAC009S0906W0385", "wav": "./aishell/wav/test/S0906/BAC009S0906W0385.wav", "txt": "面对身高臂展明显占优的对手"}
-{"key": "BAC009S0906W0386", "wav": "./aishell/wav/test/S0906/BAC009S0906W0386.wav", "txt": "雅桑克莱并没有采取矮个子拳手惯用的闪击式打法"}
-{"key": "BAC009S0906W0387", "wav": "./aishell/wav/test/S0906/BAC009S0906W0387.wav", "txt": "而是王气十足地向对手进行正面逼近"}
-{"key": "BAC009S0906W0388", "wav": "./aishell/wav/test/S0906/BAC009S0906W0388.wav", "txt": "雅桑克莱的优势继续在扩大"}
-{"key": "BAC009S0906W0389", "wav": "./aishell/wav/test/S0906/BAC009S0906W0389.wav", "txt": "标志性的扫腿重击力道沉猛的后手重拳纷纷呼啸而出"}
-{"key": "BAC009S0906W0390", "wav": "./aishell/wav/test/S0906/BAC009S0906W0390.wav", "txt": "在其左扫腿无情踢击之下"}
-{"key": "BAC009S0906W0391", "wav": "./aishell/wav/test/S0906/BAC009S0906W0391.wav", "txt": "祖耶夫的右肋很快便被踢出大片鲜红的淤血斑痕"}
-{"key": "BAC009S0906W0392", "wav": "./aishell/wav/test/S0906/BAC009S0906W0392.wav", "txt": "经验丰富的雅桑克莱开始刻意放缓节奏"}
-{"key": "BAC009S0906W0393", "wav": "./aishell/wav/test/S0906/BAC009S0906W0393.wav", "txt": "对已是强弩之末的对手进行消耗"}
-{"key": "BAC009S0906W0394", "wav": "./aishell/wav/test/S0906/BAC009S0906W0394.wav", "txt": "此时的祖耶夫右眼已经肿胀得完全封闭"}
-{"key": "BAC009S0906W0395", "wav": "./aishell/wav/test/S0906/BAC009S0906W0395.wav", "txt": "只能依靠顽强的意志进行支撑"}
-{"key": "BAC009S0906W0396", "wav": "./aishell/wav/test/S0906/BAC009S0906W0396.wav", "txt": "雅桑克莱的组合拳将祖耶夫重重击倒然而"}
-{"key": "BAC009S0906W0397", "wav": "./aishell/wav/test/S0906/BAC009S0906W0397.wav", "txt": "意志力惊人的白俄罗斯特种兵被没有就此放弃"}
-{"key": "BAC009S0906W0398", "wav": "./aishell/wav/test/S0906/BAC009S0906W0398.wav", "txt": "顽强的意志力博得了对手以及全场观众致意"}
-{"key": "BAC009S0906W0399", "wav": "./aishell/wav/test/S0906/BAC009S0906W0399.wav", "txt": "比赛在两名王者最后的对决中"}
-{"key": "BAC009S0906W0400", "wav": "./aishell/wav/test/S0906/BAC009S0906W0400.wav", "txt": "比赛结果已经无需裁判的裁定"}
-{"key": "BAC009S0906W0401", "wav": "./aishell/wav/test/S0906/BAC009S0906W0401.wav", "txt": "但看两人比赛后的面部状况"}
-{"key": "BAC009S0906W0402", "wav": "./aishell/wav/test/S0906/BAC009S0906W0402.wav", "txt": "夺得了自己在昆仑拳坛上的第二场重要胜利"}
-{"key": "BAC009S0906W0404", "wav": "./aishell/wav/test/S0906/BAC009S0906W0404.wav", "txt": "布拉德皮特新片狂怒接受了宣传媒体拍照"}
-{"key": "BAC009S0906W0405", "wav": "./aishell/wav/test/S0906/BAC009S0906W0405.wav", "txt": "我们可以清晰看到皮特的结婚戒指"}
-{"key": "BAC009S0906W0406", "wav": "./aishell/wav/test/S0906/BAC009S0906W0406.wav", "txt": "今天确定了上映日期二零一七年四月十七日"}
-{"key": "BAC009S0906W0407", "wav": "./aishell/wav/test/S0906/BAC009S0906W0407.wav", "txt": "这是后年春季档的一个黄金上映期"}
-{"key": "BAC009S0906W0408", "wav": "./aishell/wav/test/S0906/BAC009S0906W0408.wav", "txt": "看来郑嘉颖是她的初恋"}
-{"key": "BAC009S0906W0409", "wav": "./aishell/wav/test/S0906/BAC009S0906W0409.wav", "txt": "问到他们在法国拍戏定情的细节"}
-{"key": "BAC009S0906W0410", "wav": "./aishell/wav/test/S0906/BAC009S0906W0410.wav", "txt": "陈凯琳也拒绝回答"}
-{"key": "BAC009S0906W0411", "wav": "./aishell/wav/test/S0906/BAC009S0906W0411.wav", "txt": "但就希望外界多给予他们发展空间"}
-{"key": "BAC009S0906W0412", "wav": "./aishell/wav/test/S0906/BAC009S0906W0412.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0906W0413", "wav": "./aishell/wav/test/S0906/BAC009S0906W0413.wav", "txt": "早前有传媒更拍到陈凯琳直上嘉颖住所短聚"}
-{"key": "BAC009S0906W0414", "wav": "./aishell/wav/test/S0906/BAC009S0906W0414.wav", "txt": "父女恋纸包不住火"}
-{"key": "BAC009S0906W0415", "wav": "./aishell/wav/test/S0906/BAC009S0906W0415.wav", "txt": "两人于异国拍外景晨夕相对"}
-{"key": "BAC009S0906W0416", "wav": "./aishell/wav/test/S0906/BAC009S0906W0416.wav", "txt": "感情一日千里"}
-{"key": "BAC009S0906W0418", "wav": "./aishell/wav/test/S0906/BAC009S0906W0418.wav", "txt": "陈势安两天一夜没洗澡刷牙"}
-{"key": "BAC009S0906W0419", "wav": "./aishell/wav/test/S0906/BAC009S0906W0419.wav", "txt": "猛嗑薄荷喉糖"}
-{"key": "BAC009S0906W0420", "wav": "./aishell/wav/test/S0906/BAC009S0906W0420.wav", "txt": "搜狐娱乐讯据台湾媒体报道"}
-{"key": "BAC009S0906W0421", "wav": "./aishell/wav/test/S0906/BAC009S0906W0421.wav", "txt": "香港女星吴君如与导演陈可辛爱情长跑十八年"}
-{"key": "BAC009S0906W0422", "wav": "./aishell/wav/test/S0906/BAC009S0906W0422.wav", "txt": "虽然没有注册结婚"}
-{"key": "BAC009S0906W0423", "wav": "./aishell/wav/test/S0906/BAC009S0906W0423.wav", "txt": "但两人关系比一般夫妻更加紧密"}
-{"key": "BAC009S0906W0424", "wav": "./aishell/wav/test/S0906/BAC009S0906W0424.wav", "txt": "她日前被媒体目击与陈可辛在大街上逛街血拼"}
-{"key": "BAC009S0906W0425", "wav": "./aishell/wav/test/S0906/BAC009S0906W0425.wav", "txt": "且沿途有说有笑"}
-{"key": "BAC009S0906W0426", "wav": "./aishell/wav/test/S0906/BAC009S0906W0426.wav", "txt": "一路上都十指紧扣"}
-{"key": "BAC009S0906W0427", "wav": "./aishell/wav/test/S0906/BAC009S0906W0427.wav", "txt": "甜蜜恩爱的模样彷彿热恋中的情侣"}
-{"key": "BAC009S0906W0428", "wav": "./aishell/wav/test/S0906/BAC009S0906W0428.wav", "txt": "搜狐娱乐讯据香港媒体报导"}
-{"key": "BAC009S0906W0429", "wav": "./aishell/wav/test/S0906/BAC009S0906W0429.wav", "txt": "一直邀请陈善之担任经理人"}
-{"key": "BAC009S0906W0430", "wav": "./aishell/wav/test/S0906/BAC009S0906W0430.wav", "txt": "并兼任李嘉欣经理人及处理旗下其他艺人的合约事宜"}
-{"key": "BAC009S0906W0431", "wav": "./aishell/wav/test/S0906/BAC009S0906W0431.wav", "txt": "执法人员将王靖苏押解回温州"}
-{"key": "BAC009S0906W0432", "wav": "./aishell/wav/test/S0906/BAC009S0906W0432.wav", "txt": "温州水库沉车案现男女腐尸女方事发前行为古怪"}
-{"key": "BAC009S0906W0433", "wav": "./aishell/wav/test/S0906/BAC009S0906W0433.wav", "txt": "温州沙城街道一民房发生火灾已造成四人死亡"}
-{"key": "BAC009S0906W0434", "wav": "./aishell/wav/test/S0906/BAC009S0906W0434.wav", "txt": "温州沙城街道一民房今晨发生火灾已造成四人死亡"}
-{"key": "BAC009S0906W0436", "wav": "./aishell/wav/test/S0906/BAC009S0906W0436.wav", "txt": "沙城街道七五村永安路一二五号一民房发生火灾"}
-{"key": "BAC009S0906W0437", "wav": "./aishell/wav/test/S0906/BAC009S0906W0437.wav", "txt": "一时一零分火势完全扑灭"}
-{"key": "BAC009S0906W0438", "wav": "./aishell/wav/test/S0906/BAC009S0906W0438.wav", "txt": "火灾造成四人死亡一人受伤"}
-{"key": "BAC009S0906W0439", "wav": "./aishell/wav/test/S0906/BAC009S0906W0439.wav", "txt": "伤者目前在解放军第一一八医院进行治疗"}
-{"key": "BAC009S0906W0441", "wav": "./aishell/wav/test/S0906/BAC009S0906W0441.wav", "txt": "温州惊现最牛菜场温州的状元农贸市场"}
-{"key": "BAC009S0906W0443", "wav": "./aishell/wav/test/S0906/BAC009S0906W0443.wav", "txt": "买菜用支付宝扫码付钱"}
-{"key": "BAC009S0906W0444", "wav": "./aishell/wav/test/S0906/BAC009S0906W0444.wav", "txt": "听说过段时间还要上场智能秤"}
-{"key": "BAC009S0906W0445", "wav": "./aishell/wav/test/S0906/BAC009S0906W0445.wav", "txt": "用智能秤称重将自动生成二维码"}
-{"key": "BAC009S0906W0446", "wav": "./aishell/wav/test/S0906/BAC009S0906W0446.wav", "txt": "用支付宝扫一下就能付款"}
-{"key": "BAC009S0906W0447", "wav": "./aishell/wav/test/S0906/BAC009S0906W0447.wav", "txt": "温州美女学霸将赴非洲支教教当地小学生汉语"}
-{"key": "BAC009S0906W0448", "wav": "./aishell/wav/test/S0906/BAC009S0906W0448.wav", "txt": "麻丽贤等一七位志愿者将远赴非洲支教"}
-{"key": "BAC009S0906W0449", "wav": "./aishell/wav/test/S0906/BAC009S0906W0449.wav", "txt": "温州老人卖房筹四八零零万建养老院赠政府遭闲置"}
-{"key": "BAC009S0906W0450", "wav": "./aishell/wav/test/S0906/BAC009S0906W0450.wav", "txt": "为了实现退休后能建一座养老机构"}
-{"key": "BAC009S0906W0451", "wav": "./aishell/wav/test/S0906/BAC009S0906W0451.wav", "txt": "为更多的老人安度往年的心愿"}
-{"key": "BAC009S0906W0452", "wav": "./aishell/wav/test/S0906/BAC009S0906W0452.wav", "txt": "浙江温州一老人拿出全部积蓄并卖掉两套房子"}
-{"key": "BAC009S0906W0453", "wav": "./aishell/wav/test/S0906/BAC009S0906W0453.wav", "txt": "筹款四八零零万经六年建成养老院"}
-{"key": "BAC009S0906W0454", "wav": "./aishell/wav/test/S0906/BAC009S0906W0454.wav", "txt": "捐给当地慈善部门后却遭闲置三年"}
-{"key": "BAC009S0906W0455", "wav": "./aishell/wav/test/S0906/BAC009S0906W0455.wav", "txt": "温州苍南县看守所民警宿舍楼起火无人员伤亡"}
-{"key": "BAC009S0906W0456", "wav": "./aishell/wav/test/S0906/BAC009S0906W0456.wav", "txt": "八月三日上午一一时左右"}
-{"key": "BAC009S0906W0457", "wav": "./aishell/wav/test/S0906/BAC009S0906W0457.wav", "txt": "温州苍南县看守所一宿舍起火"}
-{"key": "BAC009S0906W0458", "wav": "./aishell/wav/test/S0906/BAC009S0906W0458.wav", "txt": "该市苍南县公安局直属县看守所突发火情"}
-{"key": "BAC009S0906W0459", "wav": "./aishell/wav/test/S0906/BAC009S0906W0459.wav", "txt": "所内民警宿舍楼突发大火"}
-{"key": "BAC009S0906W0460", "wav": "./aishell/wav/test/S0906/BAC009S0906W0460.wav", "txt": "在看守所干警及消防人员的扑救下火势很快被扑灭"}
-{"key": "BAC009S0906W0461", "wav": "./aishell/wav/test/S0906/BAC009S0906W0461.wav", "txt": "温州话到底有多难懂"}
-{"key": "BAC009S0906W0463", "wav": "./aishell/wav/test/S0906/BAC009S0906W0463.wav", "txt": "大家对温州话难懂这事儿略有耳闻"}
-{"key": "BAC009S0906W0464", "wav": "./aishell/wav/test/S0906/BAC009S0906W0464.wav", "txt": "一直被认为是全中国最难学习的方言之一"}
-{"key": "BAC009S0906W0465", "wav": "./aishell/wav/test/S0906/BAC009S0906W0465.wav", "txt": "温州贩卖婴儿大案女医生假称婴儿已死然后卖掉"}
-{"key": "BAC009S0906W0466", "wav": "./aishell/wav/test/S0906/BAC009S0906W0466.wav", "txt": "警方先后解救了一六名婴儿"}
-{"key": "BAC009S0906W0467", "wav": "./aishell/wav/test/S0906/BAC009S0906W0467.wav", "txt": "有六个被送往苍南福利院"}
-{"key": "BAC009S0906W0468", "wav": "./aishell/wav/test/S0906/BAC009S0906W0468.wav", "txt": "图为其中一名被解救的孩子"}
-{"key": "BAC009S0906W0469", "wav": "./aishell/wav/test/S0906/BAC009S0906W0469.wav", "txt": "温州集资诈骗案犯汇给情人四千万小三被诉"}
-{"key": "BAC009S0906W0470", "wav": "./aishell/wav/test/S0906/BAC009S0906W0470.wav", "txt": "二九岁的章某被控洗钱一二二万元"}
-{"key": "BAC009S0906W0471", "wav": "./aishell/wav/test/S0906/BAC009S0906W0471.wav", "txt": "温州鞋业总经理遭追杀凶手行凶过程中被打死"}
-{"key": "BAC009S0906W0472", "wav": "./aishell/wav/test/S0906/BAC009S0906W0472.wav", "txt": "陆续有人从乐清赶往平阳法院"}
-{"key": "BAC009S0906W0473", "wav": "./aishell/wav/test/S0906/BAC009S0906W0473.wav", "txt": "平阳法院内外已聚集了三零零多人等待开庭"}
-{"key": "BAC009S0906W0474", "wav": "./aishell/wav/test/S0906/BAC009S0906W0474.wav", "txt": "温州首家支付宝菜市场启动一周很多摊主不会用"}
-{"key": "BAC009S0906W0475", "wav": "./aishell/wav/test/S0906/BAC009S0906W0475.wav", "txt": "状元农贸市场内挂着支付宝的宣传牌"}
-{"key": "BAC009S0906W0476", "wav": "./aishell/wav/test/S0906/BAC009S0906W0476.wav", "txt": "温州高三男生坠楼身亡事发前无异常刚从家返校"}
-{"key": "BAC009S0906W0477", "wav": "./aishell/wav/test/S0906/BAC009S0906W0477.wav", "txt": "龙湾永强中学一名高三男生从宿舍楼五楼楼顶坠楼身亡"}
-{"key": "BAC009S0906W0478", "wav": "./aishell/wav/test/S0906/BAC009S0906W0478.wav", "txt": "永强中学校长也是坠楼学生的语文老师"}
-{"key": "BAC009S0906W0479", "wav": "./aishell/wav/test/S0906/BAC009S0906W0479.wav", "txt": "印象里他性格是比较开朗的"}
-{"key": "BAC009S0906W0480", "wav": "./aishell/wav/test/S0906/BAC009S0906W0480.wav", "txt": "没有发现近期有异常变化目前"}
-{"key": "BAC009S0906W0481", "wav": "./aishell/wav/test/S0906/BAC009S0906W0481.wav", "txt": "龙湾警方已对此事展开调查"}
-{"key": "BAC009S0906W0482", "wav": "./aishell/wav/test/S0906/BAC009S0906W0482.wav", "txt": "温州高速公路大米遭抢续五名涉案人员已落网"}
-{"key": "BAC009S0906W0483", "wav": "./aishell/wav/test/S0906/BAC009S0906W0483.wav", "txt": "白花花的大米洒了一地"}
-{"key": "BAC009S0906W0484", "wav": "./aishell/wav/test/S0906/BAC009S0906W0484.wav", "txt": "引来周边大批村民哄抢"}
-{"key": "BAC009S0906W0485", "wav": "./aishell/wav/test/S0906/BAC009S0906W0485.wav", "txt": "一场考验道德与良知的大米保卫战悄然打响"}
-{"key": "BAC009S0906W0486", "wav": "./aishell/wav/test/S0906/BAC009S0906W0486.wav", "txt": "温州鹿城警方发布通报称"}
-{"key": "BAC009S0906W0487", "wav": "./aishell/wav/test/S0906/BAC009S0906W0487.wav", "txt": "五名涉嫌参与抢米的犯罪嫌疑人先后被抓获并拘留"}
-{"key": "BAC009S0906W0488", "wav": "./aishell/wav/test/S0906/BAC009S0906W0488.wav", "txt": "民警仍在对其馀涉事人员进行调查"}
-{"key": "BAC009S0906W0489", "wav": "./aishell/wav/test/S0906/BAC009S0906W0489.wav", "txt": "温州高速车祸九二包大米遭哄抢续带头者被拘"}
-{"key": "BAC009S0906W0490", "wav": "./aishell/wav/test/S0906/BAC009S0906W0490.wav", "txt": "一辆货车在金丽温高速温州段发生事故"}
-{"key": "BAC009S0906W0491", "wav": "./aishell/wav/test/S0906/BAC009S0906W0491.wav", "txt": "涉案的其中两名嫌疑人陈某女"}
-{"key": "BAC009S0906W0492", "wav": "./aishell/wav/test/S0906/BAC009S0906W0492.wav", "txt": "永嘉县人谢某女"}
-{"key": "BAC009S0906W0493", "wav": "./aishell/wav/test/S0906/BAC009S0906W0493.wav", "txt": "永嘉县人已被鹿城警方依法行政拘留"}
-{"key": "BAC009S0906W0494", "wav": "./aishell/wav/test/S0906/BAC009S0906W0494.wav", "txt": "港京航班六名乘客推撞地勤四人被判九至一一天监禁"}
-{"key": "BAC009S0907W0121", "wav": "./aishell/wav/test/S0907/BAC009S0907W0121.wav", "txt": "也不代表开发商资金面已经不再紧张"}
-{"key": "BAC009S0907W0122", "wav": "./aishell/wav/test/S0907/BAC009S0907W0122.wav", "txt": "背后可能蕴含着开发商更多的窘境"}
-{"key": "BAC009S0907W0123", "wav": "./aishell/wav/test/S0907/BAC009S0907W0123.wav", "txt": "本世纪网至本世纪经济报道"}
-{"key": "BAC009S0907W0124", "wav": "./aishell/wav/test/S0907/BAC009S0907W0124.wav", "txt": "上海南昌等城市近期继续松绑了公积金贷款政策"}
-{"key": "BAC009S0907W0125", "wav": "./aishell/wav/test/S0907/BAC009S0907W0125.wav", "txt": "而南昌除了放松首套房界定标准"}
-{"key": "BAC009S0907W0126", "wav": "./aishell/wav/test/S0907/BAC009S0907W0126.wav", "txt": "还降低了首套房公积金首付"}
-{"key": "BAC009S0907W0127", "wav": "./aishell/wav/test/S0907/BAC009S0907W0127.wav", "txt": "国家住房银行箭在弦上"}
-{"key": "BAC009S0907W0128", "wav": "./aishell/wav/test/S0907/BAC009S0907W0128.wav", "txt": "住建部官员发表文章指出"}
-{"key": "BAC009S0907W0129", "wav": "./aishell/wav/test/S0907/BAC009S0907W0129.wav", "txt": "以住房公积金制度为基础"}
-{"key": "BAC009S0907W0130", "wav": "./aishell/wav/test/S0907/BAC009S0907W0130.wav", "txt": "设立国家住房银行条件已经基本成熟"}
-{"key": "BAC009S0907W0131", "wav": "./aishell/wav/test/S0907/BAC009S0907W0131.wav", "txt": "国家住房银行是否箭在弦上"}
-{"key": "BAC009S0907W0132", "wav": "./aishell/wav/test/S0907/BAC009S0907W0132.wav", "txt": "其成立需具备哪些条件"}
-{"key": "BAC009S0907W0133", "wav": "./aishell/wav/test/S0907/BAC009S0907W0133.wav", "txt": "以住房公积金制度为基础"}
-{"key": "BAC009S0907W0134", "wav": "./aishell/wav/test/S0907/BAC009S0907W0134.wav", "txt": "设立政策性住宅金融机构"}
-{"key": "BAC009S0907W0135", "wav": "./aishell/wav/test/S0907/BAC009S0907W0135.wav", "txt": "此机构即是住房银行"}
-{"key": "BAC009S0907W0136", "wav": "./aishell/wav/test/S0907/BAC009S0907W0136.wav", "txt": "设立住房银行的条件已基本成熟"}
-{"key": "BAC009S0907W0137", "wav": "./aishell/wav/test/S0907/BAC009S0907W0137.wav", "txt": "改进住房公积金提取使用监管机制"}
-{"key": "BAC009S0907W0138", "wav": "./aishell/wav/test/S0907/BAC009S0907W0138.wav", "txt": "全国住房公积金七万亿元"}
-{"key": "BAC009S0907W0139", "wav": "./aishell/wav/test/S0907/BAC009S0907W0139.wav", "txt": "住房维修资金约七亿元"}
-{"key": "BAC009S0907W0140", "wav": "./aishell/wav/test/S0907/BAC009S0907W0140.wav", "txt": "如允许每年发行专项金融债券七万亿元"}
-{"key": "BAC009S0907W0141", "wav": "./aishell/wav/test/S0907/BAC009S0907W0141.wav", "txt": "今年资金规模接近七万亿元"}
-{"key": "BAC009S0907W0142", "wav": "./aishell/wav/test/S0907/BAC009S0907W0142.wav", "txt": "明年预计达到七万亿元"}
-{"key": "BAC009S0907W0143", "wav": "./aishell/wav/test/S0907/BAC009S0907W0143.wav", "txt": "可基本满足首套和改善性自住住房的低息贷款需求"}
-{"key": "BAC009S0907W0144", "wav": "./aishell/wav/test/S0907/BAC009S0907W0144.wav", "txt": "三是已有人员和机构"}
-{"key": "BAC009S0907W0145", "wav": "./aishell/wav/test/S0907/BAC009S0907W0145.wav", "txt": "全国共有管理中心一百个"}
-{"key": "BAC009S0907W0146", "wav": "./aishell/wav/test/S0907/BAC009S0907W0146.wav", "txt": "业务网点一千个"}
-{"key": "BAC009S0907W0147", "wav": "./aishell/wav/test/S0907/BAC009S0907W0147.wav", "txt": "从业人员五万人"}
-{"key": "BAC009S0907W0148", "wav": "./aishell/wav/test/S0907/BAC009S0907W0148.wav", "txt": "可充分利用这些机构网点和人员"}
-{"key": "BAC009S0907W0149", "wav": "./aishell/wav/test/S0907/BAC009S0907W0149.wav", "txt": "组建国家住房银行分行和支行"}
-{"key": "BAC009S0907W0150", "wav": "./aishell/wav/test/S0907/BAC009S0907W0150.wav", "txt": "对各地分支机构实行垂直管理"}
-{"key": "BAC009S0907W0151", "wav": "./aishell/wav/test/S0907/BAC009S0907W0151.wav", "txt": "全国住房公积金贷款风险准备金已接近一百亿元"}
-{"key": "BAC009S0907W0152", "wav": "./aishell/wav/test/S0907/BAC009S0907W0152.wav", "txt": "其中五亿元为超额拨备"}
-{"key": "BAC009S0907W0153", "wav": "./aishell/wav/test/S0907/BAC009S0907W0153.wav", "txt": "可转化为住房银行资本金"}
-{"key": "BAC009S0907W0154", "wav": "./aishell/wav/test/S0907/BAC009S0907W0154.wav", "txt": "设立住房银行好处多多"}
-{"key": "BAC009S0907W0155", "wav": "./aishell/wav/test/S0907/BAC009S0907W0155.wav", "txt": "提高家庭购房能力"}
-{"key": "BAC009S0907W0156", "wav": "./aishell/wav/test/S0907/BAC009S0907W0156.wav", "txt": "通过国家住房银行提供低息贷款"}
-{"key": "BAC009S0907W0157", "wav": "./aishell/wav/test/S0907/BAC009S0907W0157.wav", "txt": "可以解决贷款难和贷款贵问题"}
-{"key": "BAC009S0907W0158", "wav": "./aishell/wav/test/S0907/BAC009S0907W0158.wav", "txt": "有效提高家庭购房能力"}
-{"key": "BAC009S0907W0159", "wav": "./aishell/wav/test/S0907/BAC009S0907W0159.wav", "txt": "完善宏观调控机制"}
-{"key": "BAC009S0907W0160", "wav": "./aishell/wav/test/S0907/BAC009S0907W0160.wav", "txt": "可以有效解决商业银行顺周期操作问题"}
-{"key": "BAC009S0907W0161", "wav": "./aishell/wav/test/S0907/BAC009S0907W0161.wav", "txt": "避免房地产市场大起大落"}
-{"key": "BAC009S0907W0162", "wav": "./aishell/wav/test/S0907/BAC009S0907W0162.wav", "txt": "拓展货币政策操作空间"}
-{"key": "BAC009S0907W0163", "wav": "./aishell/wav/test/S0907/BAC009S0907W0163.wav", "txt": "为利率市场化改革创造条件"}
-{"key": "BAC009S0907W0164", "wav": "./aishell/wav/test/S0907/BAC009S0907W0164.wav", "txt": "促进新型城镇化发展"}
-{"key": "BAC009S0907W0165", "wav": "./aishell/wav/test/S0907/BAC009S0907W0165.wav", "txt": "将农民工纳入住房公积金制度"}
-{"key": "BAC009S0907W0166", "wav": "./aishell/wav/test/S0907/BAC009S0907W0166.wav", "txt": "积累在城镇购房首期付款"}
-{"key": "BAC009S0907W0167", "wav": "./aishell/wav/test/S0907/BAC009S0907W0167.wav", "txt": "再由国家住房银行提供低息贷款"}
-{"key": "BAC009S0907W0168", "wav": "./aishell/wav/test/S0907/BAC009S0907W0168.wav", "txt": "后续还款用住房公积金支付"}
-{"key": "BAC009S0907W0169", "wav": "./aishell/wav/test/S0907/BAC009S0907W0169.wav", "txt": "将有效缓解购房能力不足矛盾"}
-{"key": "BAC009S0907W0170", "wav": "./aishell/wav/test/S0907/BAC009S0907W0170.wav", "txt": "提升新型城镇化质量和效益"}
-{"key": "BAC009S0907W0171", "wav": "./aishell/wav/test/S0907/BAC009S0907W0171.wav", "txt": "改进住房公积金管理"}
-{"key": "BAC009S0907W0172", "wav": "./aishell/wav/test/S0907/BAC009S0907W0172.wav", "txt": "根源是体制机制存在弊端"}
-{"key": "BAC009S0907W0173", "wav": "./aishell/wav/test/S0907/BAC009S0907W0173.wav", "txt": "通过设立国家住房银行"}
-{"key": "BAC009S0907W0174", "wav": "./aishell/wav/test/S0907/BAC009S0907W0174.wav", "txt": "可以有效提高资金管理集约化专业化和精细化水平"}
-{"key": "BAC009S0907W0175", "wav": "./aishell/wav/test/S0907/BAC009S0907W0175.wav", "txt": "充分发挥住房公积金作用"}
-{"key": "BAC009S0907W0176", "wav": "./aishell/wav/test/S0907/BAC009S0907W0176.wav", "txt": "住建部官员发表文章指出"}
-{"key": "BAC009S0907W0177", "wav": "./aishell/wav/test/S0907/BAC009S0907W0177.wav", "txt": "以住房公积金制度为基础"}
-{"key": "BAC009S0907W0178", "wav": "./aishell/wav/test/S0907/BAC009S0907W0178.wav", "txt": "设立国家住房银行条件已经基本成熟"}
-{"key": "BAC009S0907W0179", "wav": "./aishell/wav/test/S0907/BAC009S0907W0179.wav", "txt": "国家住房银行是否箭在弦上"}
-{"key": "BAC009S0907W0180", "wav": "./aishell/wav/test/S0907/BAC009S0907W0180.wav", "txt": "其成立需具备哪些条件"}
-{"key": "BAC009S0907W0181", "wav": "./aishell/wav/test/S0907/BAC009S0907W0181.wav", "txt": "备受刚需购房者关注的公积金政策也频繁迎来调整"}
-{"key": "BAC009S0907W0182", "wav": "./aishell/wav/test/S0907/BAC009S0907W0182.wav", "txt": "北京市管国管住房公积金中心先后发布通知"}
-{"key": "BAC009S0907W0183", "wav": "./aishell/wav/test/S0907/BAC009S0907W0183.wav", "txt": "贷款最高额度由五万元升至七万元"}
-{"key": "BAC009S0907W0184", "wav": "./aishell/wav/test/S0907/BAC009S0907W0184.wav", "txt": "公积金贷款总共可少缴利息三十馀万"}
-{"key": "BAC009S0907W0185", "wav": "./aishell/wav/test/S0907/BAC009S0907W0185.wav", "txt": "是对过去住房公积金制度不作为方式的纠正"}
-{"key": "BAC009S0907W0186", "wav": "./aishell/wav/test/S0907/BAC009S0907W0186.wav", "txt": "而随着各地公积金政策的调整"}
-{"key": "BAC009S0907W0187", "wav": "./aishell/wav/test/S0907/BAC009S0907W0187.wav", "txt": "建立健全以工促农以城带乡的长效机制"}
-{"key": "BAC009S0907W0188", "wav": "./aishell/wav/test/S0907/BAC009S0907W0188.wav", "txt": "为现代农业建设取得明显进展提供有力保障"}
-{"key": "BAC009S0907W0189", "wav": "./aishell/wav/test/S0907/BAC009S0907W0189.wav", "txt": "建立农业投入稳定增长机制"}
-{"key": "BAC009S0907W0190", "wav": "./aishell/wav/test/S0907/BAC009S0907W0190.wav", "txt": "按照总量持续增长比例稳步提高的要求"}
-{"key": "BAC009S0907W0191", "wav": "./aishell/wav/test/S0907/BAC009S0907W0191.wav", "txt": "预算内固定资产投资要向重大农业农村建设项目倾斜"}
-{"key": "BAC009S0907W0192", "wav": "./aishell/wav/test/S0907/BAC009S0907W0192.wav", "txt": "耕地占用税税率提高后"}
-{"key": "BAC009S0907W0193", "wav": "./aishell/wav/test/S0907/BAC009S0907W0193.wav", "txt": "新增收入全部用于农业"}
-{"key": "BAC009S0907W0194", "wav": "./aishell/wav/test/S0907/BAC009S0907W0194.wav", "txt": "积极推动土地出让收益用于高标准农田建设"}
-{"key": "BAC009S0907W0195", "wav": "./aishell/wav/test/S0907/BAC009S0907W0195.wav", "txt": "充分发挥中国农业产业发展基金的引导作用"}
-{"key": "BAC009S0907W0196", "wav": "./aishell/wav/test/S0907/BAC009S0907W0196.wav", "txt": "加快农村金融组织产品和服务创新"}
-{"key": "BAC009S0907W0197", "wav": "./aishell/wav/test/S0907/BAC009S0907W0197.wav", "txt": "推动发展村镇银行等农村中小金融机构"}
-{"key": "BAC009S0907W0198", "wav": "./aishell/wav/test/S0907/BAC009S0907W0198.wav", "txt": "引导金融机构发放农业中长期贷款"}
-{"key": "BAC009S0907W0199", "wav": "./aishell/wav/test/S0907/BAC009S0907W0199.wav", "txt": "完善农民专业合作社管理方法"}
-{"key": "BAC009S0907W0200", "wav": "./aishell/wav/test/S0907/BAC009S0907W0200.wav", "txt": "支持其开展信用合作"}
-{"key": "BAC009S0907W0201", "wav": "./aishell/wav/test/S0907/BAC009S0907W0201.wav", "txt": "落实农民专业合作社和农村金融有关税收优惠政策"}
-{"key": "BAC009S0907W0202", "wav": "./aishell/wav/test/S0907/BAC009S0907W0202.wav", "txt": "扶持农业信贷担保组织发展"}
-{"key": "BAC009S0907W0203", "wav": "./aishell/wav/test/S0907/BAC009S0907W0203.wav", "txt": "扩大农村担保品范围"}
-{"key": "BAC009S0907W0204", "wav": "./aishell/wav/test/S0907/BAC009S0907W0204.wav", "txt": "完善农业保险保费补贴政策"}
-{"key": "BAC009S0907W0205", "wav": "./aishell/wav/test/S0907/BAC009S0907W0205.wav", "txt": "健全农业再保险体系"}
-{"key": "BAC009S0907W0206", "wav": "./aishell/wav/test/S0907/BAC009S0907W0206.wav", "txt": "探索完善财政支持下的农业大灾风险分散机制"}
-{"key": "BAC009S0907W0207", "wav": "./aishell/wav/test/S0907/BAC009S0907W0207.wav", "txt": "引导社会资本投入农业"}
-{"key": "BAC009S0907W0208", "wav": "./aishell/wav/test/S0907/BAC009S0907W0208.wav", "txt": "各部门要主动服务三农"}
-{"key": "BAC009S0907W0209", "wav": "./aishell/wav/test/S0907/BAC009S0907W0209.wav", "txt": "积极推动建立城乡要素平等交换关系"}
-{"key": "BAC009S0907W0210", "wav": "./aishell/wav/test/S0907/BAC009S0907W0210.wav", "txt": "鼓励和促进工业与城市资源要素向农业农村配置"}
-{"key": "BAC009S0907W0211", "wav": "./aishell/wav/test/S0907/BAC009S0907W0211.wav", "txt": "调动农民参与农业农村基础设施建设的积极性"}
-{"key": "BAC009S0907W0212", "wav": "./aishell/wav/test/S0907/BAC009S0907W0212.wav", "txt": "通过组织动员和政策引导等多种途径"}
-{"key": "BAC009S0907W0213", "wav": "./aishell/wav/test/S0907/BAC009S0907W0213.wav", "txt": "鼓励各种社会力量与乡村结对帮扶"}
-{"key": "BAC009S0907W0214", "wav": "./aishell/wav/test/S0907/BAC009S0907W0214.wav", "txt": "参与农村产业发展和公共设施建设"}
-{"key": "BAC009S0907W0215", "wav": "./aishell/wav/test/S0907/BAC009S0907W0215.wav", "txt": "努力形成多元化投入新格局"}
-{"key": "BAC009S0907W0216", "wav": "./aishell/wav/test/S0907/BAC009S0907W0216.wav", "txt": "加大农业支持保护力度"}
-{"key": "BAC009S0907W0217", "wav": "./aishell/wav/test/S0907/BAC009S0907W0217.wav", "txt": "坚持和完善农业补贴政策"}
-{"key": "BAC009S0907W0218", "wav": "./aishell/wav/test/S0907/BAC009S0907W0218.wav", "txt": "建立农业补贴政策后评估机制"}
-{"key": "BAC009S0907W0219", "wav": "./aishell/wav/test/S0907/BAC009S0907W0219.wav", "txt": "落实农资综合补贴动态调整机制"}
-{"key": "BAC009S0907W0220", "wav": "./aishell/wav/test/S0907/BAC009S0907W0220.wav", "txt": "研究逐步扩大良种补贴品种和范围"}
-{"key": "BAC009S0907W0221", "wav": "./aishell/wav/test/S0907/BAC009S0907W0221.wav", "txt": "扩大农机具购置补贴规模"}
-{"key": "BAC009S0907W0222", "wav": "./aishell/wav/test/S0907/BAC009S0907W0222.wav", "txt": "加大农机化薄弱环节生产机械补贴力度"}
-{"key": "BAC009S0907W0223", "wav": "./aishell/wav/test/S0907/BAC009S0907W0223.wav", "txt": "加大动物强制免疫补贴力度"}
-{"key": "BAC009S0907W0224", "wav": "./aishell/wav/test/S0907/BAC009S0907W0224.wav", "txt": "逐步完善农业生产关键技术应用与服务支持政策"}
-{"key": "BAC009S0907W0225", "wav": "./aishell/wav/test/S0907/BAC009S0907W0225.wav", "txt": "大幅度增加农业防灾减灾稳产增产关键技术良法补助"}
-{"key": "BAC009S0907W0226", "wav": "./aishell/wav/test/S0907/BAC009S0907W0226.wav", "txt": "坚持和完善渔用柴油补贴政策"}
-{"key": "BAC009S0907W0227", "wav": "./aishell/wav/test/S0907/BAC009S0907W0227.wav", "txt": "继续实施农业种子种苗种畜种禽免税进口优惠政策"}
-{"key": "BAC009S0907W0228", "wav": "./aishell/wav/test/S0907/BAC009S0907W0228.wav", "txt": "建立完善农业生产奖补制度"}
-{"key": "BAC009S0907W0229", "wav": "./aishell/wav/test/S0907/BAC009S0907W0229.wav", "txt": "完善主产区利益补偿机制"}
-{"key": "BAC009S0907W0230", "wav": "./aishell/wav/test/S0907/BAC009S0907W0230.wav", "txt": "提高中央财政对粮食油料生产大县转移支付水平"}
-{"key": "BAC009S0907W0231", "wav": "./aishell/wav/test/S0907/BAC009S0907W0231.wav", "txt": "继续加大对产粮大县生猪调出大县的奖励力度"}
-{"key": "BAC009S0907W0232", "wav": "./aishell/wav/test/S0907/BAC009S0907W0232.wav", "txt": "规范粮食主产县涉农投资项目地方资金配套"}
-{"key": "BAC009S0907W0233", "wav": "./aishell/wav/test/S0907/BAC009S0907W0233.wav", "txt": "全面取消主产区粮食风险基金地方资金配套"}
-{"key": "BAC009S0907W0234", "wav": "./aishell/wav/test/S0907/BAC009S0907W0234.wav", "txt": "稳步提高粮食主产区县级人均财力水平"}
-{"key": "BAC009S0907W0235", "wav": "./aishell/wav/test/S0907/BAC009S0907W0235.wav", "txt": "全面实施和完善草原生态保护补助奖励政策"}
-{"key": "BAC009S0907W0236", "wav": "./aishell/wav/test/S0907/BAC009S0907W0236.wav", "txt": "扩大草原生态保护面源污染防控生态奖补范围和规模"}
-{"key": "BAC009S0907W0237", "wav": "./aishell/wav/test/S0907/BAC009S0907W0237.wav", "txt": "探索实施生物农药低毒农药使用补助政策"}
-{"key": "BAC009S0907W0238", "wav": "./aishell/wav/test/S0907/BAC009S0907W0238.wav", "txt": "研究建立高耗能老旧农业机械报废回收制度"}
-{"key": "BAC009S0907W0239", "wav": "./aishell/wav/test/S0907/BAC009S0907W0239.wav", "txt": "探索实施报废更新补助"}
-{"key": "BAC009S0907W0240", "wav": "./aishell/wav/test/S0907/BAC009S0907W0240.wav", "txt": "加大对农业科研和技术推广的支持力度"}
-{"key": "BAC009S0907W0241", "wav": "./aishell/wav/test/S0907/BAC009S0907W0241.wav", "txt": "完善现代农业产业技术体系"}
-{"key": "BAC009S0907W0242", "wav": "./aishell/wav/test/S0907/BAC009S0907W0242.wav", "txt": "选择部分农业科研院所予以稳定支持"}
-{"key": "BAC009S0907W0243", "wav": "./aishell/wav/test/S0907/BAC009S0907W0243.wav", "txt": "按照种养规模和服务绩效安排工作经费"}
-{"key": "BAC009S0907W0244", "wav": "./aishell/wav/test/S0907/BAC009S0907W0244.wav", "txt": "加大动物疫病防控经费投入"}
-{"key": "BAC009S0907W0245", "wav": "./aishell/wav/test/S0907/BAC009S0907W0245.wav", "txt": "完善病死动物无害化处理补贴制度"}
-{"key": "BAC009S0907W0246", "wav": "./aishell/wav/test/S0907/BAC009S0907W0246.wav", "txt": "建立和完善农作物病虫害专业化统防统治补助政策"}
-{"key": "BAC009S0907W0247", "wav": "./aishell/wav/test/S0907/BAC009S0907W0247.wav", "txt": "继续向农民免费提供测土配方施肥服务"}
-{"key": "BAC009S0907W0248", "wav": "./aishell/wav/test/S0907/BAC009S0907W0248.wav", "txt": "扩大土壤有机质提升项目实施范围和规模"}
-{"key": "BAC009S0907W0249", "wav": "./aishell/wav/test/S0907/BAC009S0907W0249.wav", "txt": "继续加大农业农村人才培养力度"}
-{"key": "BAC009S0907W0250", "wav": "./aishell/wav/test/S0907/BAC009S0907W0250.wav", "txt": "对大学生涉农创业按规定给予相关政策扶持"}
-{"key": "BAC009S0907W0251", "wav": "./aishell/wav/test/S0907/BAC009S0907W0251.wav", "txt": "完善农产品市场调控机制"}
-{"key": "BAC009S0907W0252", "wav": "./aishell/wav/test/S0907/BAC009S0907W0252.wav", "txt": "稳步提高稻谷小麦最低收购价"}
-{"key": "BAC009S0907W0253", "wav": "./aishell/wav/test/S0907/BAC009S0907W0253.wav", "txt": "没有人提的往往才是真命天子"}
-{"key": "BAC009S0907W0254", "wav": "./aishell/wav/test/S0907/BAC009S0907W0254.wav", "txt": "谁是苹果进军汽车市场的合作伙伴收购对象"}
-{"key": "BAC009S0907W0258", "wav": "./aishell/wav/test/S0907/BAC009S0907W0258.wav", "txt": "这个问题存在于软件捆绑方式"}
-{"key": "BAC009S0907W0259", "wav": "./aishell/wav/test/S0907/BAC009S0907W0259.wav", "txt": "它是软件集成的一种方式"}
-{"key": "BAC009S0907W0261", "wav": "./aishell/wav/test/S0907/BAC009S0907W0261.wav", "txt": "他们很快提供了修复软件"}
-{"key": "BAC009S0907W0262", "wav": "./aishell/wav/test/S0907/BAC009S0907W0262.wav", "txt": "不管是什么时候推出软件和开发一些超前的东西"}
-{"key": "BAC009S0907W0263", "wav": "./aishell/wav/test/S0907/BAC009S0907W0263.wav", "txt": "避免不了出现一些漏洞"}
-{"key": "BAC009S0907W0264", "wav": "./aishell/wav/test/S0907/BAC009S0907W0264.wav", "txt": "我们所做的就是发现漏洞后立即修复"}
-{"key": "BAC009S0907W0265", "wav": "./aishell/wav/test/S0907/BAC009S0907W0265.wav", "txt": "在苹果发布靓丽的第四财季业绩报告后"}
-{"key": "BAC009S0907W0266", "wav": "./aishell/wav/test/S0907/BAC009S0907W0266.wav", "txt": "乔斯维亚克就很少在公众场合露面"}
-{"key": "BAC009S0907W0268", "wav": "./aishell/wav/test/S0907/BAC009S0907W0268.wav", "txt": "促使这家公司获得了创记录的第四财季盈利"}
-{"key": "BAC009S0907W0269", "wav": "./aishell/wav/test/S0907/BAC009S0907W0269.wav", "txt": "苹果正在全力以赴出售尽可能多的智能手机"}
-{"key": "BAC009S0907W0270", "wav": "./aishell/wav/test/S0907/BAC009S0907W0270.wav", "txt": "你必须保证自己了解稳态市场"}
-{"key": "BAC009S0907W0271", "wav": "./aishell/wav/test/S0907/BAC009S0907W0271.wav", "txt": "而不仅仅是早期市场"}
-{"key": "BAC009S0907W0272", "wav": "./aishell/wav/test/S0907/BAC009S0907W0272.wav", "txt": "大尺寸屏幕设备在亚洲很流行"}
-{"key": "BAC009S0907W0273", "wav": "./aishell/wav/test/S0907/BAC009S0907W0273.wav", "txt": "但是在欧洲受欢迎度较低"}
-{"key": "BAC009S0907W0274", "wav": "./aishell/wav/test/S0907/BAC009S0907W0274.wav", "txt": "美国市场刚好介于两者之间"}
-{"key": "BAC009S0907W0275", "wav": "./aishell/wav/test/S0907/BAC009S0907W0275.wav", "txt": "目前这项服务已经达到了一个里程碑"}
-{"key": "BAC009S0907W0277", "wav": "./aishell/wav/test/S0907/BAC009S0907W0277.wav", "txt": "有一百万张信用卡已被激活"}
-{"key": "BAC009S0907W0278", "wav": "./aishell/wav/test/S0907/BAC009S0907W0278.wav", "txt": "其中就包括沃尔玛和百思买"}
-{"key": "BAC009S0907W0279", "wav": "./aishell/wav/test/S0907/BAC009S0907W0279.wav", "txt": "这两家公司目前正在开发自己的移动支付系统"}
-{"key": "BAC009S0907W0280", "wav": "./aishell/wav/test/S0907/BAC009S0907W0280.wav", "txt": "零售商最终都会向消费者妥协"}
-{"key": "BAC009S0907W0281", "wav": "./aishell/wav/test/S0907/BAC009S0907W0281.wav", "txt": "想要成功的零售商将考虑消费者的利益"}
-{"key": "BAC009S0907W0282", "wav": "./aishell/wav/test/S0907/BAC009S0907W0282.wav", "txt": "并接受消费者想要使用的支付方式"}
-{"key": "BAC009S0907W0283", "wav": "./aishell/wav/test/S0907/BAC009S0907W0283.wav", "txt": "乔斯维亚克还谈及了苹果涉足可穿戴设备市场的问题"}
-{"key": "BAC009S0907W0286", "wav": "./aishell/wav/test/S0907/BAC009S0907W0286.wav", "txt": "乔斯维亚克还为苹果平板电脑业务做了辩护"}
-{"key": "BAC009S0907W0287", "wav": "./aishell/wav/test/S0907/BAC009S0907W0287.wav", "txt": "他拿出了数据作为证据截止目前"}
-{"key": "BAC009S0907W0290", "wav": "./aishell/wav/test/S0907/BAC009S0907W0290.wav", "txt": "我们一直都在打造最好的产品"}
-{"key": "BAC009S0907W0291", "wav": "./aishell/wav/test/S0907/BAC009S0907W0291.wav", "txt": "这次我们同样做到了"}
-{"key": "BAC009S0907W0294", "wav": "./aishell/wav/test/S0907/BAC009S0907W0294.wav", "txt": "用户发现系统更新之后"}
-{"key": "BAC009S0907W0295", "wav": "./aishell/wav/test/S0907/BAC009S0907W0295.wav", "txt": "心率测量记录没有之前那么频繁了"}
-{"key": "BAC009S0907W0297", "wav": "./aishell/wav/test/S0907/BAC009S0907W0297.wav", "txt": "不过苹果官方很快澄清了这个事情"}
-{"key": "BAC009S0907W0298", "wav": "./aishell/wav/test/S0907/BAC009S0907W0298.wav", "txt": "根据苹果官方的支持页面显示"}
-{"key": "BAC009S0907W0300", "wav": "./aishell/wav/test/S0907/BAC009S0907W0300.wav", "txt": "不过更新后锻炼和运动手臂的时候不会记录心率"}
-{"key": "BAC009S0907W0301", "wav": "./aishell/wav/test/S0907/BAC009S0907W0301.wav", "txt": "因此用户看到测量记录的记录要比之前少一些"}
-{"key": "BAC009S0907W0302", "wav": "./aishell/wav/test/S0907/BAC009S0907W0302.wav", "txt": "不过这导致了很多新问题"}
-{"key": "BAC009S0907W0303", "wav": "./aishell/wav/test/S0907/BAC009S0907W0303.wav", "txt": "在认证授权系统中对服务器设置权限管理"}
-{"key": "BAC009S0907W0304", "wav": "./aishell/wav/test/S0907/BAC009S0907W0304.wav", "txt": "以及与经销商汽车垂直网站等签署保密协议等"}
-{"key": "BAC009S0907W0305", "wav": "./aishell/wav/test/S0907/BAC009S0907W0305.wav", "txt": "这些措施在一定程度上将可防止用户数据泄露"}
-{"key": "BAC009S0907W0306", "wav": "./aishell/wav/test/S0907/BAC009S0907W0306.wav", "txt": "除了投入大这一因素之外"}
-{"key": "BAC009S0907W0307", "wav": "./aishell/wav/test/S0907/BAC009S0907W0307.wav", "txt": "往往对网络安全意识也不强"}
-{"key": "BAC009S0907W0308", "wav": "./aishell/wav/test/S0907/BAC009S0907W0308.wav", "txt": "毕竟与互联网融合时间不长"}
-{"key": "BAC009S0907W0309", "wav": "./aishell/wav/test/S0907/BAC009S0907W0309.wav", "txt": "上述网络安全人士称"}
-{"key": "BAC009S0907W0310", "wav": "./aishell/wav/test/S0907/BAC009S0907W0310.wav", "txt": "乌云网合伙人邬迪接受第一财经日报记者采访时称"}
-{"key": "BAC009S0907W0311", "wav": "./aishell/wav/test/S0907/BAC009S0907W0311.wav", "txt": "尽管网络安全目前投入成本大"}
-{"key": "BAC009S0907W0312", "wav": "./aishell/wav/test/S0907/BAC009S0907W0312.wav", "txt": "又未直接产生经济效益"}
-{"key": "BAC009S0907W0313", "wav": "./aishell/wav/test/S0907/BAC009S0907W0313.wav", "txt": "但到将来互联网时代"}
-{"key": "BAC009S0907W0314", "wav": "./aishell/wav/test/S0907/BAC009S0907W0314.wav", "txt": "部分传统的车企或许还没有注意到这点"}
-{"key": "BAC009S0907W0315", "wav": "./aishell/wav/test/S0907/BAC009S0907W0315.wav", "txt": "乌云上有不少因联网漏洞可导致车辆被控制"}
-{"key": "BAC009S0907W0316", "wav": "./aishell/wav/test/S0907/BAC009S0907W0316.wav", "txt": "这将会导致行车安全问题"}
-{"key": "BAC009S0907W0317", "wav": "./aishell/wav/test/S0907/BAC009S0907W0317.wav", "txt": "令车企烦恼的不仅是车主信息被泄露这一困扰"}
-{"key": "BAC009S0907W0318", "wav": "./aishell/wav/test/S0907/BAC009S0907W0318.wav", "txt": "随着越来越多车企踊跃加入车联网浪潮中"}
-{"key": "BAC009S0907W0319", "wav": "./aishell/wav/test/S0907/BAC009S0907W0319.wav", "txt": "信息安全隐患也随之而来"}
-{"key": "BAC009S0907W0320", "wav": "./aishell/wav/test/S0907/BAC009S0907W0320.wav", "txt": "负责车辆网络安全问题"}
-{"key": "BAC009S0907W0321", "wav": "./aishell/wav/test/S0907/BAC009S0907W0321.wav", "txt": "现在汽车与网络的联系越来越紧密"}
-{"key": "BAC009S0907W0322", "wav": "./aishell/wav/test/S0907/BAC009S0907W0322.wav", "txt": "以后将能够与周围环境交流"}
-{"key": "BAC009S0907W0323", "wav": "./aishell/wav/test/S0907/BAC009S0907W0323.wav", "txt": "如果车辆被黑客软件侵袭"}
-{"key": "BAC009S0907W0324", "wav": "./aishell/wav/test/S0907/BAC009S0907W0324.wav", "txt": "车辆可能会发生严重的交通事故"}
-{"key": "BAC009S0907W0325", "wav": "./aishell/wav/test/S0907/BAC009S0907W0325.wav", "txt": "比如现在的汽车一般采用了哪些新技术"}
-{"key": "BAC009S0907W0326", "wav": "./aishell/wav/test/S0907/BAC009S0907W0326.wav", "txt": "其中十六家回复发函"}
-{"key": "BAC009S0907W0327", "wav": "./aishell/wav/test/S0907/BAC009S0907W0327.wav", "txt": "在接受调查的这些公司中"}
-{"key": "BAC009S0907W0328", "wav": "./aishell/wav/test/S0907/BAC009S0907W0328.wav", "txt": "有两家表示能够诊断或者反馈黑客入侵后的情况"}
-{"key": "BAC009S0907W0329", "wav": "./aishell/wav/test/S0907/BAC009S0907W0329.wav", "txt": "有一家公司表示能够及时检测黑客入侵"}
-{"key": "BAC009S0907W0330", "wav": "./aishell/wav/test/S0907/BAC009S0907W0330.wav", "txt": "像车上的信息娱乐系统和导航系统"}
-{"key": "BAC009S0907W0331", "wav": "./aishell/wav/test/S0907/BAC009S0907W0331.wav", "txt": "很可能通过联网技术"}
-{"key": "BAC009S0907W0332", "wav": "./aishell/wav/test/S0907/BAC009S0907W0332.wav", "txt": "被恶意软件或者黑客攻击"}
-{"key": "BAC009S0907W0333", "wav": "./aishell/wav/test/S0907/BAC009S0907W0333.wav", "txt": "二十二二零一五"}
-{"key": "BAC009S0907W0335", "wav": "./aishell/wav/test/S0907/BAC009S0907W0335.wav", "txt": "黑客可利用这些漏洞远程打开车门"}
-{"key": "BAC009S0907W0336", "wav": "./aishell/wav/test/S0907/BAC009S0907W0336.wav", "txt": "宝马方面表示已经升级该数字系统"}
-{"key": "BAC009S0907W0337", "wav": "./aishell/wav/test/S0907/BAC009S0907W0337.wav", "txt": "解决信息安全的问题"}
-{"key": "BAC009S0907W0339", "wav": "./aishell/wav/test/S0907/BAC009S0907W0339.wav", "txt": "作为唯一能够入选五大飙血之战的女子比赛"}
-{"key": "BAC009S0907W0340", "wav": "./aishell/wav/test/S0907/BAC009S0907W0340.wav", "txt": "正是得益于我国女子散打名将鄂美蝶的惊艳一击"}
-{"key": "BAC009S0907W0341", "wav": "./aishell/wav/test/S0907/BAC009S0907W0341.wav", "txt": "在当天女子五二千克级自由搏击超级战中"}
-{"key": "BAC009S0907W0342", "wav": "./aishell/wav/test/S0907/BAC009S0907W0342.wav", "txt": "鄂美蝶便毫无保留地将炮火轰向对手"}
-{"key": "BAC009S0907W0343", "wav": "./aishell/wav/test/S0907/BAC009S0907W0343.wav", "txt": "三十三岁的大滨芳美在面对强大的火力下"}
-{"key": "BAC009S0907W0344", "wav": "./aishell/wav/test/S0907/BAC009S0907W0344.wav", "txt": "比赛很快便呈向一边倒的局面第二回合"}
-{"key": "BAC009S0907W0345", "wav": "./aishell/wav/test/S0907/BAC009S0907W0345.wav", "txt": "鄂美蝶继续将自己所学到的新搏击技能尽情展现"}
-{"key": "BAC009S0907W0346", "wav": "./aishell/wav/test/S0907/BAC009S0907W0346.wav", "txt": "在一连串的拳腿风暴过后"}
-{"key": "BAC009S0907W0347", "wav": "./aishell/wav/test/S0907/BAC009S0907W0347.wav", "txt": "终止时间定格在二分二十一秒"}
-{"key": "BAC009S0907W0350", "wav": "./aishell/wav/test/S0907/BAC009S0907W0350.wav", "txt": "二零一五年四月十二"}
-{"key": "BAC009S0907W0351", "wav": "./aishell/wav/test/S0907/BAC009S0907W0351.wav", "txt": "一场众星闪耀的群龙赛事震撼打响"}
-{"key": "BAC009S0907W0352", "wav": "./aishell/wav/test/S0907/BAC009S0907W0352.wav", "txt": "作为此次赛事上唯一一场纯泰式规则的超级战"}
-{"key": "BAC009S0907W0353", "wav": "./aishell/wav/test/S0907/BAC009S0907W0353.wav", "txt": "两位气质迥异的选手展示出了全然不同的擂台风格"}
-{"key": "BAC009S0907W0354", "wav": "./aishell/wav/test/S0907/BAC009S0907W0354.wav", "txt": "在前两局僵持不下的情况下"}
-{"key": "BAC009S0907W0355", "wav": "./aishell/wav/test/S0907/BAC009S0907W0355.wav", "txt": "面对兵行诡道的波斯弯刀"}
-{"key": "BAC009S0907W0356", "wav": "./aishell/wav/test/S0907/BAC009S0907W0356.wav", "txt": "张春雨选择了加强压迫对手的力度"}
-{"key": "BAC009S0907W0357", "wav": "./aishell/wav/test/S0907/BAC009S0907W0357.wav", "txt": "此举却导致了一次擂台意外的发生"}
-{"key": "BAC009S0907W0358", "wav": "./aishell/wav/test/S0907/BAC009S0907W0358.wav", "txt": "张春雨被对手的一记肘击打破了右侧眉弓"}
-{"key": "BAC009S0907W0359", "wav": "./aishell/wav/test/S0907/BAC009S0907W0359.wav", "txt": "经过场上护理人员的医治后"}
-{"key": "BAC009S0907W0360", "wav": "./aishell/wav/test/S0907/BAC009S0907W0360.wav", "txt": "张春雨在全场观众的喝彩声中再次投入比赛"}
-{"key": "BAC009S0907W0361", "wav": "./aishell/wav/test/S0907/BAC009S0907W0361.wav", "txt": "并向对手发起了凶猛的反扑"}
-{"key": "BAC009S0907W0362", "wav": "./aishell/wav/test/S0907/BAC009S0907W0362.wav", "txt": "双方刺刀见红式的对攻中"}
-{"key": "BAC009S0907W0363", "wav": "./aishell/wav/test/S0907/BAC009S0907W0363.wav", "txt": "伊萨的眉弓同样被张春雨以牙还牙的肘法击破"}
-{"key": "BAC009S0907W0364", "wav": "./aishell/wav/test/S0907/BAC009S0907W0364.wav", "txt": "全面引爆现场观众的激情"}
-{"key": "BAC009S0907W0365", "wav": "./aishell/wav/test/S0907/BAC009S0907W0365.wav", "txt": "比赛在双方互不相让的对攻中落下了帷幕"}
-{"key": "BAC009S0907W0366", "wav": "./aishell/wav/test/S0907/BAC009S0907W0366.wav", "txt": "但对于每一位观赛者来讲"}
-{"key": "BAC009S0907W0367", "wav": "./aishell/wav/test/S0907/BAC009S0907W0367.wav", "txt": "能够亲眼见证这场火爆刺激的的精彩大战"}
-{"key": "BAC009S0907W0368", "wav": "./aishell/wav/test/S0907/BAC009S0907W0368.wav", "txt": "远比单纯的胜负有意义得多"}
-{"key": "BAC009S0907W0369", "wav": "./aishell/wav/test/S0907/BAC009S0907W0369.wav", "txt": "这是一场没有输家的经典比赛"}
-{"key": "BAC009S0907W0372", "wav": "./aishell/wav/test/S0907/BAC009S0907W0372.wav", "txt": "二零一五年二月一日"}
-{"key": "BAC009S0907W0373", "wav": "./aishell/wav/test/S0907/BAC009S0907W0373.wav", "txt": "昆仑决广州站在广州天河体育中心成功打响"}
-{"key": "BAC009S0907W0374", "wav": "./aishell/wav/test/S0907/BAC009S0907W0374.wav", "txt": "多国大神级搏击王者论剑昆仑武道之巅"}
-{"key": "BAC009S0907W0375", "wav": "./aishell/wav/test/S0907/BAC009S0907W0375.wav", "txt": "决赛一如期待般精彩绝伦"}
-{"key": "BAC009S0907W0376", "wav": "./aishell/wav/test/S0907/BAC009S0907W0376.wav", "txt": "马刀抡击式的中距离组合拳法配合高位膝技"}
-{"key": "BAC009S0907W0377", "wav": "./aishell/wav/test/S0907/BAC009S0907W0377.wav", "txt": "打得对手只有招架之功"}
-{"key": "BAC009S0907W0378", "wav": "./aishell/wav/test/S0907/BAC009S0907W0378.wav", "txt": "便将对手的眼部击伤"}
-{"key": "BAC009S0907W0379", "wav": "./aishell/wav/test/S0907/BAC009S0907W0379.wav", "txt": "严重影响卡尔泽塔的实现"}
-{"key": "BAC009S0907W0380", "wav": "./aishell/wav/test/S0907/BAC009S0907W0380.wav", "txt": "令对手无奈放弃比赛"}
-{"key": "BAC009S0907W0381", "wav": "./aishell/wav/test/S0907/BAC009S0907W0381.wav", "txt": "他在二零一五年昆仑决诸神之战决赛圈的表现"}
-{"key": "BAC009S0907W0382", "wav": "./aishell/wav/test/S0907/BAC009S0907W0382.wav", "txt": "将成为无数武迷接下来最大的期待之一"}
-{"key": "BAC009S0907W0383", "wav": "./aishell/wav/test/S0907/BAC009S0907W0383.wav", "txt": "谁也不知道会发生什么"}
-{"key": "BAC009S0907W0384", "wav": "./aishell/wav/test/S0907/BAC009S0907W0384.wav", "txt": "这就是竞技体育的魅力"}
-{"key": "BAC009S0907W0385", "wav": "./aishell/wav/test/S0907/BAC009S0907W0385.wav", "txt": "在昨晚的女子标枪决赛中"}
-{"key": "BAC009S0907W0386", "wav": "./aishell/wav/test/S0907/BAC009S0907W0386.wav", "txt": "然而就是这最后一掷"}
-{"key": "BAC009S0907W0387", "wav": "./aishell/wav/test/S0907/BAC009S0907W0387.wav", "txt": "在昨天比赛的第五投"}
-{"key": "BAC009S0907W0388", "wav": "./aishell/wav/test/S0907/BAC009S0907W0388.wav", "txt": "吕会会在全场观众的加油助威声中爆发"}
-{"key": "BAC009S0907W0389", "wav": "./aishell/wav/test/S0907/BAC009S0907W0389.wav", "txt": "倾尽全力将标枪掷到了六十六米一三"}
-{"key": "BAC009S0907W0390", "wav": "./aishell/wav/test/S0907/BAC009S0907W0390.wav", "txt": "然而就在全场仅剩下莫利托一个人的最后一掷时"}
-{"key": "BAC009S0907W0391", "wav": "./aishell/wav/test/S0907/BAC009S0907W0391.wav", "txt": "虽然留下了巨大的遗憾"}
-{"key": "BAC009S0907W0392", "wav": "./aishell/wav/test/S0907/BAC009S0907W0392.wav", "txt": "不过这依然是吕会会的个人最好成绩"}
-{"key": "BAC009S0907W0393", "wav": "./aishell/wav/test/S0907/BAC009S0907W0393.wav", "txt": "吕会会在走到混合区接受记者采访时止住了泪水"}
-{"key": "BAC009S0907W0394", "wav": "./aishell/wav/test/S0907/BAC009S0907W0394.wav", "txt": "在大赛中投出这样好的成绩我自己都没有想到"}
-{"key": "BAC009S0907W0395", "wav": "./aishell/wav/test/S0907/BAC009S0907W0395.wav", "txt": "其实比赛过程中我也没有多想"}
-{"key": "BAC009S0907W0396", "wav": "./aishell/wav/test/S0907/BAC009S0907W0396.wav", "txt": "就是要一枪一枪地投"}
-{"key": "BAC009S0907W0397", "wav": "./aishell/wav/test/S0907/BAC009S0907W0397.wav", "txt": "比成这样我其实已经很开心了"}
-{"key": "BAC009S0907W0398", "wav": "./aishell/wav/test/S0907/BAC009S0907W0398.wav", "txt": "能在北京获得一枚奖牌我很骄傲"}
-{"key": "BAC009S0907W0399", "wav": "./aishell/wav/test/S0907/BAC009S0907W0399.wav", "txt": "观众们的鼓励也给了我力量"}
-{"key": "BAC009S0907W0400", "wav": "./aishell/wav/test/S0907/BAC009S0907W0400.wav", "txt": "我的泪水主要还是来自于喜悦"}
-{"key": "BAC009S0907W0401", "wav": "./aishell/wav/test/S0907/BAC009S0907W0401.wav", "txt": "要说一点儿没有遗憾和失落是假的"}
-{"key": "BAC009S0907W0402", "wav": "./aishell/wav/test/S0907/BAC009S0907W0402.wav", "txt": "但总的来说还是高兴多于遗憾"}
-{"key": "BAC009S0907W0403", "wav": "./aishell/wav/test/S0907/BAC009S0907W0403.wav", "txt": "文本报记者刘艾林"}
-{"key": "BAC009S0907W0404", "wav": "./aishell/wav/test/S0907/BAC009S0907W0404.wav", "txt": "去年美国队长二寒冬战士就曾在四月登陆"}
-{"key": "BAC009S0907W0405", "wav": "./aishell/wav/test/S0907/BAC009S0907W0405.wav", "txt": "结果创造了相当可观的票房成绩"}
-{"key": "BAC009S0907W0406", "wav": "./aishell/wav/test/S0907/BAC009S0907W0406.wav", "txt": "丛林之书则将在二零一六年四月十五日登场"}
-{"key": "BAC009S0907W0408", "wav": "./aishell/wav/test/S0907/BAC009S0907W0408.wav", "txt": "两人合作长达二十年"}
-{"key": "BAC009S0907W0409", "wav": "./aishell/wav/test/S0907/BAC009S0907W0409.wav", "txt": "不过天下无不散之筵席"}
-{"key": "BAC009S0907W0410", "wav": "./aishell/wav/test/S0907/BAC009S0907W0410.wav", "txt": "原来陈善之最近已离开了百仕活"}
-{"key": "BAC009S0907W0411", "wav": "./aishell/wav/test/S0907/BAC009S0907W0411.wav", "txt": "有传他离开是因黎明不满其在挽留艺人方面没有尽力"}
-{"key": "BAC009S0907W0412", "wav": "./aishell/wav/test/S0907/BAC009S0907W0412.wav", "txt": "搜狐娱乐讯十月九日"}
-{"key": "BAC009S0907W0413", "wav": "./aishell/wav/test/S0907/BAC009S0907W0413.wav", "txt": "表示决定辞职"}
-{"key": "BAC009S0907W0414", "wav": "./aishell/wav/test/S0907/BAC009S0907W0414.wav", "txt": "不与无线续约"}
-{"key": "BAC009S0907W0415", "wav": "./aishell/wav/test/S0907/BAC009S0907W0415.wav", "txt": "他感叹自己在无线十五年都没有机会"}
-{"key": "BAC009S0907W0416", "wav": "./aishell/wav/test/S0907/BAC009S0907W0416.wav", "txt": "眼见后辈爬头"}
-{"key": "BAC009S0907W0417", "wav": "./aishell/wav/test/S0907/BAC009S0907W0417.wav", "txt": "希望出去发展"}
-{"key": "BAC009S0907W0418", "wav": "./aishell/wav/test/S0907/BAC009S0907W0418.wav", "txt": "他直言不想看见自己变作一潭死水"}
-{"key": "BAC009S0907W0419", "wav": "./aishell/wav/test/S0907/BAC009S0907W0419.wav", "txt": "早前演出的舞台剧令他醒觉要出外寻找更多演出机会"}
-{"key": "BAC009S0907W0420", "wav": "./aishell/wav/test/S0907/BAC009S0907W0420.wav", "txt": "因此决定出外闯"}
-{"key": "BAC009S0907W0421", "wav": "./aishell/wav/test/S0907/BAC009S0907W0421.wav", "txt": "虽然未知去向"}
-{"key": "BAC009S0907W0422", "wav": "./aishell/wav/test/S0907/BAC009S0907W0422.wav", "txt": "但坚信有我落脚的地方"}
-{"key": "BAC009S0907W0423", "wav": "./aishell/wav/test/S0907/BAC009S0907W0423.wav", "txt": "我便会到那里"}
-{"key": "BAC009S0907W0424", "wav": "./aishell/wav/test/S0907/BAC009S0907W0424.wav", "txt": "搜狐娱乐讯北京时间七月二十八日消息"}
-{"key": "BAC009S0907W0425", "wav": "./aishell/wav/test/S0907/BAC009S0907W0425.wav", "txt": "据香港媒体报导"}
-{"key": "BAC009S0907W0427", "wav": "./aishell/wav/test/S0907/BAC009S0907W0427.wav", "txt": "陈奕迅双手合十认真地向蛋糕许愿"}
-{"key": "BAC009S0907W0428", "wav": "./aishell/wav/test/S0907/BAC009S0907W0428.wav", "txt": "搜狐娱乐讯据台湾媒体报道"}
-{"key": "BAC009S0907W0429", "wav": "./aishell/wav/test/S0907/BAC009S0907W0429.wav", "txt": "港歌神陈奕迅出道近二十年"}
-{"key": "BAC009S0907W0430", "wav": "./aishell/wav/test/S0907/BAC009S0907W0430.wav", "txt": "曾获美国时代杂志形容为影响香港乐坛风格的人物"}
-{"key": "BAC009S0907W0431", "wav": "./aishell/wav/test/S0907/BAC009S0907W0431.wav", "txt": "并于当日被香港警方拘捕"}
-{"key": "BAC009S0907W0432", "wav": "./aishell/wav/test/S0907/BAC009S0907W0432.wav", "txt": "警方以普通袭击罪对涉事乘客提起诉讼"}
-{"key": "BAC009S0907W0433", "wav": "./aishell/wav/test/S0907/BAC009S0907W0433.wav", "txt": "其中四名被告分别判即时监禁九至一一天"}
-{"key": "BAC009S0907W0434", "wav": "./aishell/wav/test/S0907/BAC009S0907W0434.wav", "txt": "一人被判罚款一五零零元"}
-{"key": "BAC009S0907W0435", "wav": "./aishell/wav/test/S0907/BAC009S0907W0435.wav", "txt": "港京航班延误九小时六名内地乘客推撞地勤被捕"}
-{"key": "BAC009S0907W0437", "wav": "./aishell/wav/test/S0907/BAC009S0907W0437.wav", "txt": "六名内地乘客与地勤发生肢体冲突"}
-{"key": "BAC009S0907W0439", "wav": "./aishell/wav/test/S0907/BAC009S0907W0439.wav", "txt": "将被以普通袭击罪起诉"}
-{"key": "BAC009S0907W0440", "wav": "./aishell/wav/test/S0907/BAC009S0907W0440.wav", "txt": "港商在台遭绑三八天获救后痛哭以为必死"}
-{"key": "BAC009S0907W0441", "wav": "./aishell/wav/test/S0907/BAC009S0907W0441.wav", "txt": "黄煜坤被警方送到附近医院接受检查"}
-{"key": "BAC009S0907W0442", "wav": "./aishell/wav/test/S0907/BAC009S0907W0442.wav", "txt": "惠州公安在金山河捞获一具无头无双手女尸"}
-{"key": "BAC009S0907W0443", "wav": "./aishell/wav/test/S0907/BAC009S0907W0443.wav", "txt": "广东惠州惊爆港商杀情妇碎尸凶案"}
-{"key": "BAC009S0907W0444", "wav": "./aishell/wav/test/S0907/BAC009S0907W0444.wav", "txt": "五零岁港商疑与其工厂的同龄女主管偷情多年"}
-{"key": "BAC009S0907W0445", "wav": "./aishell/wav/test/S0907/BAC009S0907W0445.wav", "txt": "近日再度拒绝女方的逼婚后"}
-{"key": "BAC009S0907W0446", "wav": "./aishell/wav/test/S0907/BAC009S0907W0446.wav", "txt": "遭追讨欠款和抚养费共四零万元人民币"}
-{"key": "BAC009S0907W0447", "wav": "./aishell/wav/test/S0907/BAC009S0907W0447.wav", "txt": "港商疑恼羞成怒将她杀害"}
-{"key": "BAC009S0907W0448", "wav": "./aishell/wav/test/S0907/BAC009S0907W0448.wav", "txt": "并肢解尸体分成多袋抛入河中"}
-{"key": "BAC009S0907W0449", "wav": "./aishell/wav/test/S0907/BAC009S0907W0449.wav", "txt": "港商被骗牵出路边地下钱庄涉案资金四三零零亿"}
-{"key": "BAC009S0907W0450", "wav": "./aishell/wav/test/S0907/BAC009S0907W0450.wav", "txt": "深圳警方查获的一个地下钱庄窝点"}
-{"key": "BAC009S0907W0451", "wav": "./aishell/wav/test/S0907/BAC009S0907W0451.wav", "txt": "由普通商店作为掩护"}
-{"key": "BAC009S0907W0452", "wav": "./aishell/wav/test/S0907/BAC009S0907W0452.wav", "txt": "该商店老板郑晓生红衣者涉嫌暗地里兑换外汇"}
-{"key": "BAC009S0907W0453", "wav": "./aishell/wav/test/S0907/BAC009S0907W0453.wav", "txt": "替人向境外转移资金"}
-{"key": "BAC009S0907W0454", "wav": "./aishell/wav/test/S0907/BAC009S0907W0454.wav", "txt": "港媒关注天价虾店停业破坏青岛形象"}
-{"key": "BAC009S0907W0455", "wav": "./aishell/wav/test/S0907/BAC009S0907W0455.wav", "txt": "参考消息网一零月八日报道港媒称"}
-{"key": "BAC009S0907W0456", "wav": "./aishell/wav/test/S0907/BAC009S0907W0456.wav", "txt": "备受关注的青岛三八元一只大虾事件有最新发展"}
-{"key": "BAC009S0907W0457", "wav": "./aishell/wav/test/S0907/BAC009S0907W0457.wav", "txt": "并责令其立即改正价格违法行为"}
-{"key": "BAC009S0907W0458", "wav": "./aishell/wav/test/S0907/BAC009S0907W0458.wav", "txt": "事发后派出所和物价局都互相踢皮球"}
-{"key": "BAC009S0907W0459", "wav": "./aishell/wav/test/S0907/BAC009S0907W0459.wav", "txt": "批评职能部门没有将消费者放在第一位"}
-{"key": "BAC009S0907W0460", "wav": "./aishell/wav/test/S0907/BAC009S0907W0460.wav", "txt": "港媒关注内地私人美术馆新富人群热衷分享藏品"}
-{"key": "BAC009S0907W0461", "wav": "./aishell/wav/test/S0907/BAC009S0907W0461.wav", "txt": "参考消息网七月二九日报道港媒称"}
-{"key": "BAC009S0907W0462", "wav": "./aishell/wav/test/S0907/BAC009S0907W0462.wav", "txt": "用来保存他们的藏品"}
-{"key": "BAC009S0907W0463", "wav": "./aishell/wav/test/S0907/BAC009S0907W0463.wav", "txt": "其中一些人是近年来国际拍卖会上艺术品的最大买家"}
-{"key": "BAC009S0907W0464", "wav": "./aishell/wav/test/S0907/BAC009S0907W0464.wav", "txt": "港媒关注浙江暖男医生手术室播动画片哄小女孩"}
-{"key": "BAC009S0907W0465", "wav": "./aishell/wav/test/S0907/BAC009S0907W0465.wav", "txt": "参考消息网九月二二日报道港媒称"}
-{"key": "BAC009S0907W0466", "wav": "./aishell/wav/test/S0907/BAC009S0907W0466.wav", "txt": "网络上热传一组暖男医生哄小萝莉的温情照片"}
-{"key": "BAC009S0907W0467", "wav": "./aishell/wav/test/S0907/BAC009S0907W0467.wav", "txt": "男医生为了安抚即将做手术的小女孩"}
-{"key": "BAC009S0907W0468", "wav": "./aishell/wav/test/S0907/BAC009S0907W0468.wav", "txt": "将小女孩抱在腿上并播放手机中的动画片"}
-{"key": "BAC009S0907W0469", "wav": "./aishell/wav/test/S0907/BAC009S0907W0469.wav", "txt": "港媒关注重雾霾重回华北罕见蓝天只持续两周"}
-{"key": "BAC009S0907W0470", "wav": "./aishell/wav/test/S0907/BAC009S0907W0470.wav", "txt": "参考消息网九月一九日报道港媒称"}
-{"key": "BAC009S0907W0471", "wav": "./aishell/wav/test/S0907/BAC009S0907W0471.wav", "txt": "随着严重雾霾卷土重来"}
-{"key": "BAC009S0907W0472", "wav": "./aishell/wav/test/S0907/BAC009S0907W0472.wav", "txt": "港媒关注马云回应被逼捐花钱比挣钱难"}
-{"key": "BAC009S0907W0474", "wav": "./aishell/wav/test/S0907/BAC009S0907W0474.wav", "txt": "企业应该做好的投资"}
-{"key": "BAC009S0907W0475", "wav": "./aishell/wav/test/S0907/BAC009S0907W0475.wav", "txt": "盲目捐款没有益处"}
-{"key": "BAC009S0907W0476", "wav": "./aishell/wav/test/S0907/BAC009S0907W0476.wav", "txt": "港媒曝水货客扮残疾人在轮椅中藏钻石月入八万"}
-{"key": "BAC009S0907W0477", "wav": "./aishell/wav/test/S0907/BAC009S0907W0477.wav", "txt": "参考消息网七月二九日报道港媒称"}
-{"key": "BAC009S0907W0478", "wav": "./aishell/wav/test/S0907/BAC009S0907W0478.wav", "txt": "香港海关严查水货客"}
-{"key": "BAC009S0907W0479", "wav": "./aishell/wav/test/S0907/BAC009S0907W0479.wav", "txt": "水货集团看中轮椅人士收入不高"}
-{"key": "BAC009S0907W0480", "wav": "./aishell/wav/test/S0907/BAC009S0907W0480.wav", "txt": "以高收入低风险和免缴税等好处利诱对方成为水货客"}
-{"key": "BAC009S0907W0481", "wav": "./aishell/wav/test/S0907/BAC009S0907W0481.wav", "txt": "有走私奢侈品的人士月入高达八万港元"}
-{"key": "BAC009S0907W0482", "wav": "./aishell/wav/test/S0907/BAC009S0907W0482.wav", "txt": "港媒盘点亚洲千禧一代十大富豪九人是中国人"}
-{"key": "BAC009S0907W0483", "wav": "./aishell/wav/test/S0907/BAC009S0907W0483.wav", "txt": "参考消息网七月二二日报道"}
-{"key": "BAC009S0907W0484", "wav": "./aishell/wav/test/S0907/BAC009S0907W0484.wav", "txt": "港媒称假沉香充斥内地多以化学香油制成"}
-{"key": "BAC009S0907W0485", "wav": "./aishell/wav/test/S0907/BAC009S0907W0485.wav", "txt": "高仿沉香多以化学香精等制成"}
-{"key": "BAC009S0907W0486", "wav": "./aishell/wav/test/S0907/BAC009S0907W0486.wav", "txt": "可比黄金的沉香价格每年倍增"}
-{"key": "BAC009S0907W0487", "wav": "./aishell/wav/test/S0907/BAC009S0907W0487.wav", "txt": "港媒称内地中产人数猛增有助稳定企望渐进改革"}
-{"key": "BAC009S0907W0488", "wav": "./aishell/wav/test/S0907/BAC009S0907W0488.wav", "txt": "一个国家稳定的社会结构呈橄榄形"}
-{"key": "BAC009S0907W0489", "wav": "./aishell/wav/test/S0907/BAC009S0907W0489.wav", "txt": "而橄榄形结构是以中产为主的结构"}
-{"key": "BAC009S0907W0490", "wav": "./aishell/wav/test/S0907/BAC009S0907W0490.wav", "txt": "中产阶级在一个国家的现代化中起着稳定作用"}
-{"key": "BAC009S0907W0491", "wav": "./aishell/wav/test/S0907/BAC009S0907W0491.wav", "txt": "是社会稳定的主要力量"}
-{"key": "BAC009S0907W0492", "wav": "./aishell/wav/test/S0907/BAC009S0907W0492.wav", "txt": "港媒称内地为国际市场修改动画片妖怪不能吃唐僧肉"}
-{"key": "BAC009S0907W0493", "wav": "./aishell/wav/test/S0907/BAC009S0907W0493.wav", "txt": "参考消息网一一月一日报道港媒称"}
-{"key": "BAC009S0907W0494", "wav": "./aishell/wav/test/S0907/BAC009S0907W0494.wav", "txt": "中国的动画工作室越来越看重海外市场"}
-{"key": "BAC009S0907W0495", "wav": "./aishell/wav/test/S0907/BAC009S0907W0495.wav", "txt": "港媒称内地人不穷了为何仍爱抢学者抢习惯了"}
-{"key": "BAC009S0908W0121", "wav": "./aishell/wav/test/S0908/BAC009S0908W0121.wav", "txt": "将进一步提振刚需购房者入市信心"}
-{"key": "BAC009S0908W0122", "wav": "./aishell/wav/test/S0908/BAC009S0908W0122.wav", "txt": "加速今年楼市成交复苏回暖"}
-{"key": "BAC009S0908W0123", "wav": "./aishell/wav/test/S0908/BAC009S0908W0123.wav", "txt": "公积金政策利好首套自住住房贷款需求的消息纷至沓来"}
-{"key": "BAC009S0908W0124", "wav": "./aishell/wav/test/S0908/BAC009S0908W0124.wav", "txt": "并已实施"}
-{"key": "BAC009S0908W0125", "wav": "./aishell/wav/test/S0908/BAC009S0908W0125.wav", "txt": "贷款额度上限调整为一百万元"}
-{"key": "BAC009S0908W0126", "wav": "./aishell/wav/test/S0908/BAC009S0908W0126.wav", "txt": "购买一百平方米以上非政策性住房或第二套住房"}
-{"key": "BAC009S0908W0127", "wav": "./aishell/wav/test/S0908/BAC009S0908W0127.wav", "txt": "贷款最高额度仍为一百万元"}
-{"key": "BAC009S0908W0128", "wav": "./aishell/wav/test/S0908/BAC009S0908W0128.wav", "txt": "均规定贷款额度不再依据个人信用等级上浮"}
-{"key": "BAC009S0908W0129", "wav": "./aishell/wav/test/S0908/BAC009S0908W0129.wav", "txt": "并对异地缴存住房公积金等政策作出调整"}
-{"key": "BAC009S0908W0130", "wav": "./aishell/wav/test/S0908/BAC009S0908W0130.wav", "txt": "北京市公积金管理中心明确取消新建商品房评估"}
-{"key": "BAC009S0908W0131", "wav": "./aishell/wav/test/S0908/BAC009S0908W0131.wav", "txt": "国管住房公积金中心则表示取消担保服务费"}
-{"key": "BAC009S0908W0132", "wav": "./aishell/wav/test/S0908/BAC009S0908W0132.wav", "txt": "这一系列公积金门槛放低额度提高的调整"}
-{"key": "BAC009S0908W0133", "wav": "./aishell/wav/test/S0908/BAC009S0908W0133.wav", "txt": "是对过去住房公积金制度不作为方式的纠正"}
-{"key": "BAC009S0908W0134", "wav": "./aishell/wav/test/S0908/BAC009S0908W0134.wav", "txt": "即使去年十一月公积金贷款利率降至百分之七"}
-{"key": "BAC009S0908W0135", "wav": "./aishell/wav/test/S0908/BAC009S0908W0135.wav", "txt": "很多人需要支付大额首付"}
-{"key": "BAC009S0908W0136", "wav": "./aishell/wav/test/S0908/BAC009S0908W0136.wav", "txt": "使用公积金制度的作用和效果没有得到有效的发挥"}
-{"key": "BAC009S0908W0137", "wav": "./aishell/wav/test/S0908/BAC009S0908W0137.wav", "txt": "此番公积金政策调整"}
-{"key": "BAC009S0908W0138", "wav": "./aishell/wav/test/S0908/BAC009S0908W0138.wav", "txt": "将在诸多方面惠及刚需购房者"}
-{"key": "BAC009S0908W0139", "wav": "./aishell/wav/test/S0908/BAC009S0908W0139.wav", "txt": "之前的公积金贷款额只有一百万"}
-{"key": "BAC009S0908W0140", "wav": "./aishell/wav/test/S0908/BAC009S0908W0140.wav", "txt": "而最高额度提升至一百万后"}
-{"key": "BAC009S0908W0141", "wav": "./aishell/wav/test/S0908/BAC009S0908W0141.wav", "txt": "大部分刚需购房者都可以选择公积金贷款"}
-{"key": "BAC009S0908W0142", "wav": "./aishell/wav/test/S0908/BAC009S0908W0142.wav", "txt": "中原地产首席分析师张大伟分析"}
-{"key": "BAC009S0908W0143", "wav": "./aishell/wav/test/S0908/BAC009S0908W0143.wav", "txt": "公积金额贷款额度升至一百万可以节省很多少利息"}
-{"key": "BAC009S0908W0144", "wav": "./aishell/wav/test/S0908/BAC009S0908W0144.wav", "txt": "公积金贷款可少缴三十馀万"}
-{"key": "BAC009S0908W0145", "wav": "./aishell/wav/test/S0908/BAC009S0908W0145.wav", "txt": "而额度最高一百万时"}
-{"key": "BAC009S0908W0146", "wav": "./aishell/wav/test/S0908/BAC009S0908W0146.wav", "txt": "这一数值为二十馀万"}
-{"key": "BAC009S0908W0147", "wav": "./aishell/wav/test/S0908/BAC009S0908W0147.wav", "txt": "这将使更多购房者具备买房支付能力"}
-{"key": "BAC009S0908W0148", "wav": "./aishell/wav/test/S0908/BAC009S0908W0148.wav", "txt": "可以使用公积金贷款的购房者将起码增加百分之七"}
-{"key": "BAC009S0908W0149", "wav": "./aishell/wav/test/S0908/BAC009S0908W0149.wav", "txt": "也将在一定程度上降低刚需购房者支付负担"}
-{"key": "BAC009S0908W0150", "wav": "./aishell/wav/test/S0908/BAC009S0908W0150.wav", "txt": "公积金政策调整对于楼市成交刺激作用已初见瑞尔"}
-{"key": "BAC009S0908W0151", "wav": "./aishell/wav/test/S0908/BAC009S0908W0151.wav", "txt": "链家地产市场研究部数据显示"}
-{"key": "BAC009S0908W0152", "wav": "./aishell/wav/test/S0908/BAC009S0908W0152.wav", "txt": "北京市公积金额度提升后的元旦时期"}
-{"key": "BAC009S0908W0153", "wav": "./aishell/wav/test/S0908/BAC009S0908W0153.wav", "txt": "近郊小户型楼盘及城区部分公房社区客户咨询量上升"}
-{"key": "BAC009S0908W0154", "wav": "./aishell/wav/test/S0908/BAC009S0908W0154.wav", "txt": "而其房源多在一百平方米以下"}
-{"key": "BAC009S0908W0155", "wav": "./aishell/wav/test/S0908/BAC009S0908W0155.wav", "txt": "中原地产市场研究部数据显示"}
-{"key": "BAC009S0908W0156", "wav": "./aishell/wav/test/S0908/BAC009S0908W0156.wav", "txt": "以北京去年纯商品房成交结构为例"}
-{"key": "BAC009S0908W0157", "wav": "./aishell/wav/test/S0908/BAC009S0908W0157.wav", "txt": "一百平均单套总价约一百万元左右"}
-{"key": "BAC009S0908W0158", "wav": "./aishell/wav/test/S0908/BAC009S0908W0158.wav", "txt": "公积金贷款上限调整后"}
-{"key": "BAC009S0908W0159", "wav": "./aishell/wav/test/S0908/BAC009S0908W0159.wav", "txt": "一百万元的贷款额度能满足大部分首套刚需的贷款需求"}
-{"key": "BAC009S0908W0160", "wav": "./aishell/wav/test/S0908/BAC009S0908W0160.wav", "txt": "链家地产市场研究部张旭表示"}
-{"key": "BAC009S0908W0161", "wav": "./aishell/wav/test/S0908/BAC009S0908W0161.wav", "txt": "此番公积金贷款政策调整将进一步提升振刚需"}
-{"key": "BAC009S0908W0162", "wav": "./aishell/wav/test/S0908/BAC009S0908W0162.wav", "txt": "促进楼市预期向好发展"}
-{"key": "BAC009S0908W0163", "wav": "./aishell/wav/test/S0908/BAC009S0908W0163.wav", "txt": "去年已有不少城市对公积金政策进行放松调整"}
-{"key": "BAC009S0908W0164", "wav": "./aishell/wav/test/S0908/BAC009S0908W0164.wav", "txt": "公积金政策调整对购房者心理层面影响较大"}
-{"key": "BAC009S0908W0165", "wav": "./aishell/wav/test/S0908/BAC009S0908W0165.wav", "txt": "将加速今年楼市成交复苏回暖"}
-{"key": "BAC009S0908W0166", "wav": "./aishell/wav/test/S0908/BAC009S0908W0166.wav", "txt": "备受刚需购房者关注的公积金政策也频繁迎来调整"}
-{"key": "BAC009S0908W0167", "wav": "./aishell/wav/test/S0908/BAC009S0908W0167.wav", "txt": "北京市管国管住房公积金中心先后发布通知"}
-{"key": "BAC009S0908W0168", "wav": "./aishell/wav/test/S0908/BAC009S0908W0168.wav", "txt": "通知指出除北上广深一线城市外"}
-{"key": "BAC009S0908W0169", "wav": "./aishell/wav/test/S0908/BAC009S0908W0169.wav", "txt": "对拥有一套住房并已结清相应购房贷款的居民家庭"}
-{"key": "BAC009S0908W0170", "wav": "./aishell/wav/test/S0908/BAC009S0908W0170.wav", "txt": "申请公积金购买第二套住房"}
-{"key": "BAC009S0908W0171", "wav": "./aishell/wav/test/S0908/BAC009S0908W0171.wav", "txt": "最低首付款比例由百分之七降低至百分之五"}
-{"key": "BAC009S0908W0172", "wav": "./aishell/wav/test/S0908/BAC009S0908W0172.wav", "txt": "公积金首付的再次降低实际影响有限"}
-{"key": "BAC009S0908W0173", "wav": "./aishell/wav/test/S0908/BAC009S0908W0173.wav", "txt": "但对购房者预期有积极响应"}
-{"key": "BAC009S0908W0174", "wav": "./aishell/wav/test/S0908/BAC009S0908W0174.wav", "txt": "这将有利于稳定房地产市场"}
-{"key": "BAC009S0908W0175", "wav": "./aishell/wav/test/S0908/BAC009S0908W0175.wav", "txt": "进而对稳定中国经济有正面作用"}
-{"key": "BAC009S0908W0176", "wav": "./aishell/wav/test/S0908/BAC009S0908W0176.wav", "txt": "为进一步完善住房公积金个人住房贷款政策"}
-{"key": "BAC009S0908W0177", "wav": "./aishell/wav/test/S0908/BAC009S0908W0177.wav", "txt": "对拥有一套住房并已结清相应购房贷款的居民家庭"}
-{"key": "BAC009S0908W0178", "wav": "./aishell/wav/test/S0908/BAC009S0908W0178.wav", "txt": "最低首付款比例由百分之七降低至百分之五"}
-{"key": "BAC009S0908W0179", "wav": "./aishell/wav/test/S0908/BAC009S0908W0179.wav", "txt": "该政策对于一线城市并不强制执行"}
-{"key": "BAC009S0908W0180", "wav": "./aishell/wav/test/S0908/BAC009S0908W0180.wav", "txt": "而是北京上海广州深圳可在国家统一政策基础上"}
-{"key": "BAC009S0908W0181", "wav": "./aishell/wav/test/S0908/BAC009S0908W0181.wav", "txt": "易居研究院智库中心研究总监严跃进认为"}
-{"key": "BAC009S0908W0182", "wav": "./aishell/wav/test/S0908/BAC009S0908W0182.wav", "txt": "此次住建部财政部和中央联合发文"}
-{"key": "BAC009S0908W0183", "wav": "./aishell/wav/test/S0908/BAC009S0908W0183.wav", "txt": "反映出政策层面较大的刺激力度"}
-{"key": "BAC009S0908W0184", "wav": "./aishell/wav/test/S0908/BAC009S0908W0184.wav", "txt": "这是自去年以来除降息外"}
-{"key": "BAC009S0908W0185", "wav": "./aishell/wav/test/S0908/BAC009S0908W0185.wav", "txt": "相关部门对公积金贷款政策的第三次放松"}
-{"key": "BAC009S0908W0186", "wav": "./aishell/wav/test/S0908/BAC009S0908W0186.wav", "txt": "美丽北京大型绿色公益品牌项目"}
-{"key": "BAC009S0908W0187", "wav": "./aishell/wav/test/S0908/BAC009S0908W0187.wav", "txt": "完善玉米大豆油菜籽棉花等农产品临时收储政策"}
-{"key": "BAC009S0908W0188", "wav": "./aishell/wav/test/S0908/BAC009S0908W0188.wav", "txt": "完善主要农产品吞吐和调节机制"}
-{"key": "BAC009S0908W0189", "wav": "./aishell/wav/test/S0908/BAC009S0908W0189.wav", "txt": "健全重要农产品储备制度"}
-{"key": "BAC009S0908W0190", "wav": "./aishell/wav/test/S0908/BAC009S0908W0190.wav", "txt": "发挥骨干企业稳定市场的作用"}
-{"key": "BAC009S0908W0191", "wav": "./aishell/wav/test/S0908/BAC009S0908W0191.wav", "txt": "完善生猪棉花食糖边销茶等调控预案"}
-{"key": "BAC009S0908W0192", "wav": "./aishell/wav/test/S0908/BAC009S0908W0192.wav", "txt": "制定鲜活农产品调控办法"}
-{"key": "BAC009S0908W0193", "wav": "./aishell/wav/test/S0908/BAC009S0908W0193.wav", "txt": "探索建立目标价格为核心的反周期补贴制度"}
-{"key": "BAC009S0908W0194", "wav": "./aishell/wav/test/S0908/BAC009S0908W0194.wav", "txt": "加强农业科技交流合作"}
-{"key": "BAC009S0908W0195", "wav": "./aishell/wav/test/S0908/BAC009S0908W0195.wav", "txt": "提高农业利用外资水平"}
-{"key": "BAC009S0908W0196", "wav": "./aishell/wav/test/S0908/BAC009S0908W0196.wav", "txt": "继续用好国外优惠贷款和赠款"}
-{"key": "BAC009S0908W0197", "wav": "./aishell/wav/test/S0908/BAC009S0908W0197.wav", "txt": "加大先进适用技术装备的引进消化和吸收力度"}
-{"key": "BAC009S0908W0198", "wav": "./aishell/wav/test/S0908/BAC009S0908W0198.wav", "txt": "强化多双边和区域农业磋商谈判和贸易促进"}
-{"key": "BAC009S0908W0199", "wav": "./aishell/wav/test/S0908/BAC009S0908W0199.wav", "txt": "做好涉农国际贸易规定制动工作"}
-{"key": "BAC009S0908W0200", "wav": "./aishell/wav/test/S0908/BAC009S0908W0200.wav", "txt": "进一步强化贸易促进公共服务能力"}
-{"key": "BAC009S0908W0201", "wav": "./aishell/wav/test/S0908/BAC009S0908W0201.wav", "txt": "积极推动优势农产品出口"}
-{"key": "BAC009S0908W0202", "wav": "./aishell/wav/test/S0908/BAC009S0908W0202.wav", "txt": "积极应对国际贸易摩擦"}
-{"key": "BAC009S0908W0203", "wav": "./aishell/wav/test/S0908/BAC009S0908W0203.wav", "txt": "支持行业协会办企业维护合法权益"}
-{"key": "BAC009S0908W0204", "wav": "./aishell/wav/test/S0908/BAC009S0908W0204.wav", "txt": "进一步完善农业产业损害监测预警机制"}
-{"key": "BAC009S0908W0205", "wav": "./aishell/wav/test/S0908/BAC009S0908W0205.wav", "txt": "运用符合世界贸易组织规定的相关措施"}
-{"key": "BAC009S0908W0206", "wav": "./aishell/wav/test/S0908/BAC009S0908W0206.wav", "txt": "灵活有效调控农业产品进出口"}
-{"key": "BAC009S0908W0207", "wav": "./aishell/wav/test/S0908/BAC009S0908W0207.wav", "txt": "积极推动种业农垦等方面改革"}
-{"key": "BAC009S0908W0208", "wav": "./aishell/wav/test/S0908/BAC009S0908W0208.wav", "txt": "发展农村服务业和乡村企业"}
-{"key": "BAC009S0908W0209", "wav": "./aishell/wav/test/S0908/BAC009S0908W0209.wav", "txt": "制定农村二三产业加快发展的鼓励政策"}
-{"key": "BAC009S0908W0210", "wav": "./aishell/wav/test/S0908/BAC009S0908W0210.wav", "txt": "落实和完善有关税收政策"}
-{"key": "BAC009S0908W0211", "wav": "./aishell/wav/test/S0908/BAC009S0908W0211.wav", "txt": "统筹城乡基础设施建和公共服务"}
-{"key": "BAC009S0908W0212", "wav": "./aishell/wav/test/S0908/BAC009S0908W0212.wav", "txt": "逐步建立城乡统一的公共服务制度"}
-{"key": "BAC009S0908W0213", "wav": "./aishell/wav/test/S0908/BAC009S0908W0213.wav", "txt": "积极稳妥推进户籍制度改革"}
-{"key": "BAC009S0908W0214", "wav": "./aishell/wav/test/S0908/BAC009S0908W0214.wav", "txt": "推进省直接管理县市财政体制改革"}
-{"key": "BAC009S0908W0215", "wav": "./aishell/wav/test/S0908/BAC009S0908W0215.wav", "txt": "优先将农业大县纳入改革范围"}
-{"key": "BAC009S0908W0216", "wav": "./aishell/wav/test/S0908/BAC009S0908W0216.wav", "txt": "强化农业法制保障"}
-{"key": "BAC009S0908W0217", "wav": "./aishell/wav/test/S0908/BAC009S0908W0217.wav", "txt": "坚持米袋子省长负责制和菜篮子市长负责制"}
-{"key": "BAC009S0908W0218", "wav": "./aishell/wav/test/S0908/BAC009S0908W0218.wav", "txt": "全面落实耕地和基本农田保护领导干部离任审计制度"}
-{"key": "BAC009S0908W0219", "wav": "./aishell/wav/test/S0908/BAC009S0908W0219.wav", "txt": "各有关部门和地方各级人民政府要围绕规划目标任务"}
-{"key": "BAC009S0908W0220", "wav": "./aishell/wav/test/S0908/BAC009S0908W0220.wav", "txt": "研究落实各项强农惠农富农政策"}
-{"key": "BAC009S0908W0221", "wav": "./aishell/wav/test/S0908/BAC009S0908W0221.wav", "txt": "统筹协调推动重大工程的实施"}
-{"key": "BAC009S0908W0222", "wav": "./aishell/wav/test/S0908/BAC009S0908W0222.wav", "txt": "努力开创我国农业现代化发展新局面"}
-{"key": "BAC009S0908W0223", "wav": "./aishell/wav/test/S0908/BAC009S0908W0223.wav", "txt": "农业农村信息化十二五规划"}
-{"key": "BAC009S0908W0224", "wav": "./aishell/wav/test/S0908/BAC009S0908W0224.wav", "txt": "关于印发十二五规划的通知"}
-{"key": "BAC009S0908W0225", "wav": "./aishell/wav/test/S0908/BAC009S0908W0225.wav", "txt": "中国老龄十二五规划"}
-{"key": "BAC009S0908W0226", "wav": "./aishell/wav/test/S0908/BAC009S0908W0226.wav", "txt": "新农村十二五发展规划"}
-{"key": "BAC009S0908W0227", "wav": "./aishell/wav/test/S0908/BAC009S0908W0227.wav", "txt": "国家林业十二五规划"}
-{"key": "BAC009S0908W0228", "wav": "./aishell/wav/test/S0908/BAC009S0908W0228.wav", "txt": "十二五医药发展规划"}
-{"key": "BAC009S0908W0229", "wav": "./aishell/wav/test/S0908/BAC009S0908W0229.wav", "txt": "老龄事业十二五规划"}
-{"key": "BAC009S0908W0230", "wav": "./aishell/wav/test/S0908/BAC009S0908W0230.wav", "txt": "国务院总理温家宝五日主持召开国务院常务会议"}
-{"key": "BAC009S0908W0231", "wav": "./aishell/wav/test/S0908/BAC009S0908W0231.wav", "txt": "再次听取全国民用核设施综合安全检查情况汇报"}
-{"key": "BAC009S0908W0232", "wav": "./aishell/wav/test/S0908/BAC009S0908W0232.wav", "txt": "核电重启的曙光越来越近"}
-{"key": "BAC009S0908W0233", "wav": "./aishell/wav/test/S0908/BAC009S0908W0233.wav", "txt": "国务院二零一一年五月"}
-{"key": "BAC009S0908W0234", "wav": "./aishell/wav/test/S0908/BAC009S0908W0234.wav", "txt": "相关公司股票走势国海证券"}
-{"key": "BAC009S0908W0235", "wav": "./aishell/wav/test/S0908/BAC009S0908W0235.wav", "txt": "决定对全国核设施进行安全检查"}
-{"key": "BAC009S0908W0236", "wav": "./aishell/wav/test/S0908/BAC009S0908W0236.wav", "txt": "有关部门组织核安全地震海洋等方面专家"}
-{"key": "BAC009S0908W0237", "wav": "./aishell/wav/test/S0908/BAC009S0908W0237.wav", "txt": "用五个多月时间对全国七十台运行在建核电机组"}
-{"key": "BAC009S0908W0238", "wav": "./aishell/wav/test/S0908/BAC009S0908W0238.wav", "txt": "以及所有民用研究堆和核燃燃料循环设施等"}
-{"key": "BAC009S0908W0239", "wav": "./aishell/wav/test/S0908/BAC009S0908W0239.wav", "txt": "进行了综合安全检查"}
-{"key": "BAC009S0908W0240", "wav": "./aishell/wav/test/S0908/BAC009S0908W0240.wav", "txt": "形成了新形势下我国核电发展的建议阶段研究报告"}
-{"key": "BAC009S0908W0241", "wav": "./aishell/wav/test/S0908/BAC009S0908W0241.wav", "txt": "国务院常务会议听取了综合安全检查情况汇报"}
-{"key": "BAC009S0908W0242", "wav": "./aishell/wav/test/S0908/BAC009S0908W0242.wav", "txt": "对进一步深入检查及落实整改措施作了部署"}
-{"key": "BAC009S0908W0243", "wav": "./aishell/wav/test/S0908/BAC009S0908W0243.wav", "txt": "核安全法规标准体系与国际接轨"}
-{"key": "BAC009S0908W0244", "wav": "./aishell/wav/test/S0908/BAC009S0908W0244.wav", "txt": "具备一定的严重事故预防和缓解能力"}
-{"key": "BAC009S0908W0245", "wav": "./aishell/wav/test/S0908/BAC009S0908W0245.wav", "txt": "部分核电厂未制定实施严重事故预防和缓解规程"}
-{"key": "BAC009S0908W0246", "wav": "./aishell/wav/test/S0908/BAC009S0908W0246.wav", "txt": "海啸问题评估和应付基础比较薄弱等"}
-{"key": "BAC009S0908W0247", "wav": "./aishell/wav/test/S0908/BAC009S0908W0247.wav", "txt": "有关部门和企业迅速组织整改"}
-{"key": "BAC009S0908W0248", "wav": "./aishell/wav/test/S0908/BAC009S0908W0248.wav", "txt": "目前已取得阶段性成效"}
-{"key": "BAC009S0908W0249", "wav": "./aishell/wav/test/S0908/BAC009S0908W0249.wav", "txt": "基本原则是预防为主纵深防御"}
-{"key": "BAC009S0908W0250", "wav": "./aishell/wav/test/S0908/BAC009S0908W0250.wav", "txt": "新老并重防结结合"}
-{"key": "BAC009S0908W0251", "wav": "./aishell/wav/test/S0908/BAC009S0908W0251.wav", "txt": "依靠科技持续改进"}
-{"key": "BAC009S0908W0252", "wav": "./aishell/wav/test/S0908/BAC009S0908W0252.wav", "txt": "坚持法治严格监管"}
-{"key": "BAC009S0908W0253", "wav": "./aishell/wav/test/S0908/BAC009S0908W0253.wav", "txt": "比如用户抱怨升级之后设施无法像以前那样工作了"}
-{"key": "BAC009S0908W0254", "wav": "./aishell/wav/test/S0908/BAC009S0908W0254.wav", "txt": "甚至还不如原来的一点零版本系统好用"}
-{"key": "BAC009S0908W0255", "wav": "./aishell/wav/test/S0908/BAC009S0908W0255.wav", "txt": "苹果此举是为了节约用电量"}
-{"key": "BAC009S0908W0256", "wav": "./aishell/wav/test/S0908/BAC009S0908W0256.wav", "txt": "有人给出了解决方法"}
-{"key": "BAC009S0908W0258", "wav": "./aishell/wav/test/S0908/BAC009S0908W0258.wav", "txt": "强制不断的心率测量"}
-{"key": "BAC009S0908W0259", "wav": "./aishell/wav/test/S0908/BAC009S0908W0259.wav", "txt": "只是这种情况下心率传感器会每隔十秒进行一次"}
-{"key": "BAC009S0908W0261", "wav": "./aishell/wav/test/S0908/BAC009S0908W0261.wav", "txt": "苹果表这么火爆微软也该出智能手表吗"}
-{"key": "BAC009S0908W0262", "wav": "./aishell/wav/test/S0908/BAC009S0908W0262.wav", "txt": "刚开始微软因谨慎起见"}
-{"key": "BAC009S0908W0264", "wav": "./aishell/wav/test/S0908/BAC009S0908W0264.wav", "txt": "最近才开始向其他市场推广销售"}
-{"key": "BAC009S0908W0265", "wav": "./aishell/wav/test/S0908/BAC009S0908W0265.wav", "txt": "在谷歌与苹果相机推出智能手表后"}
-{"key": "BAC009S0908W0266", "wav": "./aishell/wav/test/S0908/BAC009S0908W0266.wav", "txt": "微软目前仍局限于健身手环领域"}
-{"key": "BAC009S0908W0267", "wav": "./aishell/wav/test/S0908/BAC009S0908W0267.wav", "txt": "但它的确算不上是智能手表"}
-{"key": "BAC009S0908W0268", "wav": "./aishell/wav/test/S0908/BAC009S0908W0268.wav", "txt": "拥有内部存储空间与完整的应用平台"}
-{"key": "BAC009S0908W0269", "wav": "./aishell/wav/test/S0908/BAC009S0908W0269.wav", "txt": "支持开发者为其编写应用"}
-{"key": "BAC009S0908W0270", "wav": "./aishell/wav/test/S0908/BAC009S0908W0270.wav", "txt": "但它对开发者来说限制太多"}
-{"key": "BAC009S0908W0272", "wav": "./aishell/wav/test/S0908/BAC009S0908W0272.wav", "txt": "微软正在向外界推广一次编写"}
-{"key": "BAC009S0908W0273", "wav": "./aishell/wav/test/S0908/BAC009S0908W0273.wav", "txt": "跨设备使用的通用应用"}
-{"key": "BAC009S0908W0274", "wav": "./aishell/wav/test/S0908/BAC009S0908W0274.wav", "txt": "但至今唯独没有提升智能手表平台"}
-{"key": "BAC009S0908W0275", "wav": "./aishell/wav/test/S0908/BAC009S0908W0275.wav", "txt": "具体如下方视频介绍所示"}
-{"key": "BAC009S0908W0277", "wav": "./aishell/wav/test/S0908/BAC009S0908W0277.wav", "txt": "刚开始微软因谨慎起见"}
-{"key": "BAC009S0908W0280", "wav": "./aishell/wav/test/S0908/BAC009S0908W0280.wav", "txt": "原告当地时间周二在法庭上表示"}
-{"key": "BAC009S0908W0281", "wav": "./aishell/wav/test/S0908/BAC009S0908W0281.wav", "txt": "苹果通过发布不必要的软件升级包"}
-{"key": "BAC009S0908W0283", "wav": "./aishell/wav/test/S0908/BAC009S0908W0283.wav", "txt": "一起针对苹果的集体反垄断案两名原告的律师称"}
-{"key": "BAC009S0908W0284", "wav": "./aishell/wav/test/S0908/BAC009S0908W0284.wav", "txt": "由于苹果要打压竞争对手"}
-{"key": "BAC009S0908W0286", "wav": "./aishell/wav/test/S0908/BAC009S0908W0286.wav", "txt": "但却损害了消费者的利益"}
-{"key": "BAC009S0908W0287", "wav": "./aishell/wav/test/S0908/BAC009S0908W0287.wav", "txt": "这次庭审将持续九天时间"}
-{"key": "BAC009S0908W0288", "wav": "./aishell/wav/test/S0908/BAC009S0908W0288.wav", "txt": "给一桩近十年之久的诉讼一个定论"}
-{"key": "BAC009S0908W0292", "wav": "./aishell/wav/test/S0908/BAC009S0908W0292.wav", "txt": "不过这些政策现在已经被废除"}
-{"key": "BAC009S0908W0293", "wav": "./aishell/wav/test/S0908/BAC009S0908W0293.wav", "txt": "苹果打压了市场竞争"}
-{"key": "BAC009S0908W0297", "wav": "./aishell/wav/test/S0908/BAC009S0908W0297.wav", "txt": "苹果担忧这会蚕食其市场份额"}
-{"key": "BAC009S0908W0298", "wav": "./aishell/wav/test/S0908/BAC009S0908W0298.wav", "txt": "生态链中插入其他公司产品会造成问题"}
-{"key": "BAC009S0908W0299", "wav": "./aishell/wav/test/S0908/BAC009S0908W0299.wav", "txt": "这会危及用户体验和产品质量"}
-{"key": "BAC009S0908W0301", "wav": "./aishell/wav/test/S0908/BAC009S0908W0301.wav", "txt": "价格要么下降要么维持不变"}
-{"key": "BAC009S0908W0302", "wav": "./aishell/wav/test/S0908/BAC009S0908W0302.wav", "txt": "苹果没有危害消费者利益"}
-{"key": "BAC009S0908W0303", "wav": "./aishell/wav/test/S0908/BAC009S0908W0303.wav", "txt": "对频频的骚扰电话显示无可奈何"}
-{"key": "BAC009S0908W0304", "wav": "./aishell/wav/test/S0908/BAC009S0908W0304.wav", "txt": "有黑客在网络上兜售车主信"}
-{"key": "BAC009S0908W0305", "wav": "./aishell/wav/test/S0908/BAC009S0908W0305.wav", "txt": "美的摆稳棋局过冬搜狐科技"}
-{"key": "BAC009S0908W0306", "wav": "./aishell/wav/test/S0908/BAC009S0908W0306.wav", "txt": "白电行业将进入最惨烈的一年"}
-{"key": "BAC009S0908W0307", "wav": "./aishell/wav/test/S0908/BAC009S0908W0307.wav", "txt": "昔日巨头格力美的海尔也将沉浮于其中"}
-{"key": "BAC009S0908W0308", "wav": "./aishell/wav/test/S0908/BAC009S0908W0308.wav", "txt": "从本年度第一份季报来看"}
-{"key": "BAC009S0908W0309", "wav": "./aishell/wav/test/S0908/BAC009S0908W0309.wav", "txt": "三巨头中的格力海尔均出现不同程度下滑"}
-{"key": "BAC009S0908W0310", "wav": "./aishell/wav/test/S0908/BAC009S0908W0310.wav", "txt": "实现净利营收双增长"}
-{"key": "BAC009S0908W0311", "wav": "./aishell/wav/test/S0908/BAC009S0908W0311.wav", "txt": "美的吸取了当年大跃进的教训"}
-{"key": "BAC009S0908W0312", "wav": "./aishell/wav/test/S0908/BAC009S0908W0312.wav", "txt": "一位买家电的朋友晒出一张销量清单"}
-{"key": "BAC009S0908W0313", "wav": "./aishell/wav/test/S0908/BAC009S0908W0313.wav", "txt": "他担心自己马上就要被辞退了"}
-{"key": "BAC009S0908W0314", "wav": "./aishell/wav/test/S0908/BAC009S0908W0314.wav", "txt": "发改委约谈各大空调企业的高管"}
-{"key": "BAC009S0908W0315", "wav": "./aishell/wav/test/S0908/BAC009S0908W0315.wav", "txt": "媒体采访的电话打到各空调企业的市场负责人那里"}
-{"key": "BAC009S0908W0316", "wav": "./aishell/wav/test/S0908/BAC009S0908W0316.wav", "txt": "各公司市场部都在卖场忙活"}
-{"key": "BAC009S0908W0317", "wav": "./aishell/wav/test/S0908/BAC009S0908W0317.wav", "txt": "今年的促销从三月份就启动了"}
-{"key": "BAC009S0908W0318", "wav": "./aishell/wav/test/S0908/BAC009S0908W0318.wav", "txt": "一位商场场内部人士称"}
-{"key": "BAC009S0908W0319", "wav": "./aishell/wav/test/S0908/BAC009S0908W0319.wav", "txt": "注定是白色家电行业最惨烈的一年"}
-{"key": "BAC009S0908W0320", "wav": "./aishell/wav/test/S0908/BAC009S0908W0320.wav", "txt": "现实的残酷落到报表上"}
-{"key": "BAC009S0908W0321", "wav": "./aishell/wav/test/S0908/BAC009S0908W0321.wav", "txt": "是白电上市企业今年的一季报几乎全部沦陷"}
-{"key": "BAC009S0908W0322", "wav": "./aishell/wav/test/S0908/BAC009S0908W0322.wav", "txt": "两大龙头企业格力和海尔"}
-{"key": "BAC009S0908W0323", "wav": "./aishell/wav/test/S0908/BAC009S0908W0323.wav", "txt": "格力电器一季报营收为二百四十五亿元"}
-{"key": "BAC009S0908W0324", "wav": "./aishell/wav/test/S0908/BAC009S0908W0324.wav", "txt": "同比去年降零点六百分之六"}
-{"key": "BAC009S0908W0325", "wav": "./aishell/wav/test/S0908/BAC009S0908W0325.wav", "txt": "净利润为二十七点七五亿元"}
-{"key": "BAC009S0908W0326", "wav": "./aishell/wav/test/S0908/BAC009S0908W0326.wav", "txt": "同比上升百分之二十三点零六"}
-{"key": "BAC009S0908W0327", "wav": "./aishell/wav/test/S0908/BAC009S0908W0327.wav", "txt": "上一次是金融危机期间的二零零九一季度"}
-{"key": "BAC009S0908W0328", "wav": "./aishell/wav/test/S0908/BAC009S0908W0328.wav", "txt": "另一白电巨头青岛海尔"}
-{"key": "BAC009S0908W0329", "wav": "./aishell/wav/test/S0908/BAC009S0908W0329.wav", "txt": "一季度营收为二十一八点七亿元"}
-{"key": "BAC009S0908W0330", "wav": "./aishell/wav/test/S0908/BAC009S0908W0330.wav", "txt": "净利润为九点七亿元"}
-{"key": "BAC009S0908W0331", "wav": "./aishell/wav/test/S0908/BAC009S0908W0331.wav", "txt": "同比增百分之十三点一一"}
-{"key": "BAC009S0908W0332", "wav": "./aishell/wav/test/S0908/BAC009S0908W0332.wav", "txt": "海信科龙和惠而浦则是营收增"}
-{"key": "BAC009S0908W0333", "wav": "./aishell/wav/test/S0908/BAC009S0908W0333.wav", "txt": "海信科龙一季报营收为六十四点三亿元"}
-{"key": "BAC009S0908W0334", "wav": "./aishell/wav/test/S0908/BAC009S0908W0334.wav", "txt": "净利润出现百分之一的下滑"}
-{"key": "BAC009S0908W0335", "wav": "./aishell/wav/test/S0908/BAC009S0908W0335.wav", "txt": "净利出现七点三百分之一的降幅"}
-{"key": "BAC009S0908W0337", "wav": "./aishell/wav/test/S0908/BAC009S0908W0337.wav", "txt": "实现营收净利双增长"}
-{"key": "BAC009S0908W0338", "wav": "./aishell/wav/test/S0908/BAC009S0908W0338.wav", "txt": "十多天压抑的情感终于爆发"}
-{"key": "BAC009S0908W0339", "wav": "./aishell/wav/test/S0908/BAC009S0908W0339.wav", "txt": "女排姑娘们在日本的最后一夜"}
-{"key": "BAC009S0908W0340", "wav": "./aishell/wav/test/S0908/BAC009S0908W0340.wav", "txt": "大家才安安稳稳地睡了一觉"}
-{"key": "BAC009S0908W0341", "wav": "./aishell/wav/test/S0908/BAC009S0908W0341.wav", "txt": "如果要数一下中国女排谁最红"}
-{"key": "BAC009S0908W0342", "wav": "./aishell/wav/test/S0908/BAC009S0908W0342.wav", "txt": "张晓雅的人气肯定在前三名"}
-{"key": "BAC009S0908W0343", "wav": "./aishell/wav/test/S0908/BAC009S0908W0343.wav", "txt": "她以最帅国手走红网络"}
-{"key": "BAC009S0908W0344", "wav": "./aishell/wav/test/S0908/BAC009S0908W0344.wav", "txt": "网友大呼她帅过林丹"}
-{"key": "BAC009S0908W0345", "wav": "./aishell/wav/test/S0908/BAC009S0908W0345.wav", "txt": "这位英气十足的九零后很有人缘"}
-{"key": "BAC009S0908W0346", "wav": "./aishell/wav/test/S0908/BAC009S0908W0346.wav", "txt": "张晓雅最大的优点是有想法"}
-{"key": "BAC009S0908W0347", "wav": "./aishell/wav/test/S0908/BAC009S0908W0347.wav", "txt": "张晓雅这个娃娃训练很自觉"}
-{"key": "BAC009S0908W0348", "wav": "./aishell/wav/test/S0908/BAC009S0908W0348.wav", "txt": "在球场上的思路比较清楚"}
-{"key": "BAC009S0908W0349", "wav": "./aishell/wav/test/S0908/BAC009S0908W0349.wav", "txt": "是一个在球场上有想法的球员"}
-{"key": "BAC009S0908W0350", "wav": "./aishell/wav/test/S0908/BAC009S0908W0350.wav", "txt": "这个娃娃打球时很有思想"}
-{"key": "BAC009S0908W0351", "wav": "./aishell/wav/test/S0908/BAC009S0908W0351.wav", "txt": "中国最帅的竞走冠军陈定将亮相苏州吴中"}
-{"key": "BAC009S0908W0352", "wav": "./aishell/wav/test/S0908/BAC009S0908W0352.wav", "txt": "一九九二年八月五日出生于云南省保山市龙陵县"}
-{"key": "BAC009S0908W0353", "wav": "./aishell/wav/test/S0908/BAC009S0908W0353.wav", "txt": "这个二十三岁的云南小伙子"}
-{"key": "BAC009S0908W0354", "wav": "./aishell/wav/test/S0908/BAC009S0908W0354.wav", "txt": "取得瑞士卢加诺竞走挑战赛男子二十公里竞走银牌"}
-{"key": "BAC009S0908W0355", "wav": "./aishell/wav/test/S0908/BAC009S0908W0355.wav", "txt": "夺得国际田联竞走世界杯男子二十公里竞走银牌"}
-{"key": "BAC009S0908W0356", "wav": "./aishell/wav/test/S0908/BAC009S0908W0356.wav", "txt": "参加全国竞走大奖赛暨世锦赛选拔赛"}
-{"key": "BAC009S0908W0357", "wav": "./aishell/wav/test/S0908/BAC009S0908W0357.wav", "txt": "以一小时二十一分十一秒成绩获铜牌"}
-{"key": "BAC009S0908W0358", "wav": "./aishell/wav/test/S0908/BAC009S0908W0358.wav", "txt": "并取得世锦赛参赛资格"}
-{"key": "BAC009S0908W0359", "wav": "./aishell/wav/test/S0908/BAC009S0908W0359.wav", "txt": "仰泳选手在比赛中"}
-{"key": "BAC009S0908W0360", "wav": "./aishell/wav/test/S0908/BAC009S0908W0360.wav", "txt": "本次比赛使用最新的仰泳出发壁架"}
-{"key": "BAC009S0908W0361", "wav": "./aishell/wav/test/S0908/BAC009S0908W0361.wav", "txt": "帮助仰泳运动员改善自己的出发技术"}
-{"key": "BAC009S0908W0362", "wav": "./aishell/wav/test/S0908/BAC009S0908W0362.wav", "txt": "欧米茄计时管理委员会成员彼得许尔泽勒介绍说"}
-{"key": "BAC009S0908W0363", "wav": "./aishell/wav/test/S0908/BAC009S0908W0363.wav", "txt": "可以帮助他们在出发时增加自己距离水面的高度"}
-{"key": "BAC009S0908W0364", "wav": "./aishell/wav/test/S0908/BAC009S0908W0364.wav", "txt": "可以防止运动员出发时手部滑落"}
-{"key": "BAC009S0908W0365", "wav": "./aishell/wav/test/S0908/BAC009S0908W0365.wav", "txt": "得到了仰泳选手的广泛好评"}
-{"key": "BAC009S0908W0366", "wav": "./aishell/wav/test/S0908/BAC009S0908W0366.wav", "txt": "这是它第一次在游泳世界杯上亮相"}
-{"key": "BAC009S0908W0367", "wav": "./aishell/wav/test/S0908/BAC009S0908W0367.wav", "txt": "也为背后的关键技术提供开发支持"}
-{"key": "BAC009S0908W0368", "wav": "./aishell/wav/test/S0908/BAC009S0908W0368.wav", "txt": "从而确保高度精准地记录竞赛成绩"}
-{"key": "BAC009S0908W0369", "wav": "./aishell/wav/test/S0908/BAC009S0908W0369.wav", "txt": "新科世界冠军宁泽涛领衔中国队出战"}
-{"key": "BAC009S0908W0370", "wav": "./aishell/wav/test/S0908/BAC009S0908W0370.wav", "txt": "身材傲人颜值爆表的她魅力席卷整个亚洲"}
-{"key": "BAC009S0908W0373", "wav": "./aishell/wav/test/S0908/BAC009S0908W0373.wav", "txt": "现年十八岁的莎宾娜身高达一百八十二厘米"}
-{"key": "BAC009S0908W0374", "wav": "./aishell/wav/test/S0908/BAC009S0908W0374.wav", "txt": "腿长足足十二厘米"}
-{"key": "BAC009S0908W0375", "wav": "./aishell/wav/test/S0908/BAC009S0908W0375.wav", "txt": "去年在亚青赛上亮相后"}
-{"key": "BAC009S0908W0376", "wav": "./aishell/wav/test/S0908/BAC009S0908W0376.wav", "txt": "瞬间成为各国媒体的焦点"}
-{"key": "BAC009S0908W0377", "wav": "./aishell/wav/test/S0908/BAC009S0908W0377.wav", "txt": "成为宅男心目中的排球女神"}
-{"key": "BAC009S0908W0378", "wav": "./aishell/wav/test/S0908/BAC009S0908W0378.wav", "txt": "莎宾娜也凭借兼具清纯和性感气质的漂亮外形走红日本"}
-{"key": "BAC009S0908W0379", "wav": "./aishell/wav/test/S0908/BAC009S0908W0379.wav", "txt": "甚至有日本的大牌经纪公司希望与其签约"}
-{"key": "BAC009S0908W0380", "wav": "./aishell/wav/test/S0908/BAC009S0908W0380.wav", "txt": "做客日本电视台的新闻节目"}
-{"key": "BAC009S0908W0381", "wav": "./aishell/wav/test/S0908/BAC009S0908W0381.wav", "txt": "不少媒体追问她是否有男朋友"}
-{"key": "BAC009S0908W0382", "wav": "./aishell/wav/test/S0908/BAC009S0908W0382.wav", "txt": "莎宾娜透露目前单身理想型是喜欢运动"}
-{"key": "BAC009S0908W0383", "wav": "./aishell/wav/test/S0908/BAC009S0908W0383.wav", "txt": "身材高挑并且不抽烟喝酒的男生"}
-{"key": "BAC009S0908W0384", "wav": "./aishell/wav/test/S0908/BAC009S0908W0384.wav", "txt": "当下想把注意力集中在打球上"}
-{"key": "BAC009S0908W0385", "wav": "./aishell/wav/test/S0908/BAC009S0908W0385.wav", "txt": "暂时不考虑恋爱的问题"}
-{"key": "BAC009S0908W0386", "wav": "./aishell/wav/test/S0908/BAC009S0908W0386.wav", "txt": "这一单身宣言更加激发了日本粉丝对她的痴迷"}
-{"key": "BAC009S0908W0387", "wav": "./aishell/wav/test/S0908/BAC009S0908W0387.wav", "txt": "希望可以见到她本人"}
-{"key": "BAC009S0908W0388", "wav": "./aishell/wav/test/S0908/BAC009S0908W0388.wav", "txt": "该球队在官方博客上"}
-{"key": "BAC009S0908W0389", "wav": "./aishell/wav/test/S0908/BAC009S0908W0389.wav", "txt": "但喜欢欧美音乐爱吃西红柿意大利面"}
-{"key": "BAC009S0908W0391", "wav": "./aishell/wav/test/S0908/BAC009S0908W0391.wav", "txt": "看好她成为日本排球的新女神"}
-{"key": "BAC009S0908W0392", "wav": "./aishell/wav/test/S0908/BAC009S0908W0392.wav", "txt": "美貌和实力并存的选手太稀罕了"}
-{"key": "BAC009S0908W0393", "wav": "./aishell/wav/test/S0908/BAC009S0908W0393.wav", "txt": "莎宾娜已经在今年八月秘密抵达日本"}
-{"key": "BAC009S0908W0395", "wav": "./aishell/wav/test/S0908/BAC009S0908W0395.wav", "txt": "她的母亲在采访中表示莎宾娜为了提升自己的实力"}
-{"key": "BAC009S0908W0396", "wav": "./aishell/wav/test/S0908/BAC009S0908W0396.wav", "txt": "以哈萨克斯坦排协特派选手的方式加盟日本的球队"}
-{"key": "BAC009S0908W0397", "wav": "./aishell/wav/test/S0908/BAC009S0908W0397.wav", "txt": "日本的排球训练是出了名的严厉"}
-{"key": "BAC009S0908W0398", "wav": "./aishell/wav/test/S0908/BAC009S0908W0398.wav", "txt": "对此莎宾娜已经做好了吃苦的心理准备"}
-{"key": "BAC009S0908W0399", "wav": "./aishell/wav/test/S0908/BAC009S0908W0399.wav", "txt": "家人和哈排协也表明了全力支持她的态度"}
-{"key": "BAC009S0908W0400", "wav": "./aishell/wav/test/S0908/BAC009S0908W0400.wav", "txt": "不仅在各国网络社区和比赛中表现活跃"}
-{"key": "BAC009S0908W0401", "wav": "./aishell/wav/test/S0908/BAC009S0908W0401.wav", "txt": "也成为哈萨克斯坦的宣传大使"}
-{"key": "BAC009S0908W0402", "wav": "./aishell/wav/test/S0908/BAC009S0908W0402.wav", "txt": "日本排球界的人士指出"}
-{"key": "BAC009S0908W0404", "wav": "./aishell/wav/test/S0908/BAC009S0908W0404.wav", "txt": "但是由于加朵要为蝙蝠侠大战超人忙碌"}
-{"key": "BAC009S0908W0405", "wav": "./aishell/wav/test/S0908/BAC009S0908W0405.wav", "txt": "档期遇到了不可调和的冲突"}
-{"key": "BAC009S0908W0406", "wav": "./aishell/wav/test/S0908/BAC009S0908W0406.wav", "txt": "因此不得不放弃宾虚的演出"}
-{"key": "BAC009S0908W0407", "wav": "./aishell/wav/test/S0908/BAC009S0908W0407.wav", "txt": "这对她来说也是一个巨大的遗憾"}
-{"key": "BAC009S0908W0408", "wav": "./aishell/wav/test/S0908/BAC009S0908W0408.wav", "txt": "私底下对歌迷亲切和善"}
-{"key": "BAC009S0908W0409", "wav": "./aishell/wav/test/S0908/BAC009S0908W0409.wav", "txt": "最近人在大陆举行巡回演唱会的他"}
-{"key": "BAC009S0908W0410", "wav": "./aishell/wav/test/S0908/BAC009S0908W0410.wav", "txt": "却被曝出在机场大发飙"}
-{"key": "BAC009S0908W0411", "wav": "./aishell/wav/test/S0908/BAC009S0908W0411.wav", "txt": "有网友则晒出当天现场情况"}
-{"key": "BAC009S0908W0412", "wav": "./aishell/wav/test/S0908/BAC009S0908W0412.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0908W0413", "wav": "./aishell/wav/test/S0908/BAC009S0908W0413.wav", "txt": "分享入行二十年的感受"}
-{"key": "BAC009S0908W0414", "wav": "./aishell/wav/test/S0908/BAC009S0908W0414.wav", "txt": "陈奕迅坦言自己一直有情绪病"}
-{"key": "BAC009S0908W0415", "wav": "./aishell/wav/test/S0908/BAC009S0908W0415.wav", "txt": "而且是一个爱哭鬼"}
-{"key": "BAC009S0908W0416", "wav": "./aishell/wav/test/S0908/BAC009S0908W0416.wav", "txt": "常常在看电影和新闻时流泪"}
-{"key": "BAC009S0908W0417", "wav": "./aishell/wav/test/S0908/BAC009S0908W0417.wav", "txt": "不开心时会找太太徐濠所倾诉"}
-{"key": "BAC009S0908W0418", "wav": "./aishell/wav/test/S0908/BAC009S0908W0418.wav", "txt": "搜狐娱乐讯据香港媒体报道"}
-{"key": "BAC009S0908W0419", "wav": "./aishell/wav/test/S0908/BAC009S0908W0419.wav", "txt": "陈奕迅在香港出席品牌活动"}
-{"key": "BAC009S0908W0420", "wav": "./aishell/wav/test/S0908/BAC009S0908W0420.wav", "txt": "现场他透露道近日忙于内地巡演"}
-{"key": "BAC009S0908W0421", "wav": "./aishell/wav/test/S0908/BAC009S0908W0421.wav", "txt": "对于天津爆炸时间"}
-{"key": "BAC009S0908W0422", "wav": "./aishell/wav/test/S0908/BAC009S0908W0422.wav", "txt": "他表示感到伤痛"}
-{"key": "BAC009S0908W0423", "wav": "./aishell/wav/test/S0908/BAC009S0908W0423.wav", "txt": "又透露去年曾在天津举办演唱会"}
-{"key": "BAC009S0908W0424", "wav": "./aishell/wav/test/S0908/BAC009S0908W0424.wav", "txt": "希望送上歌曲今日为受害者打气"}
-{"key": "BAC009S0908W0425", "wav": "./aishell/wav/test/S0908/BAC009S0908W0425.wav", "txt": "也祝福伤者早日康复"}
-{"key": "BAC009S0908W0426", "wav": "./aishell/wav/test/S0908/BAC009S0908W0426.wav", "txt": "搜狐娱乐讯四月三十日"}
-{"key": "BAC009S0908W0427", "wav": "./aishell/wav/test/S0908/BAC009S0908W0427.wav", "txt": "称这二人总是可以把自己逗笑"}
-{"key": "BAC009S0908W0428", "wav": "./aishell/wav/test/S0908/BAC009S0908W0428.wav", "txt": "照片中二人坐在沙发上"}
-{"key": "BAC009S0908W0429", "wav": "./aishell/wav/test/S0908/BAC009S0908W0429.wav", "txt": "谢霆锋戴着帽子"}
-{"key": "BAC009S0908W0430", "wav": "./aishell/wav/test/S0908/BAC009S0908W0430.wav", "txt": "穿着白背心黑色短裤"}
-{"key": "BAC009S0908W0431", "wav": "./aishell/wav/test/S0908/BAC009S0908W0431.wav", "txt": "数万只黄色小鸡散落路上"}
-{"key": "BAC009S0908W0432", "wav": "./aishell/wav/test/S0908/BAC009S0908W0432.wav", "txt": "村民蜂拥而至捉小鸡"}
-{"key": "BAC009S0908W0433", "wav": "./aishell/wav/test/S0908/BAC009S0908W0433.wav", "txt": "香港明报参考消息网八月二十九日报道港媒称"}
-{"key": "BAC009S0908W0434", "wav": "./aishell/wav/test/S0908/BAC009S0908W0434.wav", "txt": "近日又出现疯抢水果捡漏等事"}
-{"key": "BAC009S0908W0435", "wav": "./aishell/wav/test/S0908/BAC009S0908W0435.wav", "txt": "有内地学者分析背后心态"}
-{"key": "BAC009S0908W0436", "wav": "./aishell/wav/test/S0908/BAC009S0908W0436.wav", "txt": "是因为国民抢习惯了"}
-{"key": "BAC009S0908W0437", "wav": "./aishell/wav/test/S0908/BAC009S0908W0437.wav", "txt": "港媒称马云向浙江商人发出警告永远不要行贿"}
-{"key": "BAC009S0908W0438", "wav": "./aishell/wav/test/S0908/BAC009S0908W0438.wav", "txt": "港媒评助学达人性侵女童案加强监管是关键"}
-{"key": "BAC009S0908W0439", "wav": "./aishell/wav/test/S0908/BAC009S0908W0439.wav", "txt": "资料图王杰图片来源于网络"}
-{"key": "BAC009S0908W0440", "wav": "./aishell/wav/test/S0908/BAC009S0908W0440.wav", "txt": "港媒评中国游客全球爆买旅游幼稚病"}
-{"key": "BAC009S0908W0441", "wav": "./aishell/wav/test/S0908/BAC009S0908W0441.wav", "txt": "参考消息网一零月八日报道国庆长假结束"}
-{"key": "BAC009S0908W0442", "wav": "./aishell/wav/test/S0908/BAC009S0908W0442.wav", "txt": "媒体再次盘点长假期间的各种热点新闻"}
-{"key": "BAC009S0908W0443", "wav": "./aishell/wav/test/S0908/BAC009S0908W0443.wav", "txt": "其中一组中国旅游购物者全面攻陷日本的图片"}
-{"key": "BAC009S0908W0444", "wav": "./aishell/wav/test/S0908/BAC009S0908W0444.wav", "txt": "多家媒体就这组图片中的场景和现象作出评论"}
-{"key": "BAC009S0908W0445", "wav": "./aishell/wav/test/S0908/BAC009S0908W0445.wav", "txt": "并提出多种思考和提示"}
-{"key": "BAC009S0908W0446", "wav": "./aishell/wav/test/S0908/BAC009S0908W0446.wav", "txt": "比如就中国游客热衷日本药品"}
-{"key": "BAC009S0908W0447", "wav": "./aishell/wav/test/S0908/BAC009S0908W0447.wav", "txt": "歧视中国药企改进质量提高信誉改善用户体验"}
-{"key": "BAC009S0908W0448", "wav": "./aishell/wav/test/S0908/BAC009S0908W0448.wav", "txt": "以便提高药品竞争力等等"}
-{"key": "BAC009S0908W0449", "wav": "./aishell/wav/test/S0908/BAC009S0908W0449.wav", "txt": "港媒道士下山被批引发网友广泛讨论"}
-{"key": "BAC009S0908W0450", "wav": "./aishell/wav/test/S0908/BAC009S0908W0450.wav", "txt": "参考消息网七月二零日报道"}
-{"key": "BAC009S0908W0451", "wav": "./aishell/wav/test/S0908/BAC009S0908W0451.wav", "txt": "港媒上海成为亚洲奢华生活最昂贵的城市"}
-{"key": "BAC009S0908W0452", "wav": "./aishell/wav/test/S0908/BAC009S0908W0452.wav", "txt": "参考消息网一零月二九日报道港媒称"}
-{"key": "BAC009S0908W0453", "wav": "./aishell/wav/test/S0908/BAC009S0908W0453.wav", "txt": "上海已成为全亚洲奢华生活最昂贵的城市"}
-{"key": "BAC009S0908W0454", "wav": "./aishell/wav/test/S0908/BAC009S0908W0454.wav", "txt": "垫底的是印度城市孟买"}
-{"key": "BAC009S0908W0455", "wav": "./aishell/wav/test/S0908/BAC009S0908W0455.wav", "txt": "港媒东莞工地连续两次坍塌路面似被吸入地底"}
-{"key": "BAC009S0908W0456", "wav": "./aishell/wav/test/S0908/BAC009S0908W0456.wav", "txt": "东莞常平一地盘两日两度地陷"}
-{"key": "BAC009S0908W0457", "wav": "./aishell/wav/test/S0908/BAC009S0908W0457.wav", "txt": "网上流传的视频可见"}
-{"key": "BAC009S0908W0458", "wav": "./aishell/wav/test/S0908/BAC009S0908W0458.wav", "txt": "地面在几秒内迅速塌陷成一个大坑"}
-{"key": "BAC009S0908W0459", "wav": "./aishell/wav/test/S0908/BAC009S0908W0459.wav", "txt": "恐影响旁边大厦的基地"}
-{"key": "BAC009S0908W0460", "wav": "./aishell/wav/test/S0908/BAC009S0908W0460.wav", "txt": "网络图片参考消息网八月一四日报道港媒称"}
-{"key": "BAC009S0908W0461", "wav": "./aishell/wav/test/S0908/BAC009S0908W0461.wav", "txt": "一三日上午一零时许"}
-{"key": "BAC009S0908W0462", "wav": "./aishell/wav/test/S0908/BAC009S0908W0462.wav", "txt": "东莞常平住宅大厦联邦花园旁边发生大面积地陷"}
-{"key": "BAC009S0908W0463", "wav": "./aishell/wav/test/S0908/BAC009S0908W0463.wav", "txt": "面积达逾三零零平方米"}
-{"key": "BAC009S0908W0464", "wav": "./aishell/wav/test/S0908/BAC009S0908W0464.wav", "txt": "造成一名井下工人死亡"}
-{"key": "BAC009S0908W0465", "wav": "./aishell/wav/test/S0908/BAC009S0908W0465.wav", "txt": "该地盘曾发生地陷事故"}
-{"key": "BAC009S0908W0466", "wav": "./aishell/wav/test/S0908/BAC009S0908W0466.wav", "txt": "现场流出的短片显示"}
-{"key": "BAC009S0908W0467", "wav": "./aishell/wav/test/S0908/BAC009S0908W0467.wav", "txt": "每一次塌陷的区域前已有一个大坑"}
-{"key": "BAC009S0908W0468", "wav": "./aishell/wav/test/S0908/BAC009S0908W0468.wav", "txt": "港媒中国人启动营养革命养生书籍热卖"}
-{"key": "BAC009S0908W0469", "wav": "./aishell/wav/test/S0908/BAC009S0908W0469.wav", "txt": "参考消息网八月一五日报道"}
-{"key": "BAC009S0908W0470", "wav": "./aishell/wav/test/S0908/BAC009S0908W0470.wav", "txt": "港媒中国出现多中心大都市郊区需要更多移民"}
-{"key": "BAC009S0908W0471", "wav": "./aishell/wav/test/S0908/BAC009S0908W0471.wav", "txt": "参考消息网八月二五日报道"}
-{"key": "BAC009S0908W0472", "wav": "./aishell/wav/test/S0908/BAC009S0908W0472.wav", "txt": "港媒中国发布金牌月嫂标准实用性遭质疑"}
-{"key": "BAC009S0908W0473", "wav": "./aishell/wav/test/S0908/BAC009S0908W0473.wav", "txt": "参考消息网七月八日报道"}
-{"key": "BAC009S0908W0474", "wav": "./aishell/wav/test/S0908/BAC009S0908W0474.wav", "txt": "港媒中国城市告别血汗工厂经济转型见成效"}
-{"key": "BAC009S0908W0475", "wav": "./aishell/wav/test/S0908/BAC009S0908W0475.wav", "txt": "参考消息网八月一二日报道"}
-{"key": "BAC009S0908W0476", "wav": "./aishell/wav/test/S0908/BAC009S0908W0476.wav", "txt": "港媒中国患者年底可在线上美国医生咨询病情"}
-{"key": "BAC009S0908W0477", "wav": "./aishell/wav/test/S0908/BAC009S0908W0477.wav", "txt": "参考消息网九月二五日报道港媒称"}
-{"key": "BAC009S0908W0478", "wav": "./aishell/wav/test/S0908/BAC009S0908W0478.wav", "txt": "在中国某个在线医疗平台增设一项新服务之后"}
-{"key": "BAC009S0908W0479", "wav": "./aishell/wav/test/S0908/BAC009S0908W0479.wav", "txt": "中国正在逐步拥抱智能技术和数字至上创业精神"}
-{"key": "BAC009S0908W0480", "wav": "./aishell/wav/test/S0908/BAC009S0908W0480.wav", "txt": "港媒中国成访日第一大客源国还会持续增加"}
-{"key": "BAC009S0908W0481", "wav": "./aishell/wav/test/S0908/BAC009S0908W0481.wav", "txt": "参考消息网八月二日报道外媒称"}
-{"key": "BAC009S0908W0482", "wav": "./aishell/wav/test/S0908/BAC009S0908W0482.wav", "txt": "访日外国游客突破千万"}
-{"key": "BAC009S0908W0483", "wav": "./aishell/wav/test/S0908/BAC009S0908W0483.wav", "txt": "其中上半年中国访日游客接近翻倍"}
-{"key": "BAC009S0908W0484", "wav": "./aishell/wav/test/S0908/BAC009S0908W0484.wav", "txt": "超过韩国成为访日最大客源国"}
-{"key": "BAC009S0908W0485", "wav": "./aishell/wav/test/S0908/BAC009S0908W0485.wav", "txt": "更是扭转日本旅游赤字"}
-{"key": "BAC009S0908W0486", "wav": "./aishell/wav/test/S0908/BAC009S0908W0486.wav", "txt": "港媒中国科学家研究蜈蚣毒液发现新止痛药"}
-{"key": "BAC009S0908W0487", "wav": "./aishell/wav/test/S0908/BAC009S0908W0487.wav", "txt": "蜈蚣资料图参考消息网一零月二二日报道中国科学家称"}
-{"key": "BAC009S0908W0488", "wav": "./aishell/wav/test/S0908/BAC009S0908W0488.wav", "txt": "港媒中式教学不可复制中国学生在哪都能拿高分"}
-{"key": "BAC009S0908W0489", "wav": "./aishell/wav/test/S0908/BAC009S0908W0489.wav", "txt": "参考消息网九月二三日报道港媒称"}
-{"key": "BAC009S0908W0490", "wav": "./aishell/wav/test/S0908/BAC009S0908W0490.wav", "txt": "宣传的重点是中国教育和英国教育之战"}
-{"key": "BAC009S0908W0491", "wav": "./aishell/wav/test/S0908/BAC009S0908W0491.wav", "txt": "港媒中秋赏月航班受热捧部分靠窗座位售罄"}
-{"key": "BAC009S0908W0492", "wav": "./aishell/wav/test/S0908/BAC009S0908W0492.wav", "txt": "参考消息网九月一三日报道港媒称"}
-{"key": "BAC009S0908W0493", "wav": "./aishell/wav/test/S0908/BAC009S0908W0493.wav", "txt": "很多人都已为赏月做准备"}
-{"key": "BAC009S0908W0494", "wav": "./aishell/wav/test/S0908/BAC009S0908W0494.wav", "txt": "如果对一般登高赏月仍未满足"}
-{"key": "BAC009S0908W0495", "wav": "./aishell/wav/test/S0908/BAC009S0908W0495.wav", "txt": "可以考虑一下空中赏月"}
-{"key": "BAC009S0912W0121", "wav": "./aishell/wav/test/S0912/BAC009S0912W0121.wav", "txt": "房地产相关领域问题频发"}
-{"key": "BAC009S0912W0122", "wav": "./aishell/wav/test/S0912/BAC009S0912W0122.wav", "txt": "东地产财经周刊新一年度审计工作报告出炉"}
-{"key": "BAC009S0912W0123", "wav": "./aishell/wav/test/S0912/BAC009S0912W0123.wav", "txt": "审计署审计长刘家义受国务院委托"}
-{"key": "BAC009S0912W0124", "wav": "./aishell/wav/test/S0912/BAC009S0912W0124.wav", "txt": "土地相关的审查成为重点之一"}
-{"key": "BAC009S0912W0125", "wav": "./aishell/wav/test/S0912/BAC009S0912W0125.wav", "txt": "刘家义在报告中指出"}
-{"key": "BAC009S0912W0126", "wav": "./aishell/wav/test/S0912/BAC009S0912W0126.wav", "txt": "共审计二十个省本级和二百个市"}
-{"key": "BAC009S0912W0127", "wav": "./aishell/wav/test/S0912/BAC009S0912W0127.wav", "txt": "二零零八年至二零一五年"}
-{"key": "BAC009S0912W0128", "wav": "./aishell/wav/test/S0912/BAC009S0912W0128.wav", "txt": "这些地区批准建设用地二百万公顷"}
-{"key": "BAC009S0912W0129", "wav": "./aishell/wav/test/S0912/BAC009S0912W0129.wav", "txt": "取得土地出让收入十三万亿元"}
-{"key": "BAC009S0912W0130", "wav": "./aishell/wav/test/S0912/BAC009S0912W0130.wav", "txt": "支出十二万亿元"}
-{"key": "BAC009S0912W0131", "wav": "./aishell/wav/test/S0912/BAC009S0912W0131.wav", "txt": "为经济社会发展提供了重要基础和支持"}
-{"key": "BAC009S0912W0132", "wav": "./aishell/wav/test/S0912/BAC009S0912W0132.wav", "txt": "土地出入收入累计结馀五千亿元"}
-{"key": "BAC009S0912W0133", "wav": "./aishell/wav/test/S0912/BAC009S0912W0133.wav", "txt": "主要是土地出让收入少征三千亿元"}
-{"key": "BAC009S0912W0134", "wav": "./aishell/wav/test/S0912/BAC009S0912W0134.wav", "txt": "一些地方和单位少支付补偿一亿元"}
-{"key": "BAC009S0912W0135", "wav": "./aishell/wav/test/S0912/BAC009S0912W0135.wav", "txt": "编造虚假资料等套取或骗取补偿一亿元"}
-{"key": "BAC009S0912W0136", "wav": "./aishell/wav/test/S0912/BAC009S0912W0136.wav", "txt": "一些地方土地出让收支核算不够规范"}
-{"key": "BAC009S0912W0137", "wav": "./aishell/wav/test/S0912/BAC009S0912W0137.wav", "txt": "减免或返还土地出让收入一亿元"}
-{"key": "BAC009S0912W0138", "wav": "./aishell/wav/test/S0912/BAC009S0912W0138.wav", "txt": "建设用地方面也暴露了不少问题"}
-{"key": "BAC009S0912W0139", "wav": "./aishell/wav/test/S0912/BAC009S0912W0139.wav", "txt": "违规以租代征改变规划条件等用地一万公顷"}
-{"key": "BAC009S0912W0140", "wav": "./aishell/wav/test/S0912/BAC009S0912W0140.wav", "txt": "有一个突破土地或城市规划"}
-{"key": "BAC009S0912W0141", "wav": "./aishell/wav/test/S0912/BAC009S0912W0141.wav", "txt": "还有一个违规扩区一万公顷"}
-{"key": "BAC009S0912W0142", "wav": "./aishell/wav/test/S0912/BAC009S0912W0142.wav", "txt": "虚增耕地质量不达标的分别占百分之十和百分之三十"}
-{"key": "BAC009S0912W0143", "wav": "./aishell/wav/test/S0912/BAC009S0912W0143.wav", "txt": "整治资金被挤占挪用等一亿元"}
-{"key": "BAC009S0912W0144", "wav": "./aishell/wav/test/S0912/BAC009S0912W0144.wav", "txt": "纠正违法用地一万起"}
-{"key": "BAC009S0912W0145", "wav": "./aishell/wav/test/S0912/BAC009S0912W0145.wav", "txt": "制定完善制度一百多项"}
-{"key": "BAC009S0912W0146", "wav": "./aishell/wav/test/S0912/BAC009S0912W0146.wav", "txt": "审计已向有关部门移送重大违法违纪问题三百起"}
-{"key": "BAC009S0912W0147", "wav": "./aishell/wav/test/S0912/BAC009S0912W0147.wav", "txt": "各级政府安排财政资金一亿元"}
-{"key": "BAC009S0912W0148", "wav": "./aishell/wav/test/S0912/BAC009S0912W0148.wav", "txt": "为安居工程建设提供了资金保障"}
-{"key": "BAC009S0912W0149", "wav": "./aishell/wav/test/S0912/BAC009S0912W0149.wav", "txt": "还有一亿元被套取或用于弥补经费不足等"}
-{"key": "BAC009S0912W0150", "wav": "./aishell/wav/test/S0912/BAC009S0912W0150.wav", "txt": "有关地方追回资金或补贴一亿元"}
-{"key": "BAC009S0912W0151", "wav": "./aishell/wav/test/S0912/BAC009S0912W0151.wav", "txt": "清理收回住房二十套"}
-{"key": "BAC009S0912W0152", "wav": "./aishell/wav/test/S0912/BAC009S0912W0152.wav", "txt": "取消一万户家庭的保障资格"}
-{"key": "BAC009S0912W0153", "wav": "./aishell/wav/test/S0912/BAC009S0912W0153.wav", "txt": "审计已向有关部门移送重大违法违纪问题三十起"}
-{"key": "BAC009S0912W0154", "wav": "./aishell/wav/test/S0912/BAC009S0912W0154.wav", "txt": "在对央企的审计也发现了不少问题"}
-{"key": "BAC009S0912W0155", "wav": "./aishell/wav/test/S0912/BAC009S0912W0155.wav", "txt": "中粮集团违规投资四亿元对原培训中心进行改扩建"}
-{"key": "BAC009S0912W0156", "wav": "./aishell/wav/test/S0912/BAC009S0912W0156.wav", "txt": "受土地开发政策和土地规划限制未开发建设"}
-{"key": "BAC009S0912W0157", "wav": "./aishell/wav/test/S0912/BAC009S0912W0157.wav", "txt": "六年土地收入十三万度审计报告中"}
-{"key": "BAC009S0912W0158", "wav": "./aishell/wav/test/S0912/BAC009S0912W0158.wav", "txt": "房地产相关领域问题频发"}
-{"key": "BAC009S0912W0159", "wav": "./aishell/wav/test/S0912/BAC009S0912W0159.wav", "txt": "东地产财经周度审计工作报告出炉"}
-{"key": "BAC009S0912W0160", "wav": "./aishell/wav/test/S0912/BAC009S0912W0160.wav", "txt": "审计署审计长刘家义受国务院委托"}
-{"key": "BAC009S0912W0161", "wav": "./aishell/wav/test/S0912/BAC009S0912W0161.wav", "txt": "羊年置业小调查的调查结果截图"}
-{"key": "BAC009S0912W0162", "wav": "./aishell/wav/test/S0912/BAC009S0912W0162.wav", "txt": "二初楼市迎来多项利好政策"}
-{"key": "BAC009S0912W0163", "wav": "./aishell/wav/test/S0912/BAC009S0912W0163.wav", "txt": "在多项政策的支持下"}
-{"key": "BAC009S0912W0164", "wav": "./aishell/wav/test/S0912/BAC009S0912W0164.wav", "txt": "今年楼市将走向何方"}
-{"key": "BAC009S0912W0165", "wav": "./aishell/wav/test/S0912/BAC009S0912W0165.wav", "txt": "中新网房产频道推置业小调查"}
-{"key": "BAC009S0912W0167", "wav": "./aishell/wav/test/S0912/BAC009S0912W0167.wav", "txt": "十位网友参与了本次调查"}
-{"key": "BAC009S0912W0168", "wav": "./aishell/wav/test/S0912/BAC009S0912W0168.wav", "txt": "在参与调查的网友中"}
-{"key": "BAC009S0912W0169", "wav": "./aishell/wav/test/S0912/BAC009S0912W0169.wav", "txt": "约六成网友看涨全国的商品房价格"}
-{"key": "BAC009S0912W0170", "wav": "./aishell/wav/test/S0912/BAC009S0912W0170.wav", "txt": "万科获选性价比最高的房企"}
-{"key": "BAC009S0912W0171", "wav": "./aishell/wav/test/S0912/BAC009S0912W0171.wav", "txt": "房价的一涨一跌都牵动着购房者的神经"}
-{"key": "BAC009S0912W0172", "wav": "./aishell/wav/test/S0912/BAC009S0912W0172.wav", "txt": "百分之五的网友认为房价将普遍上涨"}
-{"key": "BAC009S0912W0173", "wav": "./aishell/wav/test/S0912/BAC009S0912W0173.wav", "txt": "百分之五的网友认为房价将普遍下跌"}
-{"key": "BAC009S0912W0174", "wav": "./aishell/wav/test/S0912/BAC009S0912W0174.wav", "txt": "百分之五的网友认为房价走势不好判断"}
-{"key": "BAC009S0912W0175", "wav": "./aishell/wav/test/S0912/BAC009S0912W0175.wav", "txt": "作为楼市政策的风向标"}
-{"key": "BAC009S0912W0176", "wav": "./aishell/wav/test/S0912/BAC009S0912W0176.wav", "txt": "二全国两会或将楼市基调"}
-{"key": "BAC009S0912W0177", "wav": "./aishell/wav/test/S0912/BAC009S0912W0177.wav", "txt": "国务院总理李克强在二政府工作报告中表示"}
-{"key": "BAC009S0912W0178", "wav": "./aishell/wav/test/S0912/BAC009S0912W0178.wav", "txt": "支持居民自住和改善住房需求"}
-{"key": "BAC009S0912W0179", "wav": "./aishell/wav/test/S0912/BAC009S0912W0179.wav", "txt": "促进房地产市场平稳健康发展"}
-{"key": "BAC009S0912W0180", "wav": "./aishell/wav/test/S0912/BAC009S0912W0180.wav", "txt": "这也从宏观层面明确了政府对于房地产市场的态度"}
-{"key": "BAC009S0912W0181", "wav": "./aishell/wav/test/S0912/BAC009S0912W0181.wav", "txt": "在今年两会是否会开启新一轮楼市调控这个问题上"}
-{"key": "BAC009S0912W0182", "wav": "./aishell/wav/test/S0912/BAC009S0912W0182.wav", "txt": "中新网的调查结果显示"}
-{"key": "BAC009S0912W0183", "wav": "./aishell/wav/test/S0912/BAC009S0912W0183.wav", "txt": "百分之五的网友认为不会"}
-{"key": "BAC009S0912W0184", "wav": "./aishell/wav/test/S0912/BAC009S0912W0184.wav", "txt": "百分之五的网友认为会"}
-{"key": "BAC009S0912W0185", "wav": "./aishell/wav/test/S0912/BAC009S0912W0185.wav", "txt": "百分之五的网友认为不好说"}
-{"key": "BAC009S0912W0186", "wav": "./aishell/wav/test/S0912/BAC009S0912W0186.wav", "txt": "楼市政策也深刻影响着房地产行业的走向"}
-{"key": "BAC009S0912W0187", "wav": "./aishell/wav/test/S0912/BAC009S0912W0187.wav", "txt": "抓紧做好故调查处理工作"}
-{"key": "BAC009S0912W0188", "wav": "./aishell/wav/test/S0912/BAC009S0912W0188.wav", "txt": "督促责任单位彻底排查溢油风险点"}
-{"key": "BAC009S0912W0189", "wav": "./aishell/wav/test/S0912/BAC009S0912W0189.wav", "txt": "并重新编报海洋环境影响报告书"}
-{"key": "BAC009S0912W0190", "wav": "./aishell/wav/test/S0912/BAC009S0912W0190.wav", "txt": "彻底查明事故原因"}
-{"key": "BAC009S0912W0191", "wav": "./aishell/wav/test/S0912/BAC009S0912W0191.wav", "txt": "查清事故造成的危害及损失"}
-{"key": "BAC009S0912W0192", "wav": "./aishell/wav/test/S0912/BAC009S0912W0192.wav", "txt": "维护受损各方合法权益"}
-{"key": "BAC009S0912W0193", "wav": "./aishell/wav/test/S0912/BAC009S0912W0193.wav", "txt": "立即部署开展海洋石油勘探开发安全生产检查"}
-{"key": "BAC009S0912W0194", "wav": "./aishell/wav/test/S0912/BAC009S0912W0194.wav", "txt": "全面加强海洋环境监视监测和监督管理"}
-{"key": "BAC009S0912W0195", "wav": "./aishell/wav/test/S0912/BAC009S0912W0195.wav", "txt": "全面准确及时发布事故处置相关信息"}
-{"key": "BAC009S0912W0196", "wav": "./aishell/wav/test/S0912/BAC009S0912W0196.wav", "txt": "抓紧研究完善海洋环境保护的法律法规"}
-{"key": "BAC009S0912W0197", "wav": "./aishell/wav/test/S0912/BAC009S0912W0197.wav", "txt": "入海污染物排放总量下降"}
-{"key": "BAC009S0912W0198", "wav": "./aishell/wav/test/S0912/BAC009S0912W0198.wav", "txt": "力争渤海近岸海域水质总体改善"}
-{"key": "BAC009S0912W0199", "wav": "./aishell/wav/test/S0912/BAC009S0912W0199.wav", "txt": "优化产业结构与布局"}
-{"key": "BAC009S0912W0200", "wav": "./aishell/wav/test/S0912/BAC009S0912W0200.wav", "txt": "切实改变沿海地区重化工比重过大过于集中的状况"}
-{"key": "BAC009S0912W0201", "wav": "./aishell/wav/test/S0912/BAC009S0912W0201.wav", "txt": "严格控制新上石化项目"}
-{"key": "BAC009S0912W0202", "wav": "./aishell/wav/test/S0912/BAC009S0912W0202.wav", "txt": "禁止在可能造成生态严重失衡的地方进行围填海活动"}
-{"key": "BAC009S0912W0203", "wav": "./aishell/wav/test/S0912/BAC009S0912W0203.wav", "txt": "有效控制陆海污染源"}
-{"key": "BAC009S0912W0204", "wav": "./aishell/wav/test/S0912/BAC009S0912W0204.wav", "txt": "坚持海陆统筹河海兼顾"}
-{"key": "BAC009S0912W0205", "wav": "./aishell/wav/test/S0912/BAC009S0912W0205.wav", "txt": "加强入海河流综合治理"}
-{"key": "BAC009S0912W0206", "wav": "./aishell/wav/test/S0912/BAC009S0912W0206.wav", "txt": "合理布局入海排污口"}
-{"key": "BAC009S0912W0207", "wav": "./aishell/wav/test/S0912/BAC009S0912W0207.wav", "txt": "制定更加严格的地方水污染排放标准"}
-{"key": "BAC009S0912W0208", "wav": "./aishell/wav/test/S0912/BAC009S0912W0208.wav", "txt": "努力保护和修复渤海生态系统"}
-{"key": "BAC009S0912W0209", "wav": "./aishell/wav/test/S0912/BAC009S0912W0209.wav", "txt": "加强用水总量控制与调度管理"}
-{"key": "BAC009S0912W0210", "wav": "./aishell/wav/test/S0912/BAC009S0912W0210.wav", "txt": "改善河口和近岸海域生态环境"}
-{"key": "BAC009S0912W0211", "wav": "./aishell/wav/test/S0912/BAC009S0912W0211.wav", "txt": "加强海陆过渡区生态建设"}
-{"key": "BAC009S0912W0212", "wav": "./aishell/wav/test/S0912/BAC009S0912W0212.wav", "txt": "逐步恢复湿地生态功能"}
-{"key": "BAC009S0912W0213", "wav": "./aishell/wav/test/S0912/BAC009S0912W0213.wav", "txt": "在海洋环境敏感区关键区等划定生态红线"}
-{"key": "BAC009S0912W0214", "wav": "./aishell/wav/test/S0912/BAC009S0912W0214.wav", "txt": "有效防范海洋环境灾害"}
-{"key": "BAC009S0912W0215", "wav": "./aishell/wav/test/S0912/BAC009S0912W0215.wav", "txt": "建立渤海海洋环境预警机制和突发事件应对机制"}
-{"key": "BAC009S0912W0216", "wav": "./aishell/wav/test/S0912/BAC009S0912W0216.wav", "txt": "修订完善相关应急预案"}
-{"key": "BAC009S0912W0217", "wav": "./aishell/wav/test/S0912/BAC009S0912W0217.wav", "txt": "强化地方政府和企业的主体意识法制意识"}
-{"key": "BAC009S0912W0218", "wav": "./aishell/wav/test/S0912/BAC009S0912W0218.wav", "txt": "落实海洋环境保护责任"}
-{"key": "BAC009S0912W0219", "wav": "./aishell/wav/test/S0912/BAC009S0912W0219.wav", "txt": "提高公众参与渤海环境保护的积极性和主动性"}
-{"key": "BAC009S0912W0220", "wav": "./aishell/wav/test/S0912/BAC009S0912W0220.wav", "txt": "建立公开透明的信息发布机制"}
-{"key": "BAC009S0912W0221", "wav": "./aishell/wav/test/S0912/BAC009S0912W0221.wav", "txt": "会议讨论进一步加强环境保护工作的意见"}
-{"key": "BAC009S0912W0222", "wav": "./aishell/wav/test/S0912/BAC009S0912W0222.wav", "txt": "强调必须把污染治理和生态保护摆在更加重要的位置"}
-{"key": "BAC009S0912W0223", "wav": "./aishell/wav/test/S0912/BAC009S0912W0223.wav", "txt": "切实解决损害公众健康影响科学发展的突发环境问题"}
-{"key": "BAC009S0912W0224", "wav": "./aishell/wav/test/S0912/BAC009S0912W0224.wav", "txt": "落实节能减排各项任务"}
-{"key": "BAC009S0912W0225", "wav": "./aishell/wav/test/S0912/BAC009S0912W0225.wav", "txt": "凡依法应当进行环评的建设规划和项目"}
-{"key": "BAC009S0912W0226", "wav": "./aishell/wav/test/S0912/BAC009S0912W0226.wav", "txt": "都要严格履行环评程序"}
-{"key": "BAC009S0912W0227", "wav": "./aishell/wav/test/S0912/BAC009S0912W0227.wav", "txt": "环评过程要公开透明"}
-{"key": "BAC009S0912W0228", "wav": "./aishell/wav/test/S0912/BAC009S0912W0228.wav", "txt": "充分征求专家和社会公众意见"}
-{"key": "BAC009S0912W0229", "wav": "./aishell/wav/test/S0912/BAC009S0912W0229.wav", "txt": "要依法追究管理部门责任企业及有关人员的责任"}
-{"key": "BAC009S0912W0230", "wav": "./aishell/wav/test/S0912/BAC009S0912W0230.wav", "txt": "切实加强重金属污染防治"}
-{"key": "BAC009S0912W0231", "wav": "./aishell/wav/test/S0912/BAC009S0912W0231.wav", "txt": "对重点地区行业和企业"}
-{"key": "BAC009S0912W0232", "wav": "./aishell/wav/test/S0912/BAC009S0912W0232.wav", "txt": "妥善处理重金属污染历史遗留问题和突发污染事件"}
-{"key": "BAC009S0912W0233", "wav": "./aishell/wav/test/S0912/BAC009S0912W0233.wav", "txt": "保障人民群众生命健康安全"}
-{"key": "BAC009S0912W0234", "wav": "./aishell/wav/test/S0912/BAC009S0912W0234.wav", "txt": "严格化学品环境管理"}
-{"key": "BAC009S0912W0235", "wav": "./aishell/wav/test/S0912/BAC009S0912W0235.wav", "txt": "对化学品项目布局进行梳理评估"}
-{"key": "BAC009S0912W0236", "wav": "./aishell/wav/test/S0912/BAC009S0912W0236.wav", "txt": "对化学品生产经营企业进行环境隐患排查"}
-{"key": "BAC009S0912W0237", "wav": "./aishell/wav/test/S0912/BAC009S0912W0237.wav", "txt": "对海洋江河湖泊沿岸化工企业进行集中综合整治"}
-{"key": "BAC009S0912W0238", "wav": "./aishell/wav/test/S0912/BAC009S0912W0238.wav", "txt": "落实环境监管责任和安全保障措施"}
-{"key": "BAC009S0912W0239", "wav": "./aishell/wav/test/S0912/BAC009S0912W0239.wav", "txt": "提高化学品生产的环境准入门槛"}
-{"key": "BAC009S0912W0240", "wav": "./aishell/wav/test/S0912/BAC009S0912W0240.wav", "txt": "加强农村环境保护"}
-{"key": "BAC009S0912W0241", "wav": "./aishell/wav/test/S0912/BAC009S0912W0241.wav", "txt": "集中整治存在突出环境问题的村庄和集镇"}
-{"key": "BAC009S0912W0242", "wav": "./aishell/wav/test/S0912/BAC009S0912W0242.wav", "txt": "重点治理农村土壤饮用水水源地污染"}
-{"key": "BAC009S0912W0243", "wav": "./aishell/wav/test/S0912/BAC009S0912W0243.wav", "txt": "推动环保基础设施和服务向农村延伸"}
-{"key": "BAC009S0912W0244", "wav": "./aishell/wav/test/S0912/BAC009S0912W0244.wav", "txt": "引导和帮助农民科学处理垃圾和污水"}
-{"key": "BAC009S0912W0245", "wav": "./aishell/wav/test/S0912/BAC009S0912W0245.wav", "txt": "科学使用农药化肥和农膜"}
-{"key": "BAC009S0912W0246", "wav": "./aishell/wav/test/S0912/BAC009S0912W0246.wav", "txt": "严格农村工矿企业环境监管"}
-{"key": "BAC009S0912W0247", "wav": "./aishell/wav/test/S0912/BAC009S0912W0247.wav", "txt": "坚决防止污染向农村转移"}
-{"key": "BAC009S0912W0248", "wav": "./aishell/wav/test/S0912/BAC009S0912W0248.wav", "txt": "加快建设环境监测预警体系"}
-{"key": "BAC009S0912W0249", "wav": "./aishell/wav/test/S0912/BAC009S0912W0249.wav", "txt": "完善环境事件应急机制"}
-{"key": "BAC009S0912W0250", "wav": "./aishell/wav/test/S0912/BAC009S0912W0250.wav", "txt": "完善环境法律政策体系"}
-{"key": "BAC009S0912W0251", "wav": "./aishell/wav/test/S0912/BAC009S0912W0251.wav", "txt": "针对近期各种环境事件暴露出的问题"}
-{"key": "BAC009S0912W0252", "wav": "./aishell/wav/test/S0912/BAC009S0912W0252.wav", "txt": "抓紧制定和修订相关法律法规"}
-{"key": "BAC009S0912W0253", "wav": "./aishell/wav/test/S0912/BAC009S0912W0253.wav", "txt": "毛利率也只有百分之十四"}
-{"key": "BAC009S0912W0254", "wav": "./aishell/wav/test/S0912/BAC009S0912W0254.wav", "txt": "由此可见苹果现在的业务确实比汽车行业更加赚钱"}
-{"key": "BAC009S0912W0256", "wav": "./aishell/wav/test/S0912/BAC009S0912W0256.wav", "txt": "他表示他肯定会与苹果展开合作"}
-{"key": "BAC009S0912W0257", "wav": "./aishell/wav/test/S0912/BAC009S0912W0257.wav", "txt": "苹果公司一直在秘密从事电汽汽车的研发"}
-{"key": "BAC009S0912W0258", "wav": "./aishell/wav/test/S0912/BAC009S0912W0258.wav", "txt": "并且计划最早在二零二零年推出生产首款车型"}
-{"key": "BAC009S0912W0259", "wav": "./aishell/wav/test/S0912/BAC009S0912W0259.wav", "txt": "苹果已为汽车项目招募了数百名员工"}
-{"key": "BAC009S0912W0260", "wav": "./aishell/wav/test/S0912/BAC009S0912W0260.wav", "txt": "包括电池和机器人技术领域的专家"}
-{"key": "BAC009S0912W0261", "wav": "./aishell/wav/test/S0912/BAC009S0912W0261.wav", "txt": "苹果涉足汽车行业并不是一个好主意"}
-{"key": "BAC009S0912W0264", "wav": "./aishell/wav/test/S0912/BAC009S0912W0264.wav", "txt": "除了苹果上周公布的选定合作伙伴"}
-{"key": "BAC009S0912W0269", "wav": "./aishell/wav/test/S0912/BAC009S0912W0269.wav", "txt": "将会在设备发售稳定的推出与更新"}
-{"key": "BAC009S0912W0278", "wav": "./aishell/wav/test/S0912/BAC009S0912W0278.wav", "txt": "此款健康设备将延迟到明年推出"}
-{"key": "BAC009S0912W0279", "wav": "./aishell/wav/test/S0912/BAC009S0912W0279.wav", "txt": "根据知情人士获得的安吉拉录音手稿"}
-{"key": "BAC009S0912W0280", "wav": "./aishell/wav/test/S0912/BAC009S0912W0280.wav", "txt": "安吉拉要求零售店员工养精蓄锐"}
-{"key": "BAC009S0912W0281", "wav": "./aishell/wav/test/S0912/BAC009S0912W0281.wav", "txt": "为即将到来的购物季"}
-{"key": "BAC009S0912W0282", "wav": "./aishell/wav/test/S0912/BAC009S0912W0282.wav", "txt": "以及中国的春节做准备"}
-{"key": "BAC009S0912W0285", "wav": "./aishell/wav/test/S0912/BAC009S0912W0285.wav", "txt": "后有消息称该款产品将于今年的情人节推出"}
-{"key": "BAC009S0912W0286", "wav": "./aishell/wav/test/S0912/BAC009S0912W0286.wav", "txt": "不过目前看来不大可能"}
-{"key": "BAC009S0912W0287", "wav": "./aishell/wav/test/S0912/BAC009S0912W0287.wav", "txt": "因为春季的计算方式是从三月二十日到六月三十日"}
-{"key": "BAC009S0912W0290", "wav": "./aishell/wav/test/S0912/BAC009S0912W0290.wav", "txt": "有报道称苹果计划在二零一四年秋季推出其可穿戴设备"}
-{"key": "BAC009S0912W0291", "wav": "./aishell/wav/test/S0912/BAC009S0912W0291.wav", "txt": "该产品将延迟到二零一五年发布"}
-{"key": "BAC009S0912W0292", "wav": "./aishell/wav/test/S0912/BAC009S0912W0292.wav", "txt": "纷至沓来的报道显示"}
-{"key": "BAC009S0912W0293", "wav": "./aishell/wav/test/S0912/BAC009S0912W0293.wav", "txt": "电池的技术难题最终导致了它的延迟推出"}
-{"key": "BAC009S0912W0299", "wav": "./aishell/wav/test/S0912/BAC009S0912W0299.wav", "txt": "包括更换不同尺寸型号和不同的表带"}
-{"key": "BAC009S0912W0302", "wav": "./aishell/wav/test/S0912/BAC009S0912W0302.wav", "txt": "这将是苹果零售店采用的全新模式"}
-{"key": "BAC009S0912W0303", "wav": "./aishell/wav/test/S0912/BAC009S0912W0303.wav", "txt": "如果融入移动互联的新时代"}
-{"key": "BAC009S0912W0304", "wav": "./aishell/wav/test/S0912/BAC009S0912W0304.wav", "txt": "我们凭什么征战全世界"}
-{"key": "BAC009S0912W0305", "wav": "./aishell/wav/test/S0912/BAC009S0912W0305.wav", "txt": "在前段时间的上海家电展上"}
-{"key": "BAC009S0912W0306", "wav": "./aishell/wav/test/S0912/BAC009S0912W0306.wav", "txt": "美的集团总裁方洪波提出了上述三个问题"}
-{"key": "BAC009S0912W0307", "wav": "./aishell/wav/test/S0912/BAC009S0912W0307.wav", "txt": "这是当前所有中国家电企业"}
-{"key": "BAC009S0912W0308", "wav": "./aishell/wav/test/S0912/BAC009S0912W0308.wav", "txt": "都必须要回答的问题"}
-{"key": "BAC009S0912W0309", "wav": "./aishell/wav/test/S0912/BAC009S0912W0309.wav", "txt": "如果不回答这三个课题"}
-{"key": "BAC009S0912W0310", "wav": "./aishell/wav/test/S0912/BAC009S0912W0310.wav", "txt": "企业所有的目标都是空洞的"}
-{"key": "BAC009S0912W0311", "wav": "./aishell/wav/test/S0912/BAC009S0912W0311.wav", "txt": "得出这个结论来自于方洪波对当前形势的判断"}
-{"key": "BAC009S0912W0312", "wav": "./aishell/wav/test/S0912/BAC009S0912W0312.wav", "txt": "中国家电企业现在正面临前所未有的挑战"}
-{"key": "BAC009S0912W0313", "wav": "./aishell/wav/test/S0912/BAC009S0912W0313.wav", "txt": "过去三十年高速发展的前提条件没有了"}
-{"key": "BAC009S0912W0316", "wav": "./aishell/wav/test/S0912/BAC009S0912W0316.wav", "txt": "在全世界的产业格局看"}
-{"key": "BAC009S0912W0317", "wav": "./aishell/wav/test/S0912/BAC009S0912W0317.wav", "txt": "全世界排列的二加三格局"}
-{"key": "BAC009S0912W0319", "wav": "./aishell/wav/test/S0912/BAC009S0912W0319.wav", "txt": "这样一个全球白电的格局短期内是难以撬动的"}
-{"key": "BAC009S0912W0320", "wav": "./aishell/wav/test/S0912/BAC009S0912W0320.wav", "txt": "时代力量正在颠复着家电行业"}
-{"key": "BAC009S0912W0321", "wav": "./aishell/wav/test/S0912/BAC009S0912W0321.wav", "txt": "移动互联以前改变的是软的层面"}
-{"key": "BAC009S0912W0322", "wav": "./aishell/wav/test/S0912/BAC009S0912W0322.wav", "txt": "比如流程的缩短平台化的应用"}
-{"key": "BAC009S0912W0323", "wav": "./aishell/wav/test/S0912/BAC009S0912W0323.wav", "txt": "转型升级应该在十年前就开始了"}
-{"key": "BAC009S0912W0324", "wav": "./aishell/wav/test/S0912/BAC009S0912W0324.wav", "txt": "中国家电企业在世界产业链地位弱小"}
-{"key": "BAC009S0912W0325", "wav": "./aishell/wav/test/S0912/BAC009S0912W0325.wav", "txt": "跟世界产业的差距不是在缩小"}
-{"key": "BAC009S0912W0326", "wav": "./aishell/wav/test/S0912/BAC009S0912W0326.wav", "txt": "这是目前我们中国家电企业面临的具体挑战"}
-{"key": "BAC009S0912W0327", "wav": "./aishell/wav/test/S0912/BAC009S0912W0327.wav", "txt": "这些挑战来自于四面八方"}
-{"key": "BAC009S0912W0328", "wav": "./aishell/wav/test/S0912/BAC009S0912W0328.wav", "txt": "未来给我们的机会和空间是有限的"}
-{"key": "BAC009S0912W0330", "wav": "./aishell/wav/test/S0912/BAC009S0912W0330.wav", "txt": "白电行业将进入最惨烈的一年"}
-{"key": "BAC009S0912W0331", "wav": "./aishell/wav/test/S0912/BAC009S0912W0331.wav", "txt": "昔日巨头格力美的海尔也将沉浮于其中"}
-{"key": "BAC009S0912W0332", "wav": "./aishell/wav/test/S0912/BAC009S0912W0332.wav", "txt": "从本年度第一份季报来看"}
-{"key": "BAC009S0912W0333", "wav": "./aishell/wav/test/S0912/BAC009S0912W0333.wav", "txt": "三巨头中的格力海尔均出现不同程度"}
-{"key": "BAC009S0912W0335", "wav": "./aishell/wav/test/S0912/BAC009S0912W0335.wav", "txt": "下称美的内部的组织架构二点一五年加大了调整力度"}
-{"key": "BAC009S0912W0336", "wav": "./aishell/wav/test/S0912/BAC009S0912W0336.wav", "txt": "七月成立了美的部品事业部"}
-{"key": "BAC009S0912W0337", "wav": "./aishell/wav/test/S0912/BAC009S0912W0337.wav", "txt": "威灵电机将有可能兼并美芝压缩机"}
-{"key": "BAC009S0912W0338", "wav": "./aishell/wav/test/S0912/BAC009S0912W0338.wav", "txt": "这一切并不是说说而已"}
-{"key": "BAC009S0912W0339", "wav": "./aishell/wav/test/S0912/BAC009S0912W0339.wav", "txt": "而是要明确落实在数字上"}
-{"key": "BAC009S0912W0340", "wav": "./aishell/wav/test/S0912/BAC009S0912W0340.wav", "txt": "控制在六零微克立方米左右"}
-{"key": "BAC009S0912W0341", "wav": "./aishell/wav/test/S0912/BAC009S0912W0341.wav", "txt": "这与市民的期望和城市发展的愿景也是一致的"}
-{"key": "BAC009S0912W0342", "wav": "./aishell/wav/test/S0912/BAC009S0912W0342.wav", "txt": "二零一七年二零二二年"}
-{"key": "BAC009S0912W0343", "wav": "./aishell/wav/test/S0912/BAC009S0912W0343.wav", "txt": "我们还将继续加大污染防治力度"}
-{"key": "BAC009S0912W0344", "wav": "./aishell/wav/test/S0912/BAC009S0912W0344.wav", "txt": "这一点对于京津冀一带的居民来说是才最重要的"}
-{"key": "BAC009S0912W0345", "wav": "./aishell/wav/test/S0912/BAC009S0912W0345.wav", "txt": "因为每个人都需要呼吸"}
-{"key": "BAC009S0912W0346", "wav": "./aishell/wav/test/S0912/BAC009S0912W0346.wav", "txt": "场馆建设一简约而不简单"}
-{"key": "BAC009S0912W0347", "wav": "./aishell/wav/test/S0912/BAC009S0912W0347.wav", "txt": "二零二二年北京冬奥会计划使用一二个比赛场馆"}
-{"key": "BAC009S0912W0348", "wav": "./aishell/wav/test/S0912/BAC009S0912W0348.wav", "txt": "总体上以节俭办赛为原则进行规划建设和改造使用"}
-{"key": "BAC009S0912W0349", "wav": "./aishell/wav/test/S0912/BAC009S0912W0349.wav", "txt": "充分利用北京奥运后的丰富遗产"}
-{"key": "BAC009S0912W0350", "wav": "./aishell/wav/test/S0912/BAC009S0912W0350.wav", "txt": "仅有三个场馆需要新建"}
-{"key": "BAC009S0912W0351", "wav": "./aishell/wav/test/S0912/BAC009S0912W0351.wav", "txt": "分别是位于北京市区的国家速滑馆和延庆的二个雪场"}
-{"key": "BAC009S0912W0352", "wav": "./aishell/wav/test/S0912/BAC009S0912W0352.wav", "txt": "其馀场馆改建后均可满足赛事需要"}
-{"key": "BAC009S0912W0353", "wav": "./aishell/wav/test/S0912/BAC009S0912W0353.wav", "txt": "既免去了不必要的花费"}
-{"key": "BAC009S0912W0354", "wav": "./aishell/wav/test/S0912/BAC009S0912W0354.wav", "txt": "每个场馆又高端大气上档次"}
-{"key": "BAC009S0912W0355", "wav": "./aishell/wav/test/S0912/BAC009S0912W0355.wav", "txt": "真可谓是简约而不简单啊"}
-{"key": "BAC009S0912W0356", "wav": "./aishell/wav/test/S0912/BAC009S0912W0356.wav", "txt": "花样滑冰短道速滑项目在首都体育馆进行"}
-{"key": "BAC009S0912W0357", "wav": "./aishell/wav/test/S0912/BAC009S0912W0357.wav", "txt": "冰壶项目在水立方进行"}
-{"key": "BAC009S0912W0358", "wav": "./aishell/wav/test/S0912/BAC009S0912W0358.wav", "txt": "计划明年就将开始动工"}
-{"key": "BAC009S0912W0359", "wav": "./aishell/wav/test/S0912/BAC009S0912W0359.wav", "txt": "速滑馆建成后将设置四百米滑道"}
-{"key": "BAC009S0912W0360", "wav": "./aishell/wav/test/S0912/BAC009S0912W0360.wav", "txt": "设有座位一万两千个"}
-{"key": "BAC009S0912W0361", "wav": "./aishell/wav/test/S0912/BAC009S0912W0361.wav", "txt": "在冬奥会举办之前这里将为专业队伍训练提供场地"}
-{"key": "BAC009S0912W0362", "wav": "./aishell/wav/test/S0912/BAC009S0912W0362.wav", "txt": "我们的奥运健儿将在此努力备战"}
-{"key": "BAC009S0912W0363", "wav": "./aishell/wav/test/S0912/BAC009S0912W0363.wav", "txt": "成为市民体验冰上运动的乐园"}
-{"key": "BAC009S0912W0364", "wav": "./aishell/wav/test/S0912/BAC009S0912W0364.wav", "txt": "张家口市的崇礼县从每年十一月初到第二年四月初"}
-{"key": "BAC009S0912W0365", "wav": "./aishell/wav/test/S0912/BAC009S0912W0365.wav", "txt": "崇礼县发展较成熟的万龙滑雪场和云顶滑雪场"}
-{"key": "BAC009S0912W0366", "wav": "./aishell/wav/test/S0912/BAC009S0912W0366.wav", "txt": "加上一个仍在建的太舞四季滑雪场"}
-{"key": "BAC009S0912W0367", "wav": "./aishell/wav/test/S0912/BAC009S0912W0367.wav", "txt": "均已被纳入二零二二年冬奥会的规划场馆"}
-{"key": "BAC009S0912W0368", "wav": "./aishell/wav/test/S0912/BAC009S0912W0368.wav", "txt": "万龙和云顶滑雪场都将根据赛事要求进行改造和扩建"}
-{"key": "BAC009S0912W0369", "wav": "./aishell/wav/test/S0912/BAC009S0912W0369.wav", "txt": "小海坨山是位于延庆境内的海坨山主峰"}
-{"key": "BAC009S0912W0370", "wav": "./aishell/wav/test/S0912/BAC009S0912W0370.wav", "txt": "此地春秋冬三季有雪"}
-{"key": "BAC009S0912W0371", "wav": "./aishell/wav/test/S0912/BAC009S0912W0371.wav", "txt": "滑雪期从十一月下旬到次年三月中旬"}
-{"key": "BAC009S0912W0372", "wav": "./aishell/wav/test/S0912/BAC009S0912W0372.wav", "txt": "这里常年吸引着众多登山探险运动爱好者"}
-{"key": "BAC009S0912W0373", "wav": "./aishell/wav/test/S0912/BAC009S0912W0373.wav", "txt": "拥有高山滑雪要求的八百米落差"}
-{"key": "BAC009S0912W0374", "wav": "./aishell/wav/test/S0912/BAC009S0912W0374.wav", "txt": "非常适合修建高山雪场"}
-{"key": "BAC009S0912W0375", "wav": "./aishell/wav/test/S0912/BAC009S0912W0375.wav", "txt": "将依托现有山体地形修建临时场地设施"}
-{"key": "BAC009S0912W0376", "wav": "./aishell/wav/test/S0912/BAC009S0912W0376.wav", "txt": "用作雪车雪橇大项和滑雪大项中的高山滑雪比赛场地"}
-{"key": "BAC009S0912W0377", "wav": "./aishell/wav/test/S0912/BAC009S0912W0377.wav", "txt": "全民冰雪季奥运健儿助力"}
-{"key": "BAC009S0912W0378", "wav": "./aishell/wav/test/S0912/BAC009S0912W0378.wav", "txt": "早在申办北京冬奥会的时候"}
-{"key": "BAC009S0912W0379", "wav": "./aishell/wav/test/S0912/BAC009S0912W0379.wav", "txt": "很多人都看到了新的奥运商机"}
-{"key": "BAC009S0912W0380", "wav": "./aishell/wav/test/S0912/BAC009S0912W0380.wav", "txt": "会投资建设一些冰雪主题乐园和冬季项目体验场所"}
-{"key": "BAC009S0912W0381", "wav": "./aishell/wav/test/S0912/BAC009S0912W0381.wav", "txt": "在全民健身成为国家战略的大背景下"}
-{"key": "BAC009S0912W0382", "wav": "./aishell/wav/test/S0912/BAC009S0912W0382.wav", "txt": "观赛便利不出国门看奥运"}
-{"key": "BAC009S0912W0383", "wav": "./aishell/wav/test/S0912/BAC009S0912W0383.wav", "txt": "以往想要见识奥运级别的比赛"}
-{"key": "BAC009S0912W0384", "wav": "./aishell/wav/test/S0912/BAC009S0912W0384.wav", "txt": "冰雪爱好者不得不选择出国"}
-{"key": "BAC009S0912W0385", "wav": "./aishell/wav/test/S0912/BAC009S0912W0385.wav", "txt": "高昂的交通和住宿成本让很多人望而却步"}
-{"key": "BAC009S0912W0386", "wav": "./aishell/wav/test/S0912/BAC009S0912W0386.wav", "txt": "如今在家门口就可以实现这个愿望了"}
-{"key": "BAC009S0912W0387", "wav": "./aishell/wav/test/S0912/BAC009S0912W0387.wav", "txt": "交通住宿花费大大降低"}
-{"key": "BAC009S0912W0388", "wav": "./aishell/wav/test/S0912/BAC009S0912W0388.wav", "txt": "让我们能够来一次说走就走的冬奥之行"}
-{"key": "BAC009S0912W0389", "wav": "./aishell/wav/test/S0912/BAC009S0912W0389.wav", "txt": "在主场为中国健儿加油"}
-{"key": "BAC009S0912W0390", "wav": "./aishell/wav/test/S0912/BAC009S0912W0390.wav", "txt": "该是一件多幸福的事啊"}
-{"key": "BAC009S0912W0392", "wav": "./aishell/wav/test/S0912/BAC009S0912W0392.wav", "txt": "责任编辑冯浩"}
-{"key": "BAC009S0912W0393", "wav": "./aishell/wav/test/S0912/BAC009S0912W0393.wav", "txt": "十月十八日早上九点"}
-{"key": "BAC009S0912W0394", "wav": "./aishell/wav/test/S0912/BAC009S0912W0394.wav", "txt": "各地跑步爱好者齐聚一堂"}
-{"key": "BAC009S0912W0395", "wav": "./aishell/wav/test/S0912/BAC009S0912W0395.wav", "txt": "共同享受奔跑带来的乐趣"}
-{"key": "BAC009S0912W0396", "wav": "./aishell/wav/test/S0912/BAC009S0912W0396.wav", "txt": "经历过北京站和上海站两次比赛"}
-{"key": "BAC009S0912W0397", "wav": "./aishell/wav/test/S0912/BAC009S0912W0397.wav", "txt": "本次沈阳站赛场迎来了许多熟悉的面孔"}
-{"key": "BAC009S0912W0398", "wav": "./aishell/wav/test/S0912/BAC009S0912W0398.wav", "txt": "尤为引人瞩目的莫过于李子成"}
-{"key": "BAC009S0912W0399", "wav": "./aishell/wav/test/S0912/BAC009S0912W0399.wav", "txt": "他更是以三十分十七秒一举夺得奔跑中国三连冠"}
-{"key": "BAC009S0912W0400", "wav": "./aishell/wav/test/S0912/BAC009S0912W0400.wav", "txt": "而十公里女子组由刘庆红以三十四分十秒夺得冠军"}
-{"key": "BAC009S0912W0401", "wav": "./aishell/wav/test/S0912/BAC009S0912W0401.wav", "txt": "海信一汽大众等知名企业和品牌也依旧亮相赛场"}
-{"key": "BAC009S0912W0402", "wav": "./aishell/wav/test/S0912/BAC009S0912W0402.wav", "txt": "以不同方式助力本次比赛胜利进行"}
-{"key": "BAC009S0912W0403", "wav": "./aishell/wav/test/S0912/BAC009S0912W0403.wav", "txt": "近四千名跑步爱好者和其家人朋友齐聚于此"}
-{"key": "BAC009S0912W0404", "wav": "./aishell/wav/test/S0912/BAC009S0912W0404.wav", "txt": "全球范围内的创收达到十一点八亿美元"}
-{"key": "BAC009S0912W0405", "wav": "./aishell/wav/test/S0912/BAC009S0912W0405.wav", "txt": "亚当桑德勒成功卫冕"}
-{"key": "BAC009S0912W0406", "wav": "./aishell/wav/test/S0912/BAC009S0912W0406.wav", "txt": "约翰尼德普紧随其后"}
-{"key": "BAC009S0912W0407", "wav": "./aishell/wav/test/S0912/BAC009S0912W0407.wav", "txt": "但是出于预算考虑"}
-{"key": "BAC009S0912W0408", "wav": "./aishell/wav/test/S0912/BAC009S0912W0408.wav", "txt": "陈奕迅隔空发表爱的宣言也是啊"}
-{"key": "BAC009S0912W0409", "wav": "./aishell/wav/test/S0912/BAC009S0912W0409.wav", "txt": "例如出入帮忙开门拉椅子"}
-{"key": "BAC009S0912W0410", "wav": "./aishell/wav/test/S0912/BAC009S0912W0410.wav", "txt": "新京报报道思维发散表情与肢体语言丰富"}
-{"key": "BAC009S0912W0411", "wav": "./aishell/wav/test/S0912/BAC009S0912W0411.wav", "txt": "对于疯癫陈奕迅所长的这些设定歌迷早已习惯了"}
-{"key": "BAC009S0912W0412", "wav": "./aishell/wav/test/S0912/BAC009S0912W0412.wav", "txt": "在凭借专辑米闪成为新一轮金曲歌王后"}
-{"key": "BAC009S0912W0414", "wav": "./aishell/wav/test/S0912/BAC009S0912W0414.wav", "txt": "朱祖儿操刀灰色调封面"}
-{"key": "BAC009S0912W0415", "wav": "./aishell/wav/test/S0912/BAC009S0912W0415.wav", "txt": "袁两半一人歌词包办"}
-{"key": "BAC009S0912W0416", "wav": "./aishell/wav/test/S0912/BAC009S0912W0416.wav", "txt": "处于寻找状态中的挣扎"}
-{"key": "BAC009S0912W0417", "wav": "./aishell/wav/test/S0912/BAC009S0912W0417.wav", "txt": "然而准备中三个字卸掉了他的纠结"}
-{"key": "BAC009S0912W0418", "wav": "./aishell/wav/test/S0912/BAC009S0912W0418.wav", "txt": "二十九日晚间举办媒体听歌会"}
-{"key": "BAC009S0912W0419", "wav": "./aishell/wav/test/S0912/BAC009S0912W0419.wav", "txt": "现场试听无条件人生马拉松等六首歌曲"}
-{"key": "BAC009S0912W0420", "wav": "./aishell/wav/test/S0912/BAC009S0912W0420.wav", "txt": "终站是好友谢霆锋的创作"}
-{"key": "BAC009S0912W0422", "wav": "./aishell/wav/test/S0912/BAC009S0912W0422.wav", "txt": "花了三年时间才得到这首歌"}
-{"key": "BAC009S0912W0423", "wav": "./aishell/wav/test/S0912/BAC009S0912W0423.wav", "txt": "被问是否感觉到谢霆锋与王菲恋爱的甜蜜"}
-{"key": "BAC009S0912W0428", "wav": "./aishell/wav/test/S0912/BAC009S0912W0428.wav", "txt": "十九点二十六分"}
-{"key": "BAC009S0912W0429", "wav": "./aishell/wav/test/S0912/BAC009S0912W0429.wav", "txt": "好友陈妍希晒与潘玮柏搞怪合影为他庆生"}
-{"key": "BAC009S0912W0430", "wav": "./aishell/wav/test/S0912/BAC009S0912W0430.wav", "txt": "称潘玮柏生日快乐"}
-{"key": "BAC009S0912W0431", "wav": "./aishell/wav/test/S0912/BAC009S0912W0431.wav", "txt": "港富豪被绑涉及两岸三地绑匪要求赎金用比特币"}
-{"key": "BAC009S0912W0432", "wav": "./aishell/wav/test/S0912/BAC009S0912W0432.wav", "txt": "日前遭人绑架并勒索七零零零万港元"}
-{"key": "BAC009S0912W0433", "wav": "./aishell/wav/test/S0912/BAC009S0912W0433.wav", "txt": "台港警方追查一个月"}
-{"key": "BAC009S0912W0434", "wav": "./aishell/wav/test/S0912/BAC009S0912W0434.wav", "txt": "二十七日深夜终于在云林县一家废弃空屋中救出了黄立坤"}
-{"key": "BAC009S0912W0435", "wav": "./aishell/wav/test/S0912/BAC009S0912W0435.wav", "txt": "获救第一句话就是我以为我活不了了"}
-{"key": "BAC009S0912W0436", "wav": "./aishell/wav/test/S0912/BAC009S0912W0436.wav", "txt": "港报评上海迪尼士不意味着香港迪尼士的没落"}
-{"key": "BAC009S0912W0437", "wav": "./aishell/wav/test/S0912/BAC009S0912W0437.wav", "txt": "参考消息网七月二八日报道"}
-{"key": "BAC009S0912W0438", "wav": "./aishell/wav/test/S0912/BAC009S0912W0438.wav", "txt": "港报内地医院仍控制处方药销售电商盼网售解禁"}
-{"key": "BAC009S0912W0439", "wav": "./aishell/wav/test/S0912/BAC009S0912W0439.wav", "txt": "参考消息网九月一七日报道港媒称"}
-{"key": "BAC009S0912W0440", "wav": "./aishell/wav/test/S0912/BAC009S0912W0440.wav", "txt": "自从中国内地的第一家网上药店一零年前开张以来"}
-{"key": "BAC009S0912W0441", "wav": "./aishell/wav/test/S0912/BAC009S0912W0441.wav", "txt": "大量资本已投入医药企业中"}
-{"key": "BAC009S0912W0442", "wav": "./aishell/wav/test/S0912/BAC009S0912W0442.wav", "txt": "希望能从中国内地日益老龄化的一三亿人口中受益"}
-{"key": "BAC009S0912W0443", "wav": "./aishell/wav/test/S0912/BAC009S0912W0443.wav", "txt": "港校两名内地生酒后街头野战当事人被起底"}
-{"key": "BAC009S0912W0444", "wav": "./aishell/wav/test/S0912/BAC009S0912W0444.wav", "txt": "南都讯记者王睦广发自香港今年四月初"}
-{"key": "BAC009S0912W0445", "wav": "./aishell/wav/test/S0912/BAC009S0912W0445.wav", "txt": "被拍下短片冠以野战之名在网上疯传"}
-{"key": "BAC009S0912W0446", "wav": "./aishell/wav/test/S0912/BAC009S0912W0446.wav", "txt": "二人早前被香港警方以有违公德罪落案起诉"}
-{"key": "BAC009S0912W0447", "wav": "./aishell/wav/test/S0912/BAC009S0912W0447.wav", "txt": "事件中的女方昨日被判一二个月感化令"}
-{"key": "BAC009S0912W0448", "wav": "./aishell/wav/test/S0912/BAC009S0912W0448.wav", "txt": "男方则将于下月庭审"}
-{"key": "BAC009S0912W0449", "wav": "./aishell/wav/test/S0912/BAC009S0912W0449.wav", "txt": "港珠澳大桥又起漂移风波可能进一步影响工期"}
-{"key": "BAC009S0912W0450", "wav": "./aishell/wav/test/S0912/BAC009S0912W0450.wav", "txt": "其人工岛被指移动六七米"}
-{"key": "BAC009S0912W0451", "wav": "./aishell/wav/test/S0912/BAC009S0912W0451.wav", "txt": "这个意外可能进一步影响工期"}
-{"key": "BAC009S0912W0452", "wav": "./aishell/wav/test/S0912/BAC009S0912W0452.wav", "txt": "游乐场大章鱼甩飞游客母亲落地时紧抱儿子"}
-{"key": "BAC009S0912W0453", "wav": "./aishell/wav/test/S0912/BAC009S0912W0453.wav", "txt": "在空中以高速自转带给游客惊险刺激的体验"}
-{"key": "BAC009S0912W0454", "wav": "./aishell/wav/test/S0912/BAC009S0912W0454.wav", "txt": "背部撞断了游乐场场边的三根不锈钢护栏"}
-{"key": "BAC009S0912W0455", "wav": "./aishell/wav/test/S0912/BAC009S0912W0455.wav", "txt": "游学夏令营的无奈花豪华团价格吃喝难保"}
-{"key": "BAC009S0912W0457", "wav": "./aishell/wav/test/S0912/BAC009S0912W0457.wav", "txt": "游客三亚海滩赏月后留二九吨垃圾三百人连夜清理"}
-{"key": "BAC009S0912W0458", "wav": "./aishell/wav/test/S0912/BAC009S0912W0458.wav", "txt": "当海滩上如潮的人群散去"}
-{"key": "BAC009S0912W0459", "wav": "./aishell/wav/test/S0912/BAC009S0912W0459.wav", "txt": "留下的却是被随手丢弃的垃圾"}
-{"key": "BAC009S0912W0460", "wav": "./aishell/wav/test/S0912/BAC009S0912W0460.wav", "txt": "虽然海滩上设置了众多垃圾桶"}
-{"key": "BAC009S0912W0461", "wav": "./aishell/wav/test/S0912/BAC009S0912W0461.wav", "txt": "但赏月人群还是乱扔垃圾"}
-{"key": "BAC009S0912W0462", "wav": "./aishell/wav/test/S0912/BAC009S0912W0462.wav", "txt": "从二八日凌晨四点半至六点半这整整二个小时里"}
-{"key": "BAC009S0912W0463", "wav": "./aishell/wav/test/S0912/BAC009S0912W0463.wav", "txt": "游客三亚游泳致终身残疾向旅行社索赔一九六万"}
-{"key": "BAC009S0912W0464", "wav": "./aishell/wav/test/S0912/BAC009S0912W0464.wav", "txt": "成都男子张呈亮化名旅行时到三亚海滩游泳"}
-{"key": "BAC009S0912W0465", "wav": "./aishell/wav/test/S0912/BAC009S0912W0465.wav", "txt": "下海后却突然失去意识"}
-{"key": "BAC009S0912W0466", "wav": "./aishell/wav/test/S0912/BAC009S0912W0466.wav", "txt": "送医后被查出颈部脊髓损伤"}
-{"key": "BAC009S0912W0467", "wav": "./aishell/wav/test/S0912/BAC009S0912W0467.wav", "txt": "张先生在青羊法院提起诉讼"}
-{"key": "BAC009S0912W0468", "wav": "./aishell/wav/test/S0912/BAC009S0912W0468.wav", "txt": "此案正在进一步审理之中"}
-{"key": "BAC009S0912W0469", "wav": "./aishell/wav/test/S0912/BAC009S0912W0469.wav", "txt": "游客下桥拍照踩死植物水杉栈道仙境拉铁丝网"}
-{"key": "BAC009S0912W0470", "wav": "./aishell/wav/test/S0912/BAC009S0912W0470.wav", "txt": "当植物恢复正常生长后铁丝网将拆除"}
-{"key": "BAC009S0912W0471", "wav": "./aishell/wav/test/S0912/BAC009S0912W0471.wav", "txt": "游客不满小孩超高补票与景区工作人员群殴"}
-{"key": "BAC009S0912W0472", "wav": "./aishell/wav/test/S0912/BAC009S0912W0472.wav", "txt": "一段游客暴打景区员工的视频开始在网上发酵"}
-{"key": "BAC009S0912W0473", "wav": "./aishell/wav/test/S0912/BAC009S0912W0473.wav", "txt": "某景点大门处多名游客与身着穿服的工作人员大打出手"}
-{"key": "BAC009S0912W0474", "wav": "./aishell/wav/test/S0912/BAC009S0912W0474.wav", "txt": "游客乌鲁木齐吃自助被罚二四零零元工商部门介入"}
-{"key": "BAC009S0912W0475", "wav": "./aishell/wav/test/S0912/BAC009S0912W0475.wav", "txt": "剩下了一二零零克食物"}
-{"key": "BAC009S0912W0476", "wav": "./aishell/wav/test/S0912/BAC009S0912W0476.wav", "txt": "被餐厅罚款二四零零元"}
-{"key": "BAC009S0912W0477", "wav": "./aishell/wav/test/S0912/BAC009S0912W0477.wav", "txt": "餐厅返还了游客的二四零零元"}
-{"key": "BAC009S0912W0478", "wav": "./aishell/wav/test/S0912/BAC009S0912W0478.wav", "txt": "物价部门工商部门已介入调查"}
-{"key": "BAC009S0912W0479", "wav": "./aishell/wav/test/S0912/BAC009S0912W0479.wav", "txt": "游客偷走雷峰塔砖块想供奉起来做药给老人喝"}
-{"key": "BAC009S0912W0480", "wav": "./aishell/wav/test/S0912/BAC009S0912W0480.wav", "txt": "游客入住药店被收二零元马桶使用费消协可举报"}
-{"key": "BAC009S0912W0481", "wav": "./aishell/wav/test/S0912/BAC009S0912W0481.wav", "txt": "住酒店还要交二零元马桶费"}
-{"key": "BAC009S0912W0482", "wav": "./aishell/wav/test/S0912/BAC009S0912W0482.wav", "txt": "南京市民张女士化姓去无锡旅游时"}
-{"key": "BAC009S0912W0483", "wav": "./aishell/wav/test/S0912/BAC009S0912W0483.wav", "txt": "通过网站团购了无锡江南丹青度假酒店一间套房"}
-{"key": "BAC009S0912W0484", "wav": "./aishell/wav/test/S0912/BAC009S0912W0484.wav", "txt": "退房结账时却被告知扣了二零元马桶使用费"}
-{"key": "BAC009S0912W0485", "wav": "./aishell/wav/test/S0912/BAC009S0912W0485.wav", "txt": "这让张女士哭笑不得"}
-{"key": "BAC009S0912W0486", "wav": "./aishell/wav/test/S0912/BAC009S0912W0486.wav", "txt": "酒店方承诺退还二零元马桶使用费"}
-{"key": "BAC009S0912W0487", "wav": "./aishell/wav/test/S0912/BAC009S0912W0487.wav", "txt": "酒店行为属于乱收费"}
-{"key": "BAC009S0912W0488", "wav": "./aishell/wav/test/S0912/BAC009S0912W0488.wav", "txt": "消费者可以直接向物价部门和旅游部门举报"}
-{"key": "BAC009S0912W0489", "wav": "./aishell/wav/test/S0912/BAC009S0912W0489.wav", "txt": "现代快报记者赵书伶"}
-{"key": "BAC009S0912W0490", "wav": "./aishell/wav/test/S0912/BAC009S0912W0490.wav", "txt": "游客再曝日照点海鲜太少被围殴当地警方证实"}
-{"key": "BAC009S0912W0491", "wav": "./aishell/wav/test/S0912/BAC009S0912W0491.wav", "txt": "网友先在微博中陈述了悲惨遭遇"}
-{"key": "BAC009S0912W0492", "wav": "./aishell/wav/test/S0912/BAC009S0912W0492.wav", "txt": "据称是当事人之一在派出所通过一个亲戚的微博发的"}
-{"key": "BAC009S0912W0493", "wav": "./aishell/wav/test/S0912/BAC009S0912W0493.wav", "txt": "游客北京游两天遭引导消费近二万元"}
-{"key": "BAC009S0912W0494", "wav": "./aishell/wav/test/S0912/BAC009S0912W0494.wav", "txt": "京华时报讯记者武红利与家人来京旅游"}
-{"key": "BAC009S0912W0495", "wav": "./aishell/wav/test/S0912/BAC009S0912W0495.wav", "txt": "王女士与旅行社签订四天五晚的旅行合同"}
-{"key": "BAC009S0913W0121", "wav": "./aishell/wav/test/S0913/BAC009S0913W0121.wav", "txt": "在最希望国家实施的调控政策这一问题上"}
-{"key": "BAC009S0913W0122", "wav": "./aishell/wav/test/S0913/BAC009S0913W0122.wav", "txt": "有百分之五的网友选择了提高公积金贷款额度"}
-{"key": "BAC009S0913W0123", "wav": "./aishell/wav/test/S0913/BAC009S0913W0123.wav", "txt": "百分之五的网友选择了房贷利率打折优惠"}
-{"key": "BAC009S0913W0124", "wav": "./aishell/wav/test/S0913/BAC009S0913W0124.wav", "txt": "百分之五的网友倾向于房产税的开征"}
-{"key": "BAC009S0913W0125", "wav": "./aishell/wav/test/S0913/BAC009S0913W0125.wav", "txt": "百分之五的网友希望放开一线城市的限购政策"}
-{"key": "BAC009S0913W0126", "wav": "./aishell/wav/test/S0913/BAC009S0913W0126.wav", "txt": "网友的置业目的为首套房自住的占到了百分之六十"}
-{"key": "BAC009S0913W0127", "wav": "./aishell/wav/test/S0913/BAC009S0913W0127.wav", "txt": "改善型二套房比例比约为百分之五"}
-{"key": "BAC009S0913W0128", "wav": "./aishell/wav/test/S0913/BAC009S0913W0128.wav", "txt": "三套以上投资性购房占百分之五"}
-{"key": "BAC009S0913W0129", "wav": "./aishell/wav/test/S0913/BAC009S0913W0129.wav", "txt": "其他目的的占比为百分之五"}
-{"key": "BAC009S0913W0130", "wav": "./aishell/wav/test/S0913/BAC009S0913W0130.wav", "txt": "在商品房性价比的选择上"}
-{"key": "BAC009S0913W0131", "wav": "./aishell/wav/test/S0913/BAC009S0913W0131.wav", "txt": "万科以百分之五的票数获选性价比最高的房企"}
-{"key": "BAC009S0913W0132", "wav": "./aishell/wav/test/S0913/BAC009S0913W0132.wav", "txt": "绿地保利万达分列性价比最高房企的二三四名"}
-{"key": "BAC009S0913W0133", "wav": "./aishell/wav/test/S0913/BAC009S0913W0133.wav", "txt": "选择恒大世茂富力的网友均不足百分之十"}
-{"key": "BAC009S0913W0134", "wav": "./aishell/wav/test/S0913/BAC009S0913W0134.wav", "txt": "有百分之五的网友选择了其他房企"}
-{"key": "BAC009S0913W0135", "wav": "./aishell/wav/test/S0913/BAC009S0913W0135.wav", "txt": "二初楼市迎来多项利好政策"}
-{"key": "BAC009S0913W0136", "wav": "./aishell/wav/test/S0913/BAC009S0913W0136.wav", "txt": "在多项政策的支持下"}
-{"key": "BAC009S0913W0137", "wav": "./aishell/wav/test/S0913/BAC009S0913W0137.wav", "txt": "支持新产业新业态"}
-{"key": "BAC009S0913W0138", "wav": "./aishell/wav/test/S0913/BAC009S0913W0138.wav", "txt": "集中释放用地政策红利"}
-{"key": "BAC009S0913W0139", "wav": "./aishell/wav/test/S0913/BAC009S0913W0139.wav", "txt": "在加大新供用地保障力度方面"}
-{"key": "BAC009S0913W0140", "wav": "./aishell/wav/test/S0913/BAC009S0913W0140.wav", "txt": "新产业发展快地用地集约求且需求大的地区"}
-{"key": "BAC009S0913W0141", "wav": "./aishell/wav/test/S0913/BAC009S0913W0141.wav", "txt": "在鼓励盘活利用现有用地方面"}
-{"key": "BAC009S0913W0142", "wav": "./aishell/wav/test/S0913/BAC009S0913W0142.wav", "txt": "意见提出对制造业迈向中高端的企业用地"}
-{"key": "BAC009S0913W0143", "wav": "./aishell/wav/test/S0913/BAC009S0913W0143.wav", "txt": "生产性科技及高技术服务业发展用地"}
-{"key": "BAC009S0913W0144", "wav": "./aishell/wav/test/S0913/BAC009S0913W0144.wav", "txt": "建设创业创新平台用地"}
-{"key": "BAC009S0913W0145", "wav": "./aishell/wav/test/S0913/BAC009S0913W0145.wav", "txt": "互联网行动计划实实施用地实行过渡期政策"}
-{"key": "BAC009S0913W0146", "wav": "./aishell/wav/test/S0913/BAC009S0913W0146.wav", "txt": "按新用途新权利类型市场价办理用地手续"}
-{"key": "BAC009S0913W0147", "wav": "./aishell/wav/test/S0913/BAC009S0913W0147.wav", "txt": "支持新产业新业态"}
-{"key": "BAC009S0913W0148", "wav": "./aishell/wav/test/S0913/BAC009S0913W0148.wav", "txt": "由国土资源部联合国家"}
-{"key": "BAC009S0913W0149", "wav": "./aishell/wav/test/S0913/BAC009S0913W0149.wav", "txt": "正式放松外贸外资投资我国房地产相关规定"}
-{"key": "BAC009S0913W0150", "wav": "./aishell/wav/test/S0913/BAC009S0913W0150.wav", "txt": "我国对房地产的行政干预政策陆续退出"}
-{"key": "BAC009S0913W0151", "wav": "./aishell/wav/test/S0913/BAC009S0913W0151.wav", "txt": "放松限外是必然趋势"}
-{"key": "BAC009S0913W0152", "wav": "./aishell/wav/test/S0913/BAC009S0913W0152.wav", "txt": "此举将有利于市场信心的培养"}
-{"key": "BAC009S0913W0153", "wav": "./aishell/wav/test/S0913/BAC009S0913W0153.wav", "txt": "并利好一二线城市的中高端住宅"}
-{"key": "BAC009S0913W0154", "wav": "./aishell/wav/test/S0913/BAC009S0913W0154.wav", "txt": "对于外商投资房地产企业注册资本与投资总额比例"}
-{"key": "BAC009S0913W0155", "wav": "./aishell/wav/test/S0913/BAC009S0913W0155.wav", "txt": "对于实施住房限购政策的城市"}
-{"key": "BAC009S0913W0156", "wav": "./aishell/wav/test/S0913/BAC009S0913W0156.wav", "txt": "境外个人购房应当符合当地政策规定"}
-{"key": "BAC009S0913W0157", "wav": "./aishell/wav/test/S0913/BAC009S0913W0157.wav", "txt": "上海易居研究所副院长杨红旭表示"}
-{"key": "BAC009S0913W0158", "wav": "./aishell/wav/test/S0913/BAC009S0913W0158.wav", "txt": "外资管制放松是大势所趋"}
-{"key": "BAC009S0913W0159", "wav": "./aishell/wav/test/S0913/BAC009S0913W0159.wav", "txt": "随着我国行政干预政策的陆续退出"}
-{"key": "BAC009S0913W0160", "wav": "./aishell/wav/test/S0913/BAC009S0913W0160.wav", "txt": "此前为限制外资炒房"}
-{"key": "BAC009S0913W0161", "wav": "./aishell/wav/test/S0913/BAC009S0913W0161.wav", "txt": "我国出台了一系列限外令"}
-{"key": "BAC009S0913W0162", "wav": "./aishell/wav/test/S0913/BAC009S0913W0162.wav", "txt": "二的向境外投资方出售国内资产征税规定"}
-{"key": "BAC009S0913W0163", "wav": "./aishell/wav/test/S0913/BAC009S0913W0163.wav", "txt": "国家外汇局出台过规定"}
-{"key": "BAC009S0913W0164", "wav": "./aishell/wav/test/S0913/BAC009S0913W0164.wav", "txt": "国家发改委也发出过通知"}
-{"key": "BAC009S0913W0165", "wav": "./aishell/wav/test/S0913/BAC009S0913W0165.wav", "txt": "对于提供给外籍人士的个人住房按揭贷款的外债需求"}
-{"key": "BAC009S0913W0166", "wav": "./aishell/wav/test/S0913/BAC009S0913W0166.wav", "txt": "不予安排中长期外债额度"}
-{"key": "BAC009S0913W0167", "wav": "./aishell/wav/test/S0913/BAC009S0913W0167.wav", "txt": "房地产被视作保值升值的投资标的被炒作"}
-{"key": "BAC009S0913W0168", "wav": "./aishell/wav/test/S0913/BAC009S0913W0168.wav", "txt": "但目前的形势早已改变"}
-{"key": "BAC009S0913W0169", "wav": "./aishell/wav/test/S0913/BAC009S0913W0169.wav", "txt": "此前外资购房主要集中在一线城市和几个热点二线城市"}
-{"key": "BAC009S0913W0170", "wav": "./aishell/wav/test/S0913/BAC009S0913W0170.wav", "txt": "而现在这类城市房价已经很高"}
-{"key": "BAC009S0913W0171", "wav": "./aishell/wav/test/S0913/BAC009S0913W0171.wav", "txt": "即使限外令放开也不会出现外资大规模买房"}
-{"key": "BAC009S0913W0172", "wav": "./aishell/wav/test/S0913/BAC009S0913W0172.wav", "txt": "中原地产市场总监张大伟认为"}
-{"key": "BAC009S0913W0173", "wav": "./aishell/wav/test/S0913/BAC009S0913W0173.wav", "txt": "放松对外限制利用利好一二线城市中高端物业"}
-{"key": "BAC009S0913W0174", "wav": "./aishell/wav/test/S0913/BAC009S0913W0174.wav", "txt": "对于外商房企的注册资本金降低要求"}
-{"key": "BAC009S0913W0175", "wav": "./aishell/wav/test/S0913/BAC009S0913W0175.wav", "txt": "也有利于部分企业的资金周转"}
-{"key": "BAC009S0913W0176", "wav": "./aishell/wav/test/S0913/BAC009S0913W0176.wav", "txt": "正式放松外资投资我国房地产相关规定"}
-{"key": "BAC009S0913W0177", "wav": "./aishell/wav/test/S0913/BAC009S0913W0177.wav", "txt": "允许境外机构在境内设"}
-{"key": "BAC009S0913W0178", "wav": "./aishell/wav/test/S0913/BAC009S0913W0178.wav", "txt": "六部委松绑楼市限外令"}
-{"key": "BAC009S0913W0179", "wav": "./aishell/wav/test/S0913/BAC009S0913W0179.wav", "txt": "外资在华房地产投资购房限制被松绑"}
-{"key": "BAC009S0913W0180", "wav": "./aishell/wav/test/S0913/BAC009S0913W0180.wav", "txt": "允许机构和个人在中国购房"}
-{"key": "BAC009S0913W0181", "wav": "./aishell/wav/test/S0913/BAC009S0913W0181.wav", "txt": "中房指数研究所院长陈晟表示"}
-{"key": "BAC009S0913W0182", "wav": "./aishell/wav/test/S0913/BAC009S0913W0182.wav", "txt": "此举对促进外企在华投资房地产有积极作用"}
-{"key": "BAC009S0913W0183", "wav": "./aishell/wav/test/S0913/BAC009S0913W0183.wav", "txt": "相关公司股票走势鄂尔多斯"}
-{"key": "BAC009S0913W0184", "wav": "./aishell/wav/test/S0913/BAC009S0913W0184.wav", "txt": "内地产投资比例有限"}
-{"key": "BAC009S0913W0185", "wav": "./aishell/wav/test/S0913/BAC009S0913W0185.wav", "txt": "此项政策对中国楼市影响有限"}
-{"key": "BAC009S0913W0186", "wav": "./aishell/wav/test/S0913/BAC009S0913W0186.wav", "txt": "对于实施住房限购政策的城市"}
-{"key": "BAC009S0913W0187", "wav": "./aishell/wav/test/S0913/BAC009S0913W0187.wav", "txt": "为环境保护提供更加完备有效的法制保障"}
-{"key": "BAC009S0913W0188", "wav": "./aishell/wav/test/S0913/BAC009S0913W0188.wav", "txt": "进一步完善环境政策"}
-{"key": "BAC009S0913W0189", "wav": "./aishell/wav/test/S0913/BAC009S0913W0189.wav", "txt": "健全环境执法调协调机制"}
-{"key": "BAC009S0913W0190", "wav": "./aishell/wav/test/S0913/BAC009S0913W0190.wav", "txt": "国务院国资委力挺国企"}
-{"key": "BAC009S0913W0191", "wav": "./aishell/wav/test/S0913/BAC009S0913W0191.wav", "txt": "具备条件的要积极引进战略投资者"}
-{"key": "BAC009S0913W0192", "wav": "./aishell/wav/test/S0913/BAC009S0913W0192.wav", "txt": "推进主营业务整体上市"}
-{"key": "BAC009S0913W0193", "wav": "./aishell/wav/test/S0913/BAC009S0913W0193.wav", "txt": "国资委接二连三对此表态或意味着国企将迎来上市高峰"}
-{"key": "BAC009S0913W0194", "wav": "./aishell/wav/test/S0913/BAC009S0913W0194.wav", "txt": "使国有资本更多地向重要行业和关键领域集中"}
-{"key": "BAC009S0913W0195", "wav": "./aishell/wav/test/S0913/BAC009S0913W0195.wav", "txt": "向具有优势的行业集中"}
-{"key": "BAC009S0913W0196", "wav": "./aishell/wav/test/S0913/BAC009S0913W0196.wav", "txt": "向大企业大集团集中"}
-{"key": "BAC009S0913W0197", "wav": "./aishell/wav/test/S0913/BAC009S0913W0197.wav", "txt": "要吸收民间资本参与国有企业改制重组"}
-{"key": "BAC009S0913W0198", "wav": "./aishell/wav/test/S0913/BAC009S0913W0198.wav", "txt": "发展混合所有制经济"}
-{"key": "BAC009S0913W0199", "wav": "./aishell/wav/test/S0913/BAC009S0913W0199.wav", "txt": "发挥国有大企业引领带动作用"}
-{"key": "BAC009S0913W0200", "wav": "./aishell/wav/test/S0913/BAC009S0913W0200.wav", "txt": "促进各种所有制企业共同发展"}
-{"key": "BAC009S0913W0201", "wav": "./aishell/wav/test/S0913/BAC009S0913W0201.wav", "txt": "至二零一一年六月底"}
-{"key": "BAC009S0913W0202", "wav": "./aishell/wav/test/S0913/BAC009S0913W0202.wav", "txt": "中央企业控股境外上市公司"}
-{"key": "BAC009S0913W0203", "wav": "./aishell/wav/test/S0913/BAC009S0913W0203.wav", "txt": "国资委还将支持企业走出去"}
-{"key": "BAC009S0913W0204", "wav": "./aishell/wav/test/S0913/BAC009S0913W0204.wav", "txt": "逐步实现战略运营管管理全球化"}
-{"key": "BAC009S0913W0205", "wav": "./aishell/wav/test/S0913/BAC009S0913W0205.wav", "txt": "应当经国务院国资委核准"}
-{"key": "BAC009S0913W0206", "wav": "./aishell/wav/test/S0913/BAC009S0913W0206.wav", "txt": "办法五月一日起实施"}
-{"key": "BAC009S0913W0207", "wav": "./aishell/wav/test/S0913/BAC009S0913W0207.wav", "txt": "国务院国资委对央企境外投资的管理法规在逐渐完善"}
-{"key": "BAC009S0913W0208", "wav": "./aishell/wav/test/S0913/BAC009S0913W0208.wav", "txt": "中央企业在境外从事非主业投资"}
-{"key": "BAC009S0913W0209", "wav": "./aishell/wav/test/S0913/BAC009S0913W0209.wav", "txt": "需要向国务院国资委报送申请核准非主业投资的请示"}
-{"key": "BAC009S0913W0210", "wav": "./aishell/wav/test/S0913/BAC009S0913W0210.wav", "txt": "对非主业投资项目的有关决策文件"}
-{"key": "BAC009S0913W0211", "wav": "./aishell/wav/test/S0913/BAC009S0913W0211.wav", "txt": "项目可行性研究报告尽职调查等相关文件"}
-{"key": "BAC009S0913W0212", "wav": "./aishell/wav/test/S0913/BAC009S0913W0212.wav", "txt": "办法还特别提出一些建议"}
-{"key": "BAC009S0913W0213", "wav": "./aishell/wav/test/S0913/BAC009S0913W0213.wav", "txt": "国务院国资委将指导中央企业之间加强境外投资合作"}
-{"key": "BAC009S0913W0214", "wav": "./aishell/wav/test/S0913/BAC009S0913W0214.wav", "txt": "中央走出去的步伐正趋加快"}
-{"key": "BAC009S0913W0215", "wav": "./aishell/wav/test/S0913/BAC009S0913W0215.wav", "txt": "央企在境外含港澳地区营收"}
-{"key": "BAC009S0913W0216", "wav": "./aishell/wav/test/S0913/BAC009S0913W0216.wav", "txt": "利润总额较大"}
-{"key": "BAC009S0913W0217", "wav": "./aishell/wav/test/S0913/BAC009S0913W0217.wav", "txt": "同比较上年同期分别增长百分之三十和百分之二十八"}
-{"key": "BAC009S0913W0218", "wav": "./aishell/wav/test/S0913/BAC009S0913W0218.wav", "txt": "涨幅远超央企整体水平"}
-{"key": "BAC009S0913W0219", "wav": "./aishell/wav/test/S0913/BAC009S0913W0219.wav", "txt": "一方面很多央企已制定了海外战略"}
-{"key": "BAC009S0913W0220", "wav": "./aishell/wav/test/S0913/BAC009S0913W0220.wav", "txt": "一些国家经济出现大的波动"}
-{"key": "BAC009S0913W0221", "wav": "./aishell/wav/test/S0913/BAC009S0913W0221.wav", "txt": "而社会罢工劳资纠纷也时有发生"}
-{"key": "BAC009S0913W0222", "wav": "./aishell/wav/test/S0913/BAC009S0913W0222.wav", "txt": "央企在境外投资面临的问题很多"}
-{"key": "BAC009S0913W0223", "wav": "./aishell/wav/test/S0913/BAC009S0913W0223.wav", "txt": "目前央企境外投资仍处在初级阶段"}
-{"key": "BAC009S0913W0224", "wav": "./aishell/wav/test/S0913/BAC009S0913W0224.wav", "txt": "制定和发布办法"}
-{"key": "BAC009S0913W0225", "wav": "./aishell/wav/test/S0913/BAC009S0913W0225.wav", "txt": "是为了进一步建立健全境外国有资产管理制度"}
-{"key": "BAC009S0913W0226", "wav": "./aishell/wav/test/S0913/BAC009S0913W0226.wav", "txt": "切实加强央企境外投资监管"}
-{"key": "BAC009S0913W0227", "wav": "./aishell/wav/test/S0913/BAC009S0913W0227.wav", "txt": "确保境外国有资产保值增值"}
-{"key": "BAC009S0913W0228", "wav": "./aishell/wav/test/S0913/BAC009S0913W0228.wav", "txt": "更好地适应了新形势的需要"}
-{"key": "BAC009S0913W0229", "wav": "./aishell/wav/test/S0913/BAC009S0913W0229.wav", "txt": "国务院多举措力挺农产品流通"}
-{"key": "BAC009S0913W0230", "wav": "./aishell/wav/test/S0913/BAC009S0913W0230.wav", "txt": "免征蔬菜流通环节的增值税"}
-{"key": "BAC009S0913W0231", "wav": "./aishell/wav/test/S0913/BAC009S0913W0231.wav", "txt": "提出完善农产品流通税收政策"}
-{"key": "BAC009S0913W0232", "wav": "./aishell/wav/test/S0913/BAC009S0913W0232.wav", "txt": "免征蔬菜流通环节增值税加强金融支持"}
-{"key": "BAC009S0913W0233", "wav": "./aishell/wav/test/S0913/BAC009S0913W0233.wav", "txt": "相关公司股票走势农产品"}
-{"key": "BAC009S0913W0234", "wav": "./aishell/wav/test/S0913/BAC009S0913W0234.wav", "txt": "各地要鼓励流通企业跨地区兼并重组和投资合作"}
-{"key": "BAC009S0913W0235", "wav": "./aishell/wav/test/S0913/BAC009S0913W0235.wav", "txt": "以加强产销衔接为重点"}
-{"key": "BAC009S0913W0236", "wav": "./aishell/wav/test/S0913/BAC009S0913W0236.wav", "txt": "加强鲜活农产品流通基础设施建设"}
-{"key": "BAC009S0913W0237", "wav": "./aishell/wav/test/S0913/BAC009S0913W0237.wav", "txt": "创新鲜活农产品的流通模式"}
-{"key": "BAC009S0913W0238", "wav": "./aishell/wav/test/S0913/BAC009S0913W0238.wav", "txt": "提高流通组织化程度"}
-{"key": "BAC009S0913W0239", "wav": "./aishell/wav/test/S0913/BAC009S0913W0239.wav", "txt": "完善流通链条和市场布局"}
-{"key": "BAC009S0913W0240", "wav": "./aishell/wav/test/S0913/BAC009S0913W0240.wav", "txt": "进一步减少流通环节"}
-{"key": "BAC009S0913W0241", "wav": "./aishell/wav/test/S0913/BAC009S0913W0241.wav", "txt": "保障鲜活农产品市场供应和价格稳定"}
-{"key": "BAC009S0913W0242", "wav": "./aishell/wav/test/S0913/BAC009S0913W0242.wav", "txt": "各地要依据城市总体规划和城市网点商业规划"}
-{"key": "BAC009S0913W0243", "wav": "./aishell/wav/test/S0913/BAC009S0913W0243.wav", "txt": "鼓励流通企业跨地区兼并重组和投资合作"}
-{"key": "BAC009S0913W0244", "wav": "./aishell/wav/test/S0913/BAC009S0913W0244.wav", "txt": "要大力推进产销衔接"}
-{"key": "BAC009S0913W0245", "wav": "./aishell/wav/test/S0913/BAC009S0913W0245.wav", "txt": "完善市场监测预警和信息发布机制"}
-{"key": "BAC009S0913W0246", "wav": "./aishell/wav/test/S0913/BAC009S0913W0246.wav", "txt": "建立健全重要农产品储备制度"}
-{"key": "BAC009S0913W0247", "wav": "./aishell/wav/test/S0913/BAC009S0913W0247.wav", "txt": "完善农产品跨区调运调剂机制"}
-{"key": "BAC009S0913W0248", "wav": "./aishell/wav/test/S0913/BAC009S0913W0248.wav", "txt": "各城市要根据消费需求和季节变化"}
-{"key": "BAC009S0913W0249", "wav": "./aishell/wav/test/S0913/BAC009S0913W0249.wav", "txt": "合理确定耐贮蔬菜的流通动态库存数量"}
-{"key": "BAC009S0913W0250", "wav": "./aishell/wav/test/S0913/BAC009S0913W0250.wav", "txt": "加快鲜活农产品质量安全追溯体系建设"}
-{"key": "BAC009S0913W0251", "wav": "./aishell/wav/test/S0913/BAC009S0913W0251.wav", "txt": "通过投资入股产权回购回租建公建配套等方式"}
-{"key": "BAC009S0913W0252", "wav": "./aishell/wav/test/S0913/BAC009S0913W0252.wav", "txt": "发挥财政资金引导示范作用"}
-{"key": "BAC009S0913W0254", "wav": "./aishell/wav/test/S0913/BAC009S0913W0254.wav", "txt": "这样可以加深苹果和消费者之间的关系"}
-{"key": "BAC009S0913W0255", "wav": "./aishell/wav/test/S0913/BAC009S0913W0255.wav", "txt": "对未来的销量至关重要"}
-{"key": "BAC009S0913W0258", "wav": "./aishell/wav/test/S0913/BAC009S0913W0258.wav", "txt": "为提高苹果零售商店的服务质量"}
-{"key": "BAC009S0913W0259", "wav": "./aishell/wav/test/S0913/BAC009S0913W0259.wav", "txt": "该系统包含一套算法"}
-{"key": "BAC009S0913W0260", "wav": "./aishell/wav/test/S0913/BAC009S0913W0260.wav", "txt": "有媒体援引知情人士消息称"}
-{"key": "BAC009S0913W0261", "wav": "./aishell/wav/test/S0913/BAC009S0913W0261.wav", "txt": "苹果将引入这样一套顾客接待系统"}
-{"key": "BAC009S0913W0262", "wav": "./aishell/wav/test/S0913/BAC009S0913W0262.wav", "txt": "前往苹果零售店的顾客将比餐厅订餐叫号还方便"}
-{"key": "BAC009S0913W0263", "wav": "./aishell/wav/test/S0913/BAC009S0913W0263.wav", "txt": "苹果零售店实行先到先服务的原则"}
-{"key": "BAC009S0913W0264", "wav": "./aishell/wav/test/S0913/BAC009S0913W0264.wav", "txt": "这样难免会出现某个客户的维修问题特别复杂"}
-{"key": "BAC009S0913W0265", "wav": "./aishell/wav/test/S0913/BAC009S0913W0265.wav", "txt": "导致技术支持时间超过了预期分配时间"}
-{"key": "BAC009S0913W0266", "wav": "./aishell/wav/test/S0913/BAC009S0913W0266.wav", "txt": "从而影响接下来的客户无法在指定时间点获得服务"}
-{"key": "BAC009S0913W0267", "wav": "./aishell/wav/test/S0913/BAC009S0913W0267.wav", "txt": "新系统可根据难易程度进行排序"}
-{"key": "BAC009S0913W0268", "wav": "./aishell/wav/test/S0913/BAC009S0913W0268.wav", "txt": "与现在的接待原则不同的是"}
-{"key": "BAC009S0913W0269", "wav": "./aishell/wav/test/S0913/BAC009S0913W0269.wav", "txt": "此时客户可以选择离开苹果零售店"}
-{"key": "BAC009S0913W0270", "wav": "./aishell/wav/test/S0913/BAC009S0913W0270.wav", "txt": "而当预订时间接近时"}
-{"key": "BAC009S0913W0271", "wav": "./aishell/wav/test/S0913/BAC009S0913W0271.wav", "txt": "客户会再次收到短信提醒"}
-{"key": "BAC009S0913W0272", "wav": "./aishell/wav/test/S0913/BAC009S0913W0272.wav", "txt": "客户回到苹果零售店后"}
-{"key": "BAC009S0913W0274", "wav": "./aishell/wav/test/S0913/BAC009S0913W0274.wav", "txt": "以告知客户相关技术人员确切的空闲时间"}
-{"key": "BAC009S0913W0275", "wav": "./aishell/wav/test/S0913/BAC009S0913W0275.wav", "txt": "以及在店内的具体位置"}
-{"key": "BAC009S0913W0277", "wav": "./aishell/wav/test/S0913/BAC009S0913W0277.wav", "txt": "为提高苹果零售商店的服务质量"}
-{"key": "BAC009S0913W0278", "wav": "./aishell/wav/test/S0913/BAC009S0913W0278.wav", "txt": "苹果靠什么颠复移动支付市场"}
-{"key": "BAC009S0913W0279", "wav": "./aishell/wav/test/S0913/BAC009S0913W0279.wav", "txt": "苹果一口气召开了两次新品发布会"}
-{"key": "BAC009S0913W0280", "wav": "./aishell/wav/test/S0913/BAC009S0913W0280.wav", "txt": "就在会场的凳子和垃圾尚未收拾干净的时候"}
-{"key": "BAC009S0913W0281", "wav": "./aishell/wav/test/S0913/BAC009S0913W0281.wav", "txt": "全世界的报道已经蜂拥而至"}
-{"key": "BAC009S0913W0282", "wav": "./aishell/wav/test/S0913/BAC009S0913W0282.wav", "txt": "失望中夹杂着嘲讽的情绪霸占了各模块的头条"}
-{"key": "BAC009S0913W0283", "wav": "./aishell/wav/test/S0913/BAC009S0913W0283.wav", "txt": "科技经济社会金融全都是苹果的消息"}
-{"key": "BAC009S0913W0284", "wav": "./aishell/wav/test/S0913/BAC009S0913W0284.wav", "txt": "害得汪峰也不敢随便表白了"}
-{"key": "BAC009S0913W0285", "wav": "./aishell/wav/test/S0913/BAC009S0913W0285.wav", "txt": "而是统一地认为苹果开了有史以来最烂的发布会"}
-{"key": "BAC009S0913W0286", "wav": "./aishell/wav/test/S0913/BAC009S0913W0286.wav", "txt": "他们推出的产品不仅非常鸡肋"}
-{"key": "BAC009S0913W0288", "wav": "./aishell/wav/test/S0913/BAC009S0913W0288.wav", "txt": "就足以让专家们恶心七七四十九天了"}
-{"key": "BAC009S0913W0289", "wav": "./aishell/wav/test/S0913/BAC009S0913W0289.wav", "txt": "但这些口水式的讨伐并没有影响苹果前进的脚步"}
-{"key": "BAC009S0913W0291", "wav": "./aishell/wav/test/S0913/BAC009S0913W0291.wav", "txt": "证明了其向主流妥协的姿态"}
-{"key": "BAC009S0913W0293", "wav": "./aishell/wav/test/S0913/BAC009S0913W0293.wav", "txt": "自二零零七年乔布斯重新发明手机开始"}
-{"key": "BAC009S0913W0294", "wav": "./aishell/wav/test/S0913/BAC009S0913W0294.wav", "txt": "把它升级成为一款综合性智能终端之后"}
-{"key": "BAC009S0913W0295", "wav": "./aishell/wav/test/S0913/BAC009S0913W0295.wav", "txt": "就开始潜移默化地渗透人类的生活"}
-{"key": "BAC009S0913W0296", "wav": "./aishell/wav/test/S0913/BAC009S0913W0296.wav", "txt": "这种渗透犹如蜘蛛结网细菌繁殖病毒传播"}
-{"key": "BAC009S0913W0297", "wav": "./aishell/wav/test/S0913/BAC009S0913W0297.wav", "txt": "悄无声息又经年累月"}
-{"key": "BAC009S0913W0298", "wav": "./aishell/wav/test/S0913/BAC009S0913W0298.wav", "txt": "我们甚至都没有来得及反抗就被完全征服了"}
-{"key": "BAC009S0913W0299", "wav": "./aishell/wav/test/S0913/BAC009S0913W0299.wav", "txt": "我根本无法想象每天在朋友圈上花两个小时的情景"}
-{"key": "BAC009S0913W0300", "wav": "./aishell/wav/test/S0913/BAC009S0913W0300.wav", "txt": "但现在已经成为了习生活习惯"}
-{"key": "BAC009S0913W0301", "wav": "./aishell/wav/test/S0913/BAC009S0913W0301.wav", "txt": "但新贵移动支付具绝对能更深层次地改变用户的生活"}
-{"key": "BAC009S0913W0302", "wav": "./aishell/wav/test/S0913/BAC009S0913W0302.wav", "txt": "乃至颠复现有的经济形态和支付格局"}
-{"key": "BAC009S0913W0303", "wav": "./aishell/wav/test/S0913/BAC009S0913W0303.wav", "txt": "第一财经日报记者七月十三日从美的内部获悉"}
-{"key": "BAC009S0913W0304", "wav": "./aishell/wav/test/S0913/BAC009S0913W0304.wav", "txt": "已获任美的部品事业部的总裁"}
-{"key": "BAC009S0913W0305", "wav": "./aishell/wav/test/S0913/BAC009S0913W0305.wav", "txt": "而威灵电器七月九日下午也公告透露"}
-{"key": "BAC009S0913W0306", "wav": "./aishell/wav/test/S0913/BAC009S0913W0306.wav", "txt": "于一九九一年加盟美的集团"}
-{"key": "BAC009S0913W0307", "wav": "./aishell/wav/test/S0913/BAC009S0913W0307.wav", "txt": "美芝压缩机已是全球最大空调压缩机企业"}
-{"key": "BAC009S0913W0308", "wav": "./aishell/wav/test/S0913/BAC009S0913W0308.wav", "txt": "占全球空调压缩机市场三分之一的份额"}
-{"key": "BAC009S0913W0309", "wav": "./aishell/wav/test/S0913/BAC009S0913W0309.wav", "txt": "美的将美芝压缩机威灵电机合并"}
-{"key": "BAC009S0913W0310", "wav": "./aishell/wav/test/S0913/BAC009S0913W0310.wav", "txt": "将有助于两大部品业务的研发资源销售渠道共享"}
-{"key": "BAC009S0913W0311", "wav": "./aishell/wav/test/S0913/BAC009S0913W0311.wav", "txt": "美的部品事业部建立后"}
-{"key": "BAC009S0913W0312", "wav": "./aishell/wav/test/S0913/BAC009S0913W0312.wav", "txt": "将成立压缩机开发研究院和微电机开发研究院"}
-{"key": "BAC009S0913W0313", "wav": "./aishell/wav/test/S0913/BAC009S0913W0313.wav", "txt": "以区域为中心建立客户经理负责制制造方面"}
-{"key": "BAC009S0913W0314", "wav": "./aishell/wav/test/S0913/BAC009S0913W0314.wav", "txt": "负责统一管理原电机事业部的各工厂制造系统"}
-{"key": "BAC009S0913W0315", "wav": "./aishell/wav/test/S0913/BAC009S0913W0315.wav", "txt": "原压缩机事业部各工厂保持不变"}
-{"key": "BAC009S0913W0316", "wav": "./aishell/wav/test/S0913/BAC009S0913W0316.wav", "txt": "美的集团公关部的相关人士告诉第一财经日报记者"}
-{"key": "BAC009S0913W0317", "wav": "./aishell/wav/test/S0913/BAC009S0913W0317.wav", "txt": "目前美芝与威灵的合并"}
-{"key": "BAC009S0913W0318", "wav": "./aishell/wav/test/S0913/BAC009S0913W0318.wav", "txt": "仅处于美的集团内部管理架构调整的阶段"}
-{"key": "BAC009S0913W0319", "wav": "./aishell/wav/test/S0913/BAC009S0913W0319.wav", "txt": "还没体现在香港上市公司威灵电器的业务层面"}
-{"key": "BAC009S0913W0320", "wav": "./aishell/wav/test/S0913/BAC009S0913W0320.wav", "txt": "由于向为民已获任威灵电机的董事会主席"}
-{"key": "BAC009S0913W0321", "wav": "./aishell/wav/test/S0913/BAC009S0913W0321.wav", "txt": "威灵电机今后兼并美芝压缩机"}
-{"key": "BAC009S0913W0322", "wav": "./aishell/wav/test/S0913/BAC009S0913W0322.wav", "txt": "美芝压缩机是隶属于美的集团旗下的业务"}
-{"key": "BAC009S0913W0323", "wav": "./aishell/wav/test/S0913/BAC009S0913W0323.wav", "txt": "由于美的集团本身就是威灵电机的大股东"}
-{"key": "BAC009S0913W0324", "wav": "./aishell/wav/test/S0913/BAC009S0913W0324.wav", "txt": "即使今后美芝压缩机被威灵电器兼并"}
-{"key": "BAC009S0913W0325", "wav": "./aishell/wav/test/S0913/BAC009S0913W0325.wav", "txt": "也对美的集团的总体业绩影响不大"}
-{"key": "BAC009S0913W0326", "wav": "./aishell/wav/test/S0913/BAC009S0913W0326.wav", "txt": "而威灵电器二零一四年的营业额约九十二点七三亿港元"}
-{"key": "BAC009S0913W0327", "wav": "./aishell/wav/test/S0913/BAC009S0913W0327.wav", "txt": "同比增长百分之四净利润六点七八亿港元"}
-{"key": "BAC009S0913W0328", "wav": "./aishell/wav/test/S0913/BAC009S0913W0328.wav", "txt": "同比下跌十三六点百分之六"}
-{"key": "BAC009S0913W0329", "wav": "./aishell/wav/test/S0913/BAC009S0913W0329.wav", "txt": "如果威灵电器兼并美芝压缩机"}
-{"key": "BAC009S0913W0330", "wav": "./aishell/wav/test/S0913/BAC009S0913W0330.wav", "txt": "将有利于增加威灵电器的收入和利润"}
-{"key": "BAC009S0913W0331", "wav": "./aishell/wav/test/S0913/BAC009S0913W0331.wav", "txt": "除了威灵电器与美芝压缩机合并成为美的部品事业部之外"}
-{"key": "BAC009S0913W0332", "wav": "./aishell/wav/test/S0913/BAC009S0913W0332.wav", "txt": "美的最近还把洗碗机事业部合并到美的的厨房电器事业部"}
-{"key": "BAC009S0913W0333", "wav": "./aishell/wav/test/S0913/BAC009S0913W0333.wav", "txt": "美的的洗碗机业务以外销为主"}
-{"key": "BAC009S0913W0334", "wav": "./aishell/wav/test/S0913/BAC009S0913W0334.wav", "txt": "业物内士向记者分析说"}
-{"key": "BAC009S0913W0335", "wav": "./aishell/wav/test/S0913/BAC009S0913W0335.wav", "txt": "被合并到美的的厨房电器事业部后"}
-{"key": "BAC009S0913W0336", "wav": "./aishell/wav/test/S0913/BAC009S0913W0336.wav", "txt": "将有助于美的洗碗机开拓国内市场"}
-{"key": "BAC009S0913W0337", "wav": "./aishell/wav/test/S0913/BAC009S0913W0337.wav", "txt": "破坏和颠复是互联网时代的特征"}
-{"key": "BAC009S0913W0338", "wav": "./aishell/wav/test/S0913/BAC009S0913W0338.wav", "txt": "美丽的丁香湖公园成为跑步爱好者的狂欢圣地"}
-{"key": "BAC009S0913W0339", "wav": "./aishell/wav/test/S0913/BAC009S0913W0339.wav", "txt": "剪纸皮影戏等特色节目更是吸引了一批批观众围观"}
-{"key": "BAC009S0913W0340", "wav": "./aishell/wav/test/S0913/BAC009S0913W0340.wav", "txt": "跑友们积极的参与剪纸活动"}
-{"key": "BAC009S0913W0341", "wav": "./aishell/wav/test/S0913/BAC009S0913W0341.wav", "txt": "亲身感受沈阳当地浓郁的民俗文化内蕴"}
-{"key": "BAC009S0913W0342", "wav": "./aishell/wav/test/S0913/BAC009S0913W0342.wav", "txt": "许多跑友争先恐后穿上沈阳花棉袄拍照"}
-{"key": "BAC009S0913W0343", "wav": "./aishell/wav/test/S0913/BAC009S0913W0343.wav", "txt": "并与亲朋好友分享这份快乐"}
-{"key": "BAC009S0913W0344", "wav": "./aishell/wav/test/S0913/BAC009S0913W0344.wav", "txt": "而涂鸦墙上写满了跑友们的目标和愿望"}
-{"key": "BAC009S0913W0345", "wav": "./aishell/wav/test/S0913/BAC009S0913W0345.wav", "txt": "伴随着专业啦啦队的加油声"}
-{"key": "BAC009S0913W0346", "wav": "./aishell/wav/test/S0913/BAC009S0913W0346.wav", "txt": "跑友们在奔跑中国沈阳站的赛道上尽情的展示自己"}
-{"key": "BAC009S0913W0347", "wav": "./aishell/wav/test/S0913/BAC009S0913W0347.wav", "txt": "赛道两边设置了许多专业摄像头"}
-{"key": "BAC009S0913W0348", "wav": "./aishell/wav/test/S0913/BAC009S0913W0348.wav", "txt": "主办方试图记录每一个跑友挥洒激情的每一个瞬间"}
-{"key": "BAC009S0913W0349", "wav": "./aishell/wav/test/S0913/BAC009S0913W0349.wav", "txt": "将这份快乐与跑对跑步的执着传递给身边的好友"}
-{"key": "BAC009S0913W0351", "wav": "./aishell/wav/test/S0913/BAC009S0913W0351.wav", "txt": "同时带动当地人民的奔跑热情"}
-{"key": "BAC009S0913W0354", "wav": "./aishell/wav/test/S0913/BAC009S0913W0354.wav", "txt": "更加多维度的助推跑步事业在中国的发展"}
-{"key": "BAC009S0913W0355", "wav": "./aishell/wav/test/S0913/BAC009S0913W0355.wav", "txt": "服务广大跑步爱好者"}
-{"key": "BAC009S0913W0356", "wav": "./aishell/wav/test/S0913/BAC009S0913W0356.wav", "txt": "奔跑中国系列竞跑赛事将转战广州"}
-{"key": "BAC009S0913W0357", "wav": "./aishell/wav/test/S0913/BAC009S0913W0357.wav", "txt": "中新网成都九月十五日电付敬懿十五日"}
-{"key": "BAC009S0913W0358", "wav": "./aishell/wav/test/S0913/BAC009S0913W0358.wav", "txt": "服务时间约为五十三万小时"}
-{"key": "BAC009S0913W0359", "wav": "./aishell/wav/test/S0913/BAC009S0913W0359.wav", "txt": "自二零一四年十二月五日正式启动志愿者招募工作以来"}
-{"key": "BAC009S0913W0360", "wav": "./aishell/wav/test/S0913/BAC009S0913W0360.wav", "txt": "因为本次赛事比赛周期长赛区跨度大"}
-{"key": "BAC009S0913W0361", "wav": "./aishell/wav/test/S0913/BAC009S0913W0361.wav", "txt": "经过网络测试综合面试专业技能体能测试等环节"}
-{"key": "BAC009S0913W0362", "wav": "./aishell/wav/test/S0913/BAC009S0913W0362.wav", "txt": "机关企事业单位等社会志愿者三千一百名"}
-{"key": "BAC009S0913W0363", "wav": "./aishell/wav/test/S0913/BAC009S0913W0363.wav", "txt": "为做好志愿者服务工作"}
-{"key": "BAC009S0913W0364", "wav": "./aishell/wav/test/S0913/BAC009S0913W0364.wav", "txt": "邀请专家学者等三十馀人组成志愿者培训导师库"}
-{"key": "BAC009S0913W0365", "wav": "./aishell/wav/test/S0913/BAC009S0913W0365.wav", "txt": "指导各赛区开展志愿服务培训"}
-{"key": "BAC009S0913W0366", "wav": "./aishell/wav/test/S0913/BAC009S0913W0366.wav", "txt": "组委会设计了具有四川特色的志愿者服装"}
-{"key": "BAC009S0913W0367", "wav": "./aishell/wav/test/S0913/BAC009S0913W0367.wav", "txt": "志愿者的那一抹绿并大家亲切地称呼为小青椒"}
-{"key": "BAC009S0913W0368", "wav": "./aishell/wav/test/S0913/BAC009S0913W0368.wav", "txt": "随着赛会推进被越来越多的人所熟知"}
-{"key": "BAC009S0913W0369", "wav": "./aishell/wav/test/S0913/BAC009S0913W0369.wav", "txt": "成为本届残运会志愿服务文化的重要部分"}
-{"key": "BAC009S0913W0370", "wav": "./aishell/wav/test/S0913/BAC009S0913W0370.wav", "txt": "电子科大的小青椒早上六点起床"}
-{"key": "BAC009S0913W0371", "wav": "./aishell/wav/test/S0913/BAC009S0913W0371.wav", "txt": "每天忙碌十三个小时"}
-{"key": "BAC009S0913W0372", "wav": "./aishell/wav/test/S0913/BAC009S0913W0372.wav", "txt": "用他们真挚的微笑和运动员建立起心与心的连接"}
-{"key": "BAC009S0913W0373", "wav": "./aishell/wav/test/S0913/BAC009S0913W0373.wav", "txt": "四川大学的手语志愿者要学习四千个手语动作"}
-{"key": "BAC009S0913W0374", "wav": "./aishell/wav/test/S0913/BAC009S0913W0374.wav", "txt": "而他们熟练掌握的秘笈是一次又一次反复的训练和排练"}
-{"key": "BAC009S0913W0375", "wav": "./aishell/wav/test/S0913/BAC009S0913W0375.wav", "txt": "小青椒用热情和真诚打动了每位运动员"}
-{"key": "BAC009S0913W0376", "wav": "./aishell/wav/test/S0913/BAC009S0913W0376.wav", "txt": "他们每天手牵手肩并肩出入赛场"}
-{"key": "BAC009S0913W0377", "wav": "./aishell/wav/test/S0913/BAC009S0913W0377.wav", "txt": "就像认识多年的朋友和兄弟姐妹一样"}
-{"key": "BAC009S0913W0378", "wav": "./aishell/wav/test/S0913/BAC009S0913W0378.wav", "txt": "湖北运动员的家长给小青椒写来致谢信"}
-{"key": "BAC009S0913W0379", "wav": "./aishell/wav/test/S0913/BAC009S0913W0379.wav", "txt": "也温暖和感动着志愿者"}
-{"key": "BAC009S0913W0380", "wav": "./aishell/wav/test/S0913/BAC009S0913W0380.wav", "txt": "北京时间十月十日"}
-{"key": "BAC009S0913W0381", "wav": "./aishell/wav/test/S0913/BAC009S0913W0381.wav", "txt": "根据韩国乒乓球协会的相关规定"}
-{"key": "BAC009S0913W0382", "wav": "./aishell/wav/test/S0913/BAC009S0913W0382.wav", "txt": "根据国际乒联刚刚公布的最新一期世界排名"}
-{"key": "BAC009S0913W0383", "wav": "./aishell/wav/test/S0913/BAC009S0913W0383.wav", "txt": "而排名第三位的李尚洙"}
-{"key": "BAC009S0913W0384", "wav": "./aishell/wav/test/S0913/BAC009S0913W0384.wav", "txt": "将只参加奥运会团体赛的比赛"}
-{"key": "BAC009S0913W0385", "wav": "./aishell/wav/test/S0913/BAC009S0913W0385.wav", "txt": "此次韩国男团派出了一老带二新的阵容"}
-{"key": "BAC009S0913W0386", "wav": "./aishell/wav/test/S0913/BAC009S0913W0386.wav", "txt": "此次里约奥运会也将会是其第三次征战奥运会比赛"}
-{"key": "BAC009S0913W0387", "wav": "./aishell/wav/test/S0913/BAC009S0913W0387.wav", "txt": "作为经验最为丰富的老大哥"}
-{"key": "BAC009S0913W0388", "wav": "./aishell/wav/test/S0913/BAC009S0913W0388.wav", "txt": "他将尽全力带领队伍取得好成绩"}
-{"key": "BAC009S0913W0389", "wav": "./aishell/wav/test/S0913/BAC009S0913W0389.wav", "txt": "在韩国男队中排名第四"}
-{"key": "BAC009S0913W0390", "wav": "./aishell/wav/test/S0913/BAC009S0913W0390.wav", "txt": "女排三零阿根廷朱婷复出扣杀状态神勇"}
-{"key": "BAC009S0913W0391", "wav": "./aishell/wav/test/S0913/BAC009S0913W0391.wav", "txt": "二零一五年第十二届女排世界杯战至第八轮"}
-{"key": "BAC009S0913W0392", "wav": "./aishell/wav/test/S0913/BAC009S0913W0392.wav", "txt": "中国女排直落三周以三零取胜阿根廷拿到第七胜"}
-{"key": "BAC009S0913W0393", "wav": "./aishell/wav/test/S0913/BAC009S0913W0393.wav", "txt": "早前意外崴伤脚踝的朱婷强势复出"}
-{"key": "BAC009S0913W0394", "wav": "./aishell/wav/test/S0913/BAC009S0913W0394.wav", "txt": "斩获十五分冠全场并且拦网独得四分"}
-{"key": "BAC009S0913W0395", "wav": "./aishell/wav/test/S0913/BAC009S0913W0395.wav", "txt": "伤愈复出找手感一传防守遇考验"}
-{"key": "BAC009S0913W0396", "wav": "./aishell/wav/test/S0913/BAC009S0913W0396.wav", "txt": "本报讯记者李晖经过两天转场"}
-{"key": "BAC009S0913W0397", "wav": "./aishell/wav/test/S0913/BAC009S0913W0397.wav", "txt": "中国女排昨天下午在冈山迎战古巴队"}
-{"key": "BAC009S0913W0398", "wav": "./aishell/wav/test/S0913/BAC009S0913W0398.wav", "txt": "三局比分是二五比一九二五比十和二五比一四"}
-{"key": "BAC009S0913W0399", "wav": "./aishell/wav/test/S0913/BAC009S0913W0399.wav", "txt": "中国女排从第三轮开始便被挤出了三甲"}
-{"key": "BAC009S0913W0400", "wav": "./aishell/wav/test/S0913/BAC009S0913W0400.wav", "txt": "而东道主日本队紧追在中国队之后"}
-{"key": "BAC009S0913W0401", "wav": "./aishell/wav/test/S0913/BAC009S0913W0401.wav", "txt": "若想保住进入前两名的资格"}
-{"key": "BAC009S0913W0402", "wav": "./aishell/wav/test/S0913/BAC009S0913W0402.wav", "txt": "中国队在第二阶段的第三场比赛不仅要保全取九个积分"}
-{"key": "BAC009S0913W0403", "wav": "./aishell/wav/test/S0913/BAC009S0913W0403.wav", "txt": "而且还要尽量在小分上取得优势"}
-{"key": "BAC009S0913W0404", "wav": "./aishell/wav/test/S0913/BAC009S0913W0404.wav", "txt": "福斯只允许先拍一部"}
-{"key": "BAC009S0913W0405", "wav": "./aishell/wav/test/S0913/BAC009S0913W0405.wav", "txt": "另一部要视独立日二的票房而定"}
-{"key": "BAC009S0913W0406", "wav": "./aishell/wav/test/S0913/BAC009S0913W0406.wav", "txt": "影片的上映日期"}
-{"key": "BAC009S0913W0407", "wav": "./aishell/wav/test/S0913/BAC009S0913W0407.wav", "txt": "也从原计划的二零一六年七月一日"}
-{"key": "BAC009S0913W0408", "wav": "./aishell/wav/test/S0913/BAC009S0913W0408.wav", "txt": "潘玮柏以侧颜出镜"}
-{"key": "BAC009S0913W0409", "wav": "./aishell/wav/test/S0913/BAC009S0913W0409.wav", "txt": "与陈妍希分别看向对方"}
-{"key": "BAC009S0913W0410", "wav": "./aishell/wav/test/S0913/BAC009S0913W0410.wav", "txt": "可见两人友谊非同一般"}
-{"key": "BAC009S0913W0411", "wav": "./aishell/wav/test/S0913/BAC009S0913W0411.wav", "txt": "搜狐娱乐据讯据香港媒体报道"}
-{"key": "BAC009S0913W0412", "wav": "./aishell/wav/test/S0913/BAC009S0913W0412.wav", "txt": "陈妍希今天五月二日下午出席公益活动"}
-{"key": "BAC009S0913W0413", "wav": "./aishell/wav/test/S0913/BAC009S0913W0413.wav", "txt": "小洋装更衬托出她的纤细身材"}
-{"key": "BAC009S0913W0414", "wav": "./aishell/wav/test/S0913/BAC009S0913W0414.wav", "txt": "不过她出道以来身材一直是外界关注的焦点"}
-{"key": "BAC009S0913W0415", "wav": "./aishell/wav/test/S0913/BAC009S0913W0415.wav", "txt": "陈妍希一直努力让自己的脸圆圆脸变瘦"}
-{"key": "BAC009S0913W0416", "wav": "./aishell/wav/test/S0913/BAC009S0913W0416.wav", "txt": "今天她出席活动"}
-{"key": "BAC009S0913W0417", "wav": "./aishell/wav/test/S0913/BAC009S0913W0417.wav", "txt": "对着镜头嘟嘴吐舌"}
-{"key": "BAC009S0913W0418", "wav": "./aishell/wav/test/S0913/BAC009S0913W0418.wav", "txt": "当被问到对于被选为棉花糖女孩比较肉感的女生"}
-{"key": "BAC009S0913W0419", "wav": "./aishell/wav/test/S0913/BAC009S0913W0419.wav", "txt": "她笑说我觉得蛮好的啊"}
-{"key": "BAC009S0913W0420", "wav": "./aishell/wav/test/S0913/BAC009S0913W0420.wav", "txt": "搜狐娱乐讯日前"}
-{"key": "BAC009S0913W0421", "wav": "./aishell/wav/test/S0913/BAC009S0913W0421.wav", "txt": "名为娱乐圈八卦的自然自媒体"}
-{"key": "BAC009S0913W0422", "wav": "./aishell/wav/test/S0913/BAC009S0913W0422.wav", "txt": "曝出陈妍希拍戏时突然干呕"}
-{"key": "BAC009S0913W0423", "wav": "./aishell/wav/test/S0913/BAC009S0913W0423.wav", "txt": "并推断其已怀孕"}
-{"key": "BAC009S0913W0424", "wav": "./aishell/wav/test/S0913/BAC009S0913W0424.wav", "txt": "陈妍希公司官方账号发表微博辟谣"}
-{"key": "BAC009S0913W0425", "wav": "./aishell/wav/test/S0913/BAC009S0913W0425.wav", "txt": "否认了陈妍希疑似怀孕的传闻"}
-{"key": "BAC009S0913W0426", "wav": "./aishell/wav/test/S0913/BAC009S0913W0426.wav", "txt": "称陈妍希目前还在剧组拍戏"}
-{"key": "BAC009S0913W0427", "wav": "./aishell/wav/test/S0913/BAC009S0913W0427.wav", "txt": "并感谢了各界对于陈妍希公开与陈晓恋情的祝福"}
-{"key": "BAC009S0913W0428", "wav": "./aishell/wav/test/S0913/BAC009S0913W0428.wav", "txt": "陈妍希在微博发布跳绳视频"}
-{"key": "BAC009S0913W0429", "wav": "./aishell/wav/test/S0913/BAC009S0913W0429.wav", "txt": "并写道每天早上二十零下"}
-{"key": "BAC009S0913W0430", "wav": "./aishell/wav/test/S0913/BAC009S0913W0430.wav", "txt": "中午二十零下"}
-{"key": "BAC009S0913W0431", "wav": "./aishell/wav/test/S0913/BAC009S0913W0431.wav", "txt": "北京地接旅行社有限公司负责人称"}
-{"key": "BAC009S0913W0432", "wav": "./aishell/wav/test/S0913/BAC009S0913W0432.wav", "txt": "向乘客收取的船费属于应收项目"}
-{"key": "BAC009S0913W0433", "wav": "./aishell/wav/test/S0913/BAC009S0913W0433.wav", "txt": "旅行社为了盈利设置购物环节"}
-{"key": "BAC009S0913W0434", "wav": "./aishell/wav/test/S0913/BAC009S0913W0434.wav", "txt": "北京旅游服务热线反馈称"}
-{"key": "BAC009S0913W0435", "wav": "./aishell/wav/test/S0913/BAC009S0913W0435.wav", "txt": "还有待职能部门进一步调查"}
-{"key": "BAC009S0913W0436", "wav": "./aishell/wav/test/S0913/BAC009S0913W0436.wav", "txt": "游客参与不合理低价游将被罚专家怎么判断"}
-{"key": "BAC009S0913W0437", "wav": "./aishell/wav/test/S0913/BAC009S0913W0437.wav", "txt": "关于低价游旅行团因强制购物产生的纠纷事件频出"}
-{"key": "BAC009S0913W0438", "wav": "./aishell/wav/test/S0913/BAC009S0913W0438.wav", "txt": "甚至还出现了一些造成游人身伤害的悲剧"}
-{"key": "BAC009S0913W0439", "wav": "./aishell/wav/test/S0913/BAC009S0913W0439.wav", "txt": "旅游法早已明令禁止"}
-{"key": "BAC009S0913W0440", "wav": "./aishell/wav/test/S0913/BAC009S0913W0440.wav", "txt": "游客参与不合理低价游也将受到受处难执行"}
-{"key": "BAC009S0913W0441", "wav": "./aishell/wav/test/S0913/BAC009S0913W0441.wav", "txt": "京汇佳律师事务所律师邱宝昌表示"}
-{"key": "BAC009S0913W0442", "wav": "./aishell/wav/test/S0913/BAC009S0913W0442.wav", "txt": "消费者根本很难判断什么叫做不合理低价"}
-{"key": "BAC009S0913W0444", "wav": "./aishell/wav/test/S0913/BAC009S0913W0444.wav", "txt": "园中园收费超景区大门票"}
-{"key": "BAC009S0913W0445", "wav": "./aishell/wav/test/S0913/BAC009S0913W0445.wav", "txt": "游客在乌鲁木齐市吃自助餐浪费食物被罚二四零零元"}
-{"key": "BAC009S0913W0446", "wav": "./aishell/wav/test/S0913/BAC009S0913W0446.wav", "txt": "剩下了一二零零克食物"}
-{"key": "BAC009S0913W0447", "wav": "./aishell/wav/test/S0913/BAC009S0913W0447.wav", "txt": "被餐厅罚款二四零零元"}
-{"key": "BAC009S0913W0448", "wav": "./aishell/wav/test/S0913/BAC009S0913W0448.wav", "txt": "此事昨日经网络曝光后"}
-{"key": "BAC009S0913W0449", "wav": "./aishell/wav/test/S0913/BAC009S0913W0449.wav", "txt": "食客该不该如此浪费"}
-{"key": "BAC009S0913W0450", "wav": "./aishell/wav/test/S0913/BAC009S0913W0450.wav", "txt": "餐厅有没有权力罚款"}
-{"key": "BAC009S0913W0451", "wav": "./aishell/wav/test/S0913/BAC009S0913W0451.wav", "txt": "成为了网民争相讨论的话题"}
-{"key": "BAC009S0913W0452", "wav": "./aishell/wav/test/S0913/BAC009S0913W0452.wav", "txt": "游客在公园躲雨遭雷击已脑死亡至今无人负责"}
-{"key": "BAC009S0913W0453", "wav": "./aishell/wav/test/S0913/BAC009S0913W0453.wav", "txt": "信息时报讯记者周伟龙八月一零日下午"}
-{"key": "BAC009S0913W0454", "wav": "./aishell/wav/test/S0913/BAC009S0913W0454.wav", "txt": "六名游客在海珠湖公园凉亭内躲雨"}
-{"key": "BAC009S0913W0455", "wav": "./aishell/wav/test/S0913/BAC009S0913W0455.wav", "txt": "昨日记者从医院了解到"}
-{"key": "BAC009S0913W0456", "wav": "./aishell/wav/test/S0913/BAC009S0913W0456.wav", "txt": "目前黄某已被诊断为脑死亡"}
-{"key": "BAC009S0913W0457", "wav": "./aishell/wav/test/S0913/BAC009S0913W0457.wav", "txt": "记者回访海珠湖公园发现"}
-{"key": "BAC009S0913W0458", "wav": "./aishell/wav/test/S0913/BAC009S0913W0458.wav", "txt": "出事凉亭依然呈现事发时的状态"}
-{"key": "BAC009S0913W0459", "wav": "./aishell/wav/test/S0913/BAC009S0913W0459.wav", "txt": "一旦雷雨天游客在亭内出事"}
-{"key": "BAC009S0913W0460", "wav": "./aishell/wav/test/S0913/BAC009S0913W0460.wav", "txt": "该告示不能成为园方免责的理由"}
-{"key": "BAC009S0913W0461", "wav": "./aishell/wav/test/S0913/BAC009S0913W0461.wav", "txt": "游客在北京动物园内小树间拉吊床摇荡"}
-{"key": "BAC009S0913W0462", "wav": "./aishell/wav/test/S0913/BAC009S0913W0462.wav", "txt": "却要承载一个成年人的体重"}
-{"key": "BAC009S0913W0463", "wav": "./aishell/wav/test/S0913/BAC009S0913W0463.wav", "txt": "一家三口在两棵树间拉起了一张吊床"}
-{"key": "BAC009S0913W0464", "wav": "./aishell/wav/test/S0913/BAC009S0913W0464.wav", "txt": "父亲和孩子轮流上去躺"}
-{"key": "BAC009S0913W0465", "wav": "./aishell/wav/test/S0913/BAC009S0913W0465.wav", "txt": "躺进吊床的父亲还荡起吊床"}
-{"key": "BAC009S0913W0466", "wav": "./aishell/wav/test/S0913/BAC009S0913W0466.wav", "txt": "游客在新加坡买祖母绿回国发现非纯天然"}
-{"key": "BAC009S0913W0467", "wav": "./aishell/wav/test/S0913/BAC009S0913W0467.wav", "txt": "夏先生带太太跟团去新马泰旅游"}
-{"key": "BAC009S0913W0468", "wav": "./aishell/wav/test/S0913/BAC009S0913W0468.wav", "txt": "在新加坡花费三万元购买了纯天然的祖母绿吊坠"}
-{"key": "BAC009S0913W0469", "wav": "./aishell/wav/test/S0913/BAC009S0913W0469.wav", "txt": "回国后经鉴定发现不是天纯天然的"}
-{"key": "BAC009S0913W0470", "wav": "./aishell/wav/test/S0913/BAC009S0913W0470.wav", "txt": "游客在日照旅游吃海鲜太少被围殴警方都有错"}
-{"key": "BAC009S0913W0471", "wav": "./aishell/wav/test/S0913/BAC009S0913W0471.wav", "txt": "大众网河南游客爆料在日照旅游团因吃海鲜太少被围殴"}
-{"key": "BAC009S0913W0472", "wav": "./aishell/wav/test/S0913/BAC009S0913W0472.wav", "txt": "警方回应都有过错经警方调查"}
-{"key": "BAC009S0913W0473", "wav": "./aishell/wav/test/S0913/BAC009S0913W0473.wav", "txt": "双方均有不同程度受伤"}
-{"key": "BAC009S0913W0474", "wav": "./aishell/wav/test/S0913/BAC009S0913W0474.wav", "txt": "河南籍游客张某某手部受伤及表皮损伤"}
-{"key": "BAC009S0913W0475", "wav": "./aishell/wav/test/S0913/BAC009S0913W0475.wav", "txt": "店主陈某头皮裂创二处"}
-{"key": "BAC009S0913W0476", "wav": "./aishell/wav/test/S0913/BAC009S0913W0476.wav", "txt": "游客在济南景区水池许愿观赏莲被砸成马蜂窝"}
-{"key": "BAC009S0913W0477", "wav": "./aishell/wav/test/S0913/BAC009S0913W0477.wav", "txt": "游客扔硬币许愿观赏莲被砸成马蜂窝"}
-{"key": "BAC009S0913W0478", "wav": "./aishell/wav/test/S0913/BAC009S0913W0478.wav", "txt": "游客在百年老店买到发霉盐水鸭商家主动退款"}
-{"key": "BAC009S0913W0479", "wav": "./aishell/wav/test/S0913/BAC009S0913W0479.wav", "txt": "谢女士购买的盐水鸭外包装"}
-{"key": "BAC009S0913W0480", "wav": "./aishell/wav/test/S0913/BAC009S0913W0480.wav", "txt": "华商报讯记者杨德合买了两个肉夹馍"}
-{"key": "BAC009S0913W0481", "wav": "./aishell/wav/test/S0913/BAC009S0913W0481.wav", "txt": "结果被店员搓走了二零零零元"}
-{"key": "BAC009S0913W0482", "wav": "./aishell/wav/test/S0913/BAC009S0913W0482.wav", "txt": "尽管在民警的协助下"}
-{"key": "BAC009S0913W0483", "wav": "./aishell/wav/test/S0913/BAC009S0913W0483.wav", "txt": "但这也让首次来到陕西游玩的孙女士感到憋屈"}
-{"key": "BAC009S0913W0484", "wav": "./aishell/wav/test/S0913/BAC009S0913W0484.wav", "txt": "游客大铜缸刻字警察喊话故宫刻字者请自首"}
-{"key": "BAC009S0913W0485", "wav": "./aishell/wav/test/S0913/BAC009S0913W0485.wav", "txt": "北京警方已介入调查"}
-{"key": "BAC009S0913W0486", "wav": "./aishell/wav/test/S0913/BAC009S0913W0486.wav", "txt": "目前正在进行一步工作中"}
-{"key": "BAC009S0913W0487", "wav": "./aishell/wav/test/S0913/BAC009S0913W0487.wav", "txt": "游客成都遇连环车祸近千人隧道里死里逃亡"}
-{"key": "BAC009S0913W0488", "wav": "./aishell/wav/test/S0913/BAC009S0913W0488.wav", "txt": "图片由胡先生提供本报讯记者喻莉出门旅游"}
-{"key": "BAC009S0913W0489", "wav": "./aishell/wav/test/S0913/BAC009S0913W0489.wav", "txt": "近千人在隧道里上演生死时速"}
-{"key": "BAC009S0913W0490", "wav": "./aishell/wav/test/S0913/BAC009S0913W0490.wav", "txt": "武汉网友胡琦的一条短信微博引起众人关注"}
-{"key": "BAC009S0913W0491", "wav": "./aishell/wav/test/S0913/BAC009S0913W0491.wav", "txt": "记者联系上胡先生才知虚惊一场"}
-{"key": "BAC009S0913W0492", "wav": "./aishell/wav/test/S0913/BAC009S0913W0492.wav", "txt": "现场有人喊有车要爆炸"}
-{"key": "BAC009S0913W0493", "wav": "./aishell/wav/test/S0913/BAC009S0913W0493.wav", "txt": "事后才了解他们遭遇的只是普通连环车祸"}
-{"key": "BAC009S0913W0494", "wav": "./aishell/wav/test/S0913/BAC009S0913W0494.wav", "txt": "游客打车被找四张同号假钞官方疑遇克隆车"}
-{"key": "BAC009S0913W0495", "wav": "./aishell/wav/test/S0913/BAC009S0913W0495.wav", "txt": "其在出行成都打车时被司机找了四张同号的二元零元假币"}
-{"key": "BAC009S0914W0121", "wav": "./aishell/wav/test/S0914/BAC009S0914W0121.wav", "txt": "境外个人购买应当符合当地政策规定"}
-{"key": "BAC009S0914W0122", "wav": "./aishell/wav/test/S0914/BAC009S0914W0122.wav", "txt": "外资在华房地产投资限制松绑已成大势"}
-{"key": "BAC009S0914W0123", "wav": "./aishell/wav/test/S0914/BAC009S0914W0123.wav", "txt": "在限限制外商投资产业目录中"}
-{"key": "BAC009S0914W0124", "wav": "./aishell/wav/test/S0914/BAC009S0914W0124.wav", "txt": "已经删除了此前针对外商投资房地产的全部限制类条款"}
-{"key": "BAC009S0914W0125", "wav": "./aishell/wav/test/S0914/BAC009S0914W0125.wav", "txt": "放开外资购买房产限制"}
-{"key": "BAC009S0914W0126", "wav": "./aishell/wav/test/S0914/BAC009S0914W0126.wav", "txt": "外资只可以购买商铺写字楼等物业"}
-{"key": "BAC009S0914W0127", "wav": "./aishell/wav/test/S0914/BAC009S0914W0127.wav", "txt": "普通住宅很可能仍将限购"}
-{"key": "BAC009S0914W0128", "wav": "./aishell/wav/test/S0914/BAC009S0914W0128.wav", "txt": "而就昨日六部委松绑楼市限外令的情况来看"}
-{"key": "BAC009S0914W0129", "wav": "./aishell/wav/test/S0914/BAC009S0914W0129.wav", "txt": "对于中国楼市的影响不必过于乐观"}
-{"key": "BAC009S0914W0130", "wav": "./aishell/wav/test/S0914/BAC009S0914W0130.wav", "txt": "取消限外令将促进外企在华投资房地产"}
-{"key": "BAC009S0914W0131", "wav": "./aishell/wav/test/S0914/BAC009S0914W0131.wav", "txt": "对于中国楼市有一定积极作用"}
-{"key": "BAC009S0914W0132", "wav": "./aishell/wav/test/S0914/BAC009S0914W0132.wav", "txt": "特别是在海外热钱有外流预期的情况下"}
-{"key": "BAC009S0914W0133", "wav": "./aishell/wav/test/S0914/BAC009S0914W0133.wav", "txt": "继续限制外资投资中国房地产已经不合时宜"}
-{"key": "BAC009S0914W0134", "wav": "./aishell/wav/test/S0914/BAC009S0914W0134.wav", "txt": "外资占国内地产投资比例有限"}
-{"key": "BAC009S0914W0135", "wav": "./aishell/wav/test/S0914/BAC009S0914W0135.wav", "txt": "此项政策对中国楼市影响有限"}
-{"key": "BAC009S0914W0136", "wav": "./aishell/wav/test/S0914/BAC009S0914W0136.wav", "txt": "中国房地产学会副会长陈国强也认为"}
-{"key": "BAC009S0914W0137", "wav": "./aishell/wav/test/S0914/BAC009S0914W0137.wav", "txt": "外资购房主要集中在一线城市和几个热点二线城市"}
-{"key": "BAC009S0914W0138", "wav": "./aishell/wav/test/S0914/BAC009S0914W0138.wav", "txt": "而现在这类城市的房价已经很高"}
-{"key": "BAC009S0914W0139", "wav": "./aishell/wav/test/S0914/BAC009S0914W0139.wav", "txt": "即使限外令放开也不会出现大规模外资买房"}
-{"key": "BAC009S0914W0140", "wav": "./aishell/wav/test/S0914/BAC009S0914W0140.wav", "txt": "正处于筑底回暖阶段"}
-{"key": "BAC009S0914W0141", "wav": "./aishell/wav/test/S0914/BAC009S0914W0141.wav", "txt": "主要还是依靠中国国内企业投资"}
-{"key": "BAC009S0914W0142", "wav": "./aishell/wav/test/S0914/BAC009S0914W0142.wav", "txt": "虽然一线城市房价已出现反弹"}
-{"key": "BAC009S0914W0143", "wav": "./aishell/wav/test/S0914/BAC009S0914W0143.wav", "txt": "但包括鄂尔多斯温州等地的去库存还是非常困难"}
-{"key": "BAC009S0914W0144", "wav": "./aishell/wav/test/S0914/BAC009S0914W0144.wav", "txt": "七月份各线城市房价分化仍然明显"}
-{"key": "BAC009S0914W0145", "wav": "./aishell/wav/test/S0914/BAC009S0914W0145.wav", "txt": "目前整体的宏观经济还是比较困难的"}
-{"key": "BAC009S0914W0146", "wav": "./aishell/wav/test/S0914/BAC009S0914W0146.wav", "txt": "房地产的投资增速目前不到五百分之"}
-{"key": "BAC009S0914W0148", "wav": "./aishell/wav/test/S0914/BAC009S0914W0148.wav", "txt": "因此开发还要继续坚定的开工和拿地的信心"}
-{"key": "BAC009S0914W0149", "wav": "./aishell/wav/test/S0914/BAC009S0914W0149.wav", "txt": "这种分化情况会更剧烈"}
-{"key": "BAC009S0914W0150", "wav": "./aishell/wav/test/S0914/BAC009S0914W0150.wav", "txt": "但是整体回暖和好转态势已经确定"}
-{"key": "BAC009S0914W0151", "wav": "./aishell/wav/test/S0914/BAC009S0914W0151.wav", "txt": "与前年差不多这种状态"}
-{"key": "BAC009S0914W0152", "wav": "./aishell/wav/test/S0914/BAC009S0914W0152.wav", "txt": "外资在华房地产投资购房限制被松绑"}
-{"key": "BAC009S0914W0153", "wav": "./aishell/wav/test/S0914/BAC009S0914W0153.wav", "txt": "允许机构和个人在中国购"}
-{"key": "BAC009S0914W0154", "wav": "./aishell/wav/test/S0914/BAC009S0914W0154.wav", "txt": "六部委调整房地产市场外资准入和管理政策"}
-{"key": "BAC009S0914W0155", "wav": "./aishell/wav/test/S0914/BAC009S0914W0155.wav", "txt": "为促进房地产市场平稳健康发展"}
-{"key": "BAC009S0914W0156", "wav": "./aishell/wav/test/S0914/BAC009S0914W0156.wav", "txt": "一外商投资房地产企业注册资本与投资总额比例"}
-{"key": "BAC009S0914W0157", "wav": "./aishell/wav/test/S0914/BAC009S0914W0157.wav", "txt": "对于实施住房限购政策的城市"}
-{"key": "BAC009S0914W0158", "wav": "./aishell/wav/test/S0914/BAC009S0914W0158.wav", "txt": "境外个人购房应当符合当地政策规定"}
-{"key": "BAC009S0914W0159", "wav": "./aishell/wav/test/S0914/BAC009S0914W0159.wav", "txt": "优化和改进外商投资房地产管理"}
-{"key": "BAC009S0914W0160", "wav": "./aishell/wav/test/S0914/BAC009S0914W0160.wav", "txt": "除上述政策调整以外"}
-{"key": "BAC009S0914W0161", "wav": "./aishell/wav/test/S0914/BAC009S0914W0161.wav", "txt": "为促进房地产市场平稳健康发展"}
-{"key": "BAC009S0914W0162", "wav": "./aishell/wav/test/S0914/BAC009S0914W0162.wav", "txt": "以及在中国境内工作学习的境外个人"}
-{"key": "BAC009S0914W0163", "wav": "./aishell/wav/test/S0914/BAC009S0914W0163.wav", "txt": "可以购买符合实际需要的自用自住商品房"}
-{"key": "BAC009S0914W0164", "wav": "./aishell/wav/test/S0914/BAC009S0914W0164.wav", "txt": "外商投资房地产企业注册资本与投资总额比例"}
-{"key": "BAC009S0914W0165", "wav": "./aishell/wav/test/S0914/BAC009S0914W0165.wav", "txt": "将依照中外合资经营企业的相关暂行规定"}
-{"key": "BAC009S0914W0166", "wav": "./aishell/wav/test/S0914/BAC009S0914W0166.wav", "txt": "中新网八月二十七日电据商务部官网公布的文件显示"}
-{"key": "BAC009S0914W0167", "wav": "./aishell/wav/test/S0914/BAC009S0914W0167.wav", "txt": "取消外商投资房地产企业"}
-{"key": "BAC009S0914W0168", "wav": "./aishell/wav/test/S0914/BAC009S0914W0168.wav", "txt": "六部门出台新政楼市限外政策放松"}
-{"key": "BAC009S0914W0169", "wav": "./aishell/wav/test/S0914/BAC009S0914W0169.wav", "txt": "这来外资进入我国房地产市场最宽松的政策"}
-{"key": "BAC009S0914W0170", "wav": "./aishell/wav/test/S0914/BAC009S0914W0170.wav", "txt": "这份只有五百多字的通知印发于八月十九日"}
-{"key": "BAC009S0914W0171", "wav": "./aishell/wav/test/S0914/BAC009S0914W0171.wav", "txt": "规定外商投资建立房地产企业"}
-{"key": "BAC009S0914W0172", "wav": "./aishell/wav/test/S0914/BAC009S0914W0172.wav", "txt": "投资总额超过一千万美元含一千万美元的"}
-{"key": "BAC009S0914W0173", "wav": "./aishell/wav/test/S0914/BAC009S0914W0173.wav", "txt": "注册资本金不得低于投资总额的百分之五十"}
-{"key": "BAC009S0914W0174", "wav": "./aishell/wav/test/S0914/BAC009S0914W0174.wav", "txt": "外商投资房地产企业注册资本金未全部缴付的"}
-{"key": "BAC009S0914W0175", "wav": "./aishell/wav/test/S0914/BAC009S0914W0175.wav", "txt": "未取得国有土地使用证的"}
-{"key": "BAC009S0914W0176", "wav": "./aishell/wav/test/S0914/BAC009S0914W0176.wav", "txt": "或开发项目资本金未达到项目投资总额百分之五"}
-{"key": "BAC009S0914W0177", "wav": "./aishell/wav/test/S0914/BAC009S0914W0177.wav", "txt": "不得办理境内境外贷款"}
-{"key": "BAC009S0914W0178", "wav": "./aishell/wav/test/S0914/BAC009S0914W0178.wav", "txt": "外汇管理部门不予批准该企业的外汇借款结汇"}
-{"key": "BAC009S0914W0179", "wav": "./aishell/wav/test/S0914/BAC009S0914W0179.wav", "txt": "不得购买非自用非自住商品房"}
-{"key": "BAC009S0914W0180", "wav": "./aishell/wav/test/S0914/BAC009S0914W0180.wav", "txt": "港澳台地区居民和华侨因生活需要"}
-{"key": "BAC009S0914W0181", "wav": "./aishell/wav/test/S0914/BAC009S0914W0181.wav", "txt": "可在境内限购一定面积的自住商品房"}
-{"key": "BAC009S0914W0182", "wav": "./aishell/wav/test/S0914/BAC009S0914W0182.wav", "txt": "二到二"}
-{"key": "BAC009S0914W0183", "wav": "./aishell/wav/test/S0914/BAC009S0914W0183.wav", "txt": "我国楼市正处在急速上升通道"}
-{"key": "BAC009S0914W0184", "wav": "./aishell/wav/test/S0914/BAC009S0914W0184.wav", "txt": "大量外资希望进入我国市场"}
-{"key": "BAC009S0914W0185", "wav": "./aishell/wav/test/S0914/BAC009S0914W0185.wav", "txt": "面对楼市中急剧增长的投资热情"}
-{"key": "BAC009S0914W0186", "wav": "./aishell/wav/test/S0914/BAC009S0914W0186.wav", "txt": "对购买住房的数量也未做要求"}
-{"key": "BAC009S0914W0187", "wav": "./aishell/wav/test/S0914/BAC009S0914W0187.wav", "txt": "带动和规范民间资本进入农产品流通领域"}
-{"key": "BAC009S0914W0188", "wav": "./aishell/wav/test/S0914/BAC009S0914W0188.wav", "txt": "完善农产品流通税收政策"}
-{"key": "BAC009S0914W0189", "wav": "./aishell/wav/test/S0914/BAC009S0914W0189.wav", "txt": "免征蔬菜流通环节增值税"}
-{"key": "BAC009S0914W0190", "wav": "./aishell/wav/test/S0914/BAC009S0914W0190.wav", "txt": "加大涉农贷款投放力度"}
-{"key": "BAC009S0914W0191", "wav": "./aishell/wav/test/S0914/BAC009S0914W0191.wav", "txt": "可按作价出资入股方式办理理用地手续"}
-{"key": "BAC009S0914W0192", "wav": "./aishell/wav/test/S0914/BAC009S0914W0192.wav", "txt": "但禁止改变用途和性质"}
-{"key": "BAC009S0914W0193", "wav": "./aishell/wav/test/S0914/BAC009S0914W0193.wav", "txt": "严厉打击农产品投机炒作"}
-{"key": "BAC009S0914W0194", "wav": "./aishell/wav/test/S0914/BAC009S0914W0194.wav", "txt": "做好外资并购大型农产品批发市场的安全审查"}
-{"key": "BAC009S0914W0195", "wav": "./aishell/wav/test/S0914/BAC009S0914W0195.wav", "txt": "严格执行鲜活农产品运输绿色通道政策"}
-{"key": "BAC009S0914W0196", "wav": "./aishell/wav/test/S0914/BAC009S0914W0196.wav", "txt": "加快农产品流通标准体系建设"}
-{"key": "BAC009S0914W0197", "wav": "./aishell/wav/test/S0914/BAC009S0914W0197.wav", "txt": "各地各部门加强组织领导"}
-{"key": "BAC009S0914W0198", "wav": "./aishell/wav/test/S0914/BAC009S0914W0198.wav", "txt": "农产品产销对接的经验介绍"}
-{"key": "BAC009S0914W0199", "wav": "./aishell/wav/test/S0914/BAC009S0914W0199.wav", "txt": "农产品产销合作社简介"}
-{"key": "BAC009S0914W0200", "wav": "./aishell/wav/test/S0914/BAC009S0914W0200.wav", "txt": "海南农产品流通现状"}
-{"key": "BAC009S0914W0201", "wav": "./aishell/wav/test/S0914/BAC009S0914W0201.wav", "txt": "农产品流通加工标准化"}
-{"key": "BAC009S0914W0202", "wav": "./aishell/wav/test/S0914/BAC009S0914W0202.wav", "txt": "中国对农产品流通政策"}
-{"key": "BAC009S0914W0203", "wav": "./aishell/wav/test/S0914/BAC009S0914W0203.wav", "txt": "温家宝主持召开国务院常务会议"}
-{"key": "BAC009S0914W0204", "wav": "./aishell/wav/test/S0914/BAC009S0914W0204.wav", "txt": "研究部署在城市优先发展公共交通"}
-{"key": "BAC009S0914W0205", "wav": "./aishell/wav/test/S0914/BAC009S0914W0205.wav", "txt": "审议通过缺陷汽车产品召回管理条例草案"}
-{"key": "BAC009S0914W0206", "wav": "./aishell/wav/test/S0914/BAC009S0914W0206.wav", "txt": "国务院总理温家宝主持召开国务院常务会议"}
-{"key": "BAC009S0914W0207", "wav": "./aishell/wav/test/S0914/BAC009S0914W0207.wav", "txt": "研究部署在城市优先发展公共交通"}
-{"key": "BAC009S0914W0208", "wav": "./aishell/wav/test/S0914/BAC009S0914W0208.wav", "txt": "审议通过缺陷汽车产品召回管理条例草案"}
-{"key": "BAC009S0914W0209", "wav": "./aishell/wav/test/S0914/BAC009S0914W0209.wav", "txt": "为加快发展中等职业教育"}
-{"key": "BAC009S0914W0210", "wav": "./aishell/wav/test/S0914/BAC009S0914W0210.wav", "txt": "自秋季学期起"}
-{"key": "BAC009S0914W0211", "wav": "./aishell/wav/test/S0914/BAC009S0914W0211.wav", "txt": "多数城市公共交通出行比例偏低"}
-{"key": "BAC009S0914W0212", "wav": "./aishell/wav/test/S0914/BAC009S0914W0212.wav", "txt": "为从根本上缓解交通拥堵出行不便环境污染等矛盾"}
-{"key": "BAC009S0914W0213", "wav": "./aishell/wav/test/S0914/BAC009S0914W0213.wav", "txt": "必须树立公共交通优先发展理念"}
-{"key": "BAC009S0914W0214", "wav": "./aishell/wav/test/S0914/BAC009S0914W0214.wav", "txt": "将公共交通放在城市交通发展的首要位置"}
-{"key": "BAC009S0914W0215", "wav": "./aishell/wav/test/S0914/BAC009S0914W0215.wav", "txt": "加快构建以公共交通为主"}
-{"key": "BAC009S0914W0216", "wav": "./aishell/wav/test/S0914/BAC009S0914W0216.wav", "txt": "同时改善步行自行车出行条件"}
-{"key": "BAC009S0914W0217", "wav": "./aishell/wav/test/S0914/BAC009S0914W0217.wav", "txt": "城市综合交通体系规划应明确公共交通优先发展原则"}
-{"key": "BAC009S0914W0218", "wav": "./aishell/wav/test/S0914/BAC009S0914W0218.wav", "txt": "城市公共交通规划要科学布局线线网"}
-{"key": "BAC009S0914W0219", "wav": "./aishell/wav/test/S0914/BAC009S0914W0219.wav", "txt": "促进城市内外交通便利衔接和城乡公共交通一体化发展"}
-{"key": "BAC009S0914W0220", "wav": "./aishell/wav/test/S0914/BAC009S0914W0220.wav", "txt": "加快基础设施建设"}
-{"key": "BAC009S0914W0221", "wav": "./aishell/wav/test/S0914/BAC009S0914W0221.wav", "txt": "提升公共交通设施装备水平"}
-{"key": "BAC009S0914W0222", "wav": "./aishell/wav/test/S0914/BAC009S0914W0222.wav", "txt": "提高公共交通舒适性"}
-{"key": "BAC009S0914W0223", "wav": "./aishell/wav/test/S0914/BAC009S0914W0223.wav", "txt": "将其纳入旧城改造和新城建设规划"}
-{"key": "BAC009S0914W0224", "wav": "./aishell/wav/test/S0914/BAC009S0914W0224.wav", "txt": "加强公共交通用地综合开开发"}
-{"key": "BAC009S0914W0225", "wav": "./aishell/wav/test/S0914/BAC009S0914W0225.wav", "txt": "对新建公共交通设施用地的地上地下空间"}
-{"key": "BAC009S0914W0226", "wav": "./aishell/wav/test/S0914/BAC009S0914W0226.wav", "txt": "按照市场化原则实施土地综合开发"}
-{"key": "BAC009S0914W0227", "wav": "./aishell/wav/test/S0914/BAC009S0914W0227.wav", "txt": "收益用于公共交通基础设施建设和弥补运营亏损"}
-{"key": "BAC009S0914W0228", "wav": "./aishell/wav/test/S0914/BAC009S0914W0228.wav", "txt": "加大政府投入"}
-{"key": "BAC009S0914W0229", "wav": "./aishell/wav/test/S0914/BAC009S0914W0229.wav", "txt": "城市政府要将公共交通发展资金纳入公共财政体系"}
-{"key": "BAC009S0914W0230", "wav": "./aishell/wav/test/S0914/BAC009S0914W0230.wav", "txt": "对城市公共交通企业实行税收优惠政策"}
-{"key": "BAC009S0914W0231", "wav": "./aishell/wav/test/S0914/BAC009S0914W0231.wav", "txt": "落实对城市公共交通行业的成品油价格补贴政策"}
-{"key": "BAC009S0914W0232", "wav": "./aishell/wav/test/S0914/BAC009S0914W0232.wav", "txt": "对城市轨道交通运营企业实行电价优惠"}
-{"key": "BAC009S0914W0233", "wav": "./aishell/wav/test/S0914/BAC009S0914W0233.wav", "txt": "拓宽投资渠道"}
-{"key": "BAC009S0914W0234", "wav": "./aishell/wav/test/S0914/BAC009S0914W0234.wav", "txt": "吸引和鼓励社会资金参与公共交通基础设施建设和运营"}
-{"key": "BAC009S0914W0235", "wav": "./aishell/wav/test/S0914/BAC009S0914W0235.wav", "txt": "保障公交路权优先"}
-{"key": "BAC009S0914W0236", "wav": "./aishell/wav/test/S0914/BAC009S0914W0236.wav", "txt": "增加划设城市公共交通优先车道"}
-{"key": "BAC009S0914W0237", "wav": "./aishell/wav/test/S0914/BAC009S0914W0237.wav", "txt": "允许机场巴士校车班车使用公共交通优先车道"}
-{"key": "BAC009S0914W0238", "wav": "./aishell/wav/test/S0914/BAC009S0914W0238.wav", "txt": "加强公共交通优先车道的监控和管理"}
-{"key": "BAC009S0914W0239", "wav": "./aishell/wav/test/S0914/BAC009S0914W0239.wav", "txt": "健全安全管理制度"}
-{"key": "BAC009S0914W0240", "wav": "./aishell/wav/test/S0914/BAC009S0914W0240.wav", "txt": "规范技术和产品标准"}
-{"key": "BAC009S0914W0241", "wav": "./aishell/wav/test/S0914/BAC009S0914W0241.wav", "txt": "构建服务质量评价指标体系"}
-{"key": "BAC009S0914W0242", "wav": "./aishell/wav/test/S0914/BAC009S0914W0242.wav", "txt": "规范公共交通重大决策程序"}
-{"key": "BAC009S0914W0243", "wav": "./aishell/wav/test/S0914/BAC009S0914W0243.wav", "txt": "实行线网规划编制公示制度和运营价格听证制度"}
-{"key": "BAC009S0914W0244", "wav": "./aishell/wav/test/S0914/BAC009S0914W0244.wav", "txt": "建立城市公共交通运营成本和服务质量信息公开制度"}
-{"key": "BAC009S0914W0245", "wav": "./aishell/wav/test/S0914/BAC009S0914W0245.wav", "txt": "应当立即停止生产销售进口"}
-{"key": "BAC009S0914W0246", "wav": "./aishell/wav/test/S0914/BAC009S0914W0246.wav", "txt": "由其生产者实施召回"}
-{"key": "BAC009S0914W0247", "wav": "./aishell/wav/test/S0914/BAC009S0914W0247.wav", "txt": "并及时发布产品缺陷及信息"}
-{"key": "BAC009S0914W0248", "wav": "./aishell/wav/test/S0914/BAC009S0914W0248.wav", "txt": "对实施召回的缺陷汽车产品"}
-{"key": "BAC009S0914W0249", "wav": "./aishell/wav/test/S0914/BAC009S0914W0249.wav", "txt": "生产者应当及时采取措施消除缺陷"}
-{"key": "BAC009S0914W0250", "wav": "./aishell/wav/test/S0914/BAC009S0914W0250.wav", "txt": "会议还研究了其他事项"}
-{"key": "BAC009S0914W0251", "wav": "./aishell/wav/test/S0914/BAC009S0914W0251.wav", "txt": "国务院将对各类交易场所清理整顿"}
-{"key": "BAC009S0914W0252", "wav": "./aishell/wav/test/S0914/BAC009S0914W0252.wav", "txt": "国务院近期将开展对各类交易场所的清理整顿工作"}
-{"key": "BAC009S0914W0254", "wav": "./aishell/wav/test/S0914/BAC009S0914W0254.wav", "txt": "而且这也可以看作是苹果利用硬件优势"}
-{"key": "BAC009S0914W0255", "wav": "./aishell/wav/test/S0914/BAC009S0914W0255.wav", "txt": "衍生出软件服务的又一重要举措"}
-{"key": "BAC009S0914W0256", "wav": "./aishell/wav/test/S0914/BAC009S0914W0256.wav", "txt": "又如何和政府银行搞好关系"}
-{"key": "BAC009S0914W0257", "wav": "./aishell/wav/test/S0914/BAC009S0914W0257.wav", "txt": "证明他们真得没有手机用户信息"}
-{"key": "BAC009S0914W0258", "wav": "./aishell/wav/test/S0914/BAC009S0914W0258.wav", "txt": "苹果靠什么颠复移动支付"}
-{"key": "BAC009S0914W0259", "wav": "./aishell/wav/test/S0914/BAC009S0914W0259.wav", "txt": "苹果推出的每一款新产品都不免要引发大讨论"}
-{"key": "BAC009S0914W0260", "wav": "./aishell/wav/test/S0914/BAC009S0914W0260.wav", "txt": "才能显得像个知识分子"}
-{"key": "BAC009S0914W0261", "wav": "./aishell/wav/test/S0914/BAC009S0914W0261.wav", "txt": "不仅树立了良好的品牌形象"}
-{"key": "BAC009S0914W0262", "wav": "./aishell/wav/test/S0914/BAC009S0914W0262.wav", "txt": "也向全世界推广了一种趋之若鹜的文化"}
-{"key": "BAC009S0914W0263", "wav": "./aishell/wav/test/S0914/BAC009S0914W0263.wav", "txt": "他们真得赚了很多钱"}
-{"key": "BAC009S0914W0264", "wav": "./aishell/wav/test/S0914/BAC009S0914W0264.wav", "txt": "这些特质让库克基本上实现了财务自由"}
-{"key": "BAC009S0914W0265", "wav": "./aishell/wav/test/S0914/BAC009S0914W0265.wav", "txt": "这对于一家巨型企业是非常难能可贵的"}
-{"key": "BAC009S0914W0266", "wav": "./aishell/wav/test/S0914/BAC009S0914W0266.wav", "txt": "而土豪和穷鬼做生意的最大区别就是"}
-{"key": "BAC009S0914W0267", "wav": "./aishell/wav/test/S0914/BAC009S0914W0267.wav", "txt": "而是会更加关注产品本身"}
-{"key": "BAC009S0914W0268", "wav": "./aishell/wav/test/S0914/BAC009S0914W0268.wav", "txt": "以及是否能提升他们的历史地位"}
-{"key": "BAC009S0914W0270", "wav": "./aishell/wav/test/S0914/BAC009S0914W0270.wav", "txt": "他们没有必要着急回本"}
-{"key": "BAC009S0914W0271", "wav": "./aishell/wav/test/S0914/BAC009S0914W0271.wav", "txt": "更大的野心在于深刻变革人类的支付习惯"}
-{"key": "BAC009S0914W0272", "wav": "./aishell/wav/test/S0914/BAC009S0914W0272.wav", "txt": "这种状态是苹果颠复现有市场格局的根基"}
-{"key": "BAC009S0914W0273", "wav": "./aishell/wav/test/S0914/BAC009S0914W0273.wav", "txt": "除却土豪式的生意属性之外"}
-{"key": "BAC009S0914W0276", "wav": "./aishell/wav/test/S0914/BAC009S0914W0276.wav", "txt": "系统会使用不同编码来转移用户凭据和支付数据"}
-{"key": "BAC009S0914W0277", "wav": "./aishell/wav/test/S0914/BAC009S0914W0277.wav", "txt": "整个过程基于安全元素芯片"}
-{"key": "BAC009S0914W0278", "wav": "./aishell/wav/test/S0914/BAC009S0914W0278.wav", "txt": "这种芯片不会直接发送用户敏感信息"}
-{"key": "BAC009S0914W0279", "wav": "./aishell/wav/test/S0914/BAC009S0914W0279.wav", "txt": "而是将其转化成唯一的临时编码"}
-{"key": "BAC009S0914W0280", "wav": "./aishell/wav/test/S0914/BAC009S0914W0280.wav", "txt": "可有效降低信息泄漏的风险其次"}
-{"key": "BAC009S0914W0284", "wav": "./aishell/wav/test/S0914/BAC009S0914W0284.wav", "txt": "苹果积累了海量的绑定信息卡用户"}
-{"key": "BAC009S0914W0285", "wav": "./aishell/wav/test/S0914/BAC009S0914W0285.wav", "txt": "这些资源的特点不单单是数目庞大"}
-{"key": "BAC009S0914W0286", "wav": "./aishell/wav/test/S0914/BAC009S0914W0286.wav", "txt": "而且苹果最早一批的用户积累"}
-{"key": "BAC009S0914W0287", "wav": "./aishell/wav/test/S0914/BAC009S0914W0287.wav", "txt": "囊括了大量的优质资源"}
-{"key": "BAC009S0914W0288", "wav": "./aishell/wav/test/S0914/BAC009S0914W0288.wav", "txt": "甚至包括了一些明星意见领袖和政府官员"}
-{"key": "BAC009S0914W0290", "wav": "./aishell/wav/test/S0914/BAC009S0914W0290.wav", "txt": "更是一种文化和习惯的推广者"}
-{"key": "BAC009S0914W0291", "wav": "./aishell/wav/test/S0914/BAC009S0914W0291.wav", "txt": "柯振东入狱期间的囚服都能在淘宝上热卖"}
-{"key": "BAC009S0914W0292", "wav": "./aishell/wav/test/S0914/BAC009S0914W0292.wav", "txt": "要是詹妮弗劳伦斯也能在微博上说这个应用不错"}
-{"key": "BAC009S0914W0293", "wav": "./aishell/wav/test/S0914/BAC009S0914W0293.wav", "txt": "一定会有立竿见影的推广效果"}
-{"key": "BAC009S0914W0294", "wav": "./aishell/wav/test/S0914/BAC009S0914W0294.wav", "txt": "也在所不惜的最后"}
-{"key": "BAC009S0914W0296", "wav": "./aishell/wav/test/S0914/BAC009S0914W0296.wav", "txt": "早在九月九日发布会上"}
-{"key": "BAC009S0914W0297", "wav": "./aishell/wav/test/S0914/BAC009S0914W0297.wav", "txt": "苹果就公布了合作伙伴"}
-{"key": "BAC009S0914W0298", "wav": "./aishell/wav/test/S0914/BAC009S0914W0298.wav", "txt": "包括迪斯尼耐克麦当劳梅西百货公司等巨头企业"}
-{"key": "BAC009S0914W0300", "wav": "./aishell/wav/test/S0914/BAC009S0914W0300.wav", "txt": "从这些零售商的等级来看"}
-{"key": "BAC009S0914W0301", "wav": "./aishell/wav/test/S0914/BAC009S0914W0301.wav", "txt": "库克团队应当是花费了大量精力"}
-{"key": "BAC009S0914W0302", "wav": "./aishell/wav/test/S0914/BAC009S0914W0302.wav", "txt": "移动支付肯定会有井喷式的发展"}
-{"key": "BAC009S0914W0303", "wav": "./aishell/wav/test/S0914/BAC009S0914W0303.wav", "txt": "现阶段管理创新和组织再造比任何的创新都重要"}
-{"key": "BAC009S0914W0304", "wav": "./aishell/wav/test/S0914/BAC009S0914W0304.wav", "txt": "美的美的在二零一四年三月正式发布智慧家庭战略"}
-{"key": "BAC009S0914W0305", "wav": "./aishell/wav/test/S0914/BAC009S0914W0305.wav", "txt": "未来将搭建空气水营养等智能管家平台"}
-{"key": "BAC009S0914W0306", "wav": "./aishell/wav/test/S0914/BAC009S0914W0306.wav", "txt": "事业部制一直是美的快速成长的法宝"}
-{"key": "BAC009S0914W0307", "wav": "./aishell/wav/test/S0914/BAC009S0914W0307.wav", "txt": "一定程度上影响了资源整合的效率"}
-{"key": "BAC009S0914W0308", "wav": "./aishell/wav/test/S0914/BAC009S0914W0308.wav", "txt": "美的已将风扇加湿器等空气类产品"}
-{"key": "BAC009S0914W0309", "wav": "./aishell/wav/test/S0914/BAC009S0914W0309.wav", "txt": "归到家用空调事业部旗下"}
-{"key": "BAC009S0914W0310", "wav": "./aishell/wav/test/S0914/BAC009S0914W0310.wav", "txt": "围绕几大智能管家平台"}
-{"key": "BAC009S0914W0311", "wav": "./aishell/wav/test/S0914/BAC009S0914W0311.wav", "txt": "美的整合事业部精简组织架构"}
-{"key": "BAC009S0914W0312", "wav": "./aishell/wav/test/S0914/BAC009S0914W0312.wav", "txt": "也是顺应互联网时代管理扁平化的趋势"}
-{"key": "BAC009S0914W0314", "wav": "./aishell/wav/test/S0914/BAC009S0914W0314.wav", "txt": "下称美的内部的组织架构二零一五年加大了调整力度"}
-{"key": "BAC009S0914W0316", "wav": "./aishell/wav/test/S0914/BAC009S0914W0316.wav", "txt": "每日经济新闻记者从美的家用空调事业部了解到"}
-{"key": "BAC009S0914W0317", "wav": "./aishell/wav/test/S0914/BAC009S0914W0317.wav", "txt": "自二零一一年事业部启动自动化升级至今的四年里"}
-{"key": "BAC009S0914W0318", "wav": "./aishell/wav/test/S0914/BAC009S0914W0318.wav", "txt": "工人数量减少近一半"}
-{"key": "BAC009S0914W0319", "wav": "./aishell/wav/test/S0914/BAC009S0914W0319.wav", "txt": "美的家用空调事业部制造副总裁乌守保对记者表示"}
-{"key": "BAC009S0914W0320", "wav": "./aishell/wav/test/S0914/BAC009S0914W0320.wav", "txt": "到二零一八年美的空调营收到达一千亿元规划时"}
-{"key": "BAC009S0914W0321", "wav": "./aishell/wav/test/S0914/BAC009S0914W0321.wav", "txt": "员工数量将减至两万人"}
-{"key": "BAC009S0914W0322", "wav": "./aishell/wav/test/S0914/BAC009S0914W0322.wav", "txt": "虽然投入产生问题以及机器人后期运行维护等"}
-{"key": "BAC009S0914W0323", "wav": "./aishell/wav/test/S0914/BAC009S0914W0323.wav", "txt": "都是家电企业自动化升级需要面临的挑战"}
-{"key": "BAC009S0914W0324", "wav": "./aishell/wav/test/S0914/BAC009S0914W0324.wav", "txt": "自动化是未来唯一出路"}
-{"key": "BAC009S0914W0325", "wav": "./aishell/wav/test/S0914/BAC009S0914W0325.wav", "txt": "四年来机器人代替人工近半"}
-{"key": "BAC009S0914W0326", "wav": "./aishell/wav/test/S0914/BAC009S0914W0326.wav", "txt": "美的家用空调事业提出精品战略"}
-{"key": "BAC009S0914W0327", "wav": "./aishell/wav/test/S0914/BAC009S0914W0327.wav", "txt": "机器人应用也进一步提速"}
-{"key": "BAC009S0914W0328", "wav": "./aishell/wav/test/S0914/BAC009S0914W0328.wav", "txt": "二零一一年美的空调达到五百亿元营收规模时"}
-{"key": "BAC009S0914W0329", "wav": "./aishell/wav/test/S0914/BAC009S0914W0329.wav", "txt": "工人数量超过五万以上"}
-{"key": "BAC009S0914W0330", "wav": "./aishell/wav/test/S0914/BAC009S0914W0330.wav", "txt": "空调业务总营收接近七百亿元"}
-{"key": "BAC009S0914W0331", "wav": "./aishell/wav/test/S0914/BAC009S0914W0331.wav", "txt": "工人数量已经缩减至二点六万人"}
-{"key": "BAC009S0914W0332", "wav": "./aishell/wav/test/S0914/BAC009S0914W0332.wav", "txt": "除了在顺德工厂建成全自动遥控器生产线外"}
-{"key": "BAC009S0914W0333", "wav": "./aishell/wav/test/S0914/BAC009S0914W0333.wav", "txt": "美的空调还在其他地区工厂建有三条全自动生产线"}
-{"key": "BAC009S0914W0334", "wav": "./aishell/wav/test/S0914/BAC009S0914W0334.wav", "txt": "经过前几年自动化生产线升级改造"}
-{"key": "BAC009S0914W0335", "wav": "./aishell/wav/test/S0914/BAC009S0914W0335.wav", "txt": "美的空调工厂的注塑车间"}
-{"key": "BAC009S0914W0336", "wav": "./aishell/wav/test/S0914/BAC009S0914W0336.wav", "txt": "在无开灯照明的情况下也能正常稳定运行"}
-{"key": "BAC009S0914W0337", "wav": "./aishell/wav/test/S0914/BAC009S0914W0337.wav", "txt": "钣金冲压已实现无人运行"}
-{"key": "BAC009S0914W0338", "wav": "./aishell/wav/test/S0914/BAC009S0914W0338.wav", "txt": "而在昨天对阵古巴队的比赛中"}
-{"key": "BAC009S0914W0339", "wav": "./aishell/wav/test/S0914/BAC009S0914W0339.wav", "txt": "中国队教练组还是做出了让朱婷继续休战的抉择"}
-{"key": "BAC009S0914W0340", "wav": "./aishell/wav/test/S0914/BAC009S0914W0340.wav", "txt": "来自北汽女排的主攻手刘晓彤取代朱婷的位置首发出场"}
-{"key": "BAC009S0914W0341", "wav": "./aishell/wav/test/S0914/BAC009S0914W0341.wav", "txt": "除了第一局在开局阶段古巴队一度领先外"}
-{"key": "BAC009S0914W0342", "wav": "./aishell/wav/test/S0914/BAC009S0914W0342.wav", "txt": "比赛的节奏始终被中国队控制在手中"}
-{"key": "BAC009S0914W0343", "wav": "./aishell/wav/test/S0914/BAC009S0914W0343.wav", "txt": "中国队直落三局零封对手"}
-{"key": "BAC009S0914W0344", "wav": "./aishell/wav/test/S0914/BAC009S0914W0344.wav", "txt": "曾春蕾和张常宁均拿到十六分"}
-{"key": "BAC009S0914W0345", "wav": "./aishell/wav/test/S0914/BAC009S0914W0345.wav", "txt": "俄罗斯美国和日本三队均零封对手"}
-{"key": "BAC009S0914W0346", "wav": "./aishell/wav/test/S0914/BAC009S0914W0346.wav", "txt": "此轮战罢后积分榜前四名排位没有任何变化"}
-{"key": "BAC009S0914W0347", "wav": "./aishell/wav/test/S0914/BAC009S0914W0347.wav", "txt": "俄罗斯队十七分居榜首"}
-{"key": "BAC009S0914W0348", "wav": "./aishell/wav/test/S0914/BAC009S0914W0348.wav", "txt": "美国队十六分排第二"}
-{"key": "BAC009S0914W0349", "wav": "./aishell/wav/test/S0914/BAC009S0914W0349.wav", "txt": "日本和中国同积十五分"}
-{"key": "BAC009S0914W0350", "wav": "./aishell/wav/test/S0914/BAC009S0914W0350.wav", "txt": "日本以小分优势暂列第三位"}
-{"key": "BAC009S0914W0351", "wav": "./aishell/wav/test/S0914/BAC009S0914W0351.wav", "txt": "中国队将迎战冈山赛区的第二个对手肯尼亚队"}
-{"key": "BAC009S0914W0352", "wav": "./aishell/wav/test/S0914/BAC009S0914W0352.wav", "txt": "中国女排昨天下午在松本迎战韩国队"}
-{"key": "BAC009S0914W0353", "wav": "./aishell/wav/test/S0914/BAC009S0914W0353.wav", "txt": "主攻手朱婷不慎扭伤脚踝后依然带伤奋战"}
-{"key": "BAC009S0914W0354", "wav": "./aishell/wav/test/S0914/BAC009S0914W0354.wav", "txt": "最终中国队以三比一力战韩国队全取三分"}
-{"key": "BAC009S0914W0355", "wav": "./aishell/wav/test/S0914/BAC009S0914W0355.wav", "txt": "中韩之战中国队首发再次变阵"}
-{"key": "BAC009S0914W0356", "wav": "./aishell/wav/test/S0914/BAC009S0914W0356.wav", "txt": "二传丁霞和主攻刘晏含取代了沈静思和张常宁的位置"}
-{"key": "BAC009S0914W0357", "wav": "./aishell/wav/test/S0914/BAC009S0914W0357.wav", "txt": "张常宁则取代曾春蕾站在接应的位置上"}
-{"key": "BAC009S0914W0358", "wav": "./aishell/wav/test/S0914/BAC009S0914W0358.wav", "txt": "中国队迅速调整阵容"}
-{"key": "BAC009S0914W0359", "wav": "./aishell/wav/test/S0914/BAC009S0914W0359.wav", "txt": "逐渐控制住了局面并连扳两局以二比一优先"}
-{"key": "BAC009S0914W0360", "wav": "./aishell/wav/test/S0914/BAC009S0914W0360.wav", "txt": "关键的第四局一开始中国队便发生了意外"}
-{"key": "BAC009S0914W0361", "wav": "./aishell/wav/test/S0914/BAC009S0914W0361.wav", "txt": "一脸痛苦的朱婷当即被换下场"}
-{"key": "BAC009S0914W0362", "wav": "./aishell/wav/test/S0914/BAC009S0914W0362.wav", "txt": "失去了最稳定的得分手之后"}
-{"key": "BAC009S0914W0363", "wav": "./aishell/wav/test/S0914/BAC009S0914W0363.wav", "txt": "中国队进攻火力明显减弱"}
-{"key": "BAC009S0914W0364", "wav": "./aishell/wav/test/S0914/BAC009S0914W0364.wav", "txt": "而看到了希望的韩国队也趁机拼命反击"}
-{"key": "BAC009S0914W0365", "wav": "./aishell/wav/test/S0914/BAC009S0914W0365.wav", "txt": "当打到一三比一四中国队落后一分时"}
-{"key": "BAC009S0914W0366", "wav": "./aishell/wav/test/S0914/BAC009S0914W0366.wav", "txt": "在场下接受完队医高压包扎后的朱婷请命上场"}
-{"key": "BAC009S0914W0367", "wav": "./aishell/wav/test/S0914/BAC009S0914W0367.wav", "txt": "虽然扣球落地后朱婷依然一瘸一拐"}
-{"key": "BAC009S0914W0368", "wav": "./aishell/wav/test/S0914/BAC009S0914W0368.wav", "txt": "见此情景韩国队的信心受到了打击"}
-{"key": "BAC009S0914W0369", "wav": "./aishell/wav/test/S0914/BAC009S0914W0369.wav", "txt": "尽管也一度以二一比一七领先四分之多"}
-{"key": "BAC009S0914W0370", "wav": "./aishell/wav/test/S0914/BAC009S0914W0370.wav", "txt": "但朱婷与队友们合力打出了一波八比二的高潮"}
-{"key": "BAC009S0914W0371", "wav": "./aishell/wav/test/S0914/BAC009S0914W0371.wav", "txt": "最终中国队以二五比二三拿下第四局"}
-{"key": "BAC009S0914W0372", "wav": "./aishell/wav/test/S0914/BAC009S0914W0372.wav", "txt": "以三比一胜出拿到了宝贵的三个积分"}
-{"key": "BAC009S0914W0373", "wav": "./aishell/wav/test/S0914/BAC009S0914W0373.wav", "txt": "俄俄罗斯队以全胜战绩列积分榜首位"}
-{"key": "BAC009S0914W0374", "wav": "./aishell/wav/test/S0914/BAC009S0914W0374.wav", "txt": "日本与美国同积十十分暂列二三两位"}
-{"key": "BAC009S0914W0375", "wav": "./aishell/wav/test/S0914/BAC009S0914W0375.wav", "txt": "中国和多米尼加同积九分排在第四和第五位"}
-{"key": "BAC009S0914W0376", "wav": "./aishell/wav/test/S0914/BAC009S0914W0376.wav", "txt": "今天中国队将迎战第一阶段的最后一个对手秘鲁队"}
-{"key": "BAC009S0914W0377", "wav": "./aishell/wav/test/S0914/BAC009S0914W0377.wav", "txt": "中国女排三十一日本四夺世界杯冠军直通里约奥运"}
-{"key": "BAC009S0914W0378", "wav": "./aishell/wav/test/S0914/BAC009S0914W0378.wav", "txt": "女排三十一日本进军里约众将欢度欢庆"}
-{"key": "BAC009S0914W0379", "wav": "./aishell/wav/test/S0914/BAC009S0914W0379.wav", "txt": "夺冠的同时摘得本次世界杯的冠军"}
-{"key": "BAC009S0914W0380", "wav": "./aishell/wav/test/S0914/BAC009S0914W0380.wav", "txt": "同时拿到了明年里约奥运会的入场券"}
-{"key": "BAC009S0914W0381", "wav": "./aishell/wav/test/S0914/BAC009S0914W0381.wav", "txt": "是全场得分最高的运动员"}
-{"key": "BAC009S0914W0382", "wav": "./aishell/wav/test/S0914/BAC009S0914W0382.wav", "txt": "也让这位一九九四年出生的河南妹子"}
-{"key": "BAC009S0914W0383", "wav": "./aishell/wav/test/S0914/BAC009S0914W0383.wav", "txt": "逐步成长为中国女排的新核心"}
-{"key": "BAC009S0914W0385", "wav": "./aishell/wav/test/S0914/BAC009S0914W0385.wav", "txt": "在今年的亚锦赛夺冠后"}
-{"key": "BAC009S0914W0386", "wav": "./aishell/wav/test/S0914/BAC009S0914W0386.wav", "txt": "关于中国队过于依赖朱婷的言论不少"}
-{"key": "BAC009S0914W0387", "wav": "./aishell/wav/test/S0914/BAC009S0914W0387.wav", "txt": "本赛季调进张常宁就是郎平为朱婷解压的一个表现"}
-{"key": "BAC009S0914W0388", "wav": "./aishell/wav/test/S0914/BAC009S0914W0388.wav", "txt": "加上惠若琪因伤缺席本届世界杯"}
-{"key": "BAC009S0914W0389", "wav": "./aishell/wav/test/S0914/BAC009S0914W0389.wav", "txt": "张常宁的幼稚嫩显然还不能立即挑起大梁"}
-{"key": "BAC009S0914W0390", "wav": "./aishell/wav/test/S0914/BAC009S0914W0390.wav", "txt": "这支女排的暴露性强攻基本上都是靠朱婷打"}
-{"key": "BAC009S0914W0391", "wav": "./aishell/wav/test/S0914/BAC009S0914W0391.wav", "txt": "郎平也认为这样去打世界高水平的球队是不够的"}
-{"key": "BAC009S0914W0392", "wav": "./aishell/wav/test/S0914/BAC009S0914W0392.wav", "txt": "在目前中国队的阵容中"}
-{"key": "BAC009S0914W0393", "wav": "./aishell/wav/test/S0914/BAC009S0914W0393.wav", "txt": "霸气外露的朱婷是不可或缺的绝对核心"}
-{"key": "BAC009S0914W0394", "wav": "./aishell/wav/test/S0914/BAC009S0914W0394.wav", "txt": "在队长惠若琪缺阵的情况下"}
-{"key": "BAC009S0914W0395", "wav": "./aishell/wav/test/S0914/BAC009S0914W0395.wav", "txt": "她几乎担当起了场上进攻加振奋士气的主力作用"}
-{"key": "BAC009S0914W0396", "wav": "./aishell/wav/test/S0914/BAC009S0914W0396.wav", "txt": "半决赛对阵俄罗斯的比赛中"}
-{"key": "BAC009S0914W0397", "wav": "./aishell/wav/test/S0914/BAC009S0914W0397.wav", "txt": "朱婷全场夺得二十九分"}
-{"key": "BAC009S0914W0398", "wav": "./aishell/wav/test/S0914/BAC009S0914W0398.wav", "txt": "在俄罗斯队的严密拦防下"}
-{"key": "BAC009S0914W0399", "wav": "./aishell/wav/test/S0914/BAC009S0914W0399.wav", "txt": "进攻成功率达到百分之五十六点七六拦网"}
-{"key": "BAC009S0914W0400", "wav": "./aishell/wav/test/S0914/BAC009S0914W0400.wav", "txt": "朱婷得到七分同样全队最高"}
-{"key": "BAC009S0914W0401", "wav": "./aishell/wav/test/S0914/BAC009S0914W0401.wav", "txt": "作为一个主攻手非常不易"}
-{"key": "BAC009S0914W0402", "wav": "./aishell/wav/test/S0914/BAC009S0914W0402.wav", "txt": "与几乎不接一传的科舍列娃相比"}
-{"key": "BAC009S0914W0403", "wav": "./aishell/wav/test/S0914/BAC009S0914W0403.wav", "txt": "朱婷的任务更重效率更高"}
-{"key": "BAC009S0914W0404", "wav": "./aishell/wav/test/S0914/BAC009S0914W0404.wav", "txt": "提前一周至二零一六年六月二十四日"}
-{"key": "BAC009S0914W0405", "wav": "./aishell/wav/test/S0914/BAC009S0914W0405.wav", "txt": "避免和新木乃伊正面较量"}
-{"key": "BAC009S0914W0406", "wav": "./aishell/wav/test/S0914/BAC009S0914W0406.wav", "txt": "来源时光网美国时间本周一"}
-{"key": "BAC009S0914W0407", "wav": "./aishell/wav/test/S0914/BAC009S0914W0407.wav", "txt": "二十世纪福斯影业公布一批新片的档期"}
-{"key": "BAC009S0914W0408", "wav": "./aishell/wav/test/S0914/BAC009S0914W0408.wav", "txt": "晚上二十零下"}
-{"key": "BAC009S0914W0409", "wav": "./aishell/wav/test/S0914/BAC009S0914W0409.wav", "txt": "马甲线啊马甲线"}
-{"key": "BAC009S0914W0410", "wav": "./aishell/wav/test/S0914/BAC009S0914W0410.wav", "txt": "力证自己没有怀孕"}
-{"key": "BAC009S0914W0411", "wav": "./aishell/wav/test/S0914/BAC009S0914W0411.wav", "txt": "网友纷纷调侃道为了辟谣怀孕也是蛮拼的"}
-{"key": "BAC009S0914W0412", "wav": "./aishell/wav/test/S0914/BAC009S0914W0412.wav", "txt": "哈哈哈第一次见人用这种方式证明自己没怀孕"}
-{"key": "BAC009S0914W0413", "wav": "./aishell/wav/test/S0914/BAC009S0914W0413.wav", "txt": "搜狐娱乐讯九月六日"}
-{"key": "BAC009S0914W0414", "wav": "./aishell/wav/test/S0914/BAC009S0914W0414.wav", "txt": "陈妍希晒出一组攀岩照"}
-{"key": "BAC009S0914W0415", "wav": "./aishell/wav/test/S0914/BAC009S0914W0415.wav", "txt": "并称攀岩太难会晃"}
-{"key": "BAC009S0914W0416", "wav": "./aishell/wav/test/S0914/BAC009S0914W0416.wav", "txt": "不抓紧会被撞到地上"}
-{"key": "BAC009S0914W0417", "wav": "./aishell/wav/test/S0914/BAC009S0914W0417.wav", "txt": "希饭快来接住我"}
-{"key": "BAC009S0914W0418", "wav": "./aishell/wav/test/S0914/BAC009S0914W0418.wav", "txt": "陈妍希穿着粉色上衣"}
-{"key": "BAC009S0914W0419", "wav": "./aishell/wav/test/S0914/BAC009S0914W0419.wav", "txt": "头发随意披在脑后"}
-{"key": "BAC009S0914W0420", "wav": "./aishell/wav/test/S0914/BAC009S0914W0420.wav", "txt": "手脚并用努力向往上爬"}
-{"key": "BAC009S0914W0421", "wav": "./aishell/wav/test/S0914/BAC009S0914W0421.wav", "txt": "似乎已过了第三关"}
-{"key": "BAC009S0914W0422", "wav": "./aishell/wav/test/S0914/BAC009S0914W0422.wav", "txt": "如此高难度的动作"}
-{"key": "BAC009S0914W0423", "wav": "./aishell/wav/test/S0914/BAC009S0914W0423.wav", "txt": "再次身体力行地辟谣怀孕传闻"}
-{"key": "BAC009S0914W0424", "wav": "./aishell/wav/test/S0914/BAC009S0914W0424.wav", "txt": "搜狐娱乐讯近日频频传出陈晓向陈妍希求婚成功的消息"}
-{"key": "BAC009S0914W0425", "wav": "./aishell/wav/test/S0914/BAC009S0914W0425.wav", "txt": "陈妍希回应现在真的很享受快乐恋爱的喜悦"}
-{"key": "BAC009S0914W0426", "wav": "./aishell/wav/test/S0914/BAC009S0914W0426.wav", "txt": "有进一步消息一定会通知大家"}
-{"key": "BAC009S0914W0427", "wav": "./aishell/wav/test/S0914/BAC009S0914W0427.wav", "txt": "中新网七月二十二日电据台湾东森新闻消息"}
-{"key": "BAC009S0914W0428", "wav": "./aishell/wav/test/S0914/BAC009S0914W0428.wav", "txt": "陈妍希曾在新版神鵰侠侣中演小龙女"}
-{"key": "BAC009S0914W0429", "wav": "./aishell/wav/test/S0914/BAC009S0914W0429.wav", "txt": "被网友调侃是小笼包"}
-{"key": "BAC009S0914W0430", "wav": "./aishell/wav/test/S0914/BAC009S0914W0430.wav", "txt": "尽管她努力瘦身"}
-{"key": "BAC009S0914W0431", "wav": "./aishell/wav/test/S0914/BAC009S0914W0431.wav", "txt": "当事网友疑遭遇克隆车"}
-{"key": "BAC009S0914W0432", "wav": "./aishell/wav/test/S0914/BAC009S0914W0432.wav", "txt": "经调查核实相关情况"}
-{"key": "BAC009S0914W0433", "wav": "./aishell/wav/test/S0914/BAC009S0914W0433.wav", "txt": "游客抢订冬奥运旅游团因遭遇订票难住房等"}
-{"key": "BAC009S0914W0434", "wav": "./aishell/wav/test/S0914/BAC009S0914W0434.wav", "txt": "北京冬奥会刚刚申办成功"}
-{"key": "BAC009S0914W0435", "wav": "./aishell/wav/test/S0914/BAC009S0914W0435.wav", "txt": "已经有游客迫不及待想去张家口看看了"}
-{"key": "BAC009S0914W0436", "wav": "./aishell/wav/test/S0914/BAC009S0914W0436.wav", "txt": "游客摔断腿旅游社赔三成因旅游时未尽提示义务"}
-{"key": "BAC009S0914W0437", "wav": "./aishell/wav/test/S0914/BAC009S0914W0437.wav", "txt": "游客日照海鲜店被打受伤警方称言语冲突引发互殴"}
-{"key": "BAC009S0914W0438", "wav": "./aishell/wav/test/S0914/BAC009S0914W0438.wav", "txt": "京华时报讯记者卫张宁昨天上午"}
-{"key": "BAC009S0914W0439", "wav": "./aishell/wav/test/S0914/BAC009S0914W0439.wav", "txt": "自己和家人因点的海鲜较少"}
-{"key": "BAC009S0914W0440", "wav": "./aishell/wav/test/S0914/BAC009S0914W0440.wav", "txt": "并被店主及店员辱骂围殴"}
-{"key": "BAC009S0914W0441", "wav": "./aishell/wav/test/S0914/BAC009S0914W0441.wav", "txt": "当时游客出言不逊在先"}
-{"key": "BAC009S0914W0442", "wav": "./aishell/wav/test/S0914/BAC009S0914W0442.wav", "txt": "并未将游客脱光衣服殴打"}
-{"key": "BAC009S0914W0443", "wav": "./aishell/wav/test/S0914/BAC009S0914W0443.wav", "txt": "日照市公安局官方发布消息"}
-{"key": "BAC009S0914W0444", "wav": "./aishell/wav/test/S0914/BAC009S0914W0444.wav", "txt": "称事件系点餐过程中"}
-{"key": "BAC009S0914W0445", "wav": "./aishell/wav/test/S0914/BAC009S0914W0445.wav", "txt": "双方发生语言冲突后进行互殴"}
-{"key": "BAC009S0914W0446", "wav": "./aishell/wav/test/S0914/BAC009S0914W0446.wav", "txt": "已依法对双方进行处罚"}
-{"key": "BAC009S0914W0447", "wav": "./aishell/wav/test/S0914/BAC009S0914W0447.wav", "txt": "游客晋吉岛乘船颠骨折诉旅社索赔二零馀万元"}
-{"key": "BAC009S0914W0448", "wav": "./aishell/wav/test/S0914/BAC009S0914W0448.wav", "txt": "本来一家人出国旅游挺高兴的"}
-{"key": "BAC009S0914W0449", "wav": "./aishell/wav/test/S0914/BAC009S0914W0449.wav", "txt": "可是我遇见这事还不够添堵的呢"}
-{"key": "BAC009S0914W0450", "wav": "./aishell/wav/test/S0914/BAC009S0914W0450.wav", "txt": "崔先生带家人随团前往泰国晋吉岛游玩"}
-{"key": "BAC009S0914W0451", "wav": "./aishell/wav/test/S0914/BAC009S0914W0451.wav", "txt": "导致崔先生腰部受伤"}
-{"key": "BAC009S0914W0452", "wav": "./aishell/wav/test/S0914/BAC009S0914W0452.wav", "txt": "回国后被确诊为腰部骨折"}
-{"key": "BAC009S0914W0453", "wav": "./aishell/wav/test/S0914/BAC009S0914W0453.wav", "txt": "将接团的两家旅行社起诉至法院"}
-{"key": "BAC009S0914W0454", "wav": "./aishell/wav/test/S0914/BAC009S0914W0454.wav", "txt": "索赔各项损失共计二零馀万元"}
-{"key": "BAC009S0914W0455", "wav": "./aishell/wav/test/S0914/BAC009S0914W0455.wav", "txt": "昌平法院开庭审理了这起案件"}
-{"key": "BAC009S0914W0456", "wav": "./aishell/wav/test/S0914/BAC009S0914W0456.wav", "txt": "游客景区被忽悠八零零克石斛收费一二六零零元"}
-{"key": "BAC009S0914W0457", "wav": "./aishell/wav/test/S0914/BAC009S0914W0457.wav", "txt": "滕女士在云南购买的石斛"}
-{"key": "BAC009S0914W0458", "wav": "./aishell/wav/test/S0914/BAC009S0914W0458.wav", "txt": "游客武夷山就餐麝香肉结账要四八元一两"}
-{"key": "BAC009S0914W0459", "wav": "./aishell/wav/test/S0914/BAC009S0914W0459.wav", "txt": "旅游点餐时与海鲜店主起争执互殴二人被行政拘留"}
-{"key": "BAC009S0914W0460", "wav": "./aishell/wav/test/S0914/BAC009S0914W0460.wav", "txt": "新京报讯记者林斐然近日"}
-{"key": "BAC009S0914W0461", "wav": "./aishell/wav/test/S0914/BAC009S0914W0461.wav", "txt": "有网友反映前往山东日照一海排档点海鲜时"}
-{"key": "BAC009S0914W0462", "wav": "./aishell/wav/test/S0914/BAC009S0914W0462.wav", "txt": "该事件系游客点餐时嫌大排档太脏而引起口角纷"}
-{"key": "BAC009S0914W0463", "wav": "./aishell/wav/test/S0914/BAC009S0914W0463.wav", "txt": "日照市公安局官方微博通报了这一事件的调查情况"}
-{"key": "BAC009S0914W0464", "wav": "./aishell/wav/test/S0914/BAC009S0914W0464.wav", "txt": "双方因互殴均被行政拘留并处罚款"}
-{"key": "BAC009S0914W0465", "wav": "./aishell/wav/test/S0914/BAC009S0914W0465.wav", "txt": "游客爬到峨眉山悬崖边石头上拍照"}
-{"key": "BAC009S0914W0466", "wav": "./aishell/wav/test/S0914/BAC009S0914W0466.wav", "txt": "游客称点海鲜太少被当地媒体老板受伤更重"}
-{"key": "BAC009S0914W0467", "wav": "./aishell/wav/test/S0914/BAC009S0914W0467.wav", "txt": "事情的真相完全不是这样的"}
-{"key": "BAC009S0914W0468", "wav": "./aishell/wav/test/S0914/BAC009S0914W0468.wav", "txt": "大排档老板受伤更严重"}
-{"key": "BAC009S0914W0469", "wav": "./aishell/wav/test/S0914/BAC009S0914W0469.wav", "txt": "起因也完全不是河南游客自己说的那样"}
-{"key": "BAC009S0914W0470", "wav": "./aishell/wav/test/S0914/BAC009S0914W0470.wav", "txt": "希望警方尽快给出公平调查结果"}
-{"key": "BAC009S0914W0471", "wav": "./aishell/wav/test/S0914/BAC009S0914W0471.wav", "txt": "游客称在山东日照只因点海鲜少全家遭殴打恐吓"}
-{"key": "BAC009S0914W0472", "wav": "./aishell/wav/test/S0914/BAC009S0914W0472.wav", "txt": "并最新发微博表示当地警方已介入调查"}
-{"key": "BAC009S0914W0473", "wav": "./aishell/wav/test/S0914/BAC009S0914W0473.wav", "txt": "游客突破八万人限流大关故宫首次提前禁止售票"}
-{"key": "BAC009S0914W0474", "wav": "./aishell/wav/test/S0914/BAC009S0914W0474.wav", "txt": "新京报讯记者黄颖自七月六日进入暑期以来"}
-{"key": "BAC009S0914W0475", "wav": "./aishell/wav/test/S0914/BAC009S0914W0475.wav", "txt": "故宫博物院接待的观众量也日益攀升"}
-{"key": "BAC009S0914W0476", "wav": "./aishell/wav/test/S0914/BAC009S0914W0476.wav", "txt": "屡屡逼近八万人次的限流大关"}
-{"key": "BAC009S0914W0477", "wav": "./aishell/wav/test/S0914/BAC009S0914W0477.wav", "txt": "故宫首次启动了起流起票限流措施"}
-{"key": "BAC009S0914W0478", "wav": "./aishell/wav/test/S0914/BAC009S0914W0478.wav", "txt": "在馀票数量为售后现场关闭售票窗口"}
-{"key": "BAC009S0914W0479", "wav": "./aishell/wav/test/S0914/BAC009S0914W0479.wav", "txt": "游客美签被废因访美停留太久称从没到过欧洲"}
-{"key": "BAC009S0914W0480", "wav": "./aishell/wav/test/S0914/BAC009S0914W0480.wav", "txt": "而被美国海关移民官遣返"}
-{"key": "BAC009S0914W0481", "wav": "./aishell/wav/test/S0914/BAC009S0914W0481.wav", "txt": "游客脚踩烈士铜像拍照四名当事人鞠躬道歉"}
-{"key": "BAC009S0914W0482", "wav": "./aishell/wav/test/S0914/BAC009S0914W0482.wav", "txt": "四人鞠躬道歉据瓜沥人网"}
-{"key": "BAC009S0914W0483", "wav": "./aishell/wav/test/S0914/BAC009S0914W0483.wav", "txt": "游客被黑导游拉进农家宴消费蘑菇炖鸡卖九零零元"}
-{"key": "BAC009S0914W0484", "wav": "./aishell/wav/test/S0914/BAC009S0914W0484.wav", "txt": "其中一道蘑菇炖鸡收费近九零零元"}
-{"key": "BAC009S0914W0485", "wav": "./aishell/wav/test/S0914/BAC009S0914W0485.wav", "txt": "看到该网友的曝光帖后"}
-{"key": "BAC009S0914W0486", "wav": "./aishell/wav/test/S0914/BAC009S0914W0486.wav", "txt": "崂山景区勒令该农家宴停止停止营业"}
-{"key": "BAC009S0914W0487", "wav": "./aishell/wav/test/S0914/BAC009S0914W0487.wav", "txt": "并索偿该游客全部损失"}
-{"key": "BAC009S0914W0488", "wav": "./aishell/wav/test/S0914/BAC009S0914W0488.wav", "txt": "游客西安遭天价玛卡商家四零零零元一价合理"}
-{"key": "BAC009S0914W0489", "wav": "./aishell/wav/test/S0914/BAC009S0914W0489.wav", "txt": "张先生购买的四零零元玛卡"}
-{"key": "BAC009S0914W0490", "wav": "./aishell/wav/test/S0914/BAC009S0914W0490.wav", "txt": "内江人张先生在这次国庆期间"}
-{"key": "BAC009S0914W0491", "wav": "./aishell/wav/test/S0914/BAC009S0914W0491.wav", "txt": "被导游介绍到一家购物点后"}
-{"key": "BAC009S0914W0492", "wav": "./aishell/wav/test/S0914/BAC009S0914W0492.wav", "txt": "他被迫交了四零零元"}
-{"key": "BAC009S0914W0493", "wav": "./aishell/wav/test/S0914/BAC009S0914W0493.wav", "txt": "这一斤玛卡其实价格只有一零零多元"}
-{"key": "BAC009S0914W0494", "wav": "./aishell/wav/test/S0914/BAC009S0914W0494.wav", "txt": "一捧玛卡磨成粉景区商家要四零零零元"}
-{"key": "BAC009S0914W0495", "wav": "./aishell/wav/test/S0914/BAC009S0914W0495.wav", "txt": "游客要退团张家界低价团导游称信不信你走走不了"}
-{"key": "BAC009S0915W0121", "wav": "./aishell/wav/test/S0915/BAC009S0915W0121.wav", "txt": "从房地产的角度来看"}
-{"key": "BAC009S0915W0122", "wav": "./aishell/wav/test/S0915/BAC009S0915W0122.wav", "txt": "这个政策的出台是希望刺激房地产投资"}
-{"key": "BAC009S0915W0123", "wav": "./aishell/wav/test/S0915/BAC009S0915W0123.wav", "txt": "则是希望防止外资流出"}
-{"key": "BAC009S0915W0124", "wav": "./aishell/wav/test/S0915/BAC009S0915W0124.wav", "txt": "国家统计局公布的数据显示"}
-{"key": "BAC009S0915W0125", "wav": "./aishell/wav/test/S0915/BAC009S0915W0125.wav", "txt": "今年一到七月全国房地产开发投资五万亿元"}
-{"key": "BAC009S0915W0126", "wav": "./aishell/wav/test/S0915/BAC009S0915W0126.wav", "txt": "增速比一到六月回落一个百分点"}
-{"key": "BAC009S0915W0127", "wav": "./aishell/wav/test/S0915/BAC009S0915W0127.wav", "txt": "开发商投资增速处于不断下降的状态"}
-{"key": "BAC009S0915W0128", "wav": "./aishell/wav/test/S0915/BAC009S0915W0128.wav", "txt": "市场开发也呈降温态势"}
-{"key": "BAC009S0915W0129", "wav": "./aishell/wav/test/S0915/BAC009S0915W0129.wav", "txt": "此次出台的新政虽然放宽了条件"}
-{"key": "BAC009S0915W0130", "wav": "./aishell/wav/test/S0915/BAC009S0915W0130.wav", "txt": "但对于实施住房限购政策的城市"}
-{"key": "BAC009S0915W0131", "wav": "./aishell/wav/test/S0915/BAC009S0915W0131.wav", "txt": "境外个人购房依然需要符合当地政策规定"}
-{"key": "BAC009S0915W0132", "wav": "./aishell/wav/test/S0915/BAC009S0915W0132.wav", "txt": "境外机构和个人在中国投资购买房地产的限制放松"}
-{"key": "BAC009S0915W0133", "wav": "./aishell/wav/test/S0915/BAC009S0915W0133.wav", "txt": "兰州房地产市场回暖销量增加价格微涨"}
-{"key": "BAC009S0915W0134", "wav": "./aishell/wav/test/S0915/BAC009S0915W0134.wav", "txt": "自二夏季开始"}
-{"key": "BAC009S0915W0135", "wav": "./aishell/wav/test/S0915/BAC009S0915W0135.wav", "txt": "得益于一系列稳定房地产市场的措施"}
-{"key": "BAC009S0915W0136", "wav": "./aishell/wav/test/S0915/BAC009S0915W0136.wav", "txt": "兰州房地产市场销量增加明显"}
-{"key": "BAC009S0915W0137", "wav": "./aishell/wav/test/S0915/BAC009S0915W0137.wav", "txt": "一些楼盘新房价格出现微涨"}
-{"key": "BAC009S0915W0138", "wav": "./aishell/wav/test/S0915/BAC009S0915W0138.wav", "txt": "较上月环比上涨百分之五"}
-{"key": "BAC009S0915W0139", "wav": "./aishell/wav/test/S0915/BAC009S0915W0139.wav", "txt": "这也是该指数连续三个月出现上涨"}
-{"key": "BAC009S0915W0140", "wav": "./aishell/wav/test/S0915/BAC009S0915W0140.wav", "txt": "而在多时间里"}
-{"key": "BAC009S0915W0141", "wav": "./aishell/wav/test/S0915/BAC009S0915W0141.wav", "txt": "兰州新建住宅价格均呈现微降的态势"}
-{"key": "BAC009S0915W0142", "wav": "./aishell/wav/test/S0915/BAC009S0915W0142.wav", "txt": "兰州楼市出现明显的区域分化"}
-{"key": "BAC009S0915W0143", "wav": "./aishell/wav/test/S0915/BAC009S0915W0143.wav", "txt": "兰州市中心城区的一些楼盘"}
-{"key": "BAC009S0915W0144", "wav": "./aishell/wav/test/S0915/BAC009S0915W0144.wav", "txt": "自今年初至今上涨幅度超过了十百分之"}
-{"key": "BAC009S0915W0145", "wav": "./aishell/wav/test/S0915/BAC009S0915W0145.wav", "txt": "可由于中心城区楼盘数量稀少"}
-{"key": "BAC009S0915W0146", "wav": "./aishell/wav/test/S0915/BAC009S0915W0146.wav", "txt": "在兰州雁滩区域的一家楼盘"}
-{"key": "BAC009S0915W0147", "wav": "./aishell/wav/test/S0915/BAC009S0915W0147.wav", "txt": "而在兰州市新开楼盘集中的城郊区域"}
-{"key": "BAC009S0915W0148", "wav": "./aishell/wav/test/S0915/BAC009S0915W0148.wav", "txt": "但房企调价幅度有限"}
-{"key": "BAC009S0915W0149", "wav": "./aishell/wav/test/S0915/BAC009S0915W0149.wav", "txt": "由于商品房供应量充足"}
-{"key": "BAC009S0915W0150", "wav": "./aishell/wav/test/S0915/BAC009S0915W0150.wav", "txt": "多个楼盘仍然采取的是低价走量的策略"}
-{"key": "BAC009S0915W0151", "wav": "./aishell/wav/test/S0915/BAC009S0915W0151.wav", "txt": "在兰州市北岸由广东房企开发的一个大型楼盘里"}
-{"key": "BAC009S0915W0152", "wav": "./aishell/wav/test/S0915/BAC009S0915W0152.wav", "txt": "但房价从七月至今上涨幅度仅为百分之二左右"}
-{"key": "BAC009S0915W0153", "wav": "./aishell/wav/test/S0915/BAC009S0915W0153.wav", "txt": "今兰州市商品房销售面积同比上涨超过百分之三十"}
-{"key": "BAC009S0915W0154", "wav": "./aishell/wav/test/S0915/BAC009S0915W0154.wav", "txt": "商品房销售额同比上涨超过了百分之四十"}
-{"key": "BAC009S0915W0155", "wav": "./aishell/wav/test/S0915/BAC009S0915W0155.wav", "txt": "许多刚性住房和改善型住房需求得到释放"}
-{"key": "BAC009S0915W0156", "wav": "./aishell/wav/test/S0915/BAC009S0915W0156.wav", "txt": "兰州房地产市场存在持续上涨可能"}
-{"key": "BAC009S0915W0157", "wav": "./aishell/wav/test/S0915/BAC009S0915W0157.wav", "txt": "但由于房地产市场供给仍然不仍然充足"}
-{"key": "BAC009S0915W0158", "wav": "./aishell/wav/test/S0915/BAC009S0915W0158.wav", "txt": "自二夏季开始"}
-{"key": "BAC009S0915W0159", "wav": "./aishell/wav/test/S0915/BAC009S0915W0159.wav", "txt": "得益于一系列稳定房地产市场的措施"}
-{"key": "BAC009S0915W0160", "wav": "./aishell/wav/test/S0915/BAC009S0915W0160.wav", "txt": "兰州房地产市场销量增加明显"}
-{"key": "BAC009S0915W0161", "wav": "./aishell/wav/test/S0915/BAC009S0915W0161.wav", "txt": "而且提供各项衍生的福利性服务"}
-{"key": "BAC009S0915W0162", "wav": "./aishell/wav/test/S0915/BAC009S0915W0162.wav", "txt": "中新网十月二十一日前"}
-{"key": "BAC009S0915W0163", "wav": "./aishell/wav/test/S0915/BAC009S0915W0163.wav", "txt": "北京又一家共享创办公平台落地丰台"}
-{"key": "BAC009S0915W0164", "wav": "./aishell/wav/test/S0915/BAC009S0915W0164.wav", "txt": "借全国大众创业万众创新活动周启动之势"}
-{"key": "BAC009S0915W0166", "wav": "./aishell/wav/test/S0915/BAC009S0915W0166.wav", "txt": "将生活社区与科技园区两种空间组织融合"}
-{"key": "BAC009S0915W0167", "wav": "./aishell/wav/test/S0915/BAC009S0915W0167.wav", "txt": "作为美国新型共享式办公与创新环境的运营品牌"}
-{"key": "BAC009S0915W0168", "wav": "./aishell/wav/test/S0915/BAC009S0915W0168.wav", "txt": "是国际上合作性办公品牌的代表"}
-{"key": "BAC009S0915W0169", "wav": "./aishell/wav/test/S0915/BAC009S0915W0169.wav", "txt": "由此拉开了跨境共享创新生态平台化发展的新时代"}
-{"key": "BAC009S0915W0170", "wav": "./aishell/wav/test/S0915/BAC009S0915W0170.wav", "txt": "而且提供各项行生的福利性服务"}
-{"key": "BAC009S0915W0171", "wav": "./aishell/wav/test/S0915/BAC009S0915W0171.wav", "txt": "帮助创新创业者聚合各方面资源"}
-{"key": "BAC009S0915W0172", "wav": "./aishell/wav/test/S0915/BAC009S0915W0172.wav", "txt": "旨在帮助小型企业降低运运营成本"}
-{"key": "BAC009S0915W0174", "wav": "./aishell/wav/test/S0915/BAC009S0915W0174.wav", "txt": "从创业者真正的需求出发"}
-{"key": "BAC009S0915W0175", "wav": "./aishell/wav/test/S0915/BAC009S0915W0175.wav", "txt": "石榴中心位于丰台区宋家庄交通枢纽商圈"}
-{"key": "BAC009S0915W0176", "wav": "./aishell/wav/test/S0915/BAC009S0915W0176.wav", "txt": "可以北京四环内唯一的国际化共享办公园区"}
-{"key": "BAC009S0915W0177", "wav": "./aishell/wav/test/S0915/BAC009S0915W0177.wav", "txt": "园区总建筑面积一万平方米"}
-{"key": "BAC009S0915W0178", "wav": "./aishell/wav/test/S0915/BAC009S0915W0178.wav", "txt": "其中地上一万平方米"}
-{"key": "BAC009S0915W0179", "wav": "./aishell/wav/test/S0915/BAC009S0915W0179.wav", "txt": "地下一万平方米"}
-{"key": "BAC009S0915W0180", "wav": "./aishell/wav/test/S0915/BAC009S0915W0180.wav", "txt": "由二十二栋企业独栋和二栋二十层的五ａ级写字楼组成"}
-{"key": "BAC009S0915W0181", "wav": "./aishell/wav/test/S0915/BAC009S0915W0181.wav", "txt": "而且提供各项行生的福利性服务"}
-{"key": "BAC009S0915W0182", "wav": "./aishell/wav/test/S0915/BAC009S0915W0182.wav", "txt": "中新网十月二十日前"}
-{"key": "BAC009S0915W0184", "wav": "./aishell/wav/test/S0915/BAC009S0915W0184.wav", "txt": "关于智能家居你必须懂的五件事"}
-{"key": "BAC009S0915W0185", "wav": "./aishell/wav/test/S0915/BAC009S0915W0185.wav", "txt": "智能家居概念的炒作"}
-{"key": "BAC009S0915W0186", "wav": "./aishell/wav/test/S0915/BAC009S0915W0186.wav", "txt": "这是自媒体时代的胜利"}
-{"key": "BAC009S0915W0187", "wav": "./aishell/wav/test/S0915/BAC009S0915W0187.wav", "txt": "将明确政策界限和工作机制以知以及部门分工"}
-{"key": "BAC009S0915W0188", "wav": "./aishell/wav/test/S0915/BAC009S0915W0188.wav", "txt": "证监会将协同有关部门落实相关工作"}
-{"key": "BAC009S0915W0189", "wav": "./aishell/wav/test/S0915/BAC009S0915W0189.wav", "txt": "公共娱乐场所清理整顿"}
-{"key": "BAC009S0915W0190", "wav": "./aishell/wav/test/S0915/BAC009S0915W0190.wav", "txt": "燃气经经营市场清理整顿"}
-{"key": "BAC009S0915W0191", "wav": "./aishell/wav/test/S0915/BAC009S0915W0191.wav", "txt": "行业协会清理整顿报告"}
-{"key": "BAC009S0915W0192", "wav": "./aishell/wav/test/S0915/BAC009S0915W0192.wav", "txt": "国务院已批准信贷资产证券化继续扩大试点"}
-{"key": "BAC009S0915W0193", "wav": "./aishell/wav/test/S0915/BAC009S0915W0193.wav", "txt": "多方面原因造成今年上半年部分中小企业生产经营困难"}
-{"key": "BAC009S0915W0194", "wav": "./aishell/wav/test/S0915/BAC009S0915W0194.wav", "txt": "但没有出现大范围趋势性的破产倒闭"}
-{"key": "BAC009S0915W0195", "wav": "./aishell/wav/test/S0915/BAC009S0915W0195.wav", "txt": "部分中小企业国内生产成本有所提高"}
-{"key": "BAC009S0915W0196", "wav": "./aishell/wav/test/S0915/BAC009S0915W0196.wav", "txt": "这主要有四方面原因"}
-{"key": "BAC009S0915W0197", "wav": "./aishell/wav/test/S0915/BAC009S0915W0197.wav", "txt": "中小企业经营困难"}
-{"key": "BAC009S0915W0198", "wav": "./aishell/wav/test/S0915/BAC009S0915W0198.wav", "txt": "既是信贷投放回归常态的体现"}
-{"key": "BAC009S0915W0199", "wav": "./aishell/wav/test/S0915/BAC009S0915W0199.wav", "txt": "也是国家淘汰落后产能加快产业升级宏观政策的体现"}
-{"key": "BAC009S0915W0200", "wav": "./aishell/wav/test/S0915/BAC009S0915W0200.wav", "txt": "对于中小企业的支持政策"}
-{"key": "BAC009S0915W0201", "wav": "./aishell/wav/test/S0915/BAC009S0915W0201.wav", "txt": "国务院已经批准信贷资产证券化继续扩大试点"}
-{"key": "BAC009S0915W0202", "wav": "./aishell/wav/test/S0915/BAC009S0915W0202.wav", "txt": "转化成由资产产生的现金流作担保可自由流通的证券"}
-{"key": "BAC009S0915W0203", "wav": "./aishell/wav/test/S0915/BAC009S0915W0203.wav", "txt": "销售给资本市场投资者的一种融资方式"}
-{"key": "BAC009S0915W0204", "wav": "./aishell/wav/test/S0915/BAC009S0915W0204.wav", "txt": "目前我国正在稳步开展中小企业信贷资产证券化试点"}
-{"key": "BAC009S0915W0205", "wav": "./aishell/wav/test/S0915/BAC009S0915W0205.wav", "txt": "为加快发展银行间债券市场"}
-{"key": "BAC009S0915W0206", "wav": "./aishell/wav/test/S0915/BAC009S0915W0206.wav", "txt": "对中小企业发行债务融资工具提供绿色通道"}
-{"key": "BAC009S0915W0207", "wav": "./aishell/wav/test/S0915/BAC009S0915W0207.wav", "txt": "占非金融企业直接债务融资总额之比"}
-{"key": "BAC009S0915W0208", "wav": "./aishell/wav/test/S0915/BAC009S0915W0208.wav", "txt": "有力地支持了中小企业的发展"}
-{"key": "BAC009S0915W0209", "wav": "./aishell/wav/test/S0915/BAC009S0915W0209.wav", "txt": "积极指导支持和鼓励金融机构根据中小企业的特点"}
-{"key": "BAC009S0915W0210", "wav": "./aishell/wav/test/S0915/BAC009S0915W0210.wav", "txt": "研发推出不同的金融创新产品和服务方式"}
-{"key": "BAC009S0915W0211", "wav": "./aishell/wav/test/S0915/BAC009S0915W0211.wav", "txt": "吴显亭称将加强和证监会等相关部门的配合和协作"}
-{"key": "BAC009S0915W0212", "wav": "./aishell/wav/test/S0915/BAC009S0915W0212.wav", "txt": "而针对浙江广东民间借贷丰沛的特点"}
-{"key": "BAC009S0915W0213", "wav": "./aishell/wav/test/S0915/BAC009S0915W0213.wav", "txt": "一定程度上缓解了部分中小企业的融资困难"}
-{"key": "BAC009S0915W0214", "wav": "./aishell/wav/test/S0915/BAC009S0915W0214.wav", "txt": "将在有效防范民间借贷的潜在风险的前提下"}
-{"key": "BAC009S0915W0215", "wav": "./aishell/wav/test/S0915/BAC009S0915W0215.wav", "txt": "发挥好民间借贷在服务中小企业发展中的积极作用"}
-{"key": "BAC009S0915W0216", "wav": "./aishell/wav/test/S0915/BAC009S0915W0216.wav", "txt": "要加强对民间借贷的合理引导"}
-{"key": "BAC009S0915W0217", "wav": "./aishell/wav/test/S0915/BAC009S0915W0217.wav", "txt": "解决中小企业生产经营困难需靠多方面共同努力"}
-{"key": "BAC009S0915W0218", "wav": "./aishell/wav/test/S0915/BAC009S0915W0218.wav", "txt": "听取对中央企业监督检查情况的汇报"}
-{"key": "BAC009S0915W0219", "wav": "./aishell/wav/test/S0915/BAC009S0915W0219.wav", "txt": "中央企业要进一步深化改革"}
-{"key": "BAC009S0915W0220", "wav": "./aishell/wav/test/S0915/BAC009S0915W0220.wav", "txt": "强化企业管理和风险管控"}
-{"key": "BAC009S0915W0221", "wav": "./aishell/wav/test/S0915/BAC009S0915W0221.wav", "txt": "加强依法监管和制度建设"}
-{"key": "BAC009S0915W0222", "wav": "./aishell/wav/test/S0915/BAC009S0915W0222.wav", "txt": "部分中央企业的结构调整还存在一些困难"}
-{"key": "BAC009S0915W0223", "wav": "./aishell/wav/test/S0915/BAC009S0915W0223.wav", "txt": "资源环境面临较大压力有的企业管理水平不高"}
-{"key": "BAC009S0915W0224", "wav": "./aishell/wav/test/S0915/BAC009S0915W0224.wav", "txt": "非主业投资存在不少经营风险"}
-{"key": "BAC009S0915W0225", "wav": "./aishell/wav/test/S0915/BAC009S0915W0225.wav", "txt": "境外资产监管有待加强"}
-{"key": "BAC009S0915W0226", "wav": "./aishell/wav/test/S0915/BAC009S0915W0226.wav", "txt": "中央企业实现营业总收入十六点八亿元"}
-{"key": "BAC009S0915W0227", "wav": "./aishell/wav/test/S0915/BAC009S0915W0227.wav", "txt": "上交税金一万亿元"}
-{"key": "BAC009S0915W0228", "wav": "./aishell/wav/test/S0915/BAC009S0915W0228.wav", "txt": "增长百分之三十净利润一千亿元"}
-{"key": "BAC009S0915W0229", "wav": "./aishell/wav/test/S0915/BAC009S0915W0229.wav", "txt": "二零一一年一月至七月"}
-{"key": "BAC009S0915W0230", "wav": "./aishell/wav/test/S0915/BAC009S0915W0230.wav", "txt": "实现营业总收入十一亿元"}
-{"key": "BAC009S0915W0231", "wav": "./aishell/wav/test/S0915/BAC009S0915W0231.wav", "txt": "同比增加迅速上缴税金三亿元"}
-{"key": "BAC009S0915W0232", "wav": "./aishell/wav/test/S0915/BAC009S0915W0232.wav", "txt": "增长非常迅速"}
-{"key": "BAC009S0915W0233", "wav": "./aishell/wav/test/S0915/BAC009S0915W0233.wav", "txt": "进入世界五百强的企业增加"}
-{"key": "BAC009S0915W0234", "wav": "./aishell/wav/test/S0915/BAC009S0915W0234.wav", "txt": "包括七座以下小客车及摩托车都被列入免费范范围"}
-{"key": "BAC009S0915W0235", "wav": "./aishell/wav/test/S0915/BAC009S0915W0235.wav", "txt": "江苏省交通厅相关负责人昨日对记者表示"}
-{"key": "BAC009S0915W0236", "wav": "./aishell/wav/test/S0915/BAC009S0915W0236.wav", "txt": "今年国庆小长假期间私家车主们就可以免费上路了"}
-{"key": "BAC009S0915W0237", "wav": "./aishell/wav/test/S0915/BAC009S0915W0237.wav", "txt": "免费时段从节假日第一天开始"}
-{"key": "BAC009S0915W0238", "wav": "./aishell/wav/test/S0915/BAC009S0915W0238.wav", "txt": "节假日最后一天结束"}
-{"key": "BAC009S0915W0239", "wav": "./aishell/wav/test/S0915/BAC009S0915W0239.wav", "txt": "普通公路以车辆通过收费站收费车道的时间为准"}
-{"key": "BAC009S0915W0240", "wav": "./aishell/wav/test/S0915/BAC009S0915W0240.wav", "txt": "高速公路以车辆驶离出口收费车车道的时间为准"}
-{"key": "BAC009S0915W0241", "wav": "./aishell/wav/test/S0915/BAC009S0915W0241.wav", "txt": "允许在普通收费公路行驶的摩托车"}
-{"key": "BAC009S0915W0242", "wav": "./aishell/wav/test/S0915/BAC009S0915W0242.wav", "txt": "各地机场高速公路是否实行免费通行"}
-{"key": "BAC009S0915W0243", "wav": "./aishell/wav/test/S0915/BAC009S0915W0243.wav", "txt": "由各省区市人民政府决定"}
-{"key": "BAC009S0915W0244", "wav": "./aishell/wav/test/S0915/BAC009S0915W0244.wav", "txt": "各地机场高速公路是否实行免费通行"}
-{"key": "BAC009S0915W0245", "wav": "./aishell/wav/test/S0915/BAC009S0915W0245.wav", "txt": "由各省区市人民政府决定"}
-{"key": "BAC009S0915W0246", "wav": "./aishell/wav/test/S0915/BAC009S0915W0246.wav", "txt": "比如南京机场高速一到节假日"}
-{"key": "BAC009S0915W0247", "wav": "./aishell/wav/test/S0915/BAC009S0915W0247.wav", "txt": "是南京往南的重要通道"}
-{"key": "BAC009S0915W0248", "wav": "./aishell/wav/test/S0915/BAC009S0915W0248.wav", "txt": "对于江苏的机场高速是否免费"}
-{"key": "BAC009S0915W0249", "wav": "./aishell/wav/test/S0915/BAC009S0915W0249.wav", "txt": "省交通部门称目前未定"}
-{"key": "BAC009S0915W0250", "wav": "./aishell/wav/test/S0915/BAC009S0915W0250.wav", "txt": "但有关负责人认为我想"}
-{"key": "BAC009S0915W0251", "wav": "./aishell/wav/test/S0915/BAC009S0915W0251.wav", "txt": "机场高速最大可能还是免费"}
-{"key": "BAC009S0915W0252", "wav": "./aishell/wav/test/S0915/BAC009S0915W0252.wav", "txt": "另一个让南京市民特别关心的是"}
-{"key": "BAC009S0915W0253", "wav": "./aishell/wav/test/S0915/BAC009S0915W0253.wav", "txt": "或许到二零一六年的时候"}
-{"key": "BAC009S0915W0254", "wav": "./aishell/wav/test/S0915/BAC009S0915W0254.wav", "txt": "零售店就再也卖不出去一个实体钱包了"}
-{"key": "BAC009S0915W0255", "wav": "./aishell/wav/test/S0915/BAC009S0915W0255.wav", "txt": "催生着移动支付技术的大跃进"}
-{"key": "BAC009S0915W0256", "wav": "./aishell/wav/test/S0915/BAC009S0915W0256.wav", "txt": "最关键的两个属性莫过于安全和便捷"}
-{"key": "BAC009S0915W0257", "wav": "./aishell/wav/test/S0915/BAC009S0915W0257.wav", "txt": "而且它们两个之间是非常对立的关系"}
-{"key": "BAC009S0915W0258", "wav": "./aishell/wav/test/S0915/BAC009S0915W0258.wav", "txt": "安全性的提升需要牺牲一定的便携性"}
-{"key": "BAC009S0915W0259", "wav": "./aishell/wav/test/S0915/BAC009S0915W0259.wav", "txt": "究竟哪个特特性更加重要"}
-{"key": "BAC009S0915W0260", "wav": "./aishell/wav/test/S0915/BAC009S0915W0260.wav", "txt": "这也影响着移动支付市场的总体进程和发展方向"}
-{"key": "BAC009S0915W0261", "wav": "./aishell/wav/test/S0915/BAC009S0915W0261.wav", "txt": "或许是受好莱坞艳照门的影响"}
-{"key": "BAC009S0915W0263", "wav": "./aishell/wav/test/S0915/BAC009S0915W0263.wav", "txt": "重点强调了其安全性"}
-{"key": "BAC009S0915W0264", "wav": "./aishell/wav/test/S0915/BAC009S0915W0264.wav", "txt": "最基本的逻辑就是我们不读取信息"}
-{"key": "BAC009S0915W0265", "wav": "./aishell/wav/test/S0915/BAC009S0915W0265.wav", "txt": "牛师傅总说自己的面没有添加任何防腐剂"}
-{"key": "BAC009S0915W0266", "wav": "./aishell/wav/test/S0915/BAC009S0915W0266.wav", "txt": "任何的电子行为都不免会留下痕迹"}
-{"key": "BAC009S0915W0267", "wav": "./aishell/wav/test/S0915/BAC009S0915W0267.wav", "txt": "移动支付又会产生非常敏感的操作信息"}
-{"key": "BAC009S0915W0268", "wav": "./aishell/wav/test/S0915/BAC009S0915W0268.wav", "txt": "蕴含着巨大商业价值"}
-{"key": "BAC009S0915W0269", "wav": "./aishell/wav/test/S0915/BAC009S0915W0269.wav", "txt": "有哪家支付机构愿意心无旁续地放弃这些金子呢"}
-{"key": "BAC009S0915W0270", "wav": "./aishell/wav/test/S0915/BAC009S0915W0270.wav", "txt": "安全真的是移动支付的第一属性吗"}
-{"key": "BAC009S0915W0271", "wav": "./aishell/wav/test/S0915/BAC009S0915W0271.wav", "txt": "消费者对便捷性的要求可能会更高"}
-{"key": "BAC009S0915W0272", "wav": "./aishell/wav/test/S0915/BAC009S0915W0272.wav", "txt": "按照国内消费者的习惯"}
-{"key": "BAC009S0915W0273", "wav": "./aishell/wav/test/S0915/BAC009S0915W0273.wav", "txt": "他们通常会单独办一张银行卡来绑定移动支付系统"}
-{"key": "BAC009S0915W0274", "wav": "./aishell/wav/test/S0915/BAC009S0915W0274.wav", "txt": "而不是拿着主卡到处刷"}
-{"key": "BAC009S0915W0275", "wav": "./aishell/wav/test/S0915/BAC009S0915W0275.wav", "txt": "移动支付可调用的只能是消费者的小额度的钱财"}
-{"key": "BAC009S0915W0276", "wav": "./aishell/wav/test/S0915/BAC009S0915W0276.wav", "txt": "一般不会给消费者带来巨大损失"}
-{"key": "BAC009S0915W0277", "wav": "./aishell/wav/test/S0915/BAC009S0915W0277.wav", "txt": "消费者会在特定情况下牺牲安全性来提升支付的便捷性"}
-{"key": "BAC009S0915W0278", "wav": "./aishell/wav/test/S0915/BAC009S0915W0278.wav", "txt": "她们宁愿可花五个小时讨论是否买一条裙子"}
-{"key": "BAC009S0915W0279", "wav": "./aishell/wav/test/S0915/BAC009S0915W0279.wav", "txt": "也不愿意花五分钟重新输一定密码"}
-{"key": "BAC009S0915W0281", "wav": "./aishell/wav/test/S0915/BAC009S0915W0281.wav", "txt": "大概十年前就有了这样的说法"}
-{"key": "BAC009S0915W0283", "wav": "./aishell/wav/test/S0915/BAC009S0915W0283.wav", "txt": "也正是看中了中国消费者的消费潜力"}
-{"key": "BAC009S0915W0284", "wav": "./aishell/wav/test/S0915/BAC009S0915W0284.wav", "txt": "华尔街才对阿里巴巴情有独钟"}
-{"key": "BAC009S0915W0286", "wav": "./aishell/wav/test/S0915/BAC009S0915W0286.wav", "txt": "就让业界讨论它会带给中国移动支付市场怎样的影响"}
-{"key": "BAC009S0915W0287", "wav": "./aishell/wav/test/S0915/BAC009S0915W0287.wav", "txt": "苹果要想在中国本土化"}
-{"key": "BAC009S0915W0288", "wav": "./aishell/wav/test/S0915/BAC009S0915W0288.wav", "txt": "最大难点在于如何改变国内的消费习惯"}
-{"key": "BAC009S0915W0289", "wav": "./aishell/wav/test/S0915/BAC009S0915W0289.wav", "txt": "如何说服四大银行一起与之愉快合作"}
-{"key": "BAC009S0915W0290", "wav": "./aishell/wav/test/S0915/BAC009S0915W0290.wav", "txt": "如何重修与中国政府的良好关系"}
-{"key": "BAC009S0915W0291", "wav": "./aishell/wav/test/S0915/BAC009S0915W0291.wav", "txt": "这对于苹果来说不是件容易的事儿"}
-{"key": "BAC009S0915W0292", "wav": "./aishell/wav/test/S0915/BAC009S0915W0292.wav", "txt": "现在是不是也该长点心了吧"}
-{"key": "BAC009S0915W0293", "wav": "./aishell/wav/test/S0915/BAC009S0915W0293.wav", "txt": "国内移动支付需主要有两股力量"}
-{"key": "BAC009S0915W0295", "wav": "./aishell/wav/test/S0915/BAC009S0915W0295.wav", "txt": "前者有长时间的沉淀"}
-{"key": "BAC009S0915W0296", "wav": "./aishell/wav/test/S0915/BAC009S0915W0296.wav", "txt": "银行们对此已深耕多年"}
-{"key": "BAC009S0915W0297", "wav": "./aishell/wav/test/S0915/BAC009S0915W0297.wav", "txt": "而后者则是刚刚涌现的后起之秀"}
-{"key": "BAC009S0915W0298", "wav": "./aishell/wav/test/S0915/BAC009S0915W0298.wav", "txt": "二零一四年春天打车软件补贴大战"}
-{"key": "BAC009S0915W0299", "wav": "./aishell/wav/test/S0915/BAC009S0915W0299.wav", "txt": "两股力量基本上都有一统天下的野心"}
-{"key": "BAC009S0915W0300", "wav": "./aishell/wav/test/S0915/BAC009S0915W0300.wav", "txt": "这三个优势能在短时间内颠复美国移动支付市场的格局"}
-{"key": "BAC009S0915W0301", "wav": "./aishell/wav/test/S0915/BAC009S0915W0301.wav", "txt": "最终促使苹果成为主流标准但中国市场有其特殊性"}
-{"key": "BAC009S0915W0302", "wav": "./aishell/wav/test/S0915/BAC009S0915W0302.wav", "txt": "首先银联和苹果的合作谈判不会顺利"}
-{"key": "BAC009S0915W0303", "wav": "./aishell/wav/test/S0915/BAC009S0915W0303.wav", "txt": "今后所有空调产品还将实现联机运行"}
-{"key": "BAC009S0915W0304", "wav": "./aishell/wav/test/S0915/BAC009S0915W0304.wav", "txt": "这台设备就不会开机运转"}
-{"key": "BAC009S0915W0305", "wav": "./aishell/wav/test/S0915/BAC009S0915W0305.wav", "txt": "这个在美的空调的南沙工厂武汉工厂已全面试点"}
-{"key": "BAC009S0915W0306", "wav": "./aishell/wav/test/S0915/BAC009S0915W0306.wav", "txt": "自动化制造是未来唯一的出路"}
-{"key": "BAC009S0915W0307", "wav": "./aishell/wav/test/S0915/BAC009S0915W0307.wav", "txt": "未来的制造业方向要实现无人化"}
-{"key": "BAC009S0915W0308", "wav": "./aishell/wav/test/S0915/BAC009S0915W0308.wav", "txt": "美的计划在二零一八年"}
-{"key": "BAC009S0915W0309", "wav": "./aishell/wav/test/S0915/BAC009S0915W0309.wav", "txt": "将家用空调事业部员工工人数缩减至两万人"}
-{"key": "BAC009S0915W0310", "wav": "./aishell/wav/test/S0915/BAC009S0915W0310.wav", "txt": "除了四轴或三轴机器人外"}
-{"key": "BAC009S0915W0311", "wav": "./aishell/wav/test/S0915/BAC009S0915W0311.wav", "txt": "今年还将新增二百台"}
-{"key": "BAC009S0915W0312", "wav": "./aishell/wav/test/S0915/BAC009S0915W0312.wav", "txt": "机器人维护成本是挑战"}
-{"key": "BAC009S0915W0313", "wav": "./aishell/wav/test/S0915/BAC009S0915W0313.wav", "txt": "广东东莞顺德等城市已经掀起大量机器换人计划"}
-{"key": "BAC009S0915W0314", "wav": "./aishell/wav/test/S0915/BAC009S0915W0314.wav", "txt": "家电企业机器人智造也正在加速进行"}
-{"key": "BAC009S0915W0315", "wav": "./aishell/wav/test/S0915/BAC009S0915W0315.wav", "txt": "从美的海尔使用机器人操作来看来"}
-{"key": "BAC009S0915W0316", "wav": "./aishell/wav/test/S0915/BAC009S0915W0316.wav", "txt": "机器换人确实能够大大降低企业的用工数量"}
-{"key": "BAC009S0915W0317", "wav": "./aishell/wav/test/S0915/BAC009S0915W0317.wav", "txt": "实现自动化升级也没那么简单"}
-{"key": "BAC009S0915W0318", "wav": "./aishell/wav/test/S0915/BAC009S0915W0318.wav", "txt": "美的集团对项目在一定年限内有投入产出的规定"}
-{"key": "BAC009S0915W0319", "wav": "./aishell/wav/test/S0915/BAC009S0915W0319.wav", "txt": "这对我们来说是个很大的挑战"}
-{"key": "BAC009S0915W0320", "wav": "./aishell/wav/test/S0915/BAC009S0915W0320.wav", "txt": "同时也卡住了自动化的投入"}
-{"key": "BAC009S0915W0321", "wav": "./aishell/wav/test/S0915/BAC009S0915W0321.wav", "txt": "一定年限内的投入产出"}
-{"key": "BAC009S0915W0322", "wav": "./aishell/wav/test/S0915/BAC009S0915W0322.wav", "txt": "我们必须要有衡量标准"}
-{"key": "BAC009S0915W0323", "wav": "./aishell/wav/test/S0915/BAC009S0915W0323.wav", "txt": "美的不能因自动化生产增加制造成本而让用户买单"}
-{"key": "BAC009S0915W0324", "wav": "./aishell/wav/test/S0915/BAC009S0915W0324.wav", "txt": "美的空调进行自动化升级"}
-{"key": "BAC009S0915W0325", "wav": "./aishell/wav/test/S0915/BAC009S0915W0325.wav", "txt": "一定是为了降低制造成本"}
-{"key": "BAC009S0915W0326", "wav": "./aishell/wav/test/S0915/BAC009S0915W0326.wav", "txt": "比如降低人工费用运作费用等"}
-{"key": "BAC009S0915W0327", "wav": "./aishell/wav/test/S0915/BAC009S0915W0327.wav", "txt": "机器人后期维护运行成本及技术也是一个高门槛"}
-{"key": "BAC009S0915W0328", "wav": "./aishell/wav/test/S0915/BAC009S0915W0328.wav", "txt": "因为机器人生产商派遣技术人员不可能长期驻起驻点企业"}
-{"key": "BAC009S0915W0329", "wav": "./aishell/wav/test/S0915/BAC009S0915W0329.wav", "txt": "高工机器人董事长张小飞表示"}
-{"key": "BAC009S0915W0330", "wav": "./aishell/wav/test/S0915/BAC009S0915W0330.wav", "txt": "家电企业自动化升级改造必须进行"}
-{"key": "BAC009S0915W0331", "wav": "./aishell/wav/test/S0915/BAC009S0915W0331.wav", "txt": "但伴随一定的投资风险"}
-{"key": "BAC009S0915W0332", "wav": "./aishell/wav/test/S0915/BAC009S0915W0332.wav", "txt": "除了后期技术维护能力外"}
-{"key": "BAC009S0915W0333", "wav": "./aishell/wav/test/S0915/BAC009S0915W0333.wav", "txt": "对于国内家电企业而言"}
-{"key": "BAC009S0915W0334", "wav": "./aishell/wav/test/S0915/BAC009S0915W0334.wav", "txt": "自动化生产线的柔性改造也是其面临的一大难题"}
-{"key": "BAC009S0915W0335", "wav": "./aishell/wav/test/S0915/BAC009S0915W0335.wav", "txt": "空调产品越来越追求个性化"}
-{"key": "BAC009S0915W0336", "wav": "./aishell/wav/test/S0915/BAC009S0915W0336.wav", "txt": "这需要通过机器人的柔性改变来对此进行处理"}
-{"key": "BAC009S0915W0337", "wav": "./aishell/wav/test/S0915/BAC009S0915W0337.wav", "txt": "家电企业要建立数字化工厂才能真正提升生产效率"}
-{"key": "BAC009S0915W0338", "wav": "./aishell/wav/test/S0915/BAC009S0915W0338.wav", "txt": "她的表现也更加全面"}
-{"key": "BAC009S0915W0339", "wav": "./aishell/wav/test/S0915/BAC009S0915W0339.wav", "txt": "对阵俄罗斯的比赛中"}
-{"key": "BAC009S0915W0340", "wav": "./aishell/wav/test/S0915/BAC009S0915W0340.wav", "txt": "在张常宁一度进行进攻受阻"}
-{"key": "BAC009S0915W0341", "wav": "./aishell/wav/test/S0915/BAC009S0915W0341.wav", "txt": "刘晓彤替补上场打得缩手缩脚的情况下"}
-{"key": "BAC009S0915W0342", "wav": "./aishell/wav/test/S0915/BAC009S0915W0342.wav", "txt": "不断地为中国女排得分"}
-{"key": "BAC009S0915W0343", "wav": "./aishell/wav/test/S0915/BAC009S0915W0343.wav", "txt": "只要中国队需要有人挺身而出"}
-{"key": "BAC009S0915W0344", "wav": "./aishell/wav/test/S0915/BAC009S0915W0344.wav", "txt": "朱婷在中韩之战中一度受伤"}
-{"key": "BAC009S0915W0345", "wav": "./aishell/wav/test/S0915/BAC009S0915W0345.wav", "txt": "但她在中国队遇到困难的时候坚持带伤上阵"}
-{"key": "BAC009S0915W0346", "wav": "./aishell/wav/test/S0915/BAC009S0915W0346.wav", "txt": "最终掠队拿下了比赛"}
-{"key": "BAC009S0915W0347", "wav": "./aishell/wav/test/S0915/BAC009S0915W0347.wav", "txt": "在队长惠若琪因为身体原因无缘世界杯的情况下"}
-{"key": "BAC009S0915W0348", "wav": "./aishell/wav/test/S0915/BAC009S0915W0348.wav", "txt": "朱婷就是中国女排的核心"}
-{"key": "BAC009S0915W0349", "wav": "./aishell/wav/test/S0915/BAC009S0915W0349.wav", "txt": "朱婷再度扮演了场上头脑的角色"}
-{"key": "BAC009S0915W0350", "wav": "./aishell/wav/test/S0915/BAC009S0915W0350.wav", "txt": "队员们也对于她在技术上和心理上都颇为依赖和信服"}
-{"key": "BAC009S0915W0351", "wav": "./aishell/wav/test/S0915/BAC009S0915W0351.wav", "txt": "朱婷扣球拿下一百一十三分"}
-{"key": "BAC009S0915W0352", "wav": "./aishell/wav/test/S0915/BAC009S0915W0352.wav", "txt": "总共贡献了一百四十一分"}
-{"key": "BAC009S0915W0353", "wav": "./aishell/wav/test/S0915/BAC009S0915W0353.wav", "txt": "反超张常宁成为中国队的得分王"}
-{"key": "BAC009S0915W0354", "wav": "./aishell/wav/test/S0915/BAC009S0915W0354.wav", "txt": "让朱婷最佳球员的身份和价值再度彰显"}
-{"key": "BAC009S0915W0355", "wav": "./aishell/wav/test/S0915/BAC009S0915W0355.wav", "txt": "尚不足十八岁的她身高为一米八六"}
-{"key": "BAC009S0915W0356", "wav": "./aishell/wav/test/S0915/BAC009S0915W0356.wav", "txt": "徐建德统领的中国青年队八战全胜夺得冠军"}
-{"key": "BAC009S0915W0357", "wav": "./aishell/wav/test/S0915/BAC009S0915W0357.wav", "txt": "作为主力主攻的朱婷"}
-{"key": "BAC009S0915W0358", "wav": "./aishell/wav/test/S0915/BAC009S0915W0358.wav", "txt": "从而被授予最有价值球员荣誉"}
-{"key": "BAC009S0915W0360", "wav": "./aishell/wav/test/S0915/BAC009S0915W0360.wav", "txt": "当时身披八号战袍的她身高达到了一米九五公分"}
-{"key": "BAC009S0915W0361", "wav": "./aishell/wav/test/S0915/BAC009S0915W0361.wav", "txt": "朱婷斩获了一六七分"}
-{"key": "BAC009S0915W0362", "wav": "./aishell/wav/test/S0915/BAC009S0915W0362.wav", "txt": "与多米尼加的马丁内斯一起摘得最佳得分奖"}
-{"key": "BAC009S0915W0363", "wav": "./aishell/wav/test/S0915/BAC009S0915W0363.wav", "txt": "随后还以百分之五十三点五六的得分率拿到了最佳进攻的大奖"}
-{"key": "BAC009S0915W0364", "wav": "./aishell/wav/test/S0915/BAC009S0915W0364.wav", "txt": "朱婷荣膺最有价值球员"}
-{"key": "BAC009S0915W0365", "wav": "./aishell/wav/test/S0915/BAC009S0915W0365.wav", "txt": "还与巴西队的加比一起入选最佳主攻"}
-{"key": "BAC009S0915W0366", "wav": "./aishell/wav/test/S0915/BAC009S0915W0366.wav", "txt": "当年的整个世青赛上"}
-{"key": "BAC009S0915W0367", "wav": "./aishell/wav/test/S0915/BAC009S0915W0367.wav", "txt": "中国队虽然如愿夺冠"}
-{"key": "BAC009S0915W0368", "wav": "./aishell/wav/test/S0915/BAC009S0915W0368.wav", "txt": "朱婷却是唯一的硕果"}
-{"key": "BAC009S0915W0369", "wav": "./aishell/wav/test/S0915/BAC009S0915W0369.wav", "txt": "去年六月下旬举行的中国国际精英赛北仑站"}
-{"key": "BAC009S0915W0371", "wav": "./aishell/wav/test/S0915/BAC009S0915W0371.wav", "txt": "当时郎平率队三战全胜名列第一"}
-{"key": "BAC009S0915W0372", "wav": "./aishell/wav/test/S0915/BAC009S0915W0372.wav", "txt": "朱婷两场比赛担任首发"}
-{"key": "BAC009S0915W0374", "wav": "./aishell/wav/test/S0915/BAC009S0915W0374.wav", "txt": "而在今年的香港站上"}
-{"key": "BAC009S0915W0375", "wav": "./aishell/wav/test/S0915/BAC009S0915W0375.wav", "txt": "中国队三比二力克美国队收获分站赛九连胜"}
-{"key": "BAC009S0915W0376", "wav": "./aishell/wav/test/S0915/BAC009S0915W0376.wav", "txt": "赛后主攻朱婷获最有价值球员和最受欢迎球员"}
-{"key": "BAC009S0915W0377", "wav": "./aishell/wav/test/S0915/BAC009S0915W0377.wav", "txt": "主教练郎平获得最佳教练"}
-{"key": "BAC009S0915W0378", "wav": "./aishell/wav/test/S0915/BAC009S0915W0378.wav", "txt": "三场比赛朱婷均有出色表现"}
-{"key": "BAC009S0915W0379", "wav": "./aishell/wav/test/S0915/BAC009S0915W0379.wav", "txt": "朱婷共计拿下二十四分"}
-{"key": "BAC009S0915W0380", "wav": "./aishell/wav/test/S0915/BAC009S0915W0380.wav", "txt": "第二场对阵日本也拿下全队第二高的十二分"}
-{"key": "BAC009S0915W0381", "wav": "./aishell/wav/test/S0915/BAC009S0915W0381.wav", "txt": "获得二十三分荣誉全场得分王"}
-{"key": "BAC009S0915W0382", "wav": "./aishell/wav/test/S0915/BAC009S0915W0382.wav", "txt": "在分站赛总得分榜上"}
-{"key": "BAC009S0915W0383", "wav": "./aishell/wav/test/S0915/BAC009S0915W0383.wav", "txt": "朱婷以一百五十七分领先群芳"}
-{"key": "BAC009S0915W0384", "wav": "./aishell/wav/test/S0915/BAC009S0915W0384.wav", "txt": "其中扣球拿到一百三十二分"}
-{"key": "BAC009S0915W0385", "wav": "./aishell/wav/test/S0915/BAC009S0915W0385.wav", "txt": "扣球成功率五十四点百分之十高居榜首"}
-{"key": "BAC009S0915W0386", "wav": "./aishell/wav/test/S0915/BAC009S0915W0386.wav", "txt": "人们首先会想到她的高度"}
-{"key": "BAC009S0915W0387", "wav": "./aishell/wav/test/S0915/BAC009S0915W0387.wav", "txt": "其一米九五的身高三米二七的扣球高度"}
-{"key": "BAC009S0915W0388", "wav": "./aishell/wav/test/S0915/BAC009S0915W0388.wav", "txt": "在比赛中确实非常有利"}
-{"key": "BAC009S0915W0389", "wav": "./aishell/wav/test/S0915/BAC009S0915W0389.wav", "txt": "朱婷进攻相对比较简单"}
-{"key": "BAC009S0915W0390", "wav": "./aishell/wav/test/S0915/BAC009S0915W0390.wav", "txt": "主要是四号位的高点强攻和六号位的后排进攻"}
-{"key": "BAC009S0915W0391", "wav": "./aishell/wav/test/S0915/BAC009S0915W0391.wav", "txt": "四号位进攻以大斜线为主"}
-{"key": "BAC009S0915W0392", "wav": "./aishell/wav/test/S0915/BAC009S0915W0392.wav", "txt": "她进攻的变化逐渐多了起来"}
-{"key": "BAC009S0915W0393", "wav": "./aishell/wav/test/S0915/BAC009S0915W0393.wav", "txt": "首先是增加了二号位的进攻"}
-{"key": "BAC009S0915W0394", "wav": "./aishell/wav/test/S0915/BAC009S0915W0394.wav", "txt": "即当自己轮转到前排二号位时"}
-{"key": "BAC009S0915W0395", "wav": "./aishell/wav/test/S0915/BAC009S0915W0395.wav", "txt": "临时客串接应在二号位参与强攻"}
-{"key": "BAC009S0915W0396", "wav": "./aishell/wav/test/S0915/BAC009S0915W0396.wav", "txt": "这样既丰富了自己也增加了全队的进攻变化"}
-{"key": "BAC009S0915W0397", "wav": "./aishell/wav/test/S0915/BAC009S0915W0397.wav", "txt": "再就是四号位的进攻除了斜线"}
-{"key": "BAC009S0915W0398", "wav": "./aishell/wav/test/S0915/BAC009S0915W0398.wav", "txt": "还增加了直线直线和斜线之间的所谓二直线"}
-{"key": "BAC009S0915W0399", "wav": "./aishell/wav/test/S0915/BAC009S0915W0399.wav", "txt": "不时还施以非常巧妙的吊球"}
-{"key": "BAC009S0915W0400", "wav": "./aishell/wav/test/S0915/BAC009S0915W0400.wav", "txt": "视频中国三一大胜俄罗斯独占女排世界杯榜首"}
-{"key": "BAC009S0915W0401", "wav": "./aishell/wav/test/S0915/BAC009S0915W0401.wav", "txt": "日本二零一五女排世界杯单循环赛战至第十轮"}
-{"key": "BAC009S0915W0402", "wav": "./aishell/wav/test/S0915/BAC009S0915W0402.wav", "txt": "由郎平挂帅的中国女排在名古屋赛区"}
-{"key": "BAC009S0915W0403", "wav": "./aishell/wav/test/S0915/BAC009S0915W0403.wav", "txt": "提升战绩为九胜一负反超至榜首位置"}
-{"key": "BAC009S0915W0404", "wav": "./aishell/wav/test/S0915/BAC009S0915W0404.wav", "txt": "上周在北美电影市场上遭遇票房惨剧"}
-{"key": "BAC009S0915W0405", "wav": "./aishell/wav/test/S0915/BAC009S0915W0405.wav", "txt": "只以六百四十八万美元的进账排名第八"}
-{"key": "BAC009S0915W0406", "wav": "./aishell/wav/test/S0915/BAC009S0915W0406.wav", "txt": "这部电影的失败并没有影响囧瑟夫的心情"}
-{"key": "BAC009S0915W0407", "wav": "./aishell/wav/test/S0915/BAC009S0915W0407.wav", "txt": "将自导自演一部名为睡魔的科幻大片"}
-{"key": "BAC009S0915W0408", "wav": "./aishell/wav/test/S0915/BAC009S0915W0408.wav", "txt": "让体重维持在四十五公斤左右"}
-{"key": "BAC009S0915W0409", "wav": "./aishell/wav/test/S0915/BAC009S0915W0409.wav", "txt": "但网友的吐槽却一直没有停息"}
-{"key": "BAC009S0915W0410", "wav": "./aishell/wav/test/S0915/BAC009S0915W0410.wav", "txt": "她在台湾出席活动"}
-{"key": "BAC009S0915W0411", "wav": "./aishell/wav/test/S0915/BAC009S0915W0411.wav", "txt": "坦言刚开拍的一个月中"}
-{"key": "BAC009S0915W0412", "wav": "./aishell/wav/test/S0915/BAC009S0915W0412.wav", "txt": "心情低落到崩溃大哭"}
-{"key": "BAC009S0915W0413", "wav": "./aishell/wav/test/S0915/BAC009S0915W0413.wav", "txt": "甚至出现忧郁症状况"}
-{"key": "BAC009S0915W0414", "wav": "./aishell/wav/test/S0915/BAC009S0915W0414.wav", "txt": "搜狐娱乐讯陈妍希传出和陈晓的恋情之后"}
-{"key": "BAC009S0915W0415", "wav": "./aishell/wav/test/S0915/BAC009S0915W0415.wav", "txt": "二人一直鲜少回应"}
-{"key": "BAC009S0915W0416", "wav": "./aishell/wav/test/S0915/BAC009S0915W0416.wav", "txt": "陈妍希回到台北代言悠游卡"}
-{"key": "BAC009S0915W0417", "wav": "./aishell/wav/test/S0915/BAC009S0915W0417.wav", "txt": "外传她可能已经怀孕"}
-{"key": "BAC009S0915W0418", "wav": "./aishell/wav/test/S0915/BAC009S0915W0418.wav", "txt": "但陈妍希在出席活动时"}
-{"key": "BAC009S0915W0419", "wav": "./aishell/wav/test/S0915/BAC009S0915W0419.wav", "txt": "穿高跟鞋快步走"}
-{"key": "BAC009S0915W0420", "wav": "./aishell/wav/test/S0915/BAC009S0915W0420.wav", "txt": "似乎也让传言不攻自破"}
-{"key": "BAC009S0915W0421", "wav": "./aishell/wav/test/S0915/BAC009S0915W0421.wav", "txt": "贵州都市报十月二十九日报道据台湾媒体报道艺人陈妍希认爱小四岁的大陆小生陈晓"}
-{"key": "BAC009S0915W0423", "wav": "./aishell/wav/test/S0915/BAC009S0915W0423.wav", "txt": "两人因合作神雕侠侣擦出爱火"}
-{"key": "BAC009S0915W0424", "wav": "./aishell/wav/test/S0915/BAC009S0915W0424.wav", "txt": "恋情发展备受关注"}
-{"key": "BAC009S0915W0425", "wav": "./aishell/wav/test/S0915/BAC009S0915W0425.wav", "txt": "更在日前爆出交往七个月准备闪婚"}
-{"key": "BAC009S0915W0426", "wav": "./aishell/wav/test/S0915/BAC009S0915W0426.wav", "txt": "连男方在法国包游艇求婚的照片都被网友扒出"}
-{"key": "BAC009S0915W0427", "wav": "./aishell/wav/test/S0915/BAC009S0915W0427.wav", "txt": "她坦承当时很惊喜很感动"}
-{"key": "BAC009S0915W0428", "wav": "./aishell/wav/test/S0915/BAC009S0915W0428.wav", "txt": "男友受访时也首度大方松口确实已经进入求婚阶段"}
-{"key": "BAC009S0915W0429", "wav": "./aishell/wav/test/S0915/BAC009S0915W0429.wav", "txt": "让粉丝听了又惊又喜"}
-{"key": "BAC009S0915W0430", "wav": "./aishell/wav/test/S0915/BAC009S0915W0430.wav", "txt": "搜狐娱乐讯据台湾媒体报道"}
-{"key": "BAC009S0915W0431", "wav": "./aishell/wav/test/S0915/BAC009S0915W0431.wav", "txt": "记者调查湖南张家界国家森林公园低价团问题"}
-{"key": "BAC009S0915W0432", "wav": "./aishell/wav/test/S0915/BAC009S0915W0432.wav", "txt": "四零零元左右的低价两日游在当地非常普遍"}
-{"key": "BAC009S0915W0433", "wav": "./aishell/wav/test/S0915/BAC009S0915W0433.wav", "txt": "这种低价游自称费用全包"}
-{"key": "BAC009S0915W0434", "wav": "./aishell/wav/test/S0915/BAC009S0915W0434.wav", "txt": "原本自费项目变成必须交费项目"}
-{"key": "BAC009S0915W0435", "wav": "./aishell/wav/test/S0915/BAC009S0915W0435.wav", "txt": "导游还诱骗游客加钱走特殊路线"}
-{"key": "BAC009S0915W0436", "wav": "./aishell/wav/test/S0915/BAC009S0915W0436.wav", "txt": "面对游客质疑和退团要求"}
-{"key": "BAC009S0915W0437", "wav": "./aishell/wav/test/S0915/BAC009S0915W0437.wav", "txt": "导游放言此树是我栽"}
-{"key": "BAC009S0915W0438", "wav": "./aishell/wav/test/S0915/BAC009S0915W0438.wav", "txt": "你不可能一分钱不花"}
-{"key": "BAC009S0915W0439", "wav": "./aishell/wav/test/S0915/BAC009S0915W0439.wav", "txt": "游客赴港游买瑞士表半个月停摆旅行社久拖不管"}
-{"key": "BAC009S0915W0440", "wav": "./aishell/wav/test/S0915/BAC009S0915W0440.wav", "txt": "市民刘先生和江西环球国际旅行社的沟通协商再次失败"}
-{"key": "BAC009S0915W0441", "wav": "./aishell/wav/test/S0915/BAC009S0915W0441.wav", "txt": "双方矛盾的焦点是一只瑞士名表"}
-{"key": "BAC009S0915W0442", "wav": "./aishell/wav/test/S0915/BAC009S0915W0442.wav", "txt": "游客踩敦煌千年古城遗址拍照反问踩了会掉吗"}
-{"key": "BAC009S0915W0443", "wav": "./aishell/wav/test/S0915/BAC009S0915W0443.wav", "txt": "现场图一零月五日下午"}
-{"key": "BAC009S0915W0444", "wav": "./aishell/wav/test/S0915/BAC009S0915W0444.wav", "txt": "在甘肃敦煌大方盘城遗址"}
-{"key": "BAC009S0915W0445", "wav": "./aishell/wav/test/S0915/BAC009S0915W0445.wav", "txt": "几位游客轮流翻越护栏"}
-{"key": "BAC009S0915W0446", "wav": "./aishell/wav/test/S0915/BAC009S0915W0446.wav", "txt": "一位游客在拍照中说人家几千年都没有掉下来"}
-{"key": "BAC009S0915W0447", "wav": "./aishell/wav/test/S0915/BAC009S0915W0447.wav", "txt": "踩一下就掉下来了"}
-{"key": "BAC009S0915W0448", "wav": "./aishell/wav/test/S0915/BAC009S0915W0448.wav", "txt": "澎湃新闻在现场看到"}
-{"key": "BAC009S0915W0449", "wav": "./aishell/wav/test/S0915/BAC009S0915W0449.wav", "txt": "遗址附近有多处警示牌写明严禁跨入保护区"}
-{"key": "BAC009S0915W0450", "wav": "./aishell/wav/test/S0915/BAC009S0915W0450.wav", "txt": "游客进店未购物被导游嘲讽官方正在立案处理"}
-{"key": "BAC009S0915W0451", "wav": "./aishell/wav/test/S0915/BAC009S0915W0451.wav", "txt": "游客铜缸刻字秀恩爱故宫已报警"}
-{"key": "BAC009S0915W0452", "wav": "./aishell/wav/test/S0915/BAC009S0915W0452.wav", "txt": "法制晚报讯记者李洁今天傍晚"}
-{"key": "BAC009S0915W0453", "wav": "./aishell/wav/test/S0915/BAC009S0915W0453.wav", "txt": "严厉谴责这一不文明应为"}
-{"key": "BAC009S0915W0454", "wav": "./aishell/wav/test/S0915/BAC009S0915W0454.wav", "txt": "并称故宫博物院已就此事件向公安机关报案"}
-{"key": "BAC009S0915W0455", "wav": "./aishell/wav/test/S0915/BAC009S0915W0455.wav", "txt": "游客青岛遭遇天价虾当地人最多几十元一斤"}
-{"key": "BAC009S0915W0456", "wav": "./aishell/wav/test/S0915/BAC009S0915W0456.wav", "txt": "肖先生在上菜后高兴地拍下图片"}
-{"key": "BAC009S0915W0457", "wav": "./aishell/wav/test/S0915/BAC009S0915W0457.wav", "txt": "当时他还不知道自己会被暗算"}
-{"key": "BAC009S0915W0458", "wav": "./aishell/wav/test/S0915/BAC009S0915W0458.wav", "txt": "游客骑着明孝陵驮碑龟趺拍照市民大煞风景"}
-{"key": "BAC009S0915W0459", "wav": "./aishell/wav/test/S0915/BAC009S0915W0459.wav", "txt": "游客骑在龟趺身上报料人供图"}
-{"key": "BAC009S0915W0460", "wav": "./aishell/wav/test/S0915/BAC009S0915W0460.wav", "txt": "游戏主播花样作死声称天津是他炸的直播被抓游戏室老板因冲突开枪将人射伤致死"}
-{"key": "BAC009S0915W0461", "wav": "./aishell/wav/test/S0915/BAC009S0915W0461.wav", "txt": "一五年后落网"}
-{"key": "BAC009S0915W0462", "wav": "./aishell/wav/test/S0915/BAC009S0915W0462.wav", "txt": "贵港民警追凶未言弃嫌犯一五年后落法网"}
-{"key": "BAC009S0915W0463", "wav": "./aishell/wav/test/S0915/BAC009S0915W0463.wav", "txt": "游戏平台称投千元可收百万数十民上当"}
-{"key": "BAC009S0915W0464", "wav": "./aishell/wav/test/S0915/BAC009S0915W0464.wav", "txt": "信息时报讯记者周伟龙天上不会掉馅饼"}
-{"key": "BAC009S0915W0465", "wav": "./aishell/wav/test/S0915/BAC009S0915W0465.wav", "txt": "数十名市民赶到越秀区一酒家维权"}
-{"key": "BAC009S0915W0466", "wav": "./aishell/wav/test/S0915/BAC009S0915W0466.wav", "txt": "称他们曾在这里被人游说注册了一游戏平台的账户"}
-{"key": "BAC009S0915W0467", "wav": "./aishell/wav/test/S0915/BAC009S0915W0467.wav", "txt": "花费几千元至上万元不等"}
-{"key": "BAC009S0915W0468", "wav": "./aishell/wav/test/S0915/BAC009S0915W0468.wav", "txt": "原以为可以按照游戏规则定期分红提现"}
-{"key": "BAC009S0915W0469", "wav": "./aishell/wav/test/S0915/BAC009S0915W0469.wav", "txt": "孰料从上月底开始平台关闭"}
-{"key": "BAC009S0915W0470", "wav": "./aishell/wav/test/S0915/BAC009S0915W0470.wav", "txt": "随后众人一起到东山派出所报案"}
-{"key": "BAC009S0915W0471", "wav": "./aishell/wav/test/S0915/BAC009S0915W0471.wav", "txt": "有待警方进一步调查"}
-{"key": "BAC009S0915W0472", "wav": "./aishell/wav/test/S0915/BAC009S0915W0472.wav", "txt": "游戏网站频遭攻击每周交二零零零元保护费息事宁人"}
-{"key": "BAC009S0915W0473", "wav": "./aishell/wav/test/S0915/BAC009S0915W0473.wav", "txt": "办案民警检查作案设备金华警方供图昨天"}
-{"key": "BAC009S0915W0474", "wav": "./aishell/wav/test/S0915/BAC009S0915W0474.wav", "txt": "记者从金华市公安局获悉"}
-{"key": "BAC009S0915W0475", "wav": "./aishell/wav/test/S0915/BAC009S0915W0475.wav", "txt": "仅半年就敲诈勒索了五七二万元"}
-{"key": "BAC009S0915W0476", "wav": "./aishell/wav/test/S0915/BAC009S0915W0476.wav", "txt": "该案也被列为公安部督办大案"}
-{"key": "BAC009S0915W0477", "wav": "./aishell/wav/test/S0915/BAC009S0915W0477.wav", "txt": "警方已抓获一五名犯罪嫌疑人"}
-{"key": "BAC009S0915W0478", "wav": "./aishell/wav/test/S0915/BAC009S0915W0478.wav", "txt": "湖北一七二名教师转岗当保安其中有人曾是校长"}
-{"key": "BAC009S0915W0479", "wav": "./aishell/wav/test/S0915/BAC009S0915W0479.wav", "txt": "一身保安制服的他准时站在校门口"}
-{"key": "BAC009S0915W0480", "wav": "./aishell/wav/test/S0915/BAC009S0915W0480.wav", "txt": "手握电动栅栏遥控器"}
-{"key": "BAC009S0915W0481", "wav": "./aishell/wav/test/S0915/BAC009S0915W0481.wav", "txt": "眼睛警惕地注视着进出校门的车辆和学生"}
-{"key": "BAC009S0915W0482", "wav": "./aishell/wav/test/S0915/BAC009S0915W0482.wav", "txt": "湖北一九岁女护士深夜遭抢劫杀害嫌疑嫌犯已落网"}
-{"key": "BAC009S0915W0483", "wav": "./aishell/wav/test/S0915/BAC009S0915W0483.wav", "txt": "凶手被抓捕归案钟欣摄"}
-{"key": "BAC009S0915W0484", "wav": "./aishell/wav/test/S0915/BAC009S0915W0484.wav", "txt": "湖北二五岁女子从未来例假基因检查是男身"}
-{"key": "BAC009S0915W0485", "wav": "./aishell/wav/test/S0915/BAC009S0915W0485.wav", "txt": "家住汉阳的莎莎化名"}
-{"key": "BAC009S0915W0486", "wav": "./aishell/wav/test/S0915/BAC009S0915W0486.wav", "txt": "近日在医院检查才发现"}
-{"key": "BAC009S0915W0487", "wav": "./aishell/wav/test/S0915/BAC009S0915W0487.wav", "txt": "她的基因竟是个纯爷们"}
-{"key": "BAC009S0915W0488", "wav": "./aishell/wav/test/S0915/BAC009S0915W0488.wav", "txt": "湖北三亿打造亚洲玫瑰基地多个种植园杂草丛生"}
-{"key": "BAC009S0915W0489", "wav": "./aishell/wav/test/S0915/BAC009S0915W0489.wav", "txt": "湖北四名被捅法官脱离危险一女法官尚在哺乳期"}
-{"key": "BAC009S0915W0490", "wav": "./aishell/wav/test/S0915/BAC009S0915W0490.wav", "txt": "经十堰市中级人民法院确认"}
-{"key": "BAC009S0915W0491", "wav": "./aishell/wav/test/S0915/BAC009S0915W0491.wav", "txt": "四名法官系送达法律文书时被刺伤"}
-{"key": "BAC009S0915W0492", "wav": "./aishell/wav/test/S0915/BAC009S0915W0492.wav", "txt": "目前均暂无生命危险"}
-{"key": "BAC009S0915W0493", "wav": "./aishell/wav/test/S0915/BAC009S0915W0493.wav", "txt": "其中一女法官尚在哺乳期"}
-{"key": "BAC009S0915W0494", "wav": "./aishell/wav/test/S0915/BAC009S0915W0494.wav", "txt": "湖北六零后求婚九零后被指责欠款六千万因诈骗取保候审"}
-{"key": "BAC009S0915W0495", "wav": "./aishell/wav/test/S0915/BAC009S0915W0495.wav", "txt": "湖北黄石市一家商场前"}
-{"key": "BAC009S0916W0121", "wav": "./aishell/wav/test/S0916/BAC009S0916W0121.wav", "txt": "真正落地的产品却非常地少"}
-{"key": "BAC009S0916W0122", "wav": "./aishell/wav/test/S0916/BAC009S0916W0122.wav", "txt": "而落地后的产品与客户的期待甚远"}
-{"key": "BAC009S0916W0123", "wav": "./aishell/wav/test/S0916/BAC009S0916W0123.wav", "txt": "这些状况每日均上演发生"}
-{"key": "BAC009S0916W0124", "wav": "./aishell/wav/test/S0916/BAC009S0916W0124.wav", "txt": "大部分的创业者举步艰辛"}
-{"key": "BAC009S0916W0125", "wav": "./aishell/wav/test/S0916/BAC009S0916W0125.wav", "txt": "钱烧完了东西出不来"}
-{"key": "BAC009S0916W0126", "wav": "./aishell/wav/test/S0916/BAC009S0916W0126.wav", "txt": "创业者成了智慧时代的贡品"}
-{"key": "BAC009S0916W0127", "wav": "./aishell/wav/test/S0916/BAC009S0916W0127.wav", "txt": "这不是这个时代的不公平"}
-{"key": "BAC009S0916W0128", "wav": "./aishell/wav/test/S0916/BAC009S0916W0128.wav", "txt": "而是我们对这个时代了解的太少"}
-{"key": "BAC009S0916W0129", "wav": "./aishell/wav/test/S0916/BAC009S0916W0129.wav", "txt": "如果我们懂得多一点智能家居产品市场的法则"}
-{"key": "BAC009S0916W0130", "wav": "./aishell/wav/test/S0916/BAC009S0916W0130.wav", "txt": "我们的路也许会好走得多"}
-{"key": "BAC009S0916W0131", "wav": "./aishell/wav/test/S0916/BAC009S0916W0131.wav", "txt": "一智能产品的安全"}
-{"key": "BAC009S0916W0132", "wav": "./aishell/wav/test/S0916/BAC009S0916W0132.wav", "txt": "连接的最高代价就是安全问题"}
-{"key": "BAC009S0916W0133", "wav": "./aishell/wav/test/S0916/BAC009S0916W0133.wav", "txt": "成千上万的产品通过无线连接"}
-{"key": "BAC009S0916W0134", "wav": "./aishell/wav/test/S0916/BAC009S0916W0134.wav", "txt": "只要一个单品存在安全漏洞"}
-{"key": "BAC009S0916W0135", "wav": "./aishell/wav/test/S0916/BAC009S0916W0135.wav", "txt": "整个系统的安全就会出现问题"}
-{"key": "BAC009S0916W0136", "wav": "./aishell/wav/test/S0916/BAC009S0916W0136.wav", "txt": "产生非常可怕的结果"}
-{"key": "BAC009S0916W0137", "wav": "./aishell/wav/test/S0916/BAC009S0916W0137.wav", "txt": "现阶段市场上落地的产品大多对安全的认知都存在缺陷"}
-{"key": "BAC009S0916W0138", "wav": "./aishell/wav/test/S0916/BAC009S0916W0138.wav", "txt": "普遍认为现在的市场很小且还是单品"}
-{"key": "BAC009S0916W0139", "wav": "./aishell/wav/test/S0916/BAC009S0916W0139.wav", "txt": "不用花那么大的成本去解决安全的问题"}
-{"key": "BAC009S0916W0140", "wav": "./aishell/wav/test/S0916/BAC009S0916W0140.wav", "txt": "可大家必须明白一个道理"}
-{"key": "BAC009S0916W0141", "wav": "./aishell/wav/test/S0916/BAC009S0916W0141.wav", "txt": "当大家习惯安全的问题留以后解决的时候"}
-{"key": "BAC009S0916W0142", "wav": "./aishell/wav/test/S0916/BAC009S0916W0142.wav", "txt": "安全问题立即会成为你的内伤"}
-{"key": "BAC009S0916W0143", "wav": "./aishell/wav/test/S0916/BAC009S0916W0143.wav", "txt": "但综观国内同类企业"}
-{"key": "BAC009S0916W0144", "wav": "./aishell/wav/test/S0916/BAC009S0916W0144.wav", "txt": "以深圳智能锁业代表为例"}
-{"key": "BAC009S0916W0145", "wav": "./aishell/wav/test/S0916/BAC009S0916W0145.wav", "txt": "在安全加解密认证等方面也做足了功夫"}
-{"key": "BAC009S0916W0147", "wav": "./aishell/wav/test/S0916/BAC009S0916W0147.wav", "txt": "软件与硬件都做了深度的对接"}
-{"key": "BAC009S0916W0148", "wav": "./aishell/wav/test/S0916/BAC009S0916W0148.wav", "txt": "把顾客个人资料全部归客户自己保管"}
-{"key": "BAC009S0916W0149", "wav": "./aishell/wav/test/S0916/BAC009S0916W0149.wav", "txt": "企业不接触客户个人资料"}
-{"key": "BAC009S0916W0150", "wav": "./aishell/wav/test/S0916/BAC009S0916W0150.wav", "txt": "许多企业都把取得顾客个人资料当作资本"}
-{"key": "BAC009S0916W0151", "wav": "./aishell/wav/test/S0916/BAC009S0916W0151.wav", "txt": "这是智能家居行业的先例"}
-{"key": "BAC009S0916W0152", "wav": "./aishell/wav/test/S0916/BAC009S0916W0152.wav", "txt": "必须具有高度习惯融合性和耐用性"}
-{"key": "BAC009S0916W0153", "wav": "./aishell/wav/test/S0916/BAC009S0916W0153.wav", "txt": "这决不是八零九零的消费习惯这么单纯的问题"}
-{"key": "BAC009S0916W0154", "wav": "./aishell/wav/test/S0916/BAC009S0916W0154.wav", "txt": "是每个家庭成员体验的统一"}
-{"key": "BAC009S0916W0155", "wav": "./aishell/wav/test/S0916/BAC009S0916W0155.wav", "txt": "也就是每个成员综合体验的最大公约数"}
-{"key": "BAC009S0916W0156", "wav": "./aishell/wav/test/S0916/BAC009S0916W0156.wav", "txt": "以情怀代替体验是非常错误的"}
-{"key": "BAC009S0916W0157", "wav": "./aishell/wav/test/S0916/BAC009S0916W0157.wav", "txt": "产品的核心是客户的体验"}
-{"key": "BAC009S0916W0158", "wav": "./aishell/wav/test/S0916/BAC009S0916W0158.wav", "txt": "顾客体验的核心是真善美"}
-{"key": "BAC009S0916W0159", "wav": "./aishell/wav/test/S0916/BAC009S0916W0159.wav", "txt": "近来看到的许多创新型产品"}
-{"key": "BAC009S0916W0160", "wav": "./aishell/wav/test/S0916/BAC009S0916W0160.wav", "txt": "可使用起来让人啼笑皆非"}
-{"key": "BAC009S0916W0161", "wav": "./aishell/wav/test/S0916/BAC009S0916W0161.wav", "txt": "加解密的措施如同虚设"}
-{"key": "BAC009S0916W0162", "wav": "./aishell/wav/test/S0916/BAC009S0916W0162.wav", "txt": "没有智慧手机的成员无法开门"}
-{"key": "BAC009S0916W0163", "wav": "./aishell/wav/test/S0916/BAC009S0916W0163.wav", "txt": "这是一帮精英自恋情怀的产品"}
-{"key": "BAC009S0916W0164", "wav": "./aishell/wav/test/S0916/BAC009S0916W0164.wav", "txt": "可美国的月亮总是比中国的亮"}
-{"key": "BAC009S0916W0165", "wav": "./aishell/wav/test/S0916/BAC009S0916W0165.wav", "txt": "国内许多媒体或企业都在为其背书"}
-{"key": "BAC009S0916W0166", "wav": "./aishell/wav/test/S0916/BAC009S0916W0166.wav", "txt": "而对国内比它更优秀的产品却集体失声"}
-{"key": "BAC009S0916W0167", "wav": "./aishell/wav/test/S0916/BAC009S0916W0167.wav", "txt": "只要了解一点核桃锁信息的人都能第一时间感受到"}
-{"key": "BAC009S0916W0168", "wav": "./aishell/wav/test/S0916/BAC009S0916W0168.wav", "txt": "一智能家居产品的销售渠道"}
-{"key": "BAC009S0916W0169", "wav": "./aishell/wav/test/S0916/BAC009S0916W0169.wav", "txt": "你要懂既然不是电子产品不是易损品不是玩品"}
-{"key": "BAC009S0916W0170", "wav": "./aishell/wav/test/S0916/BAC009S0916W0170.wav", "txt": "他是家居产品依托互联网技术升级的家居耐用品"}
-{"key": "BAC009S0916W0171", "wav": "./aishell/wav/test/S0916/BAC009S0916W0171.wav", "txt": "这产品的换代周期会较长"}
-{"key": "BAC009S0916W0172", "wav": "./aishell/wav/test/S0916/BAC009S0916W0172.wav", "txt": "购买的机会受时间的制约"}
-{"key": "BAC009S0916W0173", "wav": "./aishell/wav/test/S0916/BAC009S0916W0173.wav", "txt": "而未来借助更多的互联网技术"}
-{"key": "BAC009S0916W0174", "wav": "./aishell/wav/test/S0916/BAC009S0916W0174.wav", "txt": "产品的升级速度一定加快"}
-{"key": "BAC009S0916W0175", "wav": "./aishell/wav/test/S0916/BAC009S0916W0175.wav", "txt": "而智能家居产品的特殊属性决定了销售渠道的模式"}
-{"key": "BAC009S0916W0176", "wav": "./aishell/wav/test/S0916/BAC009S0916W0176.wav", "txt": "他不能按电子产品或传统居家产品的模式去销售"}
-{"key": "BAC009S0916W0177", "wav": "./aishell/wav/test/S0916/BAC009S0916W0177.wav", "txt": "除了做好传统门店的体验销售电商平台销售外"}
-{"key": "BAC009S0916W0178", "wav": "./aishell/wav/test/S0916/BAC009S0916W0178.wav", "txt": "希望智能家居产品企业在短期的高回报率也是不现实的"}
-{"key": "BAC009S0916W0179", "wav": "./aishell/wav/test/S0916/BAC009S0916W0179.wav", "txt": "但可以肯定的他一定是最高成长的企行业"}
-{"key": "BAC009S0916W0180", "wav": "./aishell/wav/test/S0916/BAC009S0916W0180.wav", "txt": "一大数据云计算不是你谈的"}
-{"key": "BAC009S0916W0181", "wav": "./aishell/wav/test/S0916/BAC009S0916W0181.wav", "txt": "好像不谈你就不属于这个时代的人"}
-{"key": "BAC009S0916W0182", "wav": "./aishell/wav/test/S0916/BAC009S0916W0182.wav", "txt": "作用大并不代表每个人"}
-{"key": "BAC009S0916W0183", "wav": "./aishell/wav/test/S0916/BAC009S0916W0183.wav", "txt": "大数据云计算是非常烧钱的"}
-{"key": "BAC009S0916W0184", "wav": "./aishell/wav/test/S0916/BAC009S0916W0184.wav", "txt": "不是一般的企业个人玩得起的"}
-{"key": "BAC009S0916W0185", "wav": "./aishell/wav/test/S0916/BAC009S0916W0185.wav", "txt": "与其厌不其烦的谈论大数据云计算"}
-{"key": "BAC009S0916W0186", "wav": "./aishell/wav/test/S0916/BAC009S0916W0186.wav", "txt": "不如做一款实实在在的好产品"}
-{"key": "BAC009S0916W0187", "wav": "./aishell/wav/test/S0916/BAC009S0916W0187.wav", "txt": "但是却不在国家文件所指的收费公路范围内"}
-{"key": "BAC009S0916W0188", "wav": "./aishell/wav/test/S0916/BAC009S0916W0188.wav", "txt": "而是一条市内快速路"}
-{"key": "BAC009S0916W0189", "wav": "./aishell/wav/test/S0916/BAC009S0916W0189.wav", "txt": "对于这条特殊的隧道"}
-{"key": "BAC009S0916W0190", "wav": "./aishell/wav/test/S0916/BAC009S0916W0190.wav", "txt": "省交通部门表示应该不会特殊"}
-{"key": "BAC009S0916W0191", "wav": "./aishell/wav/test/S0916/BAC009S0916W0191.wav", "txt": "长江隧道估计也顶不住"}
-{"key": "BAC009S0916W0192", "wav": "./aishell/wav/test/S0916/BAC009S0916W0192.wav", "txt": "对于提高重大节假日公路通行能力和服务水平"}
-{"key": "BAC009S0916W0193", "wav": "./aishell/wav/test/S0916/BAC009S0916W0193.wav", "txt": "降低公众假日出行成本具有重要意义"}
-{"key": "BAC009S0916W0194", "wav": "./aishell/wav/test/S0916/BAC009S0916W0194.wav", "txt": "具体工作将由各省区市政府负责统一组织实施"}
-{"key": "BAC009S0916W0195", "wav": "./aishell/wav/test/S0916/BAC009S0916W0195.wav", "txt": "国务院及五部门并没有明确实施时间"}
-{"key": "BAC009S0916W0196", "wav": "./aishell/wav/test/S0916/BAC009S0916W0196.wav", "txt": "着实让不少网友有些着急"}
-{"key": "BAC009S0916W0197", "wav": "./aishell/wav/test/S0916/BAC009S0916W0197.wav", "txt": "免费新规究竟啥时能享受到"}
-{"key": "BAC009S0916W0198", "wav": "./aishell/wav/test/S0916/BAC009S0916W0198.wav", "txt": "记者昨日第一时间从江苏省交通运输厅获悉"}
-{"key": "BAC009S0916W0199", "wav": "./aishell/wav/test/S0916/BAC009S0916W0199.wav", "txt": "就国家方案我省还会进行再研究"}
-{"key": "BAC009S0916W0200", "wav": "./aishell/wav/test/S0916/BAC009S0916W0200.wav", "txt": "具体执行时间由省政府定"}
-{"key": "BAC009S0916W0201", "wav": "./aishell/wav/test/S0916/BAC009S0916W0201.wav", "txt": "今年国庆应该可以实施"}
-{"key": "BAC009S0916W0202", "wav": "./aishell/wav/test/S0916/BAC009S0916W0202.wav", "txt": "可是通过收费站的车有大客车中型客车还有货车"}
-{"key": "BAC009S0916W0203", "wav": "./aishell/wav/test/S0916/BAC009S0916W0203.wav", "txt": "到时候会不会乱成一锅粥"}
-{"key": "BAC009S0916W0204", "wav": "./aishell/wav/test/S0916/BAC009S0916W0204.wav", "txt": "在国务院下发的文件中提及"}
-{"key": "BAC009S0916W0205", "wav": "./aishell/wav/test/S0916/BAC009S0916W0205.wav", "txt": "为确保免费政策实施后车辆有序通行"}
-{"key": "BAC009S0916W0206", "wav": "./aishell/wav/test/S0916/BAC009S0916W0206.wav", "txt": "各地区要对公路收费站现有车道进行全面调查"}
-{"key": "BAC009S0916W0207", "wav": "./aishell/wav/test/S0916/BAC009S0916W0207.wav", "txt": "合理规划和利用现有收费车道和免费专用通道"}
-{"key": "BAC009S0916W0208", "wav": "./aishell/wav/test/S0916/BAC009S0916W0208.wav", "txt": "确保过往车辆分类分车道有序通行"}
-{"key": "BAC009S0916W0209", "wav": "./aishell/wav/test/S0916/BAC009S0916W0209.wav", "txt": "记者昨日从省交通部门了解到"}
-{"key": "BAC009S0916W0210", "wav": "./aishell/wav/test/S0916/BAC009S0916W0210.wav", "txt": "这是一个比较复杂的问题"}
-{"key": "BAC009S0916W0211", "wav": "./aishell/wav/test/S0916/BAC009S0916W0211.wav", "txt": "估计未来系统可能会改造"}
-{"key": "BAC009S0916W0212", "wav": "./aishell/wav/test/S0916/BAC009S0916W0212.wav", "txt": "应该不会开免费车道"}
-{"key": "BAC009S0916W0213", "wav": "./aishell/wav/test/S0916/BAC009S0916W0213.wav", "txt": "如果开了小车免费车道"}
-{"key": "BAC009S0916W0214", "wav": "./aishell/wav/test/S0916/BAC009S0916W0214.wav", "txt": "有大车或是货车误闯或者闯进去了就不好办了"}
-{"key": "BAC009S0916W0215", "wav": "./aishell/wav/test/S0916/BAC009S0916W0215.wav", "txt": "有关负责人告诉记者"}
-{"key": "BAC009S0916W0216", "wav": "./aishell/wav/test/S0916/BAC009S0916W0216.wav", "txt": "省里会对此进行专门研究讨论"}
-{"key": "BAC009S0916W0217", "wav": "./aishell/wav/test/S0916/BAC009S0916W0217.wav", "txt": "看看山东之前是怎么做的记者了解到"}
-{"key": "BAC009S0916W0218", "wav": "./aishell/wav/test/S0916/BAC009S0916W0218.wav", "txt": "面对上述这些问题山东是怎么免费放行的呢"}
-{"key": "BAC009S0916W0219", "wav": "./aishell/wav/test/S0916/BAC009S0916W0219.wav", "txt": "免费期间收费员还是按照正常放行的"}
-{"key": "BAC009S0916W0220", "wav": "./aishell/wav/test/S0916/BAC009S0916W0220.wav", "txt": "山东潍坊的一位李先生告诉记者"}
-{"key": "BAC009S0916W0221", "wav": "./aishell/wav/test/S0916/BAC009S0916W0221.wav", "txt": "今年大年初一他开车去海南"}
-{"key": "BAC009S0916W0222", "wav": "./aishell/wav/test/S0916/BAC009S0916W0222.wav", "txt": "一路上很多省份的高速公路收费站都是免费放行"}
-{"key": "BAC009S0916W0223", "wav": "./aishell/wav/test/S0916/BAC009S0916W0223.wav", "txt": "到了出口车道再把通行卡收回去"}
-{"key": "BAC009S0916W0224", "wav": "./aishell/wav/test/S0916/BAC009S0916W0224.wav", "txt": "由于山东免费放行的时间不在春运最高峰"}
-{"key": "BAC009S0916W0225", "wav": "./aishell/wav/test/S0916/BAC009S0916W0225.wav", "txt": "大年初一路上都没什么车"}
-{"key": "BAC009S0916W0226", "wav": "./aishell/wav/test/S0916/BAC009S0916W0226.wav", "txt": "所以倒也没产生收费站排队的现象"}
-{"key": "BAC009S0916W0227", "wav": "./aishell/wav/test/S0916/BAC009S0916W0227.wav", "txt": "扩大到四个小长假之后"}
-{"key": "BAC009S0916W0228", "wav": "./aishell/wav/test/S0916/BAC009S0916W0228.wav", "txt": "国务院批准银行系基金公司再扩容"}
-{"key": "BAC009S0916W0229", "wav": "./aishell/wav/test/S0916/BAC009S0916W0229.wav", "txt": "本报记者蔡宗琦中国证券报记者获悉"}
-{"key": "BAC009S0916W0230", "wav": "./aishell/wav/test/S0916/BAC009S0916W0230.wav", "txt": "公募基金管理业务有关工作"}
-{"key": "BAC009S0916W0231", "wav": "./aishell/wav/test/S0916/BAC009S0916W0231.wav", "txt": "积极推动基金产品审核制度改革"}
-{"key": "BAC009S0916W0232", "wav": "./aishell/wav/test/S0916/BAC009S0916W0232.wav", "txt": "鼓励更多资金投资资本市场"}
-{"key": "BAC009S0916W0233", "wav": "./aishell/wav/test/S0916/BAC009S0916W0233.wav", "txt": "先后两批共八家商业银行设立或参股八家基金管理公司"}
-{"key": "BAC009S0916W0234", "wav": "./aishell/wav/test/S0916/BAC009S0916W0234.wav", "txt": "试点基金管理公司发展态势良好"}
-{"key": "BAC009S0916W0235", "wav": "./aishell/wav/test/S0916/BAC009S0916W0235.wav", "txt": "工商银行建设银行和交通银行为首批试点银行"}
-{"key": "BAC009S0916W0236", "wav": "./aishell/wav/test/S0916/BAC009S0916W0236.wav", "txt": "增加机构投资者数量"}
-{"key": "BAC009S0916W0237", "wav": "./aishell/wav/test/S0916/BAC009S0916W0237.wav", "txt": "促进基金行业规范发展"}
-{"key": "BAC009S0916W0238", "wav": "./aishell/wav/test/S0916/BAC009S0916W0238.wav", "txt": "为商业银行探索跨业经营运作积累经验"}
-{"key": "BAC009S0916W0239", "wav": "./aishell/wav/test/S0916/BAC009S0916W0239.wav", "txt": "此举可能将进一步推动金融混业经营"}
-{"key": "BAC009S0916W0240", "wav": "./aishell/wav/test/S0916/BAC009S0916W0240.wav", "txt": "随着对商业银行设立基金管理公司门槛放宽"}
-{"key": "BAC009S0916W0241", "wav": "./aishell/wav/test/S0916/BAC009S0916W0241.wav", "txt": "我国资本市场将迎来更多机构投资者"}
-{"key": "BAC009S0916W0242", "wav": "./aishell/wav/test/S0916/BAC009S0916W0242.wav", "txt": "更加有利于价值投资理念形成"}
-{"key": "BAC009S0916W0243", "wav": "./aishell/wav/test/S0916/BAC009S0916W0243.wav", "txt": "保险资产管理公司如符合有关规定"}
-{"key": "BAC009S0916W0244", "wav": "./aishell/wav/test/S0916/BAC009S0916W0244.wav", "txt": "可以向有关金融监管部门申请"}
-{"key": "BAC009S0916W0245", "wav": "./aishell/wav/test/S0916/BAC009S0916W0245.wav", "txt": "依法开展公募性质的资产管理业务"}
-{"key": "BAC009S0916W0246", "wav": "./aishell/wav/test/S0916/BAC009S0916W0246.wav", "txt": "通知扩大保险资管公司业务范围"}
-{"key": "BAC009S0916W0247", "wav": "./aishell/wav/test/S0916/BAC009S0916W0247.wav", "txt": "这体现出监管部门开放管理的思路"}
-{"key": "BAC009S0916W0248", "wav": "./aishell/wav/test/S0916/BAC009S0916W0248.wav", "txt": "允许各类资产管理公司同台竞技"}
-{"key": "BAC009S0916W0249", "wav": "./aishell/wav/test/S0916/BAC009S0916W0249.wav", "txt": "在遴选优质管理人提升保险资金投资收益率的同时"}
-{"key": "BAC009S0916W0250", "wav": "./aishell/wav/test/S0916/BAC009S0916W0250.wav", "txt": "也通过机构间的竞争促进保险资管公司的转型发展"}
-{"key": "BAC009S0916W0251", "wav": "./aishell/wav/test/S0916/BAC009S0916W0251.wav", "txt": "明确了参股基金管理公司股东"}
-{"key": "BAC009S0916W0252", "wav": "./aishell/wav/test/S0916/BAC009S0916W0252.wav", "txt": "证监会新闻发言人邓给解释"}
-{"key": "BAC009S0916W0253", "wav": "./aishell/wav/test/S0916/BAC009S0916W0253.wav", "txt": "中国的银行居垄断地位"}
-{"key": "BAC009S0916W0254", "wav": "./aishell/wav/test/S0916/BAC009S0916W0254.wav", "txt": "作风向来无耻加强势苹果也因强势出名"}
-{"key": "BAC009S0916W0255", "wav": "./aishell/wav/test/S0916/BAC009S0916W0255.wav", "txt": "可参照中国移动和苹果的合作传闻中"}
-{"key": "BAC009S0916W0257", "wav": "./aishell/wav/test/S0916/BAC009S0916W0257.wav", "txt": "这估计很难让掉进钱眼儿的四大银行接受"}
-{"key": "BAC009S0916W0260", "wav": "./aishell/wav/test/S0916/BAC009S0916W0260.wav", "txt": "但却鲜有人会像苹果的服务付费"}
-{"key": "BAC009S0916W0261", "wav": "./aishell/wav/test/S0916/BAC009S0916W0261.wav", "txt": "更现实的的困难在于"}
-{"key": "BAC009S0916W0263", "wav": "./aishell/wav/test/S0916/BAC009S0916W0263.wav", "txt": "粗估下来大概要七十亿"}
-{"key": "BAC009S0916W0264", "wav": "./aishell/wav/test/S0916/BAC009S0916W0264.wav", "txt": "这还不包括改造过程中的渠道分食"}
-{"key": "BAC009S0916W0265", "wav": "./aishell/wav/test/S0916/BAC009S0916W0265.wav", "txt": "以及给领导们的审批费用"}
-{"key": "BAC009S0916W0266", "wav": "./aishell/wav/test/S0916/BAC009S0916W0266.wav", "txt": "从支付的大环境上看"}
-{"key": "BAC009S0916W0268", "wav": "./aishell/wav/test/S0916/BAC009S0916W0268.wav", "txt": "它依旧要面对政府的刁难"}
-{"key": "BAC009S0916W0270", "wav": "./aishell/wav/test/S0916/BAC009S0916W0270.wav", "txt": "政府失控的可不是什么隐私了"}
-{"key": "BAC009S0916W0271", "wav": "./aishell/wav/test/S0916/BAC009S0916W0271.wav", "txt": "而是实实在在的金融命脉"}
-{"key": "BAC009S0916W0272", "wav": "./aishell/wav/test/S0916/BAC009S0916W0272.wav", "txt": "慈禧太后就因乔致庸创办了票号"}
-{"key": "BAC009S0916W0273", "wav": "./aishell/wav/test/S0916/BAC009S0916W0273.wav", "txt": "害怕其掌握国家金融命脉"}
-{"key": "BAC009S0916W0274", "wav": "./aishell/wav/test/S0916/BAC009S0916W0274.wav", "txt": "而将他软禁十年之久"}
-{"key": "BAC009S0916W0275", "wav": "./aishell/wav/test/S0916/BAC009S0916W0275.wav", "txt": "何况是一个来自美帝的小苹果呢"}
-{"key": "BAC009S0916W0276", "wav": "./aishell/wav/test/S0916/BAC009S0916W0276.wav", "txt": "科幻星系康斯坦丁文"}
-{"key": "BAC009S0916W0277", "wav": "./aishell/wav/test/S0916/BAC009S0916W0277.wav", "txt": "苹果一口气召开了两次新品发布会"}
-{"key": "BAC009S0916W0278", "wav": "./aishell/wav/test/S0916/BAC009S0916W0278.wav", "txt": "就在会场的凳子和垃圾尚未收拾干净的时候"}
-{"key": "BAC009S0916W0279", "wav": "./aishell/wav/test/S0916/BAC009S0916W0279.wav", "txt": "全世界的报道已经蜂拥而至"}
-{"key": "BAC009S0916W0280", "wav": "./aishell/wav/test/S0916/BAC009S0916W0280.wav", "txt": "失望中夹杂着嘲讽的情绪霸占了各模块的头条"}
-{"key": "BAC009S0916W0281", "wav": "./aishell/wav/test/S0916/BAC009S0916W0281.wav", "txt": "据华尔街日报网站报道"}
-{"key": "BAC009S0916W0282", "wav": "./aishell/wav/test/S0916/BAC009S0916W0282.wav", "txt": "在自己全身心的努力和坚持之下"}
-{"key": "BAC009S0916W0284", "wav": "./aishell/wav/test/S0916/BAC009S0916W0284.wav", "txt": "艾维本周四晚在旧金山现代艺术馆向大众表示"}
-{"key": "BAC009S0916W0286", "wav": "./aishell/wav/test/S0916/BAC009S0916W0286.wav", "txt": "主要是因为社会对可穿戴智能手表的期望太高"}
-{"key": "BAC009S0916W0287", "wav": "./aishell/wav/test/S0916/BAC009S0916W0287.wav", "txt": "手腕是配戴轻便型互动设备与休闲设备的理想之处"}
-{"key": "BAC009S0916W0288", "wav": "./aishell/wav/test/S0916/BAC009S0916W0288.wav", "txt": "但不适合那些笨重的解读设备"}
-{"key": "BAC009S0916W0289", "wav": "./aishell/wav/test/S0916/BAC009S0916W0289.wav", "txt": "艾维表示尽管苹果智能手表拥有诸多功能"}
-{"key": "BAC009S0916W0290", "wav": "./aishell/wav/test/S0916/BAC009S0916W0290.wav", "txt": "这种产品的设计仍需考虑文化历史和未来等因素"}
-{"key": "BAC009S0916W0291", "wav": "./aishell/wav/test/S0916/BAC009S0916W0291.wav", "txt": "艾维现为苹果主管设计业务的高级副总裁"}
-{"key": "BAC009S0916W0292", "wav": "./aishell/wav/test/S0916/BAC009S0916W0292.wav", "txt": "帮助设计了苹果多项产品的外观和用户体验"}
-{"key": "BAC009S0916W0294", "wav": "./aishell/wav/test/S0916/BAC009S0916W0294.wav", "txt": "苹果计划于明年初开始销售其智能手表"}
-{"key": "BAC009S0916W0295", "wav": "./aishell/wav/test/S0916/BAC009S0916W0295.wav", "txt": "该公司于上个月简单地宣布了智能手表相关的情况"}
-{"key": "BAC009S0916W0296", "wav": "./aishell/wav/test/S0916/BAC009S0916W0296.wav", "txt": "其将提供三种版本的智能手表"}
-{"key": "BAC009S0916W0297", "wav": "./aishell/wav/test/S0916/BAC009S0916W0297.wav", "txt": "起步价为三百四九美元十"}
-{"key": "BAC009S0916W0298", "wav": "./aishell/wav/test/S0916/BAC009S0916W0298.wav", "txt": "苹果没有透露更昂贵智能手表的具体售价"}
-{"key": "BAC009S0916W0299", "wav": "./aishell/wav/test/S0916/BAC009S0916W0299.wav", "txt": "这些手表将配置不同的表带"}
-{"key": "BAC009S0916W0300", "wav": "./aishell/wav/test/S0916/BAC009S0916W0300.wav", "txt": "以满足不同用户的需求"}
-{"key": "BAC009S0916W0301", "wav": "./aishell/wav/test/S0916/BAC009S0916W0301.wav", "txt": "市场上还有诸多其他制造商也在尝试生产智能手表"}
-{"key": "BAC009S0916W0302", "wav": "./aishell/wav/test/S0916/BAC009S0916W0302.wav", "txt": "但这些厂商的产品都难以进入主流"}
-{"key": "BAC009S0916W0303", "wav": "./aishell/wav/test/S0916/BAC009S0916W0303.wav", "txt": "这是未来的必经之路"}
-{"key": "BAC009S0916W0304", "wav": "./aishell/wav/test/S0916/BAC009S0916W0304.wav", "txt": "美的家用空调事业部总裁吴文新表示"}
-{"key": "BAC009S0916W0305", "wav": "./aishell/wav/test/S0916/BAC009S0916W0305.wav", "txt": "每日经济新闻记者从美的家用空调事业部了解到"}
-{"key": "BAC009S0916W0306", "wav": "./aishell/wav/test/S0916/BAC009S0916W0306.wav", "txt": "自二零一一年事业部启动自动化升级至今的四年里"}
-{"key": "BAC009S0916W0307", "wav": "./aishell/wav/test/S0916/BAC009S0916W0307.wav", "txt": "工人数量减少近一半"}
-{"key": "BAC009S0916W0308", "wav": "./aishell/wav/test/S0916/BAC009S0916W0308.wav", "txt": "美的家用空调事业部制造副总裁乌守保对记者"}
-{"key": "BAC009S0916W0309", "wav": "./aishell/wav/test/S0916/BAC009S0916W0309.wav", "txt": "老板电器的新增量创新需求追求极致搜狐科技"}
-{"key": "BAC009S0916W0310", "wav": "./aishell/wav/test/S0916/BAC009S0916W0310.wav", "txt": "质变中的世界工厂中国正在由中国制造向中国智造蜕变"}
-{"key": "BAC009S0916W0311", "wav": "./aishell/wav/test/S0916/BAC009S0916W0311.wav", "txt": "如何借力拥抱互联网加这一全新变量"}
-{"key": "BAC009S0916W0312", "wav": "./aishell/wav/test/S0916/BAC009S0916W0312.wav", "txt": "如何重新激活内部潜能"}
-{"key": "BAC009S0916W0313", "wav": "./aishell/wav/test/S0916/BAC009S0916W0313.wav", "txt": "便是区别行业龙头企业经营智慧高低的关键时刻"}
-{"key": "BAC009S0916W0314", "wav": "./aishell/wav/test/S0916/BAC009S0916W0314.wav", "txt": "身处传统白色家电领域中的重要一支到厨房电器"}
-{"key": "BAC009S0916W0315", "wav": "./aishell/wav/test/S0916/BAC009S0916W0315.wav", "txt": "多年来保持奇高市占率的老板电器"}
-{"key": "BAC009S0916W0316", "wav": "./aishell/wav/test/S0916/BAC009S0916W0316.wav", "txt": "在成名三十馀年后仍在竭力寻求业态的新鲜化和可能性"}
-{"key": "BAC009S0916W0317", "wav": "./aishell/wav/test/S0916/BAC009S0916W0317.wav", "txt": "能否找到厨电行业下一个未知的增量"}
-{"key": "BAC009S0916W0318", "wav": "./aishell/wav/test/S0916/BAC009S0916W0318.wav", "txt": "也成为老板电器和它的宿敌们能否领跑下半程的关键"}
-{"key": "BAC009S0916W0319", "wav": "./aishell/wav/test/S0916/BAC009S0916W0319.wav", "txt": "阐述老板电器和内部创新外部国际化如何进行破题"}
-{"key": "BAC009S0916W0320", "wav": "./aishell/wav/test/S0916/BAC009S0916W0320.wav", "txt": "老板电器如何看待公司的创新驱动"}
-{"key": "BAC009S0916W0321", "wav": "./aishell/wav/test/S0916/BAC009S0916W0321.wav", "txt": "赵继宏老板电器做厨电已经三十多年了"}
-{"key": "BAC009S0916W0322", "wav": "./aishell/wav/test/S0916/BAC009S0916W0322.wav", "txt": "作为企业理念和产品技术必须要走在时代的前面"}
-{"key": "BAC009S0916W0323", "wav": "./aishell/wav/test/S0916/BAC009S0916W0323.wav", "txt": "现在中国的八十五后和九十后消费人群已经成为消费主体"}
-{"key": "BAC009S0916W0324", "wav": "./aishell/wav/test/S0916/BAC009S0916W0324.wav", "txt": "他们需要的是智能厨房智能家居与家电"}
-{"key": "BAC009S0916W0325", "wav": "./aishell/wav/test/S0916/BAC009S0916W0325.wav", "txt": "公司为此研发并推出市场的智能产品非常贴近市场"}
-{"key": "BAC009S0916W0326", "wav": "./aishell/wav/test/S0916/BAC009S0916W0326.wav", "txt": "围绕消费者消费者需要什么"}
-{"key": "BAC009S0916W0327", "wav": "./aishell/wav/test/S0916/BAC009S0916W0327.wav", "txt": "我们开发什么的产品研发策略"}
-{"key": "BAC009S0916W0328", "wav": "./aishell/wav/test/S0916/BAC009S0916W0328.wav", "txt": "除了产品功能必须不错之外"}
-{"key": "BAC009S0916W0329", "wav": "./aishell/wav/test/S0916/BAC009S0916W0329.wav", "txt": "以保证持续长久的黏性互动"}
-{"key": "BAC009S0916W0330", "wav": "./aishell/wav/test/S0916/BAC009S0916W0330.wav", "txt": "产品创新其实也是一个双向互动的过程"}
-{"key": "BAC009S0916W0331", "wav": "./aishell/wav/test/S0916/BAC009S0916W0331.wav", "txt": "现在消费者的需求越来越个性化差异化"}
-{"key": "BAC009S0916W0332", "wav": "./aishell/wav/test/S0916/BAC009S0916W0332.wav", "txt": "可以和我们的消费者有很多的互动并提供超值服务"}
-{"key": "BAC009S0916W0333", "wav": "./aishell/wav/test/S0916/BAC009S0916W0333.wav", "txt": "这些都是和消费者增添黏性互动的方式"}
-{"key": "BAC009S0916W0334", "wav": "./aishell/wav/test/S0916/BAC009S0916W0334.wav", "txt": "这个方向的创新以后还有更多的东西可以发挥作用"}
-{"key": "BAC009S0916W0335", "wav": "./aishell/wav/test/S0916/BAC009S0916W0335.wav", "txt": "如今的智能家电更多意义上是智能加上互动"}
-{"key": "BAC009S0916W0336", "wav": "./aishell/wav/test/S0916/BAC009S0916W0336.wav", "txt": "也就是老板电器总结的自动加互动"}
-{"key": "BAC009S0916W0338", "wav": "./aishell/wav/test/S0916/BAC009S0916W0338.wav", "txt": "只要在明天的最后一战中赢下东道主日本"}
-{"key": "BAC009S0916W0339", "wav": "./aishell/wav/test/S0916/BAC009S0916W0339.wav", "txt": "高清女排力擒俄罗斯夺冠占主动众将喜极而泣"}
-{"key": "BAC009S0916W0340", "wav": "./aishell/wav/test/S0916/BAC009S0916W0340.wav", "txt": "今天大家打得都挺好的"}
-{"key": "BAC009S0916W0341", "wav": "./aishell/wav/test/S0916/BAC009S0916W0341.wav", "txt": "我们是一条心在打团结作战"}
-{"key": "BAC009S0916W0342", "wav": "./aishell/wav/test/S0916/BAC009S0916W0342.wav", "txt": "赛后主攻手朱婷对记者说"}
-{"key": "BAC009S0916W0343", "wav": "./aishell/wav/test/S0916/BAC009S0916W0343.wav", "txt": "本场比赛朱婷三七次扣球得到二十一分"}
-{"key": "BAC009S0916W0344", "wav": "./aishell/wav/test/S0916/BAC009S0916W0344.wav", "txt": "此外她还凭借拦网和发球分别拿到七分和一分"}
-{"key": "BAC009S0916W0345", "wav": "./aishell/wav/test/S0916/BAC009S0916W0345.wav", "txt": "我觉得自己的脚伤已经完全恢复了"}
-{"key": "BAC009S0916W0346", "wav": "./aishell/wav/test/S0916/BAC009S0916W0346.wav", "txt": "对弹跳没有什么影响"}
-{"key": "BAC009S0916W0347", "wav": "./aishell/wav/test/S0916/BAC009S0916W0347.wav", "txt": "当在新闻发布会上被问及伤情的时候"}
-{"key": "BAC009S0916W0348", "wav": "./aishell/wav/test/S0916/BAC009S0916W0348.wav", "txt": "在第四轮与韩国队的比赛中"}
-{"key": "BAC009S0916W0349", "wav": "./aishell/wav/test/S0916/BAC009S0916W0349.wav", "txt": "朱婷在第四局比赛中意外崴脚"}
-{"key": "BAC009S0916W0350", "wav": "./aishell/wav/test/S0916/BAC009S0916W0350.wav", "txt": "今天出色的数据也佐证了她身体的康复情况良好"}
-{"key": "BAC009S0916W0351", "wav": "./aishell/wav/test/S0916/BAC009S0916W0351.wav", "txt": "作为队里年龄最大的球员"}
-{"key": "BAC009S0916W0352", "wav": "./aishell/wav/test/S0916/BAC009S0916W0352.wav", "txt": "最终拦网和扣球均得到六这些分"}
-{"key": "BAC009S0916W0353", "wav": "./aishell/wav/test/S0916/BAC009S0916W0353.wav", "txt": "位列球队发球榜首位和拦网榜的第二位"}
-{"key": "BAC009S0916W0354", "wav": "./aishell/wav/test/S0916/BAC009S0916W0354.wav", "txt": "大家今天打得非常出色"}
-{"key": "BAC009S0916W0355", "wav": "./aishell/wav/test/S0916/BAC009S0916W0355.wav", "txt": "能够在这个集体与可爱的队友一起拼杀"}
-{"key": "BAC009S0916W0356", "wav": "./aishell/wav/test/S0916/BAC009S0916W0356.wav", "txt": "我感到非常骄傲和自豪"}
-{"key": "BAC009S0916W0357", "wav": "./aishell/wav/test/S0916/BAC009S0916W0357.wav", "txt": "在赛后发布会上颜妮对记者说"}
-{"key": "BAC009S0916W0358", "wav": "./aishell/wav/test/S0916/BAC009S0916W0358.wav", "txt": "在复盘与俄罗斯一战时"}
-{"key": "BAC009S0916W0359", "wav": "./aishell/wav/test/S0916/BAC009S0916W0359.wav", "txt": "这场比赛前教练给我们布置了很多"}
-{"key": "BAC009S0916W0360", "wav": "./aishell/wav/test/S0916/BAC009S0916W0360.wav", "txt": "作为就是我上场多去贯彻教练意图"}
-{"key": "BAC009S0916W0361", "wav": "./aishell/wav/test/S0916/BAC009S0916W0361.wav", "txt": "颜妮坦言今天俄罗斯表现很好"}
-{"key": "BAC009S0916W0362", "wav": "./aishell/wav/test/S0916/BAC009S0916W0362.wav", "txt": "我们两家有时候比较像"}
-{"key": "BAC009S0916W0363", "wav": "./aishell/wav/test/S0916/BAC009S0916W0363.wav", "txt": "当被问及新老队员相互担当弥补的话题时"}
-{"key": "BAC009S0916W0364", "wav": "./aishell/wav/test/S0916/BAC009S0916W0364.wav", "txt": "颜妮坦言自己的发挥也不是特别稳定"}
-{"key": "BAC009S0916W0365", "wav": "./aishell/wav/test/S0916/BAC009S0916W0365.wav", "txt": "但有起伏应该是正常的"}
-{"key": "BAC009S0916W0366", "wav": "./aishell/wav/test/S0916/BAC009S0916W0366.wav", "txt": "作为老队员我要多承担"}
-{"key": "BAC009S0916W0367", "wav": "./aishell/wav/test/S0916/BAC009S0916W0367.wav", "txt": "用实际行动来弥补不足"}
-{"key": "BAC009S0916W0368", "wav": "./aishell/wav/test/S0916/BAC009S0916W0368.wav", "txt": "搜狐体育郭健文"}
-{"key": "BAC009S0916W0369", "wav": "./aishell/wav/test/S0916/BAC009S0916W0369.wav", "txt": "女排众将手举国旗敬夺冠"}
-{"key": "BAC009S0916W0370", "wav": "./aishell/wav/test/S0916/BAC009S0916W0370.wav", "txt": "拿到了明年里约奥运会的入场券"}
-{"key": "BAC009S0916W0371", "wav": "./aishell/wav/test/S0916/BAC009S0916W0371.wav", "txt": "在接受中央电视台记者采访时朱婷表示"}
-{"key": "BAC009S0916W0372", "wav": "./aishell/wav/test/S0916/BAC009S0916W0372.wav", "txt": "全队上下面对了巨大困难"}
-{"key": "BAC009S0916W0373", "wav": "./aishell/wav/test/S0916/BAC009S0916W0373.wav", "txt": "其中郎平主教练最为辛苦"}
-{"key": "BAC009S0916W0374", "wav": "./aishell/wav/test/S0916/BAC009S0916W0374.wav", "txt": "今晚的比赛中朱婷独得二十七分"}
-{"key": "BAC009S0916W0375", "wav": "./aishell/wav/test/S0916/BAC009S0916W0375.wav", "txt": "再度成为了比赛的得分王"}
-{"key": "BAC009S0916W0377", "wav": "./aishell/wav/test/S0916/BAC009S0916W0377.wav", "txt": "但今天能拿冠军真的是发自肺腑的想哭"}
-{"key": "BAC009S0916W0378", "wav": "./aishell/wav/test/S0916/BAC009S0916W0378.wav", "txt": "面对日本队的魔鬼主场"}
-{"key": "BAC009S0916W0379", "wav": "./aishell/wav/test/S0916/BAC009S0916W0379.wav", "txt": "中国女排表示承受了巨大的压力"}
-{"key": "BAC009S0916W0380", "wav": "./aishell/wav/test/S0916/BAC009S0916W0380.wav", "txt": "朱婷表示我想日本肯定也会拼我们"}
-{"key": "BAC009S0916W0381", "wav": "./aishell/wav/test/S0916/BAC009S0916W0381.wav", "txt": "做了很多很多困难准备"}
-{"key": "BAC009S0916W0382", "wav": "./aishell/wav/test/S0916/BAC009S0916W0382.wav", "txt": "如果输了就不太好说了"}
-{"key": "BAC009S0916W0383", "wav": "./aishell/wav/test/S0916/BAC009S0916W0383.wav", "txt": "但是里面不是淡定的"}
-{"key": "BAC009S0916W0384", "wav": "./aishell/wav/test/S0916/BAC009S0916W0384.wav", "txt": "中国队连续三位主力因伤缺战"}
-{"key": "BAC009S0916W0385", "wav": "./aishell/wav/test/S0916/BAC009S0916W0385.wav", "txt": "大家可能觉得我们这支队伍很苦"}
-{"key": "BAC009S0916W0386", "wav": "./aishell/wav/test/S0916/BAC009S0916W0386.wav", "txt": "但我觉得郎导是最苦的"}
-{"key": "BAC009S0916W0387", "wav": "./aishell/wav/test/S0916/BAC009S0916W0387.wav", "txt": "朱婷表示其实我也想"}
-{"key": "BAC009S0916W0388", "wav": "./aishell/wav/test/S0916/BAC009S0916W0388.wav", "txt": "女排三零阿根廷朱婷复出扣杀状态神勇"}
-{"key": "BAC009S0916W0389", "wav": "./aishell/wav/test/S0916/BAC009S0916W0389.wav", "txt": "全场比赛的焦点是休战三场后重新登场的名将朱婷"}
-{"key": "BAC009S0916W0390", "wav": "./aishell/wav/test/S0916/BAC009S0916W0390.wav", "txt": "拿下全场最高分的朱婷赛后表示"}
-{"key": "BAC009S0916W0391", "wav": "./aishell/wav/test/S0916/BAC009S0916W0391.wav", "txt": "在八月二十六日中国队和韩国队的比赛中"}
-{"key": "BAC009S0916W0392", "wav": "./aishell/wav/test/S0916/BAC009S0916W0392.wav", "txt": "朱婷崴脚之后带伤率队取胜"}
-{"key": "BAC009S0916W0393", "wav": "./aishell/wav/test/S0916/BAC009S0916W0393.wav", "txt": "主教练郎平都没有派她出场"}
-{"key": "BAC009S0916W0394", "wav": "./aishell/wav/test/S0916/BAC009S0916W0394.wav", "txt": "一日晚的中阿之战"}
-{"key": "BAC009S0916W0395", "wav": "./aishell/wav/test/S0916/BAC009S0916W0395.wav", "txt": "重新以首发身份登场的朱婷迅速找回比赛的感觉"}
-{"key": "BAC009S0916W0396", "wav": "./aishell/wav/test/S0916/BAC009S0916W0396.wav", "txt": "赛后被评为当场最佳球员"}
-{"key": "BAC009S0916W0397", "wav": "./aishell/wav/test/S0916/BAC009S0916W0397.wav", "txt": "这也是她在本届世界杯上第二次获得全场最佳"}
-{"key": "BAC009S0916W0398", "wav": "./aishell/wav/test/S0916/BAC009S0916W0398.wav", "txt": "在场上移动很好"}
-{"key": "BAC009S0916W0399", "wav": "./aishell/wav/test/S0916/BAC009S0916W0399.wav", "txt": "朱婷在谈到大家关心的脚伤时说"}
-{"key": "BAC009S0916W0400", "wav": "./aishell/wav/test/S0916/BAC009S0916W0400.wav", "txt": "在冈山的桃太郎体育馆"}
-{"key": "BAC009S0916W0401", "wav": "./aishell/wav/test/S0916/BAC009S0916W0401.wav", "txt": "当地华人团体组织了不少球迷为中国队加油"}
-{"key": "BAC009S0916W0402", "wav": "./aishell/wav/test/S0916/BAC009S0916W0402.wav", "txt": "这样的氛围让朱婷感觉像是主场一样"}
-{"key": "BAC009S0916W0403", "wav": "./aishell/wav/test/S0916/BAC009S0916W0403.wav", "txt": "大家赢球比自己获得最佳还要高兴"}
-{"key": "BAC009S0916W0404", "wav": "./aishell/wav/test/S0916/BAC009S0916W0404.wav", "txt": "这部电影从二零一三年就已经开始筹备了"}
-{"key": "BAC009S0916W0405", "wav": "./aishell/wav/test/S0916/BAC009S0916W0405.wav", "txt": "前后打磨了两年时间才得以完成"}
-{"key": "BAC009S0916W0406", "wav": "./aishell/wav/test/S0916/BAC009S0916W0406.wav", "txt": "与奥斯卡影帝本金斯利同时出现在海报中央"}
-{"key": "BAC009S0916W0407", "wav": "./aishell/wav/test/S0916/BAC009S0916W0407.wav", "txt": "雷诺兹持枪的造型和他在冥界警局里的颇为相似"}
-{"key": "BAC009S0916W0408", "wav": "./aishell/wav/test/S0916/BAC009S0916W0408.wav", "txt": "那些年女神陈妍希近来瘦身有成"}
-{"key": "BAC009S0916W0409", "wav": "./aishell/wav/test/S0916/BAC009S0916W0409.wav", "txt": "不仅摆脱神雕侠侣时期的小笼包名号"}
-{"key": "BAC009S0916W0410", "wav": "./aishell/wav/test/S0916/BAC009S0916W0410.wav", "txt": "日前在大陆真人秀节目秀出两条雪白大长腿"}
-{"key": "BAC009S0916W0411", "wav": "./aishell/wav/test/S0916/BAC009S0916W0411.wav", "txt": "更让粉丝看了鼻血直流"}
-{"key": "BAC009S0916W0412", "wav": "./aishell/wav/test/S0916/BAC009S0916W0412.wav", "txt": "只不过好景不常"}
-{"key": "BAC009S0916W0413", "wav": "./aishell/wav/test/S0916/BAC009S0916W0413.wav", "txt": "她最近又被拍到崩坏实录"}
-{"key": "BAC009S0916W0414", "wav": "./aishell/wav/test/S0916/BAC009S0916W0414.wav", "txt": "乱糟糟的马尾加上宽松衣服的村姑打扮"}
-{"key": "BAC009S0916W0415", "wav": "./aishell/wav/test/S0916/BAC009S0916W0415.wav", "txt": "搜狐娱乐讯名为娱乐圈八卦的自媒体"}
-{"key": "BAC009S0916W0416", "wav": "./aishell/wav/test/S0916/BAC009S0916W0416.wav", "txt": "曝出陈妍希拍戏时突然干呕"}
-{"key": "BAC009S0916W0417", "wav": "./aishell/wav/test/S0916/BAC009S0916W0417.wav", "txt": "推断其已怀孕"}
-{"key": "BAC009S0916W0418", "wav": "./aishell/wav/test/S0916/BAC009S0916W0418.wav", "txt": "应该是月初吧"}
-{"key": "BAC009S0916W0419", "wav": "./aishell/wav/test/S0916/BAC009S0916W0419.wav", "txt": "小笼包身体有反应"}
-{"key": "BAC009S0916W0420", "wav": "./aishell/wav/test/S0916/BAC009S0916W0420.wav", "txt": "她突然就干呕"}
-{"key": "BAC009S0916W0421", "wav": "./aishell/wav/test/S0916/BAC009S0916W0421.wav", "txt": "陈妍希还去医院做了检查"}
-{"key": "BAC009S0916W0422", "wav": "./aishell/wav/test/S0916/BAC009S0916W0422.wav", "txt": "她的团队对她更加关心了"}
-{"key": "BAC009S0916W0423", "wav": "./aishell/wav/test/S0916/BAC009S0916W0423.wav", "txt": "中新网六月十六日电六月十六日是容祖儿的生日"}
-{"key": "BAC009S0916W0424", "wav": "./aishell/wav/test/S0916/BAC009S0916W0424.wav", "txt": "陈妍希晒出与容祖儿合照"}
-{"key": "BAC009S0916W0425", "wav": "./aishell/wav/test/S0916/BAC009S0916W0425.wav", "txt": "并送上真挚祝福"}
-{"key": "BAC009S0916W0426", "wav": "./aishell/wav/test/S0916/BAC009S0916W0426.wav", "txt": "祝可爱的你"}
-{"key": "BAC009S0916W0427", "wav": "./aishell/wav/test/S0916/BAC009S0916W0427.wav", "txt": "每一天都要快乐喔"}
-{"key": "BAC009S0916W0428", "wav": "./aishell/wav/test/S0916/BAC009S0916W0428.wav", "txt": "中新网九月二十五日电据台湾东森新闻报道"}
-{"key": "BAC009S0916W0429", "wav": "./aishell/wav/test/S0916/BAC009S0916W0429.wav", "txt": "陈晓与陈妍希承认恋情"}
-{"key": "BAC009S0916W0430", "wav": "./aishell/wav/test/S0916/BAC009S0916W0430.wav", "txt": "获得粉丝祝福"}
-{"key": "BAC009S0916W0431", "wav": "./aishell/wav/test/S0916/BAC009S0916W0431.wav", "txt": "湖北六小伙温州偷硬币称代表诸葛后人战刘伯温后人"}
-{"key": "BAC009S0916W0432", "wav": "./aishell/wav/test/S0916/BAC009S0916W0432.wav", "txt": "专偷摇摇车里的硬币"}
-{"key": "BAC009S0916W0433", "wav": "./aishell/wav/test/S0916/BAC009S0916W0433.wav", "txt": "运气好时一天能偷几千枚一元硬币"}
-{"key": "BAC009S0916W0434", "wav": "./aishell/wav/test/S0916/BAC009S0916W0434.wav", "txt": "湖北六岁女童被继母虐打下阴撕裂警方已介入"}
-{"key": "BAC009S0916W0435", "wav": "./aishell/wav/test/S0916/BAC009S0916W0435.wav", "txt": "湖北七人冒充福彩工作人员兜售中奖秘籍骗取三零零万"}
-{"key": "BAC009S0916W0436", "wav": "./aishell/wav/test/S0916/BAC009S0916W0436.wav", "txt": "湖北省黄冈市公安局通报称"}
-{"key": "BAC009S0916W0437", "wav": "./aishell/wav/test/S0916/BAC009S0916W0437.wav", "txt": "打掉一个以传授彩票中奖秘籍为名的特大电信诈骗团伙"}
-{"key": "BAC009S0916W0438", "wav": "./aishell/wav/test/S0916/BAC009S0916W0438.wav", "txt": "破获电信诈骗案二三三起"}
-{"key": "BAC009S0916W0439", "wav": "./aishell/wav/test/S0916/BAC009S0916W0439.wav", "txt": "涉案金额三零零多万元"}
-{"key": "BAC009S0916W0440", "wav": "./aishell/wav/test/S0916/BAC009S0916W0440.wav", "txt": "湖北九岁女童遇害案告破凶手强奸不成推下窗外"}
-{"key": "BAC009S0916W0441", "wav": "./aishell/wav/test/S0916/BAC009S0916W0441.wav", "txt": "湖北五道杠少年捐出二万元政府奖学金"}
-{"key": "BAC009S0916W0442", "wav": "./aishell/wav/test/S0916/BAC009S0916W0442.wav", "txt": "学校里有些同学家里条件不好"}
-{"key": "BAC009S0916W0443", "wav": "./aishell/wav/test/S0916/BAC009S0916W0443.wav", "txt": "但是想让更多需要帮助的水高学子感受到温暖"}
-{"key": "BAC009S0916W0444", "wav": "./aishell/wav/test/S0916/BAC009S0916W0444.wav", "txt": "湖北卷人电梯设计不合理同型号已售四六四八部"}
-{"key": "BAC009S0916W0445", "wav": "./aishell/wav/test/S0916/BAC009S0916W0445.wav", "txt": "事故电梯仍处于拆解状态"}
-{"key": "BAC009S0916W0446", "wav": "./aishell/wav/test/S0916/BAC009S0916W0446.wav", "txt": "新华社记者梁建强摄"}
-{"key": "BAC009S0916W0447", "wav": "./aishell/wav/test/S0916/BAC009S0916W0447.wav", "txt": "湖北吃人同型号电梯全国四六四八部分布三一省市"}
-{"key": "BAC009S0916W0448", "wav": "./aishell/wav/test/S0916/BAC009S0916W0448.wav", "txt": "安良百货商场正常营业"}
-{"key": "BAC009S0916W0449", "wav": "./aishell/wav/test/S0916/BAC009S0916W0449.wav", "txt": "但各楼层的自动扶梯均已关停供图新华"}
-{"key": "BAC009S0916W0450", "wav": "./aishell/wav/test/S0916/BAC009S0916W0450.wav", "txt": "湖北吃人电梯品牌四年被曝光五次"}
-{"key": "BAC009S0916W0452", "wav": "./aishell/wav/test/S0916/BAC009S0916W0452.wav", "txt": "湖北吞人电梯三月份刚检测合格"}
-{"key": "BAC009S0916W0453", "wav": "./aishell/wav/test/S0916/BAC009S0916W0453.wav", "txt": "事故电梯出厂刚满一年"}
-{"key": "BAC009S0916W0454", "wav": "./aishell/wav/test/S0916/BAC009S0916W0454.wav", "txt": "今年三月份经检验为合格"}
-{"key": "BAC009S0916W0455", "wav": "./aishell/wav/test/S0916/BAC009S0916W0455.wav", "txt": "涉事厂家生产的电梯此前曾发生多起事故"}
-{"key": "BAC009S0916W0456", "wav": "./aishell/wav/test/S0916/BAC009S0916W0456.wav", "txt": "目前湖北省质监局已要求全省暂停使用涉事厂家电梯"}
-{"key": "BAC009S0916W0457", "wav": "./aishell/wav/test/S0916/BAC009S0916W0457.wav", "txt": "湖北咬人电梯厂家曾为盖板支架申请专利"}
-{"key": "BAC009S0916W0458", "wav": "./aishell/wav/test/S0916/BAC009S0916W0458.wav", "txt": "湖北荆州吃人电梯盖板设计不合理供图"}
-{"key": "BAC009S0916W0459", "wav": "./aishell/wav/test/S0916/BAC009S0916W0459.wav", "txt": "湖北电梯吃人定性为责任事故"}
-{"key": "BAC009S0916W0460", "wav": "./aishell/wav/test/S0916/BAC009S0916W0460.wav", "txt": "看过湖北电梯吃人视频的不少上海年轻人"}
-{"key": "BAC009S0916W0461", "wav": "./aishell/wav/test/S0916/BAC009S0916W0461.wav", "txt": "在经过商场自动扶梯时会选择跳过视频中的跳板"}
-{"key": "BAC009S0916W0462", "wav": "./aishell/wav/test/S0916/BAC009S0916W0462.wav", "txt": "晨报记者张佳琪晨报讯昨晚九时三零分"}
-{"key": "BAC009S0916W0463", "wav": "./aishell/wav/test/S0916/BAC009S0916W0463.wav", "txt": "湖北省荆州市安监局召开安良百货电梯事故情报通报会"}
-{"key": "BAC009S0916W0464", "wav": "./aishell/wav/test/S0916/BAC009S0916W0464.wav", "txt": "此次事故调查组组长荆州市安监局局长陈观鑫通报称"}
-{"key": "BAC009S0916W0465", "wav": "./aishell/wav/test/S0916/BAC009S0916W0465.wav", "txt": "初步认定这是一起安全生产责任事故"}
-{"key": "BAC009S0916W0466", "wav": "./aishell/wav/test/S0916/BAC009S0916W0466.wav", "txt": "湖北电梯吃人调查报告电梯厂商及商场负主责"}
-{"key": "BAC009S0916W0467", "wav": "./aishell/wav/test/S0916/BAC009S0916W0467.wav", "txt": "二零一五七二六"}
-{"key": "BAC009S0916W0468", "wav": "./aishell/wav/test/S0916/BAC009S0916W0468.wav", "txt": "湖北荆州市安良百货公司事发手扶电梯已被关闭检修"}
-{"key": "BAC009S0916W0470", "wav": "./aishell/wav/test/S0916/BAC009S0916W0470.wav", "txt": "申龙电梯和安良百货公司应对事故负主要责任"}
-{"key": "BAC009S0916W0471", "wav": "./aishell/wav/test/S0916/BAC009S0916W0471.wav", "txt": "湖北飞踢女居民车道办主任被停职"}
-{"key": "BAC009S0916W0472", "wav": "./aishell/wav/test/S0916/BAC009S0916W0472.wav", "txt": "网曝视频截图当街飞踢女群众"}
-{"key": "BAC009S0916W0473", "wav": "./aishell/wav/test/S0916/BAC009S0916W0473.wav", "txt": "大喊我一脚方言"}
-{"key": "BAC009S0916W0474", "wav": "./aishell/wav/test/S0916/BAC009S0916W0474.wav", "txt": "同踹死你的街道办主任"}
-{"key": "BAC009S0916W0475", "wav": "./aishell/wav/test/S0916/BAC009S0916W0475.wav", "txt": "湖北一中学教师体罚学生致重伤被判刑三年"}
-{"key": "BAC009S0916W0476", "wav": "./aishell/wav/test/S0916/BAC009S0916W0476.wav", "txt": "用右脚踢向董某左腹部"}
-{"key": "BAC009S0916W0477", "wav": "./aishell/wav/test/S0916/BAC009S0916W0477.wav", "txt": "董某某所受损伤程度属二重伤二级"}
-{"key": "BAC009S0916W0478", "wav": "./aishell/wav/test/S0916/BAC009S0916W0478.wav", "txt": "残疾等级为六级残疾"}
-{"key": "BAC009S0916W0479", "wav": "./aishell/wav/test/S0916/BAC009S0916W0479.wav", "txt": "梁某某被一审法院以故意伤害罪判处有期徒刑三年"}
-{"key": "BAC009S0916W0480", "wav": "./aishell/wav/test/S0916/BAC009S0916W0480.wav", "txt": "湖北一中学班长失踪坠亡教学楼四小时去向成谜"}
-{"key": "BAC009S0916W0481", "wav": "./aishell/wav/test/S0916/BAC009S0916W0481.wav", "txt": "新洲一名高中新生因为没去教室上晚自习"}
-{"key": "BAC009S0916W0482", "wav": "./aishell/wav/test/S0916/BAC009S0916W0482.wav", "txt": "老师发现后和学生一起寻找"}
-{"key": "BAC009S0916W0483", "wav": "./aishell/wav/test/S0916/BAC009S0916W0483.wav", "txt": "直至晚上一零时左右"}
-{"key": "BAC009S0916W0484", "wav": "./aishell/wav/test/S0916/BAC009S0916W0484.wav", "txt": "一名老师才发现学生坠楼摔落在教学楼前"}
-{"key": "BAC009S0916W0485", "wav": "./aishell/wav/test/S0916/BAC009S0916W0485.wav", "txt": "今日二二日晨凌晨"}
-{"key": "BAC009S0916W0486", "wav": "./aishell/wav/test/S0916/BAC009S0916W0486.wav", "txt": "这名一五岁的花季少年最终送医救治无效死亡"}
-{"key": "BAC009S0916W0487", "wav": "./aishell/wav/test/S0916/BAC009S0916W0487.wav", "txt": "湖北一传销头目归国投案骗取群众资金数亿元"}
-{"key": "BAC009S0916W0488", "wav": "./aishell/wav/test/S0916/BAC009S0916W0488.wav", "txt": "湖北一骗取群众资金数亿元的传销头目近日归国投案"}
-{"key": "BAC009S0916W0489", "wav": "./aishell/wav/test/S0916/BAC009S0916W0489.wav", "txt": "湖北一公司以员工名义贷款数十员工负债千万"}
-{"key": "BAC009S0916W0490", "wav": "./aishell/wav/test/S0916/BAC009S0916W0490.wav", "txt": "阳逻一家公司以数十名员工的名义"}
-{"key": "BAC009S0916W0491", "wav": "./aishell/wav/test/S0916/BAC009S0916W0491.wav", "txt": "向一家金融公司贷款一千多万元"}
-{"key": "BAC009S0916W0492", "wav": "./aishell/wav/test/S0916/BAC009S0916W0492.wav", "txt": "公司承诺贷款本息都由公司负责偿还"}
-{"key": "BAC009S0916W0493", "wav": "./aishell/wav/test/S0916/BAC009S0916W0493.wav", "txt": "公司却遇到了资金困难"}
-{"key": "BAC009S0916W0494", "wav": "./aishell/wav/test/S0916/BAC009S0916W0494.wav", "txt": "存在无法如期还贷的风险"}
-{"key": "BAC009S0916W0495", "wav": "./aishell/wav/test/S0916/BAC009S0916W0495.wav", "txt": "这令被贷款的员工们寝食难安"}
diff --git a/models/audio/speech_recognition/conformer/igie/inference.py b/models/audio/speech_recognition/conformer/igie/inference.py
deleted file mode 100644
index d0583eeebb82f13966d3a8363f6a2d45f95742dd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/inference.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging 
-logging.basicConfig(level=logging.INFO, format = '[%(asctime)s %(filename)s line:%(lineno)d] %(levelname)s: %(message)s')
-logging.getLogger('autotvm').setLevel(logging.ERROR)
-logging.getLogger('strategy').setLevel(logging.ERROR)
-logging.getLogger('te_compiler').setLevel(logging.ERROR)
-
-import sys
-
-from pprint import pprint
-import numpy as np
-import torch
-from torch.utils.data import DataLoader
-import yaml
-import multiprocessing
-import tvm
-from tvm import relay
-from tvm.contrib import graph_executor
-import compute_cer
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-try:
-    from swig_decoders import map_batch
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--engine', required=True, help='igie engine path.')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type', default='raw', choices=['raw', 'shard'], help='train and cv data type')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder', required=False, help='encoder magicmind model')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--label', required=True, help='label file path')
-    parser.add_argument('--batch_size', type=int, default=1, help='inference batch size.')
-    parser.add_argument('--seq_len', type=int, default=384, help='inference seq length.')
-    parser.add_argument("--input_name", 
-                        type=str,
-                        nargs="+", 
-                        required=True, 
-                        help="input name of the model.")
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 
-                            'ctc_prefix_beam_search',
-                            'attention_rescoring'
-                        ],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--num_workers",
-                        type=int,
-                        default=16,
-                        help="number of workers used in pytorch dataloader.")
-    parser.add_argument("--warmup", 
-                        type=int, 
-                        default=3, 
-                        help="number of warmup before test.")  
-    parser.add_argument('--fps_target',
-                        type=float,
-                        default=0.0)
-    parser.add_argument('--acc_target',
-                        type=float,
-                        default=0.0)
-
-    parser.add_argument("--perf_only",
-                        type=bool,
-                        default=False,
-                        help="Run performance test only")
-    
-    args = parser.parse_args()
-    return args
-
-def main():
-    args = get_args()
-    pprint(vars(args), indent=2)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = True
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=args.num_workers)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    
-
-    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
-    device = tvm.device(target.kind.name, 0)
-    
-    lib = tvm.runtime.load_module(args.engine)
-    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
-    
-    if args.perf_only:
-        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
-        prof_res = np.array(ftimer().results) * 1000 
-        fps = args.batch_size * 1000 / np.mean(prof_res)
-        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
-    else:
-        # warm up
-        for _ in range(args.warmup):
-            module.run()
-            
-        with open(args.result_file, 'w') as fout:
-            for _, batch in enumerate(test_data_loader):
-                keys, feats, label, feats_lengths, label_lengths = batch
-                feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-                seq_len = feats.shape[1]
-                if seq_len > args.seq_len:
-                    continue
-
-                if feats.shape[0] == args.batch_size:
-
-                    speech_data = tvm.nd.array(feats, device)
-                    speech_lengths_data = tvm.nd.array([feats_lengths], device)
-                    module.set_input("speech", speech_data)
-                    module.set_input("speech_lengths", speech_lengths_data)
-                    
-                    module.run()
-                    
-                    encoder_out, encoder_out_lens, ctc_log_probs = module.get_output(0).asnumpy(), module.get_output(1).asnumpy(), module.get_output(2).asnumpy()
-
-                    preds = torch.from_numpy(ctc_log_probs)
-                    beam_log_probs, beam_log_probs_idx = torch.topk(preds, k=4, dim=2)
-                    
-                    encoder_out = np.array(encoder_out, dtype="float32")
-                    encoder_out_lens = np.array(encoder_out_lens, dtype="int32")
-                    ctc_log_probs = np.array(ctc_log_probs, dtype="float32")
-                    beam_log_probs = np.array(beam_log_probs, dtype="float32")
-                    beam_log_probs_idx = np.array(beam_log_probs_idx, dtype="int64")
-
-                    beam_size = beam_log_probs.shape[-1]
-                    batch_size = beam_log_probs.shape[0]
-                    num_processes = min(multiprocessing.cpu_count(), batch_size)
-                    if args.mode == 'ctc_greedy_search':
-                        if beam_size != 1:
-                            log_probs_idx = beam_log_probs_idx[:, :, 0]
-                        batch_sents = []
-                        for idx, seq in enumerate(log_probs_idx):
-                            batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                        
-                        hyps = map_batch(batch_sents, vocabulary, num_processes, True, 0)
-                
-                    for i, key in enumerate(keys):
-                        content = hyps[i]
-                        fout.write('{} {}\n'.format(key, content))
-
-        Acc = compute_cer.get_acc(args.label, args.result_file)
-        metricResult = {"metricResult": {"Accuracy": f"{Acc}%"}}
-        print(metricResult)
-        print(f"* Accuracy: {Acc} %")
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/ixrt_inference_accuracy.py b/models/audio/speech_recognition/conformer/igie/ixrt_inference_accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e5ec2b116d9368893cf5c9357f630176cf9510e
--- /dev/null
+++ b/models/audio/speech_recognition/conformer/igie/ixrt_inference_accuracy.py
@@ -0,0 +1,253 @@
+# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+import argparse
+import yaml
+import copy
+import numpy as np
+
+from tqdm.contrib import tqdm
+from torch.utils.data import DataLoader
+from wenet.file_utils import read_symbol_table
+from wenet.dataset import Dataset
+from tools.compute_cer import Calculator, characterize, normalize, default_cluster
+import tensorrt
+from tensorrt import Dims
+from common import create_engine_context, get_io_bindings,trtapi,setup_io_bindings
+import pickle
+
+import cuda.cuda as cuda
+import cuda.cudart as cudart
+
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
+
+import tvm
+from tvm import relay
+from tvm.contrib import graph_executor
+
+def get_args():
+    parser = argparse.ArgumentParser(description="recognize with your model")
+    parser.add_argument(
+        "--infer_type",
+        default="fp16",
+        choices=["fp16", "int8"],
+        help="inference type: fp16 or int8",
+    )
+    parser.add_argument("--warm_up", type=int, default=3, help="warm_up count")
+    parser.add_argument("--batch_size", type=int, default=24)
+    parser.add_argument("--data_dir", required=True, help="test data directory")
+    parser.add_argument(
+        "--model_dir", type=str, required=True, help="model for inference"
+    )
+    args = parser.parse_args()
+    return args
+
+
+
+def ixrt_infer(module, input, seq_lengths):
+    module.set_input(key="input", value=input)
+    module.set_input(key="seq_lengths", value=seq_lengths)
+    module.run()
+    out = module.get_output()
+    return out[0]
+
+
+def tensorrt_infer(engine,context, features, lengths):
+    
+    input_names=["input","seq_lengths"]
+    output_names=["output"]
+    input_idx = engine.get_binding_index(input_names[0])
+    input_shape = features.shape    
+    context.set_binding_shape(input_idx, Dims(input_shape))
+
+    seq_lengths_idx = engine.get_binding_index(input_names[1])
+    seq_lengths_shape = lengths.shape   
+    context.set_binding_shape(seq_lengths_idx, Dims(seq_lengths_shape))
+    
+    inputs, outputs, allocations = setup_io_bindings(engine, context)
+    pred_output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+    err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], features, features.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    err, = cuda.cuMemcpyHtoD(inputs[1]["allocation"], lengths, lengths.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    context.execute_v2(allocations)
+    err, = cuda.cuMemcpyDtoH(pred_output, outputs[0]["allocation"], outputs[0]["nbytes"])
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    return pred_output
+
+
+def engine_init(engine):
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    engine, context = create_engine_context(engine, logger)
+    
+    return engine,context
+
+
+def igie_infer(module, features, seq_lengths):
+    module.set_input("input", features)
+    module.set_input("seq_lengths", seq_lengths)
+    module.run()
+    out = module.get_output(0)
+    return out
+
+def igie_engine_init(engine_path):
+    device = tvm.device("iluvatar", 0)
+    lib = tvm.runtime.load_module(engine_path)
+    module = graph_executor.GraphModule(lib["default"](device))
+    # engine, context = module.engine, module.context
+    return module
+
+
+
+def calculate_cer(data, reference_data):
+    calculator = Calculator()
+    tochar = True
+    split = None
+    case_sensitive = False
+    ignore_words = set()
+    rec_set = {}
+    for line in data:
+        if tochar:
+            array = characterize(line)
+        else:
+            array = line.strip().split()
+        if len(array) == 0:
+            continue
+        fid = array[0]
+        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
+
+    default_clusters = {}
+    default_words = {}
+    for line in reference_data:
+        if tochar:
+            array = characterize(line)
+        else:
+            array = line.strip().split()
+        if len(array) == 0:
+            continue
+        fid = array[0]
+        if fid not in rec_set:
+            continue
+        lab = normalize(array[1:], ignore_words, case_sensitive, split)
+        rec = rec_set[fid]
+
+        for word in rec + lab:
+            if word not in default_words:
+                default_cluster_name = default_cluster(word)
+                if default_cluster_name not in default_clusters:
+                    default_clusters[default_cluster_name] = {}
+                if word not in default_clusters[default_cluster_name]:
+                    default_clusters[default_cluster_name][word] = 1
+                default_words[word] = default_cluster_name
+        result = calculator.calculate(lab, rec)
+
+    result = calculator.overall()
+    cer = float(result["ins"] + result["sub"] + result["del"]) / result["all"]
+    corr = result["cor"] / result["all"]
+
+    return cer, corr
+
+
+def main():
+    args = get_args()
+
+    # 读取配置文件
+    config_fn = os.path.join(args.model_dir, "config.yaml")
+    with open(config_fn, "r") as fin:
+        configs = yaml.load(fin, Loader=yaml.FullLoader)
+
+    dataset_conf = copy.deepcopy(configs["dataset_conf"])
+    dataset_conf["filter_conf"]["max_length"] = 102400
+    dataset_conf["filter_conf"]["min_length"] = 0
+    dataset_conf["filter_conf"]["token_max_length"] = 102400
+    dataset_conf["filter_conf"]["token_min_length"] = 0
+    dataset_conf["filter_conf"]["max_output_input_ratio"] = 102400
+    dataset_conf["filter_conf"]["min_output_input_ratio"] = 0
+    dataset_conf["speed_perturb"] = False
+    dataset_conf["spec_aug"] = False
+    dataset_conf["shuffle"] = False
+    dataset_conf["sort"] = True
+    dataset_conf["fbank_conf"]["dither"] = 0.0
+    dataset_conf["batch_conf"]["batch_type"] = "static"
+    dataset_conf["batch_conf"]["batch_size"] = args.batch_size
+
+    # Load dict
+    dict_fn = os.path.join(args.model_dir, "words.txt")
+    char_dict = {}
+    with open(dict_fn, "r", encoding="utf8") as fin:
+        for line in fin:
+            arr = line.strip().split()
+            assert len(arr) == 2
+            char_dict[int(arr[1])] = arr[0]
+    eos = len(char_dict) - 1
+
+    print("*** 1. Prepare data ***")
+    data_type = "raw"
+    test_data_fn = os.path.join(args.data_dir, "data.list")
+    symbol_table = read_symbol_table(dict_fn)
+    test_dataset = Dataset(
+        data_type, test_data_fn, symbol_table, dataset_conf, partition=False
+    )
+    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
+
+    print("*** 2. Load engine ***")
+    engine_path = os.path.join(args.model_dir, f"conformer_{args.infer_type}_trt.engine")
+    module = igie_engine_init(engine_path)
+    
+    print("*** 3. Warm up ***")
+    if args.warm_up > 0:
+        for i in range(args.warm_up):
+            module.run()
+
+    results = []
+    for batch in test_data_loader:
+        keys, feats, target, feats_lengths, target_lengths = batch
+        feats = feats.cpu().numpy().astype(np.float16)
+        feats_lengths = feats_lengths.cpu().numpy().astype(np.int32)
+        hyps = igie_infer(module, feats, feats_lengths)
+        for i, key in enumerate(keys):
+            line = f"{key} "
+            for w in hyps[i]:
+                if w == eos:
+                    break
+                line += char_dict[w]
+            results.append(line)
+
+    # 3. 计算 CER
+    reference_file = os.path.join(args.data_dir, "text")
+    reference_data = []
+    for line in open(reference_file, "r", encoding="utf-8"):
+        reference_data.append(line)
+
+    cer, corr = calculate_cer(results, reference_data)
+
+    target_cer = float(os.environ["Accuracy"])
+    print("CER: ", cer, "target CER: ", target_cer)
+    if cer <= target_cer:
+        print("pass!")
+        exit()
+    else:
+        print("failed!")
+        exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/models/audio/speech_recognition/conformer/igie/ixrt_inference_performance.py b/models/audio/speech_recognition/conformer/igie/ixrt_inference_performance.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f871d9488fb21c208ccd53fccbbc5b523e5eb6d
--- /dev/null
+++ b/models/audio/speech_recognition/conformer/igie/ixrt_inference_performance.py
@@ -0,0 +1,190 @@
+# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import tvm
+
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+import yaml
+import time
+import copy
+import argparse
+import pickle
+import numpy as np
+
+from tqdm.contrib import tqdm
+from torch.utils.data import DataLoader
+
+from wenet.file_utils import read_symbol_table
+from wenet.dataset import Dataset
+
+import tensorrt
+from tensorrt import Dims
+from common import create_engine_context, get_io_bindings,trtapi,setup_io_bindings
+import pickle
+
+import cuda.cuda as cuda
+import cuda.cudart as cudart
+
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
+
+import tvm
+from tvm import relay
+from tvm.contrib import graph_executor
+
+def get_args():
+    parser = argparse.ArgumentParser(description="recognize with your model")
+    parser.add_argument(
+        "--infer_type",
+        default="fp16",
+        choices=["fp16", "int8"],
+        help="inference type: fp16 or int8",
+    )
+    parser.add_argument("--warm_up", type=int, default=3, help="warm_up count")
+    parser.add_argument("--batch_size", type=int, default=24)
+    parser.add_argument("--data_dir", required=True, help="test data directory")
+    parser.add_argument(
+        "--model_dir", type=str, required=True, help="model for inference"
+    )
+    args = parser.parse_args()
+    return args
+
+def engine_init(engine):
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+    engine, context = create_engine_context(engine, logger)
+    
+    return engine,context
+
+def tensorrt_infer(engine,context, features, lengths):
+    
+    input_names=["input","seq_lengths"]
+    output_names=["output"]
+    input_idx = engine.get_binding_index(input_names[0])
+    input_shape = features.shape    
+    context.set_binding_shape(input_idx, Dims(input_shape))
+
+    seq_lengths_idx = engine.get_binding_index(input_names[1])
+    seq_lengths_shape = lengths.shape   
+    context.set_binding_shape(seq_lengths_idx, Dims(seq_lengths_shape))
+    
+    inputs, outputs, allocations = setup_io_bindings(engine, context)
+    pred_output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+    err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], features, features.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    err, = cuda.cuMemcpyHtoD(inputs[1]["allocation"], lengths, lengths.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    context.execute_v2(allocations)
+    err, = cuda.cuMemcpyDtoH(pred_output, outputs[0]["allocation"], outputs[0]["nbytes"])
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    return pred_output
+
+def igie_infer(module, features, seq_lengths):
+    start_time = time.time()
+    module.set_input("input", features)
+    module.set_input("seq_lengths", seq_lengths)
+    module.run()
+    out = module.get_output(0)
+    eval_time = time.time() - start_time
+    return out, eval_time
+
+def igie_engine_init(engine_path):
+    device = tvm.device("iluvatar", 0)
+    lib = tvm.runtime.load_module(engine_path)
+    module = graph_executor.GraphModule(lib["default"](device))
+    # engine, context = module.engine, module.context
+    return module
+
+def main():
+    args = get_args()
+
+    # 读取配置文件
+    config_fn = os.path.join(args.model_dir, "config.yaml")
+    with open(config_fn, "r") as fin:
+        configs = yaml.load(fin, Loader=yaml.FullLoader)
+
+    dataset_conf = copy.deepcopy(configs["dataset_conf"])
+    dataset_conf["filter_conf"]["max_length"] = 102400
+    dataset_conf["filter_conf"]["min_length"] = 0
+    dataset_conf["filter_conf"]["token_max_length"] = 102400
+    dataset_conf["filter_conf"]["token_min_length"] = 0
+    dataset_conf["filter_conf"]["max_output_input_ratio"] = 102400
+    dataset_conf["filter_conf"]["min_output_input_ratio"] = 0
+    dataset_conf["speed_perturb"] = False
+    dataset_conf["spec_aug"] = False
+    dataset_conf["shuffle"] = False
+    dataset_conf["sort"] = True
+    dataset_conf["fbank_conf"]["dither"] = 0.0
+    dataset_conf["batch_conf"]["batch_type"] = "static"
+    dataset_conf["batch_conf"]["batch_size"] = args.batch_size
+
+    # Load dict
+    dict_fn = os.path.join(args.model_dir, "words.txt")
+    char_dict = {}
+    with open(dict_fn, "r", encoding="utf8") as fin:
+        for line in fin:
+            arr = line.strip().split()
+            assert len(arr) == 2
+            char_dict[int(arr[1])] = arr[0]
+
+    print("*** 1. Prepare data ***")
+    data_type = "raw"
+    test_data_fn = os.path.join(args.data_dir, "data.list")
+    symbol_table = read_symbol_table(dict_fn)
+    test_dataset = Dataset(
+        data_type, test_data_fn, symbol_table, dataset_conf, partition=False
+    )
+    
+    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
+
+    print("*** 2. Load IxRT engine ***")
+    engine_path = os.path.join(args.model_dir, f"conformer_{args.infer_type}_trt.engine")
+    # engine, context = engine_init(engine_path)
+    module = igie_engine_init(engine_path)
+    
+    print("*** 3. Warm up ***")
+    if args.warm_up > 0:
+        for i in range(args.warm_up):
+            module.run()
+
+    print("*** 4. Inference ***")
+    num_samples = 0
+    results = []
+    eval_time = 0.0
+    for batch in test_data_loader:
+        keys, feats, target, feats_lengths, target_lengths = batch
+        feats = feats.cpu().numpy().astype(np.float16)
+        feats_lengths = feats_lengths.cpu().numpy().astype(np.int32)
+        num_samples += feats.shape[0]
+        hyps, batch_eval_time = igie_infer(module, feats, feats_lengths)
+        results.append([hyps, keys])
+        eval_time += batch_eval_time
+
+    QPS = num_samples / eval_time
+    print(f"Recognize {num_samples} sentences, {QPS} sentences/s")
+    target_qps = float(os.environ["Accuracy"])
+    print("QPS: = ", QPS, "target QPS: ", target_qps)
+    if QPS >= target_qps:
+        print("pass!")
+        exit()
+    else:
+        print("failed!")
+        exit(10)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/models/audio/speech_recognition/conformer/igie/lang_char.txt b/models/audio/speech_recognition/conformer/igie/lang_char.txt
deleted file mode 100644
index 9e63f9ec45cc9aa44fcfb5c3e8125beb6ff9b075..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/lang_char.txt
+++ /dev/null
@@ -1,4233 +0,0 @@
-<blank> 0
-<unk> 1
-一 2
-丁 3
-七 4
-万 5
-丈 6
-三 7
-上 8
-下 9
-不 10
-与 11
-丐 12
-丑 13
-专 14
-且 15
-世 16
-丘 17
-丙 18
-业 19
-丛 20
-东 21
-丝 22
-丞 23
-丢 24
-两 25
-严 26
-丧 27
-个 28
-丫 29
-中 30
-丰 31
-串 32
-临 33
-丸 34
-丹 35
-为 36
-主 37
-丽 38
-举 39
-乃 40
-久 41
-么 42
-义 43
-之 44
-乌 45
-乍 46
-乎 47
-乏 48
-乐 49
-乒 50
-乓 51
-乔 52
-乖 53
-乘 54
-乙 55
-九 56
-乞 57
-也 58
-习 59
-乡 60
-书 61
-买 62
-乱 63
-乳 64
-乾 65
-了 66
-予 67
-争 68
-事 69
-二 70
-于 71
-亏 72
-云 73
-互 74
-五 75
-井 76
-亚 77
-些 78
-亟 79
-亡 80
-亢 81
-交 82
-亥 83
-亦 84
-产 85
-亨 86
-亩 87
-享 88
-京 89
-亭 90
-亮 91
-亲 92
-亳 93
-亵 94
-人 95
-亿 96
-什 97
-仁 98
-仄 99
-仅 100
-仇 101
-今 102
-介 103
-仍 104
-从 105
-仑 106
-仓 107
-仔 108
-仕 109
-他 110
-仗 111
-付 112
-仙 113
-仡 114
-代 115
-令 116
-以 117
-仨 118
-仪 119
-们 120
-仰 121
-仲 122
-件 123
-价 124
-任 125
-份 126
-仿 127
-企 128
-伉 129
-伊 130
-伍 131
-伎 132
-伏 133
-伐 134
-休 135
-众 136
-优 137
-伙 138
-会 139
-伞 140
-伟 141
-传 142
-伢 143
-伤 144
-伦 145
-伪 146
-伯 147
-估 148
-伴 149
-伶 150
-伸 151
-伺 152
-似 153
-伽 154
-佃 155
-但 156
-位 157
-低 158
-住 159
-佐 160
-佑 161
-体 162
-何 163
-佘 164
-余 165
-佛 166
-作 167
-佟 168
-你 169
-佣 170
-佩 171
-佬 172
-佳 173
-佶 174
-佼 175
-使 176
-侃 177
-侄 178
-侈 179
-例 180
-侍 181
-侑 182
-侗 183
-供 184
-依 185
-侠 186
-侣 187
-侥 188
-侦 189
-侧 190
-侨 191
-侬 192
-侮 193
-侯 194
-侵 195
-便 196
-促 197
-俄 198
-俊 199
-俏 200
-俐 201
-俗 202
-俘 203
-俚 204
-保 205
-俞 206
-信 207
-俨 208
-俩 209
-俪 210
-俭 211
-修 212
-俯 213
-俱 214
-俸 215
-俺 216
-俾 217
-倍 218
-倒 219
-倘 220
-候 221
-倚 222
-倜 223
-借 224
-倡 225
-倦 226
-倩 227
-倪 228
-债 229
-值 230
-倾 231
-假 232
-偏 233
-做 234
-停 235
-健 236
-偶 237
-偷 238
-偿 239
-傅 240
-傍 241
-傥 242
-储 243
-催 244
-傲 245
-傻 246
-像 247
-僚 248
-僧 249
-僮 250
-僵 251
-僻 252
-儒 253
-儿 254
-兀 255
-允 256
-元 257
-兄 258
-充 259
-兆 260
-先 261
-光 262
-克 263
-免 264
-兑 265
-兔 266
-兖 267
-党 268
-兜 269
-兢 270
-入 271
-全 272
-八 273
-公 274
-六 275
-兰 276
-共 277
-关 278
-兴 279
-兵 280
-其 281
-具 282
-典 283
-兹 284
-养 285
-兼 286
-兽 287
-冀 288
-内 289
-冈 290
-冉 291
-册 292
-再 293
-冒 294
-冕 295
-写 296
-军 297
-农 298
-冠 299
-冤 300
-冥 301
-冬 302
-冯 303
-冰 304
-冲 305
-决 306
-况 307
-冶 308
-冷 309
-冻 310
-净 311
-凄 312
-准 313
-凇 314
-凉 315
-凋 316
-凌 317
-减 318
-凑 319
-凝 320
-几 321
-凡 322
-凤 323
-凭 324
-凯 325
-凰 326
-凳 327
-凶 328
-凸 329
-凹 330
-出 331
-击 332
-函 333
-凿 334
-刀 335
-刁 336
-刃 337
-分 338
-切 339
-刊 340
-刑 341
-划 342
-列 343
-刘 344
-则 345
-刚 346
-创 347
-初 348
-删 349
-判 350
-刨 351
-利 352
-别 353
-刮 354
-到 355
-制 356
-刷 357
-券 358
-刹 359
-刺 360
-刻 361
-剁 362
-剂 363
-剃 364
-削 365
-前 366
-剐 367
-剑 368
-剔 369
-剖 370
-剥 371
-剧 372
-剩 373
-剪 374
-副 375
-割 376
-剽 377
-剿 378
-劈 379
-力 380
-劝 381
-办 382
-功 383
-加 384
-务 385
-劣 386
-动 387
-助 388
-努 389
-劫 390
-励 391
-劲 392
-劳 393
-劵 394
-势 395
-勃 396
-勇 397
-勉 398
-勋 399
-勒 400
-勘 401
-募 402
-勤 403
-勺 404
-勾 405
-勿 406
-匀 407
-包 408
-匆 409
-匈 410
-匕 411
-化 412
-北 413
-匙 414
-匝 415
-匠 416
-匡 417
-匣 418
-匪 419
-匮 420
-匹 421
-区 422
-医 423
-匾 424
-匿 425
-十 426
-千 427
-升 428
-午 429
-卉 430
-半 431
-华 432
-协 433
-卑 434
-卒 435
-卓 436
-单 437
-卖 438
-南 439
-博 440
-卜 441
-卞 442
-占 443
-卡 444
-卢 445
-卤 446
-卦 447
-卧 448
-卫 449
-卯 450
-印 451
-危 452
-卲 453
-即 454
-却 455
-卵 456
-卷 457
-卸 458
-卿 459
-厂 460
-厄 461
-厅 462
-历 463
-厉 464
-压 465
-厌 466
-厕 467
-厘 468
-厚 469
-原 470
-厢 471
-厥 472
-厦 473
-厨 474
-厩 475
-厮 476
-去 477
-县 478
-参 479
-又 480
-叉 481
-及 482
-友 483
-双 484
-反 485
-发 486
-叔 487
-取 488
-受 489
-变 490
-叙 491
-叛 492
-叠 493
-口 494
-古 495
-句 496
-另 497
-叨 498
-叩 499
-只 500
-叫 501
-召 502
-叭 503
-叮 504
-可 505
-台 506
-叱 507
-史 508
-右 509
-叵 510
-叶 511
-号 512
-司 513
-叹 514
-叼 515
-吁 516
-吃 517
-各 518
-吆 519
-合 520
-吉 521
-吊 522
-同 523
-名 524
-后 525
-吏 526
-吐 527
-向 528
-吓 529
-吕 530
-吗 531
-君 532
-吝 533
-吞 534
-吟 535
-否 536
-吧 537
-吨 538
-吩 539
-含 540
-听 541
-吭 542
-启 543
-吴 544
-吵 545
-吸 546
-吹 547
-吻 548
-吼 549
-吾 550
-吿 551
-呀 552
-呃 553
-呆 554
-呈 555
-告 556
-呐 557
-呕 558
-呗 559
-员 560
-呛 561
-呜 562
-呢 563
-呦 564
-周 565
-呲 566
-味 567
-呵 568
-呼 569
-命 570
-咀 571
-咄 572
-咋 573
-和 574
-咎 575
-咏 576
-咐 577
-咒 578
-咔 579
-咕 580
-咖 581
-咚 582
-咣 583
-咤 584
-咧 585
-咨 586
-咪 587
-咫 588
-咬 589
-咯 590
-咱 591
-咳 592
-咸 593
-咽 594
-哀 595
-品 596
-哄 597
-哆 598
-哇 599
-哈 600
-哉 601
-响 602
-哎 603
-哑 604
-哒 605
-哗 606
-哟 607
-哥 608
-哦 609
-哨 610
-哪 611
-哭 612
-哲 613
-哺 614
-哼 615
-哽 616
-唁 617
-唇 618
-唉 619
-唏 620
-唐 621
-唠 622
-唤 623
-唬 624
-售 625
-唯 626
-唱 627
-唾 628
-啃 629
-商 630
-啊 631
-啕 632
-啡 633
-啤 634
-啥 635
-啦 636
-啧 637
-啪 638
-啬 639
-啰 640
-啵 641
-啶 642
-啸 643
-啼 644
-喀 645
-喂 646
-善 647
-喆 648
-喇 649
-喉 650
-喊 651
-喔 652
-喘 653
-喜 654
-喝 655
-喧 656
-喱 657
-喵 658
-喷 659
-喻 660
-喽 661
-嗅 662
-嗑 663
-嗒 664
-嗓 665
-嗡 666
-嗣 667
-嗤 668
-嗦 669
-嗨 670
-嗬 671
-嗯 672
-嗲 673
-嗷 674
-嗽 675
-嘀 676
-嘉 677
-嘎 678
-嘘 679
-嘛 680
-嘟 681
-嘭 682
-嘱 683
-嘲 684
-嘴 685
-嘻 686
-噎 687
-器 688
-噩 689
-噪 690
-噬 691
-噱 692
-噼 693
-嚎 694
-嚏 695
-嚓 696
-嚣 697
-嚷 698
-嚼 699
-囊 700
-囚 701
-四 702
-回 703
-因 704
-团 705
-囤 706
-囧 707
-园 708
-困 709
-围 710
-固 711
-国 712
-图 713
-圆 714
-圈 715
-土 716
-圣 717
-在 718
-圩 719
-圪 720
-圭 721
-地 722
-圳 723
-场 724
-圾 725
-址 726
-坂 727
-均 728
-坊 729
-坍 730
-坎 731
-坏 732
-坐 733
-坑 734
-块 735
-坚 736
-坛 737
-坝 738
-坞 739
-坟 740
-坠 741
-坡 742
-坤 743
-坦 744
-坪 745
-坯 746
-坷 747
-垂 748
-垃 749
-垄 750
-垅 751
-型 752
-垌 753
-垒 754
-垛 755
-垢 756
-垣 757
-垤 758
-垦 759
-垫 760
-垮 761
-埃 762
-埋 763
-城 764
-埔 765
-埜 766
-域 767
-培 768
-基 769
-堂 770
-堆 771
-堕 772
-堡 773
-堤 774
-堪 775
-堰 776
-堵 777
-塌 778
-塑 779
-塔 780
-塘 781
-塞 782
-填 783
-塬 784
-塾 785
-境 786
-墅 787
-墓 788
-墙 789
-增 790
-墟 791
-墨 792
-墩 793
-壁 794
-壑 795
-壕 796
-壤 797
-士 798
-壮 799
-声 800
-壳 801
-壶 802
-壹 803
-处 804
-备 805
-复 806
-夏 807
-夕 808
-外 809
-夙 810
-多 811
-夜 812
-够 813
-大 814
-天 815
-太 816
-夫 817
-夭 818
-央 819
-夯 820
-失 821
-头 822
-夷 823
-夸 824
-夹 825
-夺 826
-奂 827
-奇 828
-奈 829
-奉 830
-奋 831
-奎 832
-奏 833
-契 834
-奔 835
-奕 836
-奖 837
-套 838
-奘 839
-奚 840
-奠 841
-奢 842
-奥 843
-女 844
-奴 845
-奶 846
-奸 847
-她 848
-好 849
-如 850
-妃 851
-妄 852
-妆 853
-妇 854
-妈 855
-妊 856
-妍 857
-妒 858
-妖 859
-妙 860
-妞 861
-妤 862
-妥 863
-妧 864
-妨 865
-妩 866
-妮 867
-妯 868
-妹 869
-妻 870
-姆 871
-姊 872
-始 873
-姐 874
-姑 875
-姓 876
-委 877
-姗 878
-姚 879
-姜 880
-姝 881
-姣 882
-姥 883
-姨 884
-姬 885
-姻 886
-姿 887
-威 888
-娃 889
-娄 890
-娅 891
-娇 892
-娌 893
-娘 894
-娜 895
-娟 896
-娠 897
-娥 898
-娩 899
-娱 900
-娴 901
-娶 902
-娼 903
-婀 904
-婆 905
-婉 906
-婕 907
-婚 908
-婧 909
-婪 910
-婴 911
-婵 912
-婶 913
-婷 914
-婿 915
-媒 916
-媚 917
-媛 918
-媞 919
-媲 920
-媳 921
-嫁 922
-嫂 923
-嫉 924
-嫌 925
-嫔 926
-嫖 927
-嫚 928
-嫣 929
-嫦 930
-嫩 931
-嬉 932
-嬛 933
-嬷 934
-孀 935
-子 936
-孔 937
-孕 938
-字 939
-存 940
-孙 941
-孚 942
-孜 943
-孝 944
-孟 945
-孢 946
-季 947
-孤 948
-学 949
-孩 950
-孪 951
-孰 952
-孱 953
-孵 954
-孺 955
-宁 956
-它 957
-宅 958
-宇 959
-守 960
-安 961
-宋 962
-完 963
-宏 964
-宓 965
-宕 966
-宗 967
-官 968
-宙 969
-定 970
-宛 971
-宜 972
-宝 973
-实 974
-宠 975
-审 976
-客 977
-宣 978
-室 979
-宦 980
-宪 981
-宫 982
-宰 983
-害 984
-宴 985
-宵 986
-家 987
-宸 988
-容 989
-宽 990
-宾 991
-宿 992
-寂 993
-寄 994
-寅 995
-密 996
-寇 997
-富 998
-寐 999
-寒 1000
-寓 1001
-寝 1002
-寞 1003
-察 1004
-寡 1005
-寥 1006
-寨 1007
-寮 1008
-寰 1009
-寸 1010
-对 1011
-寺 1012
-寻 1013
-导 1014
-寿 1015
-封 1016
-射 1017
-将 1018
-尊 1019
-小 1020
-少 1021
-尔 1022
-尖 1023
-尘 1024
-尚 1025
-尝 1026
-尤 1027
-尧 1028
-尬 1029
-就 1030
-尴 1031
-尸 1032
-尹 1033
-尺 1034
-尼 1035
-尽 1036
-尾 1037
-尿 1038
-局 1039
-屁 1040
-层 1041
-居 1042
-屈 1043
-届 1044
-屋 1045
-屌 1046
-屎 1047
-屏 1048
-屑 1049
-展 1050
-属 1051
-屠 1052
-屡 1053
-履 1054
-屯 1055
-山 1056
-屹 1057
-屿 1058
-岁 1059
-岂 1060
-岌 1061
-岐 1062
-岔 1063
-岖 1064
-岗 1065
-岚 1066
-岛 1067
-岩 1068
-岬 1069
-岭 1070
-岱 1071
-岳 1072
-岷 1073
-岸 1074
-峁 1075
-峙 1076
-峡 1077
-峥 1078
-峨 1079
-峪 1080
-峭 1081
-峰 1082
-峻 1083
-崂 1084
-崃 1085
-崇 1086
-崎 1087
-崔 1088
-崖 1089
-崛 1090
-崧 1091
-崩 1092
-崭 1093
-崴 1094
-嵋 1095
-嵌 1096
-嵘 1097
-嵛 1098
-嵩 1099
-嶝 1100
-巅 1101
-巍 1102
-川 1103
-州 1104
-巡 1105
-巢 1106
-工 1107
-左 1108
-巧 1109
-巨 1110
-巩 1111
-巫 1112
-差 1113
-己 1114
-已 1115
-巴 1116
-巷 1117
-巾 1118
-巿 1119
-币 1120
-市 1121
-布 1122
-帅 1123
-帆 1124
-师 1125
-希 1126
-帐 1127
-帕 1128
-帖 1129
-帘 1130
-帚 1131
-帜 1132
-帝 1133
-带 1134
-席 1135
-帮 1136
-帷 1137
-常 1138
-帼 1139
-帽 1140
-幂 1141
-幄 1142
-幅 1143
-幌 1144
-幕 1145
-幢 1146
-干 1147
-平 1148
-年 1149
-并 1150
-幸 1151
-幺 1152
-幻 1153
-幼 1154
-幽 1155
-广 1156
-庄 1157
-庆 1158
-庇 1159
-床 1160
-序 1161
-庐 1162
-库 1163
-应 1164
-底 1165
-店 1166
-庙 1167
-庚 1168
-府 1169
-庞 1170
-废 1171
-度 1172
-座 1173
-庭 1174
-庵 1175
-康 1176
-庸 1177
-庾 1178
-廉 1179
-廊 1180
-廓 1181
-廖 1182
-延 1183
-廷 1184
-建 1185
-开 1186
-异 1187
-弃 1188
-弄 1189
-弈 1190
-弊 1191
-式 1192
-弓 1193
-引 1194
-弗 1195
-弘 1196
-弛 1197
-弟 1198
-张 1199
-弥 1200
-弦 1201
-弧 1202
-弩 1203
-弯 1204
-弱 1205
-弹 1206
-强 1207
-归 1208
-当 1209
-录 1210
-彝 1211
-形 1212
-彤 1213
-彦 1214
-彩 1215
-彪 1216
-彬 1217
-彭 1218
-彰 1219
-影 1220
-彷 1221
-役 1222
-彻 1223
-彼 1224
-彿 1225
-往 1226
-征 1227
-径 1228
-待 1229
-徇 1230
-很 1231
-徉 1232
-徊 1233
-律 1234
-徐 1235
-徒 1236
-得 1237
-徘 1238
-徙 1239
-御 1240
-循 1241
-微 1242
-德 1243
-徽 1244
-心 1245
-必 1246
-忆 1247
-忌 1248
-忍 1249
-忐 1250
-忑 1251
-志 1252
-忘 1253
-忙 1254
-忠 1255
-忧 1256
-忪 1257
-快 1258
-忱 1259
-念 1260
-忽 1261
-怀 1262
-态 1263
-怂 1264
-怎 1265
-怒 1266
-怕 1267
-怖 1268
-怜 1269
-思 1270
-怠 1271
-怡 1272
-急 1273
-怦 1274
-性 1275
-怨 1276
-怪 1277
-怯 1278
-怵 1279
-总 1280
-恋 1281
-恍 1282
-恐 1283
-恒 1284
-恙 1285
-恢 1286
-恣 1287
-恤 1288
-恨 1289
-恩 1290
-恪 1291
-恬 1292
-恭 1293
-息 1294
-恰 1295
-恳 1296
-恶 1297
-恸 1298
-恺 1299
-恼 1300
-恿 1301
-悄 1302
-悉 1303
-悍 1304
-悔 1305
-悖 1306
-悚 1307
-悟 1308
-悠 1309
-患 1310
-悦 1311
-您 1312
-悬 1313
-悯 1314
-悲 1315
-悴 1316
-悸 1317
-悼 1318
-情 1319
-惊 1320
-惋 1321
-惑 1322
-惕 1323
-惚 1324
-惜 1325
-惟 1326
-惠 1327
-惦 1328
-惧 1329
-惨 1330
-惩 1331
-惫 1332
-惬 1333
-惮 1334
-惯 1335
-惰 1336
-想 1337
-惶 1338
-惹 1339
-惺 1340
-愁 1341
-愈 1342
-愉 1343
-意 1344
-愕 1345
-愚 1346
-感 1347
-愤 1348
-愧 1349
-愿 1350
-慈 1351
-慌 1352
-慎 1353
-慑 1354
-慕 1355
-慢 1356
-慧 1357
-慨 1358
-慰 1359
-慷 1360
-憋 1361
-憔 1362
-憧 1363
-憨 1364
-憩 1365
-憬 1366
-憷 1367
-憾 1368
-懂 1369
-懈 1370
-懊 1371
-懋 1372
-懒 1373
-懵 1374
-懿 1375
-戈 1376
-戎 1377
-戏 1378
-成 1379
-我 1380
-戒 1381
-或 1382
-战 1383
-戚 1384
-戛 1385
-戟 1386
-截 1387
-戬 1388
-戮 1389
-戳 1390
-戴 1391
-户 1392
-房 1393
-所 1394
-扁 1395
-扇 1396
-扉 1397
-手 1398
-才 1399
-扎 1400
-扑 1401
-扒 1402
-打 1403
-扔 1404
-托 1405
-扛 1406
-扣 1407
-执 1408
-扩 1409
-扫 1410
-扬 1411
-扭 1412
-扮 1413
-扯 1414
-扰 1415
-扳 1416
-扶 1417
-批 1418
-扼 1419
-找 1420
-承 1421
-技 1422
-抄 1423
-抉 1424
-把 1425
-抑 1426
-抒 1427
-抓 1428
-投 1429
-抖 1430
-抗 1431
-折 1432
-抚 1433
-抛 1434
-抠 1435
-抡 1436
-抢 1437
-护 1438
-报 1439
-抨 1440
-披 1441
-抬 1442
-抱 1443
-抵 1444
-抹 1445
-押 1446
-抽 1447
-抿 1448
-拄 1449
-担 1450
-拆 1451
-拇 1452
-拈 1453
-拉 1454
-拌 1455
-拍 1456
-拎 1457
-拐 1458
-拒 1459
-拓 1460
-拔 1461
-拖 1462
-拗 1463
-拘 1464
-拙 1465
-招 1466
-拜 1467
-拟 1468
-拢 1469
-拣 1470
-拥 1471
-拦 1472
-拧 1473
-拨 1474
-择 1475
-括 1476
-拭 1477
-拮 1478
-拯 1479
-拱 1480
-拳 1481
-拴 1482
-拷 1483
-拼 1484
-拽 1485
-拾 1486
-拿 1487
-持 1488
-挂 1489
-指 1490
-按 1491
-挎 1492
-挑 1493
-挖 1494
-挚 1495
-挛 1496
-挝 1497
-挟 1498
-挠 1499
-挡 1500
-挣 1501
-挤 1502
-挥 1503
-挨 1504
-挪 1505
-挫 1506
-振 1507
-挺 1508
-挽 1509
-捂 1510
-捅 1511
-捆 1512
-捉 1513
-捍 1514
-捎 1515
-捏 1516
-捐 1517
-捕 1518
-捞 1519
-损 1520
-捡 1521
-换 1522
-捣 1523
-捧 1524
-据 1525
-捷 1526
-捺 1527
-捻 1528
-掀 1529
-掂 1530
-授 1531
-掉 1532
-掌 1533
-掏 1534
-掐 1535
-排 1536
-掖 1537
-掘 1538
-掠 1539
-探 1540
-掣 1541
-接 1542
-控 1543
-推 1544
-掩 1545
-措 1546
-掬 1547
-掮 1548
-掰 1549
-掴 1550
-掷 1551
-掺 1552
-揉 1553
-揍 1554
-描 1555
-提 1556
-插 1557
-握 1558
-揣 1559
-揩 1560
-揪 1561
-揭 1562
-援 1563
-揽 1564
-搀 1565
-搁 1566
-搂 1567
-搅 1568
-搏 1569
-搜 1570
-搞 1571
-搡 1572
-搪 1573
-搬 1574
-搭 1575
-携 1576
-搽 1577
-摁 1578
-摄 1579
-摆 1580
-摇 1581
-摊 1582
-摒 1583
-摔 1584
-摘 1585
-摧 1586
-摩 1587
-摸 1588
-摹 1589
-撂 1590
-撇 1591
-撑 1592
-撒 1593
-撕 1594
-撞 1595
-撤 1596
-撩 1597
-撬 1598
-播 1599
-撮 1600
-撰 1601
-撵 1602
-撸 1603
-撼 1604
-擂 1605
-擅 1606
-操 1607
-擎 1608
-擒 1609
-擘 1610
-擞 1611
-擦 1612
-攀 1613
-攒 1614
-攥 1615
-支 1616
-收 1617
-改 1618
-攻 1619
-放 1620
-政 1621
-故 1622
-效 1623
-敌 1624
-敏 1625
-救 1626
-敖 1627
-教 1628
-敛 1629
-敝 1630
-敞 1631
-敢 1632
-散 1633
-敦 1634
-敬 1635
-数 1636
-敲 1637
-整 1638
-敷 1639
-文 1640
-斌 1641
-斐 1642
-斑 1643
-斓 1644
-斗 1645
-料 1646
-斛 1647
-斜 1648
-斟 1649
-斤 1650
-斥 1651
-斧 1652
-斩 1653
-断 1654
-斯 1655
-新 1656
-方 1657
-施 1658
-旁 1659
-旅 1660
-旋 1661
-族 1662
-旗 1663
-无 1664
-既 1665
-日 1666
-旦 1667
-旧 1668
-旨 1669
-早 1670
-旬 1671
-旭 1672
-旱 1673
-时 1674
-旷 1675
-旺 1676
-昀 1677
-昂 1678
-昆 1679
-昊 1680
-昌 1681
-明 1682
-昏 1683
-易 1684
-昔 1685
-昕 1686
-昙 1687
-星 1688
-映 1689
-春 1690
-昧 1691
-昨 1692
-昭 1693
-是 1694
-昱 1695
-昵 1696
-昼 1697
-显 1698
-晃 1699
-晋 1700
-晏 1701
-晒 1702
-晓 1703
-晔 1704
-晕 1705
-晖 1706
-晗 1707
-晚 1708
-晟 1709
-晤 1710
-晦 1711
-晨 1712
-普 1713
-景 1714
-晰 1715
-晴 1716
-晶 1717
-智 1718
-晾 1719
-暂 1720
-暄 1721
-暇 1722
-暑 1723
-暖 1724
-暗 1725
-暧 1726
-暨 1727
-暮 1728
-暴 1729
-曙 1730
-曝 1731
-曦 1732
-曰 1733
-曲 1734
-更 1735
-曹 1736
-曼 1737
-曾 1738
-替 1739
-最 1740
-月 1741
-有 1742
-朋 1743
-服 1744
-朐 1745
-朔 1746
-朗 1747
-望 1748
-朝 1749
-期 1750
-朦 1751
-木 1752
-未 1753
-末 1754
-本 1755
-札 1756
-术 1757
-朱 1758
-朴 1759
-朵 1760
-机 1761
-朽 1762
-杀 1763
-杂 1764
-权 1765
-杆 1766
-杉 1767
-李 1768
-杏 1769
-材 1770
-村 1771
-杖 1772
-杜 1773
-杞 1774
-束 1775
-杠 1776
-条 1777
-来 1778
-杨 1779
-杭 1780
-杯 1781
-杰 1782
-杳 1783
-松 1784
-板 1785
-极 1786
-构 1787
-枉 1788
-析 1789
-枕 1790
-林 1791
-枚 1792
-果 1793
-枝 1794
-枞 1795
-枢 1796
-枣 1797
-枪 1798
-枫 1799
-枭 1800
-枯 1801
-架 1802
-枷 1803
-柄 1804
-柏 1805
-某 1806
-染 1807
-柔 1808
-柜 1809
-柞 1810
-柠 1811
-查 1812
-柬 1813
-柯 1814
-柱 1815
-柳 1816
-柴 1817
-柿 1818
-栅 1819
-标 1820
-栈 1821
-栋 1822
-栏 1823
-树 1824
-栓 1825
-栖 1826
-栗 1827
-校 1828
-株 1829
-样 1830
-核 1831
-根 1832
-格 1833
-栽 1834
-栾 1835
-桂 1836
-桃 1837
-框 1838
-案 1839
-桉 1840
-桌 1841
-桎 1842
-桐 1843
-桑 1844
-桓 1845
-桔 1846
-档 1847
-桥 1848
-桦 1849
-桩 1850
-桶 1851
-梁 1852
-梅 1853
-梓 1854
-梗 1855
-梦 1856
-梧 1857
-梨 1858
-梭 1859
-梯 1860
-械 1861
-梳 1862
-梵 1863
-检 1864
-棉 1865
-棋 1866
-棍 1867
-棒 1868
-棕 1869
-棘 1870
-棚 1871
-棠 1872
-森 1873
-棱 1874
-棵 1875
-棺 1876
-椅 1877
-椋 1878
-植 1879
-椎 1880
-椒 1881
-椰 1882
-椿 1883
-楂 1884
-楔 1885
-楚 1886
-楞 1887
-楠 1888
-楣 1889
-楷 1890
-楼 1891
-概 1892
-榄 1893
-榆 1894
-榈 1895
-榉 1896
-榔 1897
-榕 1898
-榜 1899
-榨 1900
-榭 1901
-榴 1902
-榷 1903
-榻 1904
-槌 1905
-槎 1906
-槐 1907
-槛 1908
-槟 1909
-槽 1910
-槿 1911
-樊 1912
-樟 1913
-模 1914
-横 1915
-樱 1916
-橄 1917
-橘 1918
-橙 1919
-橡 1920
-橱 1921
-檀 1922
-檐 1923
-檬 1924
-欠 1925
-次 1926
-欢 1927
-欣 1928
-欧 1929
-欲 1930
-欺 1931
-款 1932
-歆 1933
-歇 1934
-歉 1935
-歌 1936
-止 1937
-正 1938
-此 1939
-步 1940
-武 1941
-歧 1942
-歪 1943
-歹 1944
-死 1945
-殃 1946
-殆 1947
-殉 1948
-殊 1949
-残 1950
-殒 1951
-殓 1952
-殖 1953
-殚 1954
-殡 1955
-殭 1956
-殴 1957
-段 1958
-殷 1959
-殿 1960
-毁 1961
-毂 1962
-毅 1963
-毋 1964
-母 1965
-每 1966
-毒 1967
-毓 1968
-比 1969
-毕 1970
-毗 1971
-毙 1972
-毛 1973
-毫 1974
-毯 1975
-毽 1976
-氏 1977
-民 1978
-氓 1979
-气 1980
-氛 1981
-氟 1982
-氢 1983
-氦 1984
-氧 1985
-氨 1986
-氪 1987
-氮 1988
-氯 1989
-氰 1990
-水 1991
-永 1992
-汀 1993
-汁 1994
-求 1995
-汇 1996
-汉 1997
-汕 1998
-汗 1999
-汛 2000
-汝 2001
-汞 2002
-江 2003
-池 2004
-污 2005
-汤 2006
-汪 2007
-汰 2008
-汲 2009
-汴 2010
-汶 2011
-汹 2012
-汽 2013
-汾 2014
-沁 2015
-沃 2016
-沅 2017
-沈 2018
-沉 2019
-沏 2020
-沐 2021
-沓 2022
-沙 2023
-沛 2024
-沟 2025
-没 2026
-沣 2027
-沥 2028
-沦 2029
-沧 2030
-沪 2031
-沫 2032
-沮 2033
-沱 2034
-河 2035
-沸 2036
-油 2037
-治 2038
-沼 2039
-沽 2040
-沾 2041
-沿 2042
-泄 2043
-泉 2044
-泊 2045
-泌 2046
-泓 2047
-泔 2048
-法 2049
-泗 2050
-泛 2051
-泞 2052
-泠 2053
-泡 2054
-波 2055
-泣 2056
-泥 2057
-注 2058
-泪 2059
-泯 2060
-泰 2061
-泱 2062
-泳 2063
-泵 2064
-泷 2065
-泸 2066
-泻 2067
-泼 2068
-泽 2069
-泾 2070
-洁 2071
-洋 2072
-洒 2073
-洗 2074
-洙 2075
-洛 2076
-洞 2077
-津 2078
-洪 2079
-洱 2080
-洲 2081
-洵 2082
-活 2083
-洼 2084
-洽 2085
-派 2086
-流 2087
-浅 2088
-浆 2089
-浇 2090
-浈 2091
-浊 2092
-测 2093
-济 2094
-浏 2095
-浑 2096
-浓 2097
-浙 2098
-浚 2099
-浦 2100
-浩 2101
-浪 2102
-浮 2103
-浴 2104
-海 2105
-浸 2106
-涂 2107
-涅 2108
-消 2109
-涉 2110
-涌 2111
-涎 2112
-涓 2113
-涕 2114
-涛 2115
-涝 2116
-涞 2117
-涠 2118
-涡 2119
-涤 2120
-润 2121
-涧 2122
-涨 2123
-涩 2124
-涮 2125
-涯 2126
-液 2127
-涵 2128
-涿 2129
-淀 2130
-淄 2131
-淆 2132
-淇 2133
-淋 2134
-淌 2135
-淑 2136
-淖 2137
-淘 2138
-淝 2139
-淞 2140
-淡 2141
-淤 2142
-淫 2143
-淮 2144
-深 2145
-淳 2146
-混 2147
-淹 2148
-添 2149
-淼 2150
-渀 2151
-清 2152
-渊 2153
-渍 2154
-渎 2155
-渐 2156
-渔 2157
-渗 2158
-渚 2159
-渝 2160
-渠 2161
-渡 2162
-渣 2163
-渤 2164
-渥 2165
-温 2166
-渭 2167
-港 2168
-渲 2169
-渴 2170
-游 2171
-渺 2172
-湃 2173
-湍 2174
-湖 2175
-湘 2176
-湛 2177
-湾 2178
-湿 2179
-溃 2180
-溅 2181
-溉 2182
-源 2183
-溜 2184
-溢 2185
-溥 2186
-溧 2187
-溪 2188
-溯 2189
-溶 2190
-溺 2191
-滁 2192
-滇 2193
-滋 2194
-滑 2195
-滔 2196
-滕 2197
-滚 2198
-滞 2199
-满 2200
-滢 2201
-滤 2202
-滥 2203
-滨 2204
-滩 2205
-滴 2206
-漂 2207
-漆 2208
-漏 2209
-漓 2210
-演 2211
-漕 2212
-漠 2213
-漩 2214
-漫 2215
-漭 2216
-漯 2217
-漱 2218
-漳 2219
-漾 2220
-潇 2221
-潘 2222
-潜 2223
-潞 2224
-潢 2225
-潭 2226
-潮 2227
-潼 2228
-澄 2229
-澈 2230
-澎 2231
-澜 2232
-澡 2233
-澳 2234
-激 2235
-濑 2236
-濒 2237
-濠 2238
-濡 2239
-濮 2240
-瀑 2241
-瀚 2242
-瀛 2243
-灌 2244
-灞 2245
-火 2246
-灭 2247
-灯 2248
-灰 2249
-灵 2250
-灶 2251
-灼 2252
-灾 2253
-灿 2254
-炅 2255
-炉 2256
-炊 2257
-炎 2258
-炒 2259
-炕 2260
-炖 2261
-炙 2262
-炜 2263
-炫 2264
-炬 2265
-炭 2266
-炮 2267
-炯 2268
-炳 2269
-炷 2270
-炸 2271
-点 2272
-炼 2273
-炽 2274
-烁 2275
-烂 2276
-烃 2277
-烈 2278
-烊 2279
-烘 2280
-烙 2281
-烟 2282
-烤 2283
-烦 2284
-烧 2285
-烨 2286
-烫 2287
-热 2288
-烯 2289
-烷 2290
-烹 2291
-烽 2292
-焉 2293
-焊 2294
-焕 2295
-焖 2296
-焘 2297
-焚 2298
-焦 2299
-焯 2300
-焰 2301
-焱 2302
-然 2303
-煊 2304
-煌 2305
-煎 2306
-煜 2307
-煞 2308
-煤 2309
-煦 2310
-照 2311
-煮 2312
-煲 2313
-熄 2314
-熊 2315
-熏 2316
-熔 2317
-熙 2318
-熟 2319
-熠 2320
-熨 2321
-熬 2322
-熹 2323
-燃 2324
-燊 2325
-燎 2326
-燕 2327
-燥 2328
-爆 2329
-爪 2330
-爬 2331
-爱 2332
-爵 2333
-父 2334
-爷 2335
-爸 2336
-爹 2337
-爽 2338
-片 2339
-版 2340
-牌 2341
-牙 2342
-牛 2343
-牟 2344
-牡 2345
-牢 2346
-牧 2347
-物 2348
-牲 2349
-牵 2350
-特 2351
-牺 2352
-牾 2353
-犀 2354
-犊 2355
-犒 2356
-犬 2357
-犯 2358
-状 2359
-犷 2360
-犹 2361
-狂 2362
-狄 2363
-狈 2364
-狐 2365
-狗 2366
-狙 2367
-狞 2368
-狠 2369
-狡 2370
-狩 2371
-独 2372
-狭 2373
-狮 2374
-狰 2375
-狱 2376
-狸 2377
-狼 2378
-猎 2379
-猖 2380
-猛 2381
-猜 2382
-猝 2383
-猥 2384
-猩 2385
-猪 2386
-猫 2387
-猬 2388
-献 2389
-猴 2390
-猾 2391
-猿 2392
-獒 2393
-獗 2394
-獾 2395
-玄 2396
-率 2397
-玉 2398
-王 2399
-玖 2400
-玛 2401
-玟 2402
-玥 2403
-玩 2404
-玫 2405
-玮 2406
-环 2407
-现 2408
-玲 2409
-玳 2410
-玺 2411
-玻 2412
-珀 2413
-珉 2414
-珊 2415
-珍 2416
-珏 2417
-珑 2418
-珜 2419
-珠 2420
-班 2421
-珮 2422
-珲 2423
-珺 2424
-球 2425
-琅 2426
-理 2427
-琉 2428
-琊 2429
-琏 2430
-琐 2431
-琛 2432
-琢 2433
-琥 2434
-琦 2435
-琪 2436
-琬 2437
-琰 2438
-琳 2439
-琴 2440
-琵 2441
-琶 2442
-琼 2443
-瑁 2444
-瑄 2445
-瑕 2446
-瑙 2447
-瑚 2448
-瑛 2449
-瑜 2450
-瑞 2451
-瑟 2452
-瑰 2453
-瑶 2454
-瑾 2455
-璀 2456
-璃 2457
-璇 2458
-璋 2459
-璐 2460
-璞 2461
-璧 2462
-璨 2463
-瓜 2464
-瓢 2465
-瓣 2466
-瓦 2467
-瓮 2468
-瓯 2469
-瓶 2470
-瓷 2471
-甄 2472
-甘 2473
-甚 2474
-甜 2475
-生 2476
-甥 2477
-用 2478
-甩 2479
-甫 2480
-甬 2481
-田 2482
-由 2483
-甲 2484
-申 2485
-电 2486
-男 2487
-甸 2488
-町 2489
-画 2490
-畅 2491
-畊 2492
-界 2493
-畏 2494
-畔 2495
-留 2496
-畜 2497
-略 2498
-番 2499
-畴 2500
-畸 2501
-畿 2502
-疃 2503
-疆 2504
-疏 2505
-疑 2506
-疗 2507
-疚 2508
-疝 2509
-疤 2510
-疫 2511
-疯 2512
-疲 2513
-疵 2514
-疹 2515
-疼 2516
-疾 2517
-病 2518
-症 2519
-痉 2520
-痊 2521
-痒 2522
-痕 2523
-痘 2524
-痛 2525
-痣 2526
-痪 2527
-痫 2528
-痰 2529
-痱 2530
-痴 2531
-痹 2532
-痼 2533
-瘀 2534
-瘁 2535
-瘟 2536
-瘠 2537
-瘤 2538
-瘦 2539
-瘩 2540
-瘪 2541
-瘫 2542
-瘸 2543
-瘾 2544
-癌 2545
-癖 2546
-癣 2547
-癫 2548
-登 2549
-白 2550
-百 2551
-皂 2552
-的 2553
-皆 2554
-皇 2555
-皋 2556
-皎 2557
-皓 2558
-皖 2559
-皙 2560
-皮 2561
-皱 2562
-盆 2563
-盈 2564
-益 2565
-盎 2566
-盐 2567
-监 2568
-盒 2569
-盔 2570
-盖 2571
-盗 2572
-盘 2573
-盛 2574
-盟 2575
-目 2576
-盯 2577
-盲 2578
-直 2579
-相 2580
-盹 2581
-盼 2582
-盾 2583
-省 2584
-眈 2585
-眉 2586
-看 2587
-真 2588
-眠 2589
-眨 2590
-眬 2591
-眯 2592
-眶 2593
-眷 2594
-眺 2595
-眼 2596
-着 2597
-睁 2598
-睐 2599
-睛 2600
-睡 2601
-督 2602
-睦 2603
-睫 2604
-睬 2605
-睹 2606
-睿 2607
-瞄 2608
-瞅 2609
-瞌 2610
-瞎 2611
-瞒 2612
-瞟 2613
-瞧 2614
-瞩 2615
-瞪 2616
-瞬 2617
-瞰 2618
-瞳 2619
-瞻 2620
-瞿 2621
-矗 2622
-矛 2623
-矜 2624
-矢 2625
-矣 2626
-知 2627
-矩 2628
-矫 2629
-短 2630
-矮 2631
-石 2632
-矶 2633
-矿 2634
-码 2635
-砂 2636
-砌 2637
-砍 2638
-砒 2639
-研 2640
-砖 2641
-砚 2642
-砝 2643
-砥 2644
-砰 2645
-砲 2646
-破 2647
-砷 2648
-砸 2649
-砺 2650
-砾 2651
-础 2652
-硅 2653
-硕 2654
-硚 2655
-硝 2656
-硫 2657
-硬 2658
-确 2659
-碉 2660
-碌 2661
-碍 2662
-碎 2663
-碑 2664
-碗 2665
-碘 2666
-碚 2667
-碟 2668
-碧 2669
-碰 2670
-碱 2671
-碳 2672
-碴 2673
-碾 2674
-磁 2675
-磅 2676
-磊 2677
-磋 2678
-磐 2679
-磕 2680
-磡 2681
-磨 2682
-磴 2683
-磷 2684
-磺 2685
-礁 2686
-示 2687
-礼 2688
-社 2689
-祁 2690
-祈 2691
-祉 2692
-祖 2693
-祛 2694
-祝 2695
-神 2696
-祠 2697
-祢 2698
-祥 2699
-票 2700
-祭 2701
-祯 2702
-祷 2703
-祸 2704
-祺 2705
-禀 2706
-禁 2707
-禄 2708
-禅 2709
-福 2710
-禧 2711
-禹 2712
-禺 2713
-离 2714
-禽 2715
-禾 2716
-秀 2717
-私 2718
-秃 2719
-秆 2720
-秉 2721
-秋 2722
-种 2723
-科 2724
-秒 2725
-秘 2726
-租 2727
-秣 2728
-秤 2729
-秦 2730
-秧 2731
-秩 2732
-积 2733
-称 2734
-秸 2735
-移 2736
-秽 2737
-稀 2738
-程 2739
-稍 2740
-税 2741
-稚 2742
-稠 2743
-稣 2744
-稳 2745
-稻 2746
-稼 2747
-稽 2748
-稿 2749
-穆 2750
-穗 2751
-穴 2752
-究 2753
-穷 2754
-空 2755
-穿 2756
-突 2757
-窃 2758
-窄 2759
-窈 2760
-窍 2761
-窑 2762
-窒 2763
-窕 2764
-窖 2765
-窗 2766
-窘 2767
-窜 2768
-窝 2769
-窟 2770
-窥 2771
-窦 2772
-窨 2773
-窿 2774
-立 2775
-竖 2776
-站 2777
-竞 2778
-竟 2779
-章 2780
-竣 2781
-童 2782
-竭 2783
-端 2784
-竲 2785
-竹 2786
-竺 2787
-竽 2788
-竿 2789
-笃 2790
-笈 2791
-笋 2792
-笑 2793
-笔 2794
-笙 2795
-笛 2796
-符 2797
-笨 2798
-第 2799
-笼 2800
-等 2801
-筋 2802
-筐 2803
-筑 2804
-筒 2805
-答 2806
-策 2807
-筛 2808
-筱 2809
-筵 2810
-筷 2811
-筹 2812
-签 2813
-简 2814
-箍 2815
-算 2816
-管 2817
-箫 2818
-箭 2819
-箱 2820
-篇 2821
-篡 2822
-篪 2823
-篮 2824
-篷 2825
-簇 2826
-簧 2827
-簸 2828
-簿 2829
-籁 2830
-籍 2831
-米 2832
-类 2833
-籽 2834
-粉 2835
-粒 2836
-粕 2837
-粗 2838
-粘 2839
-粟 2840
-粤 2841
-粥 2842
-粪 2843
-粮 2844
-粱 2845
-粹 2846
-精 2847
-糊 2848
-糕 2849
-糖 2850
-糗 2851
-糙 2852
-糟 2853
-糯 2854
-系 2855
-紊 2856
-素 2857
-索 2858
-紧 2859
-紫 2860
-累 2861
-絮 2862
-綦 2863
-繁 2864
-纠 2865
-红 2866
-纣 2867
-纤 2868
-约 2869
-级 2870
-纪 2871
-纬 2872
-纯 2873
-纰 2874
-纱 2875
-纲 2876
-纳 2877
-纵 2878
-纶 2879
-纷 2880
-纸 2881
-纹 2882
-纺 2883
-纽 2884
-线 2885
-练 2886
-组 2887
-绅 2888
-细 2889
-织 2890
-终 2891
-绊 2892
-绌 2893
-绍 2894
-绎 2895
-经 2896
-绑 2897
-绒 2898
-结 2899
-绕 2900
-绘 2901
-给 2902
-绚 2903
-络 2904
-绝 2905
-绞 2906
-统 2907
-绣 2908
-继 2909
-绩 2910
-绪 2911
-续 2912
-绮 2913
-绯 2914
-绰 2915
-绳 2916
-维 2917
-绵 2918
-绷 2919
-绸 2920
-综 2921
-绽 2922
-绿 2923
-缀 2924
-缄 2925
-缅 2926
-缆 2927
-缇 2928
-缉 2929
-缓 2930
-缔 2931
-缕 2932
-编 2933
-缘 2934
-缙 2935
-缚 2936
-缜 2937
-缝 2938
-缠 2939
-缤 2940
-缨 2941
-缩 2942
-缪 2943
-缭 2944
-缮 2945
-缰 2946
-缴 2947
-缸 2948
-缺 2949
-罂 2950
-罄 2951
-罐 2952
-网 2953
-罕 2954
-罗 2955
-罚 2956
-罡 2957
-罢 2958
-罩 2959
-罪 2960
-置 2961
-署 2962
-罹 2963
-羁 2964
-羊 2965
-美 2966
-羚 2967
-羞 2968
-羡 2969
-羣 2970
-群 2971
-羲 2972
-羹 2973
-羽 2974
-羿 2975
-翁 2976
-翅 2977
-翌 2978
-翔 2979
-翘 2980
-翟 2981
-翠 2982
-翡 2983
-翩 2984
-翰 2985
-翱 2986
-翻 2987
-翼 2988
-耀 2989
-老 2990
-考 2991
-耄 2992
-者 2993
-耋 2994
-而 2995
-耍 2996
-耐 2997
-耒 2998
-耕 2999
-耗 3000
-耘 3001
-耳 3002
-耶 3003
-耷 3004
-耸 3005
-耻 3006
-耽 3007
-耿 3008
-聂 3009
-聆 3010
-聊 3011
-聋 3012
-职 3013
-联 3014
-聘 3015
-聚 3016
-聪 3017
-肃 3018
-肆 3019
-肇 3020
-肉 3021
-肋 3022
-肌 3023
-肖 3024
-肘 3025
-肚 3026
-肛 3027
-肝 3028
-肠 3029
-股 3030
-肢 3031
-肤 3032
-肥 3033
-肩 3034
-肪 3035
-肮 3036
-肯 3037
-育 3038
-肴 3039
-肺 3040
-肾 3041
-肿 3042
-胀 3043
-胁 3044
-胃 3045
-胆 3046
-背 3047
-胎 3048
-胖 3049
-胚 3050
-胛 3051
-胜 3052
-胞 3053
-胡 3054
-胤 3055
-胧 3056
-胫 3057
-胯 3058
-胰 3059
-胱 3060
-胳 3061
-胶 3062
-胸 3063
-胺 3064
-能 3065
-脂 3066
-脆 3067
-脉 3068
-脊 3069
-脍 3070
-脏 3071
-脐 3072
-脑 3073
-脖 3074
-脚 3075
-脯 3076
-脱 3077
-脸 3078
-脾 3079
-腆 3080
-腊 3081
-腋 3082
-腌 3083
-腐 3084
-腑 3085
-腓 3086
-腔 3087
-腕 3088
-腥 3089
-腩 3090
-腰 3091
-腱 3092
-腹 3093
-腺 3094
-腻 3095
-腼 3096
-腾 3097
-腿 3098
-膀 3099
-膊 3100
-膏 3101
-膑 3102
-膛 3103
-膜 3104
-膝 3105
-膨 3106
-膳 3107
-膺 3108
-臀 3109
-臂 3110
-臃 3111
-臆 3112
-臣 3113
-自 3114
-臭 3115
-至 3116
-致 3117
-臻 3118
-舀 3119
-舅 3120
-舆 3121
-舌 3122
-舍 3123
-舒 3124
-舛 3125
-舜 3126
-舞 3127
-舟 3128
-航 3129
-般 3130
-舰 3131
-舱 3132
-舵 3133
-舶 3134
-舸 3135
-船 3136
-艇 3137
-艋 3138
-艘 3139
-良 3140
-艰 3141
-色 3142
-艳 3143
-艺 3144
-艾 3145
-节 3146
-芊 3147
-芋 3148
-芒 3149
-芙 3150
-芜 3151
-芝 3152
-芦 3153
-芬 3154
-芭 3155
-芮 3156
-芯 3157
-花 3158
-芳 3159
-芷 3160
-芸 3161
-芽 3162
-苇 3163
-苍 3164
-苏 3165
-苑 3166
-苗 3167
-苛 3168
-苟 3169
-苡 3170
-苣 3171
-若 3172
-苦 3173
-苯 3174
-英 3175
-苹 3176
-茁 3177
-茂 3178
-范 3179
-茄 3180
-茅 3181
-茆 3182
-茎 3183
-茗 3184
-茜 3185
-茨 3186
-茫 3187
-茵 3188
-茶 3189
-茸 3190
-茹 3191
-荃 3192
-荆 3193
-草 3194
-荐 3195
-荒 3196
-荔 3197
-荚 3198
-荞 3199
-荟 3200
-荡 3201
-荣 3202
-荤 3203
-荧 3204
-荫 3205
-药 3206
-荷 3207
-荼 3208
-莅 3209
-莆 3210
-莉 3211
-莎 3212
-莓 3213
-莘 3214
-莞 3215
-莠 3216
-莫 3217
-莱 3218
-莲 3219
-莴 3220
-获 3221
-莹 3222
-莺 3223
-莽 3224
-菁 3225
-菇 3226
-菊 3227
-菌 3228
-菜 3229
-菠 3230
-菡 3231
-菩 3232
-菱 3233
-菲 3234
-萃 3235
-萄 3236
-萋 3237
-萌 3238
-萍 3239
-萎 3240
-萝 3241
-萤 3242
-营 3243
-萦 3244
-萧 3245
-萨 3246
-萱 3247
-落 3248
-葆 3249
-著 3250
-葛 3251
-葡 3252
-董 3253
-葩 3254
-葫 3255
-葬 3256
-葱 3257
-葵 3258
-蒂 3259
-蒋 3260
-蒙 3261
-蒜 3262
-蒲 3263
-蒸 3264
-蒿 3265
-蓁 3266
-蓄 3267
-蓉 3268
-蓝 3269
-蓟 3270
-蓬 3271
-蔑 3272
-蔓 3273
-蔗 3274
-蔚 3275
-蔡 3276
-蔫 3277
-蔬 3278
-蔷 3279
-蔺 3280
-蔽 3281
-蕉 3282
-蕊 3283
-蕙 3284
-蕲 3285
-蕴 3286
-蕾 3287
-薄 3288
-薇 3289
-薛 3290
-薪 3291
-薯 3292
-薰 3293
-藏 3294
-藜 3295
-藤 3296
-藩 3297
-藻 3298
-蘑 3299
-虎 3300
-虐 3301
-虑 3302
-虚 3303
-虞 3304
-虫 3305
-虱 3306
-虹 3307
-虽 3308
-虾 3309
-蚀 3310
-蚁 3311
-蚂 3312
-蚊 3313
-蚌 3314
-蚓 3315
-蚕 3316
-蚝 3317
-蚣 3318
-蚯 3319
-蛀 3320
-蛇 3321
-蛋 3322
-蛐 3323
-蛙 3324
-蛛 3325
-蛟 3326
-蛮 3327
-蛰 3328
-蜀 3329
-蜂 3330
-蜇 3331
-蜈 3332
-蜊 3333
-蜒 3334
-蜓 3335
-蜕 3336
-蜘 3337
-蜚 3338
-蜜 3339
-蜡 3340
-蜥 3341
-蜴 3342
-蜷 3343
-蜿 3344
-蝇 3345
-蝉 3346
-蝎 3347
-蝗 3348
-蝙 3349
-蝠 3350
-蝴 3351
-蝶 3352
-螂 3353
-螃 3354
-融 3355
-螳 3356
-螺 3357
-蟑 3358
-蟹 3359
-蠢 3360
-血 3361
-衅 3362
-行 3363
-衍 3364
-衔 3365
-街 3366
-衙 3367
-衡 3368
-衣 3369
-补 3370
-表 3371
-衫 3372
-衬 3373
-衰 3374
-衷 3375
-袁 3376
-袂 3377
-袄 3378
-袆 3379
-袈 3380
-袋 3381
-袍 3382
-袒 3383
-袖 3384
-袜 3385
-被 3386
-袭 3387
-袱 3388
-裁 3389
-裂 3390
-装 3391
-裆 3392
-裔 3393
-裕 3394
-裙 3395
-裟 3396
-裤 3397
-裳 3398
-裴 3399
-裸 3400
-裹 3401
-褂 3402
-褒 3403
-褓 3404
-褚 3405
-褛 3406
-褪 3407
-褴 3408
-褶 3409
-襁 3410
-襄 3411
-襟 3412
-西 3413
-要 3414
-覃 3415
-覆 3416
-见 3417
-观 3418
-规 3419
-觅 3420
-视 3421
-览 3422
-觉 3423
-觊 3424
-觎 3425
-觐 3426
-觑 3427
-角 3428
-解 3429
-觥 3430
-触 3431
-言 3432
-詹 3433
-誉 3434
-誓 3435
-警 3436
-譬 3437
-计 3438
-订 3439
-认 3440
-讧 3441
-讨 3442
-让 3443
-讪 3444
-训 3445
-议 3446
-讯 3447
-记 3448
-讲 3449
-讳 3450
-讶 3451
-许 3452
-讹 3453
-论 3454
-讼 3455
-讽 3456
-设 3457
-访 3458
-诀 3459
-证 3460
-评 3461
-诅 3462
-识 3463
-诈 3464
-诉 3465
-诊 3466
-词 3467
-译 3468
-诓 3469
-试 3470
-诗 3471
-诙 3472
-诚 3473
-话 3474
-诞 3475
-诟 3476
-诠 3477
-诡 3478
-询 3479
-该 3480
-详 3481
-诧 3482
-诩 3483
-诫 3484
-诬 3485
-语 3486
-误 3487
-诱 3488
-诲 3489
-说 3490
-诵 3491
-诶 3492
-请 3493
-诸 3494
-诺 3495
-读 3496
-诽 3497
-课 3498
-诿 3499
-谀 3500
-谁 3501
-调 3502
-谅 3503
-谈 3504
-谊 3505
-谋 3506
-谌 3507
-谍 3508
-谎 3509
-谐 3510
-谑 3511
-谓 3512
-谕 3513
-谙 3514
-谚 3515
-谜 3516
-谢 3517
-谣 3518
-谤 3519
-谦 3520
-谨 3521
-谩 3522
-谬 3523
-谭 3524
-谱 3525
-谴 3526
-谷 3527
-豁 3528
-豆 3529
-豚 3530
-象 3531
-豪 3532
-豫 3533
-豹 3534
-貅 3535
-貉 3536
-貌 3537
-貔 3538
-贝 3539
-贞 3540
-负 3541
-贡 3542
-财 3543
-责 3544
-贤 3545
-败 3546
-账 3547
-货 3548
-质 3549
-贩 3550
-贪 3551
-贫 3552
-贬 3553
-购 3554
-贮 3555
-贯 3556
-贱 3557
-贴 3558
-贵 3559
-贷 3560
-贸 3561
-费 3562
-贺 3563
-贼 3564
-贾 3565
-贿 3566
-赁 3567
-赂 3568
-赃 3569
-资 3570
-赋 3571
-赌 3572
-赎 3573
-赏 3574
-赐 3575
-赔 3576
-赖 3577
-赘 3578
-赚 3579
-赛 3580
-赝 3581
-赞 3582
-赠 3583
-赡 3584
-赢 3585
-赣 3586
-赤 3587
-赦 3588
-赫 3589
-走 3590
-赴 3591
-赵 3592
-赶 3593
-起 3594
-趁 3595
-超 3596
-越 3597
-趋 3598
-趟 3599
-趣 3600
-足 3601
-趴 3602
-趸 3603
-趾 3604
-跃 3605
-跄 3606
-跆 3607
-跌 3608
-跑 3609
-跛 3610
-距 3611
-跟 3612
-跤 3613
-跨 3614
-跪 3615
-路 3616
-跳 3617
-践 3618
-跷 3619
-跺 3620
-跻 3621
-踉 3622
-踊 3623
-踏 3624
-踝 3625
-踞 3626
-踢 3627
-踩 3628
-踪 3629
-踵 3630
-踹 3631
-蹂 3632
-蹄 3633
-蹈 3634
-蹊 3635
-蹚 3636
-蹦 3637
-蹬 3638
-蹭 3639
-蹲 3640
-蹴 3641
-蹶 3642
-蹼 3643
-蹿 3644
-躁 3645
-躏 3646
-身 3647
-躬 3648
-躯 3649
-躲 3650
-躺 3651
-车 3652
-轧 3653
-轨 3654
-轩 3655
-转 3656
-轮 3657
-软 3658
-轰 3659
-轴 3660
-轶 3661
-轻 3662
-载 3663
-轿 3664
-较 3665
-辄 3666
-辅 3667
-辆 3668
-辈 3669
-辉 3670
-辍 3671
-辐 3672
-辑 3673
-输 3674
-辖 3675
-辗 3676
-辘 3677
-辙 3678
-辛 3679
-辜 3680
-辞 3681
-辟 3682
-辣 3683
-辨 3684
-辩 3685
-辫 3686
-辰 3687
-辱 3688
-边 3689
-辽 3690
-达 3691
-迁 3692
-迂 3693
-迄 3694
-迅 3695
-过 3696
-迈 3697
-迎 3698
-运 3699
-近 3700
-返 3701
-还 3702
-这 3703
-进 3704
-远 3705
-违 3706
-连 3707
-迟 3708
-迢 3709
-迥 3710
-迪 3711
-迫 3712
-迭 3713
-述 3714
-迷 3715
-迸 3716
-迹 3717
-追 3718
-退 3719
-送 3720
-适 3721
-逃 3722
-逅 3723
-逆 3724
-选 3725
-逊 3726
-逍 3727
-透 3728
-逐 3729
-递 3730
-途 3731
-逗 3732
-通 3733
-逛 3734
-逝 3735
-逞 3736
-速 3737
-造 3738
-逡 3739
-逢 3740
-逮 3741
-逵 3742
-逸 3743
-逻 3744
-逼 3745
-逾 3746
-遁 3747
-遂 3748
-遇 3749
-遍 3750
-遏 3751
-遐 3752
-道 3753
-遗 3754
-遛 3755
-遢 3756
-遣 3757
-遥 3758
-遨 3759
-遭 3760
-遮 3761
-遴 3762
-遵 3763
-避 3764
-邀 3765
-邂 3766
-邃 3767
-邋 3768
-邑 3769
-邓 3770
-邛 3771
-邝 3772
-邢 3773
-那 3774
-邦 3775
-邪 3776
-邬 3777
-邮 3778
-邯 3779
-邱 3780
-邵 3781
-邹 3782
-邺 3783
-邻 3784
-郁 3785
-郊 3786
-郎 3787
-郑 3788
-郜 3789
-郝 3790
-郡 3791
-部 3792
-郫 3793
-郭 3794
-郸 3795
-都 3796
-鄂 3797
-鄙 3798
-鄞 3799
-鄢 3800
-酋 3801
-酌 3802
-配 3803
-酒 3804
-酗 3805
-酝 3806
-酣 3807
-酪 3808
-酬 3809
-酯 3810
-酱 3811
-酵 3812
-酶 3813
-酷 3814
-酸 3815
-酿 3816
-醇 3817
-醉 3818
-醋 3819
-醍 3820
-醐 3821
-醒 3822
-醛 3823
-采 3824
-釉 3825
-释 3826
-里 3827
-重 3828
-野 3829
-量 3830
-金 3831
-釜 3832
-鉴 3833
-鏖 3834
-鑫 3835
-针 3836
-钉 3837
-钊 3838
-钓 3839
-钛 3840
-钝 3841
-钞 3842
-钟 3843
-钠 3844
-钢 3845
-钥 3846
-钦 3847
-钧 3848
-钩 3849
-钮 3850
-钰 3851
-钱 3852
-钵 3853
-钻 3854
-钾 3855
-铀 3856
-铁 3857
-铂 3858
-铃 3859
-铅 3860
-铆 3861
-铉 3862
-铎 3863
-铐 3864
-铜 3865
-铝 3866
-铠 3867
-铣 3868
-铨 3869
-铬 3870
-铭 3871
-铮 3872
-铰 3873
-铲 3874
-银 3875
-铸 3876
-铺 3877
-链 3878
-铿 3879
-销 3880
-锁 3881
-锂 3882
-锄 3883
-锅 3884
-锆 3885
-锈 3886
-锋 3887
-锌 3888
-锏 3889
-锐 3890
-错 3891
-锜 3892
-锟 3893
-锡 3894
-锢 3895
-锣 3896
-锤 3897
-锥 3898
-锦 3899
-锭 3900
-键 3901
-锯 3902
-锰 3903
-锵 3904
-锷 3905
-锹 3906
-锻 3907
-镀 3908
-镁 3909
-镇 3910
-镉 3911
-镊 3912
-镍 3913
-镑 3914
-镖 3915
-镜 3916
-镯 3917
-镳 3918
-镶 3919
-长 3920
-门 3921
-闪 3922
-闫 3923
-闭 3924
-问 3925
-闯 3926
-闰 3927
-闲 3928
-闳 3929
-间 3930
-闵 3931
-闷 3932
-闸 3933
-闹 3934
-闺 3935
-闻 3936
-闽 3937
-阀 3938
-阁 3939
-阂 3940
-阅 3941
-阎 3942
-阐 3943
-阔 3944
-阙 3945
-阚 3946
-阜 3947
-队 3948
-阮 3949
-阱 3950
-防 3951
-阳 3952
-阴 3953
-阵 3954
-阶 3955
-阻 3956
-阿 3957
-陀 3958
-陂 3959
-附 3960
-际 3961
-陆 3962
-陈 3963
-陋 3964
-陌 3965
-降 3966
-限 3967
-陕 3968
-陡 3969
-院 3970
-除 3971
-陨 3972
-险 3973
-陪 3974
-陬 3975
-陵 3976
-陶 3977
-陷 3978
-隅 3979
-隆 3980
-隋 3981
-隍 3982
-随 3983
-隐 3984
-隔 3985
-隘 3986
-隙 3987
-障 3988
-隧 3989
-隶 3990
-隼 3991
-隽 3992
-难 3993
-雀 3994
-雁 3995
-雄 3996
-雅 3997
-集 3998
-雇 3999
-雌 4000
-雍 4001
-雏 4002
-雕 4003
-雨 4004
-雪 4005
-雯 4006
-雳 4007
-零 4008
-雷 4009
-雾 4010
-需 4011
-霁 4012
-霄 4013
-霆 4014
-震 4015
-霈 4016
-霉 4017
-霍 4018
-霎 4019
-霏 4020
-霖 4021
-霜 4022
-霞 4023
-露 4024
-霸 4025
-霹 4026
-霾 4027
-靑 4028
-青 4029
-靓 4030
-靖 4031
-静 4032
-靛 4033
-非 4034
-靠 4035
-靡 4036
-面 4037
-革 4038
-靳 4039
-靴 4040
-靶 4041
-鞋 4042
-鞍 4043
-鞘 4044
-鞠 4045
-鞭 4046
-韦 4047
-韧 4048
-韩 4049
-韬 4050
-音 4051
-韵 4052
-韶 4053
-页 4054
-顶 4055
-顷 4056
-项 4057
-顺 4058
-须 4059
-顽 4060
-顾 4061
-顿 4062
-颁 4063
-颂 4064
-预 4065
-颅 4066
-领 4067
-颇 4068
-颈 4069
-颊 4070
-颍 4071
-颐 4072
-频 4073
-颓 4074
-颖 4075
-颗 4076
-题 4077
-颚 4078
-颜 4079
-额 4080
-颠 4081
-颤 4082
-风 4083
-飒 4084
-飓 4085
-飘 4086
-飙 4087
-飚 4088
-飞 4089
-食 4090
-餐 4091
-餮 4092
-饕 4093
-饥 4094
-饪 4095
-饭 4096
-饮 4097
-饰 4098
-饱 4099
-饲 4100
-饵 4101
-饶 4102
-饺 4103
-饼 4104
-饽 4105
-饿 4106
-馀 4107
-馅 4108
-馆 4109
-馈 4110
-馊 4111
-馋 4112
-馑 4113
-馒 4114
-首 4115
-馗 4116
-香 4117
-馥 4118
-馨 4119
-马 4120
-驭 4121
-驯 4122
-驰 4123
-驱 4124
-驳 4125
-驴 4126
-驶 4127
-驻 4128
-驼 4129
-驾 4130
-驿 4131
-骁 4132
-骂 4133
-骄 4134
-骅 4135
-骆 4136
-骇 4137
-骊 4138
-骋 4139
-验 4140
-骏 4141
-骐 4142
-骑 4143
-骗 4144
-骚 4145
-骜 4146
-骤 4147
-骥 4148
-骨 4149
-骷 4150
-骸 4151
-骼 4152
-髅 4153
-髋 4154
-髓 4155
-高 4156
-髦 4157
-鬼 4158
-魁 4159
-魂 4160
-魄 4161
-魅 4162
-魇 4163
-魏 4164
-魔 4165
-鱼 4166
-鲁 4167
-鲍 4168
-鲜 4169
-鲟 4170
-鲨 4171
-鲶 4172
-鲷 4173
-鲸 4174
-鳄 4175
-鳅 4176
-鳌 4177
-鳖 4178
-鳝 4179
-鳞 4180
-鸟 4181
-鸠 4182
-鸡 4183
-鸣 4184
-鸥 4185
-鸦 4186
-鸭 4187
-鸯 4188
-鸳 4189
-鸵 4190
-鸽 4191
-鸾 4192
-鸿 4193
-鹃 4194
-鹅 4195
-鹊 4196
-鹏 4197
-鹜 4198
-鹞 4199
-鹤 4200
-鹭 4201
-鹰 4202
-鹿 4203
-麋 4204
-麒 4205
-麓 4206
-麟 4207
-麦 4208
-麻 4209
-麾 4210
-黄 4211
-黍 4212
-黎 4213
-黏 4214
-黑 4215
-黔 4216
-默 4217
-黛 4218
-黝 4219
-黯 4220
-鼎 4221
-鼓 4222
-鼠 4223
-鼻 4224
-鼾 4225
-齐 4226
-齿 4227
-龄 4228
-龙 4229
-龚 4230
-龟 4231
-<sos/eos> 4232
diff --git a/models/audio/speech_recognition/conformer/igie/load_ixrt_plugin.py b/models/audio/speech_recognition/conformer/igie/load_ixrt_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4452f2edf9877ea84e31f34bdadefcc247e5b52
--- /dev/null
+++ b/models/audio/speech_recognition/conformer/igie/load_ixrt_plugin.py
@@ -0,0 +1,12 @@
+import ctypes
+import tensorrt
+from os.path import join, dirname, exists
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""):
+    if not dynamic_path:
+        dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
+    if not exists(dynamic_path):
+        raise FileNotFoundError(
+            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
+    ctypes.CDLL(dynamic_path)
+    tensorrt.init_libnvinfer_plugins(logger, namespace)
+    print(f"Loaded plugin from {dynamic_path}")
diff --git a/models/audio/speech_recognition/conformer/igie/requirements.txt b/models/audio/speech_recognition/conformer/igie/requirements.txt
index 8820eb754dec653c319dc0c86d53049346c7f7b6..3dcea1ccc8337478e16d50942acc6175d270b9b5 100644
--- a/models/audio/speech_recognition/conformer/igie/requirements.txt
+++ b/models/audio/speech_recognition/conformer/igie/requirements.txt
@@ -1,4 +1,5 @@
 tqdm
 onnx
-typeguard==2.13.3
-onnxsim
\ No newline at end of file
+onnxsim
+librosa
+soundfile
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/scripts/aishell_data_prepare.sh b/models/audio/speech_recognition/conformer/igie/scripts/aishell_data_prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..985564c2294b2a413531d6ced018029ec911fb23
--- /dev/null
+++ b/models/audio/speech_recognition/conformer/igie/scripts/aishell_data_prepare.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Copyright 2019 Mobvoi Inc. All Rights Reserved.
+# set -euox pipefail
+
+data_dir=$1
+tool_dir=$2
+
+wav_dir=${data_dir}/wav
+aishell_text=${data_dir}/transcript/aishell_transcript_v0.8.txt
+
+# data directory check
+if [ ! -d $wav_dir ] || [ ! -f $aishell_text ]; then
+  echo "Error: wav directory and aishell text not found!"
+  exit 1;
+fi
+
+# find test wav file
+local_dir=${data_dir}/local
+mkdir -p $local_dir
+find $wav_dir -iname "*.wav" > $local_dir/wav.flist || exit 1;
+
+# Transcriptions preparation
+sed -e 's/\.wav//' $local_dir/wav.flist | awk -F '/' '{print $NF}' > $local_dir/utt.list
+paste -d' ' $local_dir/utt.list $local_dir/wav.flist > $local_dir/wav.scp_all
+${tool_dir}/filter_scp.pl -f 1 $local_dir/utt.list $aishell_text > $local_dir/transcripts.txt
+awk '{print $1}' $local_dir/transcripts.txt > $local_dir/utt.list
+${tool_dir}/filter_scp.pl -f 1 $local_dir/utt.list $local_dir/wav.scp_all | sort -u > $local_dir/wav.scp
+sort -u $local_dir/transcripts.txt > $local_dir/text
+echo "Preparing transcriptions succeeded!"
+
+test_dir=${data_dir}/test
+mkdir -p ${test_dir}
+for f in wav.scp text; do
+  cp $local_dir/$f ${test_dir}/$f || exit 1;
+done
+rm -r ${data_dir}/local
+
+# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
+# `shard` is used for large dataset which is over 1k hours, and `shard` is
+# faster on reading data and training.
+data_type=raw
+num_utts_per_shard=1000
+
+# remove the space between the text labels for Mandarin dataset
+cp $test_dir/text $test_dir/text.org
+paste -d " " <(cut -f 1 -d" " ${test_dir}/text.org) \
+  <(cut -f 2- -d" " ${test_dir}/text.org | tr -d " ") \
+  > ${test_dir}/text
+rm ${test_dir}/text.org
+
+# Prepare required format
+if [ $data_type == "shard" ]; then
+  ${tool_dir}/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
+    --num_threads 16 $test_dir/wav.scp $test_dir/text \
+    $(realpath $test_dir/shards) $test_dir/data.list
+else
+  ${tool_dir}/make_raw_list.py $test_dir/wav.scp $test_dir/text \
+    $test_dir/data.list
+fi
+
+echo "AISHELL data preparation succeeded!"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_accuracy.sh b/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_accuracy.sh
index 7b9d18cc7e8305cb86cc31a000ba44e60afde7e9..1f9b7fb21850f9b793887701bc542bcd30f75cf0 100644
--- a/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_accuracy.sh
+++ b/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_accuracy.sh
@@ -14,10 +14,8 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-
+set -euo pipefail
 batchsize=24
-seqlen=384
-model_path="encoder_bs24_seq384_static_opt_matmul.onnx"
 
 # Update arguments
 index=0
@@ -33,22 +31,33 @@ done
 
 echo "batch size is ${batchsize}"
 
-# build engine
-python3 build_engine.py                                                                     \
-    --model_path ${model_path}                                                              \
-    --input speech:${batchsize},${seqlen},80 speech_lengths:${batchsize}                    \
-    --precision fp16                                                                        \
-    --engine_path encoder_bs${batchsize}_seq${seqlen}_fp16.so
-
-# inference
-python3 inference.py                                          \
-  --engine encoder_bs${batchsize}_seq${seqlen}_fp16.so        \
-  --input speech speech_lengths                               \
-  --label text                                                \
-  --config train.yaml                                         \
-  --test_data data.list                                       \
-  --dict lang_char.txt                                        \
-  --mode ctc_greedy_search                                    \
-  --batch_size ${batchsize}                                   \
-  --seq_len ${seqlen}                                         \
-  --result_file conformer_output_log
\ No newline at end of file
+EXIT_STATUS=0
+check_status()
+{
+    ret_code=${PIPESTATUS[0]}
+    if [ ${ret_code} != 0 ]; then
+    echo "fails"
+    [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1
+    fi
+}
+
+current_path=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+
+PROJECT_DIR=${current_path}/..
+DATA_DIR=${current_path}/../aishell_test_data/test
+MODEL_DIR=${current_path}/../conformer_checkpoints
+
+export Accuracy=${Accuracy:=0.05}
+
+cd ${PROJECT_DIR}
+
+python3 build_engine.py \
+        --onnx_model ${MODEL_DIR}/conformer_fp16_trt.onnx  \
+        --engine ${MODEL_DIR}/conformer_fp16_trt.engine "$@" ;check_status 
+
+python3 ixrt_inference_accuracy.py \
+    --infer_type fp16 \
+    --batch_size ${batchsize} \
+    --data_dir ${DATA_DIR}  \
+    --model_dir ${MODEL_DIR} "$@"; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_performance.sh b/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_performance.sh
index 311beac35ee379ac13c11153a347e573eaf09e46..499021223e97726aecc2eff66849278ac6dfc25d 100644
--- a/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_performance.sh
+++ b/models/audio/speech_recognition/conformer/igie/scripts/infer_conformer_fp16_performance.sh
@@ -14,10 +14,9 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+set -euo pipefail
 
 batchsize=24
-seqlen=384
-model_path="encoder_bs24_seq384_static_opt_matmul.onnx"
 
 # Update arguments
 index=0
@@ -33,23 +32,33 @@ done
 
 echo "batch size is ${batchsize}"
 
-# build engine
-python3 build_engine.py                                                                     \
-    --model_path ${model_path}                                                              \
-    --input speech:${batchsize},${seqlen},80 speech_lengths:${batchsize}                    \
-    --precision fp16                                                                        \
-    --engine_path encoder_bs${batchsize}_seq${seqlen}_fp16.so
-
-# inference
-python3 inference.py                                          \
-  --engine encoder_bs${batchsize}_seq${seqlen}_fp16.so        \
-  --input speech speech_lengths                               \
-  --label text                                                \
-  --config train.yaml                                         \
-  --test_data data.list                                       \
-  --dict lang_char.txt                                        \
-  --mode ctc_greedy_search                                    \
-  --batch_size ${batchsize}                                   \
-  --seq_len ${seqlen}                                         \
-  --result_file conformer_output_log                          \
-  --perf_only True
\ No newline at end of file
+EXIT_STATUS=0
+check_status()
+{
+    ret_code=${PIPESTATUS[0]}
+    if [ ${ret_code} != 0 ]; then
+    echo "fails"
+    [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1
+    fi
+}
+
+current_path=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+
+PROJECT_DIR=${current_path}/..
+DATA_DIR=${current_path}/../aishell_test_data/test
+MODEL_DIR=${current_path}/../conformer_checkpoints
+
+export Accuracy=${Accuracy:=529}
+
+cd ${PROJECT_DIR}
+
+python3 build_engine.py \
+        --onnx_model ${MODEL_DIR}/conformer_fp16_trt.onnx  \
+        --engine ${MODEL_DIR}/conformer_fp16_trt.engine "$@" ;check_status 
+
+python3 ixrt_inference_performance.py \
+    --infer_type fp16 \
+    --batch_size ${batchsize} \
+    --data_dir ${DATA_DIR}  \
+    --model_dir ${MODEL_DIR} "$@"; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/text b/models/audio/speech_recognition/conformer/igie/text
deleted file mode 100644
index 93f768e9349b68e0705cfefb694d3c57a397b2c4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/text
+++ /dev/null
@@ -1,7176 +0,0 @@
-BAC009S0764W0121 甚至出现交易几乎停滞的情况
-BAC009S0764W0122 一二线城市虽然也处于调整中
-BAC009S0764W0123 但因为聚集了过多公共资源
-BAC009S0764W0124 为了规避三四线城市明显过剩的市场风险
-BAC009S0764W0125 标杆房企必然调整市场战略
-BAC009S0764W0126 因此土地储备至关重要
-BAC009S0764W0127 中原地产首席分析师张大伟说
-BAC009S0764W0128 一线城市土地供应量减少
-BAC009S0764W0129 也助推了土地市场的火爆
-BAC009S0764W0130 北京仅新增住宅土地供应十宗
-BAC009S0764W0131 开发边界将作为城市发展的刚性约定
-BAC009S0764W0132 不得超越界限盲目扩张
-BAC009S0764W0133 目前挂牌的只有几宗土地
-BAC009S0764W0134 再加上近期一二线楼市升温
-BAC009S0764W0135 房企对土地的争抢更加积极
-BAC009S0764W0136 土地市场体现了房企对一二线市场的看重
-BAC009S0764W0137 面包价格会跟风上涨吗
-BAC009S0764W0138 成交量环比大幅增加
-BAC009S0764W0139 国家统计局的数据显示
-BAC009S0764W0140 其中广州深圳甚至出现了多个日光盘
-BAC009S0764W0141 零三年到去年
-BAC009S0764W0142 市场基数已不可同日而语
-BAC009S0764W0143 在市场整体从高速增长进入中高速增长区间的同时
-BAC009S0764W0144 一线城市在价格较高的基础上整体回升并领涨全国
-BAC009S0764W0145 绝大部分三线城市房价仍然下降
-BAC009S0764W0146 一线楼市成交量激增
-BAC009S0764W0147 三四线城市依然冷清
-BAC009S0764W0148 根据中原地产研究中心最新数据
-BAC009S0764W0149 一线城市签约十七万套
-BAC009S0764W0150 同比涨幅达到百分之四
-BAC009S0764W0151 三线城市签约十六万套
-BAC009S0764W0152 四线城市成交量有轻微下调
-BAC009S0764W0153 住房城乡建设部政策研究中心主任秦虹表示
-BAC009S0764W0154 我国房地产市场过去从体偏紧部分地区过紧
-BAC009S0764W0155 总体偏松部分地区过剩
-BAC009S0764W0156 当供给远快于需求时
-BAC009S0764W0157 很难出现去年那样的楼市暴涨
-BAC009S0764W0158 即便是北上广深等供应偏紧的一线城市
-BAC009S0764W0159 也有限购政策在控制需求规模
-BAC009S0764W0160 从而有利于抑制楼市过快上涨
-BAC009S0764W0161 楼市调控供的行政手段宜减不宜加
-BAC009S0764W0162 稳增长措施需更全面地考虑化解楼市风险问题
-BAC009S0764W0163 楼市调控将去向何方
-BAC009S0764W0164 进一步发挥市场在资源配置中的决定性作用
-BAC009S0764W0165 楼市调控的行政手段宜减不宜加
-BAC009S0764W0166 去行政化
-BAC009S0764W0167 随着市场调整的深入
-BAC009S0764W0168 一些三线城市取消限购及限贷
-BAC009S0764W0169 实施较大幅度的补贴政策
-BAC009S0764W0170 当地新建商品住宅的房价多在每平方米三四千元
-BAC009S0764W0171 政府出台每平方米补贴五百元的托市政策
-BAC009S0764W0172 由于不可能从根本上改变供求关系
-BAC009S0764W0173 类似的补贴政策常常是短效刺激
-BAC009S0764W0174 会对市场造成新一轮的干扰
-BAC009S0764W0175 安徽铜陵结束了当地契税补贴政策
-BAC009S0764W0176 当月住宅类商品房成交套数骤跌
-BAC009S0764W0177 在经济下行压力加大的背景下
-BAC009S0764W0178 稳增长措施需更全面地考虑化解楼市风险问题
-BAC009S0764W0179 国务院发展研究中心市场经济研究所副所长邓郁松认为
-BAC009S0764W0180 可能引发房价泡沫风险
-BAC009S0764W0181 在经济增速放缓阶段运用货币政策工具时
-BAC009S0764W0183 基本住房需求得到满足后
-BAC009S0764W0184 对绿色高效宜居的高品质住房需求快速上升
-BAC009S0764W0185 通过改革和政策调整
-BAC009S0764W0186 实现我国房地产市场的平稳运行
-BAC009S0764W0187 及时发现产业发展中的倾向性苗头性问题
-BAC009S0764W0188 促进战略性新兴产业健康发展
-BAC009S0764W0189 有关部门和社会各界积极响应
-BAC009S0764W0190 采取了一系列的政策措施
-BAC009S0764W0191 促使我国战略性新兴产业发展实现了良好开局
-BAC009S0764W0192 战略性新兴产业在各地呈现出集聚蓬勃发展的态势
-BAC009S0764W0193 先后出台的战略性新兴产业的政策措施主要有六项
-BAC009S0764W0194 在加强宏观引导方面
-BAC009S0764W0195 形成了系统完整的规划体系
-BAC009S0764W0196 明确了发展目标和重点任务
-BAC009S0764W0197 在加大要素支持方面
-BAC009S0764W0198 新批复了七只创投基金的设立方案
-BAC009S0764W0199 吸引社会资本七亿元
-BAC009S0764W0200 在加快体制改革方面
-BAC009S0764W0201 组织了第一批七个地区城市开展三网融合试点
-BAC009S0764W0202 第二批三网融合试点工作业已启动
-BAC009S0764W0203 制定了可再生能源电价附加补贴和配额交易方案
-BAC009S0764W0204 发改委双节期间重点关注电商促销行为
-BAC009S0764W0205 本报记者王颖春国家发改委近日发出通知
-BAC009S0764W0206 相关公司股票走势农产品
-BAC009S0764W0207 积极防范和妥善应对市场价格异常波动
-BAC009S0764W0208 维护正常的市场价格秩序
-BAC009S0764W0209 严厉打击春运期间违规上调票价价外收费等违法行为
-BAC009S0764W0210 切实降低农产品流通成本
-BAC009S0764W0211 要加强节日期间旅游市场价格监管
-BAC009S0764W0212 以及提供服务中的变相涨价或价格欺诈行为
-BAC009S0764W0213 构建良好的旅游市场环境
-BAC009S0764W0214 要继续开展商贸零售领域价格秩序整治
-BAC009S0764W0215 重点关注大型电子商务经营者的促销行为
-BAC009S0764W0216 规范降价打折返券赠送等促销行为
-BAC009S0764W0217 营造良好的消费环境
-BAC009S0764W0218 发改委多渠道筹集保障房建设资金到
-BAC009S0764W0219 要加大保障性安居工程建设资计划落实力度
-BAC009S0764W0220 二零一二年中央进一步加大了资金支持力度
-BAC009S0764W0221 地方政府也要加大资金筹措力度
-BAC009S0764W0222 加强建设资金统筹和组织实施工作
-BAC009S0764W0223 确保保障性安居工程年度建设任务的完成
-BAC009S0764W0224 充分发挥地方政府融资平台作用
-BAC009S0764W0225 鼓励引导社会力量参与建设保障性住房及配套设施
-BAC009S0764W0226 尽快将中央补助投资和省级配套资金分解下达到市县
-BAC009S0764W0227 二零一二保障房建设
-BAC009S0764W0228 七千万套保障房多少钢材
-BAC009S0764W0229 如何在五天内筹集到七万元
-BAC009S0764W0230 各地保障房建设的套数
-BAC009S0764W0231 保障房和水利建设概念股
-BAC009S0764W0232 发改委将订制战略避免境外投资恶性竞争到
-BAC009S0764W0233 新京报讯记者钟晶晶发改委昨日表示
-BAC009S0764W0234 政府将制订境外投资总体战略
-BAC009S0764W0235 避免中国企业境外恶性竞争
-BAC009S0764W0236 并鼓励企业在境外上市
-BAC009S0764W0237 加强海外信息监测为企业提供对外投资指导
-BAC009S0764W0238 形成一批具有国际竞争力的中国企业
-BAC009S0764W0239 十一五期间我国累计境外投资七千亿美元
-BAC009S0764W0240 年均增速百分之七
-BAC009S0764W0242 单项投资规模日益增大
-BAC009S0764W0243 几个亿美元的项目不断出现
-BAC009S0764W0244 规划对十二五的投资规模未做预测
-BAC009S0764W0245 但在鼓励企业走出去方面释出多个信号
-BAC009S0764W0246 鼓励传统纺织家电汽车等一般制造业外移
-BAC009S0764W0247 鼓励商业银行去境外开设分支机构
-BAC009S0764W0248 政府将完善境外投资统计制度
-BAC009S0764W0249 实行全口径统计和动态监测
-BAC009S0764W0250 确保境外企业和人员安全
-BAC009S0764W0251 但目前还存在服务架构不完善
-BAC009S0764W0252 缺乏对外投资长远规划等问题
-BAC009S0764W0253 可控是病毒武器最基本的要求
-BAC009S0764W0254 它必须尽量做到只针对敌对国家的计算机和网络
-BAC009S0764W0255 不能波及和影响其他无关国家甚至本国
-BAC009S0764W0256 具有精确的目标定位和识别能力
-BAC009S0764W0257 一旦战事结束或出于特殊需要可以实现自毁
-BAC009S0764W0258 病毒武器的传染性超强
-BAC009S0764W0259 它可以跨硬件平台传染
-BAC009S0764W0260 除了普通计算机以外
-BAC009S0764W0261 病毒武器的隐蔽性极佳
-BAC009S0764W0262 可以实现在敌国网络中的长期潜伏
-BAC009S0764W0263 是威力巨大的定时炸弹
-BAC009S0764W0264 用电脑进行战争比用核武器还有效
-BAC009S0764W0265 核武器并不能征服类似美国这样的国家
-BAC009S0764W0266 利用电脑病毒却可以在一秒钟内从银行盗走过亿美元
-BAC009S0764W0267 足够使美国失去战争基础因此彻底失败
-BAC009S0764W0268 但是病毒武器的出现
-BAC009S0764W0269 预示着未来战争模样将完全改变
-BAC009S0764W0270 病毒武器被认为是目前最具有代表性的网络武器
-BAC009S0764W0271 美国芯片行业兴起并购热潮搜狐科技
-BAC009S0764W0272 反映了芯片行业出现整合热潮
-BAC009S0764W0273 英特尔是世界头号芯片制造商
-BAC009S0764W0274 此次以一百六十七亿美元收购拓朗
-BAC009S0764W0275 将创下该公司成立四七年来最大收购交易的记录
-BAC009S0764W0276 正在寻求扩大移动市场份额
-BAC009S0764W0277 拓朗的主打产品是现场可编程门阵列芯片
-BAC009S0764W0278 可供客户为特定任务重新编程
-BAC009S0764W0279 应用于汽车医疗等行业
-BAC009S0764W0280 英特尔首席执行官布赖恩克尔扎尼奇在一份声明中说
-BAC009S0764W0281 合并拓朗之后将推出新的产品
-BAC009S0764W0282 满足数据中心和物联网细分市场的用户需求
-BAC009S0764W0283 形成高度定制化的集成产品
-BAC009S0764W0284 微芯片科技公司表示
-BAC009S0764W0285 两家公司是联网汽车的主要芯片供应商
-BAC009S0764W0287 今年芯片行业并购交易额在八百亿美元以上
-BAC009S0764W0288 半导体行业的大公司正在寻求通过并购
-BAC009S0764W0289 扩大它们在新的芯片市场的份额
-BAC009S0764W0290 随着个人计算机芯片的需求放慢
-BAC009S0764W0291 英特尔需要找到新的增长点
-BAC009S0764W0292 高德纳咨询公司分析师马克黄说
-BAC009S0764W0293 如今则猛增到一两亿美元
-BAC009S0764W0294 解决小小芯片上的连线和物理问题需要大量昂贵设备
-BAC009S0764W0295 芯片行业的并购风体现了整个科技行业的一种趋势
-BAC009S0764W0296 即一些财大气粗的科技公司自己不创新
-BAC009S0764W0297 而是寻求收购规模较小更为灵活的公司
-BAC009S0764W0298 反映了芯片行业出现整合热
-BAC009S0764W0299 因为难以忍受股价长期被低估
-BAC009S0764W0300 中国游戏公司纷纷忙着退市
-BAC009S0764W0301 巨人网络盛大游戏以及完美世界均已选择了私有化
-BAC009S0764W0302 是这类公司在美国市场估值长期受低估
-BAC009S0764W0303 北京商报讯记者王晔君日前
-BAC009S0764W0304 裁员二千人是由于销售模式发生改变
-BAC009S0764W0305 公司已将原有的直销模式改为经销模式
-BAC009S0764W0306 因此需要的人员大幅下降
-BAC009S0764W0307 由于去年底制定的销售战略是直销模式
-BAC009S0764W0308 所以今年上半年公司在全国各地的员工人数大幅增加
-BAC009S0764W0309 由于近期销售模式的调整
-BAC009S0764W0310 即由直销模式转变为经销模式
-BAC009S0764W0311 公司将更多地依靠经销商进行销售
-BAC009S0764W0312 正是由于销售模式的改变
-BAC009S0764W0313 汉能直接销售人员大幅度减少
-BAC009S0764W0314 汉能发布中期财报披露
-BAC009S0764W0315 上半年营业收入二十一点零八亿港元
-BAC009S0764W0316 同比减少百分之三十四毛利十四点六一亿港元
-BAC009S0764W0317 同比减少约百分之四十六亏损额为五百九十三二万港元
-BAC009S0764W0318 而去年同期盈利十六点七六亿港元
-BAC009S0764W0319 是自二零一一年借壳上市以来首次出现亏损
-BAC009S0764W0320 同时公布了重组计划
-BAC009S0764W0321 撤销旗下高端产业集团和产品开发集团
-BAC009S0764W0322 并将从总部事业部及各区域公司共裁员二千人
-BAC009S0764W0323 汉能曾计划今年底前将这一数字提高到三百家
-BAC009S0764W0324 汉能上半年业绩出现大幅下滑
-BAC009S0764W0325 当务之急是扭转业绩
-BAC009S0764W0326 而由直销模式改为经销模式
-BAC009S0764W0327 可以缩减很多人力成本
-BAC009S0764W0328 有利于降低公司运营成本
-BAC009S0764W0329 但是由于直销改为经销
-BAC009S0764W0330 汉能对自身产品的议价能力推广力都将减弱
-BAC009S0764W0331 公司已经暂停或终止部分关联交易项目
-BAC009S0764W0332 已经花费了一定的资源和成本
-BAC009S0764W0333 因此暂停或终止这些项目
-BAC009S0764W0334 对本公司的上半年业绩带来了负面影响
-BAC009S0764W0335 北京商报讯记者王晔君日前
-BAC009S0764W0336 裁员两千人是由
-BAC009S0764W0338 他们在训练和比赛过程之中的速度也会逐渐慢下来
-BAC009S0764W0339 但是根据国外科学家最新的研究结果
-BAC009S0764W0340 通过对脚踝和小腿等部位的强化
-BAC009S0764W0341 可以有效的抵消年龄所带来的速度劣势
-BAC009S0764W0342 使上年纪的跑者也能保持较快的速度
-BAC009S0764W0343 美国东卡罗莱纳大学和维克森林大学的研究者认为
-BAC009S0764W0344 脚踝和小腿的能力变弱
-BAC009S0764W0345 如果能够加强这方面的锻炼
-BAC009S0764W0346 他们会拥有较快的速度
-BAC009S0764W0347 研究者们选取了一些年龄大的跑者作为研究对象
-BAC009S0764W0348 并让年轻跑者作为参照
-BAC009S0764W0349 他们的步频大致相同
-BAC009S0764W0350 年龄大跑者的步幅明显短于年轻人
-BAC009S0764W0351 使得他们的速度变慢了
-BAC009S0764W0352 研究者们选取了十九位跑者
-BAC009S0764W0353 年龄从二十三岁到五十九岁
-BAC009S0764W0354 身体质量指数平均为二十三点四
-BAC009S0764W0355 身材偏瘦而且比较健康
-BAC009S0764W0356 跑者从二十多岁到五十九岁
-BAC009S0764W0357 步幅长度和跑步速度大约下降了百分之二十
-BAC009S0764W0358 脚踝的能力损失了大约百分之四十八
-BAC009S0764W0359 按照平时训练的速度进行跑步
-BAC009S0764W0360 二十岁的跑者平均每英里耗时八分十八秒
-BAC009S0764W0361 而六十岁的跑者每英里耗时十分十八秒
-BAC009S0764W0362 已经有过不少关于这方面的研究
-BAC009S0764W0363 但是研究对象都是年轻跑者和年老跑者
-BAC009S0764W0364 年龄段的复盖范围比较窄
-BAC009S0764W0365 最令德维塔感到不可思议的是
-BAC009S0764W0366 跑者们随着年龄的增长
-BAC009S0764W0367 速度呈现出直线下降
-BAC009S0764W0368 速度下降的更加明显
-BAC009S0764W0369 很多六七十岁的跑者看到这个研究结果时
-BAC009S0764W0370 意思是他们比较认同这个结果
-BAC009S0764W0371 研究者们希望年龄大的跑者能够注意脚踝的锻炼
-BAC009S0764W0372 但德维塔觉得归根到底还是小腿肌肉的问题
-BAC009S0764W0373 尤其是比目鱼肌和腓肠肌
-BAC009S0764W0374 这才是产生跑步力量的根源
-BAC009S0764W0375 这两种方式的结合能够有效锻炼小腿肌肉
-BAC009S0764W0376 对于高年龄跑者来说
-BAC009S0764W0377 开始一项新的锻炼方式具有一定的风险性
-BAC009S0764W0378 想通过训练提升脚踝和小腿的能力
-BAC009S0764W0379 这些常年坚持跑步的人身体质量指数偏低
-BAC009S0764W0380 长期跑步可能是一种不需要药物来保持身材的有效方式
-BAC009S0764W0381 在二零二二年冬季奥运会的竞选当中
-BAC009S0764W0382 北京和张家口最终击败了强大的对手阿拉木图
-BAC009S0764W0383 顺利获得了冬奥会的主办权
-BAC009S0764W0384 这也是这项冰雪顶级盛事首次来到中国
-BAC009S0764W0385 在此次申办冬奥会的过程中
-BAC009S0764W0386 我们看到了自身强大的综合实力
-BAC009S0764W0387 也看到了在冰雪运动综合实力上的欠缺和不足
-BAC009S0764W0388 经历过夏奥会的沉淀
-BAC009S0764W0389 加上近几年承办诸多国际性赛事的经验积累
-BAC009S0764W0390 在这场亚洲国家锁定胜局的申办博弈中
-BAC009S0764W0391 北申办此次冬奥会的价值要远远超过承办本身
-BAC009S0764W0392 对于北京申办冬奥会的最终结果
-BAC009S0764W0393 我们也应该抱着更加长远和开阔的视角来看待
-BAC009S0764W0394 北京申办冬奥强大实力成获胜武器
-BAC009S0764W0395 此次北京联手张家口申办冬奥会
-BAC009S0764W0396 在与阿拉木图的直接博弈中
-BAC009S0764W0397 财政能力和硬件设施的优势是我们最终取胜的关键原因
-BAC009S0764W0398 而二零零八年举办夏季奥运会所留下的宝贵遗产
-BAC009S0764W0399 也是最终打动国家奥运委会评审团的法宝
-BAC009S0764W0400 从经济实力和基础设施建设上看
-BAC009S0764W0401 北京和张家口要占据着相当明显的优势
-BAC009S0764W0402 北京和张家口两地的生产总值是二万二千七百三十点八亿元
-BAC009S0764W0403 而阿拉木图仅为四百亿美元
-BAC009S0764W0404 影片将在二零一五年一月在慕尼黑正式开机
-BAC009S0764W0405 好莱坞当红明星之前曾被盛传将扮演斯诺登
-BAC009S0764W0406 好莱坞当红明星之前曾被盛传将扮演斯诺登
-BAC009S0764W0407 他确实拿下了这个角色
-BAC009S0764W0408 对男友有什么条件
-BAC009S0764W0409 她表示最重要的就是诚恳
-BAC009S0764W0410 对于姊弟恋也不排斥
-BAC009S0764W0411 搜狐娱乐讯七月十日消息
-BAC009S0764W0412 据台湾媒体报道
-BAC009S0764W0413 许玮甯最近到法国工作
-BAC009S0764W0414 仍在个人社群网站频繁更新动态
-BAC009S0764W0415 甚至被外界揣测是因为和阮经天分手后所刺
-BAC009S0764W0416 她近日终于在受访时松口公开正解
-BAC009S0764W0417 背后意义竟只是不要忘记自己从哪里来
-BAC009S0764W0418 搜狐娱乐讯据台湾媒体报道
-BAC009S0764W0419 阮经天和许玮甯交往八年屡传婚讯
-BAC009S0764W0420 今年三月底惊爆分手
-BAC009S0764W0421 当时女方坦承已分居
-BAC009S0764W0422 但小天坚持玮甯依然是我的女人
-BAC009S0764W0423 有网友日前目击他俩在大稻埕分食炒饭
-BAC009S0764W0424 昨天她出席保养品活动
-BAC009S0764W0425 松口仍有联络
-BAC009S0764W0426 但称自己单身
-BAC009S0764W0427 恰巧昨日记者碰见阮经天出门倒垃圾
-BAC009S0764W0428 对许玮甯单身说语气落寞表示我没有什么看法
-BAC009S0764W0429 搜狐娱乐讯男方和小三还藕断丝连
-BAC009S0764W0430 因而痛斩情丝她除了拥有模特儿火辣身材
-BAC009S0764W0431 快报讯记者赵丹丹快递实名制时代终于到来了
-BAC009S0764W0432 按照国家邮政总局统一部署
-BAC009S0764W0433 从下月起全面实施快递实名制登记
-BAC009S0764W0434 现代快报记者从省邮政管理局了解到
-BAC009S0764W0435 江苏快递实名制登记动真格
-BAC009S0764W0436 本周内动员部署全省九零零多家快递企业按要求执行
-BAC009S0764W0437 个人寄快递必须登记有效的身份证件
-BAC009S0764W0438 本山传媒回应赵本山将有新作品没听说
-BAC009S0764W0439 不仅赢得观众好口碑
-BAC009S0764W0440 特别是师父赵本山也公开出面为大鹏点赞
-BAC009S0764W0441 本月二八日超级月亮和最圆中秋月喜相逢
-BAC009S0764W0442 月亮和地球之间的平均距离仅为三五六八九六万公里
-BAC009S0764W0443 月亮看起来会比往常大
-BAC009S0764W0444 也就是我们常说的超级月亮
-BAC009S0764W0445 这一天还将上演月全食
-BAC009S0764W0446 超级月亮碰上月全食
-BAC009S0764W0447 错过了这次就要到二零三三年了
-BAC009S0764W0448 本月下旬天宇将现五星连线奇观
-BAC009S0764W0449 中科院紫金山天文台公布了一零月天象
-BAC009S0764W0450 现代快报记者注意到
-BAC009S0764W0451 天龙座流星雨猎户座流星雨
-BAC009S0764W0452 让一零月的天空有点甜蜜蜜的味道
-BAC009S0764W0453 水星金星也将迎来观测良机
-BAC009S0764W0454 现代快报记者胡玉梅
-BAC009S0764W0455 本月中下旬小行星撞地球
-BAC009S0764W0456 专家没有科学依据
-BAC009S0764W0457 京华时报讯记者任珊记者从北京市教育考试院获悉
-BAC009S0764W0458 高招本科二批今天开始进行征集志愿录取
-BAC009S0764W0459 一八一所院校将补录一九四九人
-BAC009S0764W0460 朱军系阅兵世家曾参与一九八四年阅兵军乐演奏
-BAC009S0764W0461 朱圣祎爆王思聪女朋友被诉法官送达起诉书遇阻
-BAC009S0764W0462 王思聪将朱圣祎诉至北京朝阳法院
-BAC009S0764W0463 要求停止侵权公开道歉赔偿精神损失一元
-BAC009S0764W0464 法官送达起诉书副本等应诉材料遇阻
-BAC009S0764W0465 朱茵说紫霞仙子谁来演不是我可以决定的
-BAC009S0764W0466 资料图片在湖南卫视上周开播的偶像来了中
-BAC009S0764W0467 永远的紫霞仙子朱茵的亮相引起粉丝的热捧
-BAC009S0764W0468 永远的紫霞仙子朱茵的亮相引起粉丝的热捧
-BAC009S0764W0469 来自全球四七个国家和地区的二零零零多名选手参赛
-BAC009S0764W0470 机器人服务员现身火锅店顾客直呼女神
-BAC009S0764W0471 女神机器人在火锅店内工作
-BAC009S0764W0472 机场严查匿打火机过安检放在鞋子里算藏匿
-BAC009S0764W0473 本报讯记者杨柳昨天
-BAC009S0764W0474 记者从首都机场公安分局航站区派出所获悉
-BAC009S0764W0475 首都机场公安分局航站区派出所联合驻场安检人员
-BAC009S0764W0476 坚持违法零容忍和高限处理的执法态度
-BAC009S0764W0477 严格搜集和固定相关证据
-BAC009S0764W0478 近日在违法事实认识清楚法律法规适用明确的基础上
-BAC009S0764W0479 依法对一名藏匿打火机过检的旅客进行了行政处罚
-BAC009S0764W0480 机场公安加航航班未发生性侵事件
-BAC009S0764W0482 网传该航班一名男性旅客对空姐试图性侵导致飞机返航
-BAC009S0764W0483 新京报记者从首都国际机场公安分局相关人员处获悉
-BAC009S0764W0484 冲突因空姐发餐时餐车碰到了一名旅客
-BAC009S0764W0485 双方因语言交流不畅导致纠纷
-BAC009S0764W0486 该男子因影响航班正常秩序
-BAC009S0764W0487 明星刘晓庆又火了一把
-BAC009S0764W0488 她几乎刷遍了各大媒体
-BAC009S0764W0489 不是她的戏或是她的八卦
-BAC009S0764W0490 而是因为她也中了天价的招
-BAC009S0764W0491 机组成功处置深航机上纵火事件获奖二五零万
-BAC009S0764W0492 成功处置深航机上纵火事件
-BAC009S0764W0493 杀中传女生嫌犯就想找个人发泄
-BAC009S0764W0494 其室友在微博上所发的寻人启事
-BAC009S0764W0495 警方证实周云露遇害
-BAC009S0765W0121 一线城市出现日光盘
-BAC009S0765W0122 楼市地市交相升温房价会不会再度暴涨
-BAC009S0765W0123 经济热点导读专家认为
-BAC009S0765W0124 我国房地产市场过去总体偏紧部分地区过紧
-BAC009S0765W0125 为了将后辈的婚姻分险隔断
-BAC009S0765W0126 将受益人定为直系血亲后代非配偶继承人
-BAC009S0765W0127 按公司持有房产计征
-BAC009S0765W0128 相关公司股票走势
-BAC009S0765W0129 房价起飞前购置了十几套房产
-BAC009S0765W0130 目前总估值已过亿元
-BAC009S0765W0131 这些房产全由宋芳自己打理
-BAC009S0765W0132 每月光租金收入便已远大于自己和子女的总开销
-BAC009S0765W0133 宋芳最近却有点烦恼
-BAC009S0765W0134 我想把房产留给儿女
-BAC009S0765W0135 万一以后儿女的婚姻出了问题
-BAC009S0765W0136 他们的财产和生活不会受到太大影响
-BAC009S0765W0137 宋芳对北京银行私人银行的财富顾问说
-BAC009S0765W0138 该信托出资购入宋芳的房产
-BAC009S0765W0139 成立资金信托购买自家房产
-BAC009S0765W0140 在了解宋芳的资产情况与需求之后
-BAC009S0765W0141 设立一个单一资金信托
-BAC009S0765W0142 宋芳本人为信托的发起人和委托人
-BAC009S0765W0143 北京信托作为受托人
-BAC009S0765W0144 之后由该信托对宋芳指定的房产发出购买要约
-BAC009S0765W0145 实现该信托对房产的控制
-BAC009S0765W0146 虽然房产是在信托的名下
-BAC009S0765W0147 但您和您的儿女能自由支配
-BAC009S0765W0148 这相当于左兜掏右兜
-BAC009S0765W0149 通过信托实现了财产的隔离保护
-BAC009S0765W0150 未来子女出现姻缘风险
-BAC009S0765W0151 其中资金这一要素指基于信托登记的相关法规局限
-BAC009S0765W0152 为了购买自己想要传承给子女的房产
-BAC009S0765W0153 宋芳必须再掏出完全属于自己的资金
-BAC009S0765W0154 委托人以其持有的资金设立一个单一资金信托
-BAC009S0765W0155 该资金可以是委托人的自有资金
-BAC009S0765W0156 也可以是委托人合法获得的过桥资金
-BAC009S0765W0157 确保所设信托的合法性
-BAC009S0765W0158 按公司持有房产计税
-BAC009S0765W0159 由于家族信托的存续期通常较长
-BAC009S0765W0160 在信托收益的处置上
-BAC009S0765W0161 不同的客户对收益再投资的需求差别较大
-BAC009S0765W0162 对收益率的要求差别却不至于相去甚远
-BAC009S0765W0163 从我们遇到的客户来看
-BAC009S0765W0164 回报率普遍要求并不高
-BAC009S0765W0165 有些客户只要求收益率超过利率即可
-BAC009S0765W0166 看中的是其财产保护与传承的功能
-BAC009S0765W0167 我现在就是担心自己哪天突然出现个什么情况
-BAC009S0765W0168 他们说不定又有离婚风险
-BAC009S0765W0169 我又不指望设立信托来赚钱
-BAC009S0765W0170 主要目的是把后辈的婚姻风险隔断
-BAC009S0765W0171 在宋芳的资金信托购买其房产时
-BAC009S0765W0172 需要按北京当地的要求缴纳二手房交易费用
-BAC009S0765W0173 而在信托持有这些房产后
-BAC009S0765W0174 这是因为宋芳购买其房产
-BAC009S0765W0175 按照公司持有房产计征
-BAC009S0765W0176 各项费用的加总并不低
-BAC009S0765W0177 在目前的法律框架下
-BAC009S0765W0178 这些税费均无法避免
-BAC009S0765W0179 他认为跟后辈姻缘风险相比
-BAC009S0765W0180 点击进入股友会参与讨论
-BAC009S0765W0181 本世纪经济报道
-BAC009S0765W0182 为了将后辈的婚姻风险隔断
-BAC009S0765W0183 并将受益人定为直系血亲后代非配偶继承人
-BAC009S0765W0184 今久整合营销集团迎来了它的生日
-BAC009S0765W0185 从最初的几十人
-BAC009S0765W0186 事业版图遍布全中国的集团化整合营销公司
-BAC009S0765W0187 无序竞争甚至恶意竞争时常发生
-BAC009S0765W0188 将发挥部际会议联席制度作用
-BAC009S0765W0189 制订境外投资总体战略
-BAC009S0765W0190 对重大项目和重大问题进行协调
-BAC009S0765W0191 引导企业围绕重点国家和地区在重点领域展开投资
-BAC009S0765W0192 鼓励本土中介机构提供服务
-BAC009S0765W0193 建立起政府部门企业和中介机构各司其职的组织架构
-BAC009S0765W0194 北京科技大学教授刘澄表示
-BAC009S0765W0195 主要是如何提供服务及做好监管
-BAC009S0765W0196 规划提出诸多想法
-BAC009S0765W0197 政府提供的服务企业是否需要
-BAC009S0765W0198 如何为企业提供信息避免海外投资风险
-BAC009S0765W0199 避免海外一窝蜂上项目等
-BAC009S0765W0200 发改委将尽快建立地方政府债务管理体系到
-BAC009S0765W0201 他就上述关注问题指出
-BAC009S0765W0202 目前我国出现政府性债务违约可能性并不大
-BAC009S0765W0203 下一步将进一步完善城投债券发行制度和防范风险机制
-BAC009S0765W0204 并尽快建立我国地方政府债务管理体系等
-BAC009S0765W0205 政府性违约可能性不大
-BAC009S0765W0206 中国证券报随着欧美等国主权债务危机陆续爆发
-BAC009S0765W0207 您如何看待政府的举债行为和债务风险
-BAC009S0765W0208 徐林吸取欧美等国主权债务危机的教训
-BAC009S0765W0209 采取必要措施加强政府债务管理
-BAC009S0765W0210 防范我国政府债务风险
-BAC009S0765W0211 但在具体评估我国地方政府债务风险程度时
-BAC009S0765W0212 也要看到我国与欧美国家的不同之处
-BAC009S0765W0213 我国地方政府性债务
-BAC009S0765W0214 特别是地方投融资平台公司形成的债务
-BAC009S0765W0215 主要用于各地基础设施的投资建设
-BAC009S0765W0216 当代人和后代人共同承担债务还本付息责任
-BAC009S0765W0217 可以更好地体现代际公平
-BAC009S0765W0218 克服当期建设资金不足的瓶颈制约
-BAC009S0765W0219 有利于加快完善基础设施和投资环境
-BAC009S0765W0220 是一种合理的基础设施投融资建设行为
-BAC009S0765W0221 政府举债建设形成了大量资产
-BAC009S0765W0222 相当部分资产具有长期的直接收益
-BAC009S0765W0223 一些没有直接收益的项目
-BAC009S0765W0224 也具有间接的经济效益或社会效益
-BAC009S0765W0225 对促进当地经济增长和政府财力的增长
-BAC009S0765W0226 不能简单地用寅吃卯粮来作价值判断
-BAC009S0765W0227 这并不意味着政府可以无节制地借债
-BAC009S0765W0228 关键是要把投资规模和债务规模
-BAC009S0765W0229 控制在合理的范围内
-BAC009S0765W0230 防止出现系统性的偿债风险
-BAC009S0765W0231 国务院高度重视防范地方政府债务风险
-BAC009S0765W0232 从二零零九年下半年就开始要求有关部门调研这一问题
-BAC009S0765W0233 国家审计署还专门组织力量
-BAC009S0765W0234 对全国各地的政府债务进行了严格审计
-BAC009S0765W0235 审计署的审计结论表明
-BAC009S0765W0236 我国地方政府的累积债务相对于偿付能力来看
-BAC009S0765W0237 远低于发生债务危机的欧美国家
-BAC009S0765W0238 考虑到我国正处在经济快速增长期
-BAC009S0765W0239 政府财力增长也相应较快
-BAC009S0765W0240 政府还拥有较多的可变现资产
-BAC009S0765W0241 相对于目前的负债规模
-BAC009S0765W0242 政府总体上具有较强的偿债能力
-BAC009S0765W0243 采取积极有效的措施化解部分地区和领域的债务风险
-BAC009S0765W0244 在我国出现政府性债务违约的可能性是不大的
-BAC009S0765W0245 债券市场城投债券发行不畅
-BAC009S0765W0246 从城投债券发行监管部门的角度
-BAC009S0765W0247 您如何看待这一现象
-BAC009S0765W0248 徐林出于对地方政府债务风险的担忧
-BAC009S0765W0249 投资者采取措施防范风险是成熟的表现
-BAC009S0765W0250 但出于对我国地方政府债务风险的不合理判断
-BAC009S0765W0251 并进而对城投债券进行唱空或做空
-BAC009S0765W0252 最近企业债券特别是城投债券的发行难度加大
-BAC009S0765W0253 其在纳斯达克上市时的发行价为一六美元
-BAC009S0765W0254 其股票价格在十五点七六美元上下徘徊
-BAC009S0765W0255 中国手游在退市之前的市盈率在十六十七倍左右
-BAC009S0765W0256 掌趣科技三零零三一五一度超过二百倍
-BAC009S0765W0257 华尔街并不认可游戏这种商业模式
-BAC009S0765W0258 并非仅仅针对中国公司
-BAC009S0765W0260 作为美国本土著名社交游戏开发商
-BAC009S0765W0261 在当年社交游戏风靡的时候
-BAC009S0765W0262 因为快速发展的业务和不断膨胀的营收受资本市场亲睐
-BAC009S0765W0265 亏损二千六百九十万美元相比上一季度
-BAC009S0765W0266 这一亏损已经收窄了百分之五十七
-BAC009S0765W0267 成熟的资本市场相对公平
-BAC009S0765W0268 这些被市场唱空的游戏公司本身业务模式遇到了困境
-BAC009S0765W0269 游戏公司往往靠一款游戏在市场上火爆
-BAC009S0765W0270 大多数游戏产品往往病毒式地成长
-BAC009S0765W0272 这些中国游戏公司大多成长于中国市场
-BAC009S0765W0273 他们的产品在海外市场也极少被认可
-BAC009S0765W0274 这些公司在海外市场上市往往除了获得融资机会
-BAC009S0765W0275 并未能给这些公司带来其他的效应
-BAC009S0765W0277 中国游戏产品和美国产品极为不同
-BAC009S0765W0278 美国玩家对游戏难度创造性要求较高
-BAC009S0765W0279 中国产品不可能照搬到美国市场
-BAC009S0765W0280 在融资和发展海外市场上
-BAC009S0765W0281 还希望拓展市场的话
-BAC009S0765W0282 触控科技全资韩国子公司在韩国上市
-BAC009S0765W0283 而从二零一三年开始
-BAC009S0765W0284 这家公司就在挖角当地游戏公司高管建立分公司
-BAC009S0765W0285 打造适合当地市场的产品
-BAC009S0765W0286 根据陈昊芝在二零一四年八月提供的数据
-BAC009S0765W0287 市场份额做到了前十位
-BAC009S0765W0288 未来上市能够放大公司的品牌效益
-BAC009S0765W0289 让当地更多的人知道这家公司
-BAC009S0765W0290 在韩国股市低迷情况下
-BAC009S0765W0291 触控科技子公司涨幅居前
-BAC009S0765W0292 对于游戏这种地域属性较重的产品
-BAC009S0765W0293 应慎重考虑上市时机和地点
-BAC009S0765W0294 反复检视自身商业模式
-BAC009S0765W0295 而不是迫不及待抓住一切可以上市圈钱的机会
-BAC009S0765W0296 往往连最直接的目标都无法达成
-BAC009S0765W0297 因为难以忍受股价长期被低估
-BAC009S0765W0298 中国游戏公司纷纷忙着退市
-BAC009S0765W0299 最近都在流行做预测
-BAC009S0765W0300 于是他也来凑凑热闹
-BAC009S0765W0301 他的预测有点毒基本上是在讨论谁会下台
-BAC009S0765W0303 每日经济新闻记者杨建江南嘉捷六万
-BAC009S0765W0304 收盘价十三点六五元于七月八日发布公告
-BAC009S0765W0305 为使股价与公司价值匹配
-BAC009S0765W0306 公司拟计划通过集中竞价交易方式回购公司股份
-BAC009S0765W0307 公司此次回购股份的价格不超过十五点一零七元股
-BAC009S0765W0308 用于回购的资金总额不超过一点五一七亿元
-BAC009S0765W0309 预计回购股份约一千万股
-BAC009S0765W0310 占公司总股本约二点百分之五十
-BAC009S0765W0311 公司股票于二零一五年七月八日复牌
-BAC009S0765W0312 每日经济新闻记者注意到
-BAC009S0765W0313 截至二零一四年十二月三十一日
-BAC009S0765W0314 资金来源为自有资金
-BAC009S0765W0315 回购期限为自回购股份方案之日起至今年底
-BAC009S0765W0317 收盘价四点九九元也于七月八日公告
-BAC009S0765W0321 其目前的股票市值已经不能完全反映公司价值
-BAC009S0765W0323 增持后持股比例为六十二点百分之二十三
-BAC009S0765W0325 拟在二零一五年二零一七年先行推出两期回购方案
-BAC009S0765W0326 其中第一期回购资金上限为二零一四年净利润的百分之二十五
-BAC009S0765W0327 第二期股票回购方案不晚于二零一七年六月三十日推出
-BAC009S0765W0328 回购期限为股东大会通过后不超过十二个月
-BAC009S0765W0329 预计可回购不少于七百九十一万股
-BAC009S0765W0330 每日经济新闻记者杨建江南嘉捷六万一千一百三十一三
-BAC009S0765W0331 收盘价十三点六五元于七月八日发布公告
-BAC009S0765W0332 为使股价与公司价值匹配
-BAC009S0765W0333 公司拟计划通过集中竞价交易方式回购公司股份
-BAC009S0765W0334 锂电池在今年上半年成为诸多上市公司的业绩功臣
-BAC009S0765W0335 成飞集成百二十一九十
-BAC009S0765W0337 公司上半年营业收入六点三四亿元
-BAC009S0765W0338 折合人民币大概二千四八零亿
-BAC009S0765W0339 而与经济实力相关的一些基础设施建设方面
-BAC009S0765W0340 无论是城市交通建设还是机场运力
-BAC009S0765W0341 阿拉木图都无法和北京相比
-BAC009S0765W0342 远远无法和北京相提并论
-BAC009S0765W0343 从申办冬奥会的硬件基础上看
-BAC009S0765W0344 北京冬奥会的硬件基础要强于阿拉木图
-BAC009S0765W0345 北京张家口计划启用一二个竞赛场馆
-BAC009S0765W0346 其中五个场馆需要新建
-BAC009S0765W0347 其馀场馆改扩建后可以满足赛事需要
-BAC009S0765W0348 其中北京市区仅需要新建一座速滑场馆
-BAC009S0765W0349 阿拉木图方面将会使用十四个场馆作为比赛之用
-BAC009S0765W0350 目前八座为已有场馆并在使用中
-BAC009S0765W0351 其于六个场馆都需要新建
-BAC009S0765W0352 从举办大型体育赛事的经验来看
-BAC009S0765W0353 北京的经验比阿拉木图丰富
-BAC009S0765W0354 还有二零一五年的田径世锦赛
-BAC009S0765W0355 这些使得北京积累了大量的举办与运营经验
-BAC009S0765W0356 也证明了北京举办大型体育赛事的能力
-BAC009S0765W0357 自从哈萨克斯坦独立
-BAC009S0765W0358 二零一一年的亚冬会是其举办的第一个国际性综合赛事
-BAC009S0765W0359 之后就没有举办过的大型体育赛事
-BAC009S0765W0360 花样滑冰大奖赛中国杯常年在北京和上海之间轮换
-BAC009S0765W0361 二零一四年的冰壶世锦赛也在北京举行
-BAC009S0765W0362 一系列大型赛事的承办
-BAC009S0765W0363 让北京具备了承办冬奥会这种顶级赛事的经验和能力
-BAC009S0765W0364 北京申办冬奥影响远超承办本身
-BAC009S0765W0365 此次北京申办冬奥会
-BAC009S0765W0366 也让我们看到了自身存在着的不足
-BAC009S0765W0367 其中主要集中于冰雪运动本身实力上的有所欠缺
-BAC009S0765W0368 二零二二年冬奥会的举行
-BAC009S0765W0369 对于我国冰雪运动实力的提升会有巨大的推动作用
-BAC009S0765W0370 和夏季奥运会上的斩金夺银不同
-BAC009S0765W0371 中国的冬季运动还处于半起步阶段
-BAC009S0765W0372 这在我国体育发展史上具有划时代的意义
-BAC009S0765W0373 标志着我国体育开始走向国际化
-BAC009S0765W0374 成为了国际体育运动大家庭中的一员
-BAC009S0765W0375 但直到一二年后的法国阿尔贝维尔冬奥会上
-BAC009S0765W0376 我国选手才实现了冬奥奖牌零的突破
-BAC009S0765W0377 取得这一突破的领军人就包括轮椅英雄叶乔波
-BAC009S0765W0378 又是十年的空白期
-BAC009S0765W0379 二零零二年的美国盐湖城冬奥会上
-BAC009S0765W0380 杨扬拿到了五百米和一千米两项短道速的金牌
-BAC009S0765W0381 更具历史性意义的是
-BAC009S0765W0382 这是中国奥运代表团在冬季奥运会上取得的首枚金牌
-BAC009S0765W0383 经过二十多年的努力
-BAC009S0765W0384 中国冰雪健儿终于站到了冬奥会的最高领奖台
-BAC009S0765W0385 以及拥有陈露的女单项目
-BAC009S0765W0386 可以在世界范围内立足
-BAC009S0765W0387 但随着这一系列名将的退役
-BAC009S0765W0388 在中国的这一传统优势项目上
-BAC009S0765W0389 我们可以说已经输给了其他强敌
-BAC009S0765W0390 更直观的体现是在冰雪运动的核心项目冰球上
-BAC009S0765W0391 竞争对手哈萨克斯坦在这一点上要强过我们
-BAC009S0765W0392 中国国家男子冰球队目前排名第三十二位
-BAC009S0765W0393 而哈萨克斯坦则是第十六位
-BAC009S0765W0394 所有主办国的男子冰球成绩排位均在二十位之内
-BAC009S0765W0395 二零一八年冬奥会的主办地韩国平昌是一个绝好的例子
-BAC009S0765W0396 平昌曾经三次申办冬奥会
-BAC009S0765W0397 前两次申办的过程中
-BAC009S0765W0398 男子冰球的战绩均在二十五名左右
-BAC009S0765W0399 而第三次申办周期内
-BAC009S0765W0401 几乎帮助了平昌拿下二零一八年冬奥会的主办权
-BAC009S0765W0402 由于韩国冰球协会的四年规划
-BAC009S0765W0403 保证国家队水平不会被其他球队相差太远的承诺下
-BAC009S0765W0404 业已正式启动斯诺登事件电影的拍摄
-BAC009S0765W0405 影片发布了第一批定装照
-BAC009S0765W0406 以一身越野军装黑框眼镜的造型出现
-BAC009S0765W0407 看上去和人物原型相当贴合
-BAC009S0765W0408 演技也日渐精湛
-BAC009S0765W0409 更有一手好厨艺
-BAC009S0765W0410 可说是超完美女神
-BAC009S0765W0411 男友却仍然劈腿偷吃
-BAC009S0765W0412 好友林心如也心疼喊话我会陪她
-BAC009S0765W0413 中新网五月六日电据台湾媒体报道
-BAC009S0765W0414 刚与阮经天传出情变不久的许玮甯近日接拍恐怖片
-BAC009S0765W0415 称为了演好戏
-BAC009S0765W0416 她看了不少恐怖片
-BAC009S0765W0417 看完片后会睡不好做恶梦
-BAC009S0765W0418 上厕所都要把灯全部打开
-BAC009S0765W0419 搜狐娱乐讯日前
-BAC009S0765W0420 引发众多粉丝围堵
-BAC009S0765W0421 玩心大起的许绍洋与玩家一起比拼游戏
-BAC009S0765W0422 没想竟然惨败
-BAC009S0765W0423 这让自称游戏达人的他颇有些不好意思
-BAC009S0765W0424 金陵晚报八月十二日报道二零一四年
-BAC009S0765W0425 许茹芸与韩籍男友举行了婚礼
-BAC009S0765W0426 迎来了人生崭新阶段
-BAC009S0765W0427 不同于大家心中按部就班的乖乖女形象
-BAC009S0765W0428 许茹芸突然闪婚让当时的娱乐圈也惊起了一阵小波澜
-BAC009S0765W0429 在许茹芸看来
-BAC009S0765W0430 但几乎一个都没有实现
-BAC009S0765W0431 一四年前轰动东莞沙田的一起命案
-BAC009S0765W0432 日前因为广东省高院作出的无罪判决
-BAC009S0765W0433 再次吸引了众人的目光
-BAC009S0765W0434 八月一七日上午一一时
-BAC009S0765W0435 陈传钧从东莞市第二看守所出来
-BAC009S0765W0436 这是二零一零年四月二三日以来
-BAC009S0765W0437 杀人犯出狱后喊冤被驳回供述与鉴定相印证
-BAC009S0765W0438 丈夫关某身负多处刀伤
-BAC009S0765W0439 呼救报警时称有人入屋行凶
-BAC009S0765W0440 又供称是自己失手杀妻
-BAC009S0765W0441 关某先后被判死刑死缓
-BAC009S0765W0442 他向广东省高院申诉
-BAC009S0765W0443 广东高院审理后驳回了关某的申诉
-BAC009S0765W0444 杀人犯受民警感召行刑前捐器官谢罪
-BAC009S0765W0445 杀人犯抢劫获刑未查出旧案警方指纹识别有遗漏
-BAC009S0765W0446 京华时报记者蒲东峰摄二零零七年
-BAC009S0765W0447 时年二三岁的杨柱军在北京抢劫杀害了一名出租车司机
-BAC009S0765W0448 此后他没有隐姓埋名逃往外地
-BAC009S0765W0449 公安机关并未查出其身上还背着命案
-BAC009S0765W0450 并于二零一五年一月将其抓获
-BAC009S0765W0451 曾多次比对二零零七年命案现场匕首上的指纹
-BAC009S0765W0452 但指纹比对识别系统会出现一定概率的遗漏
-BAC009S0765W0453 杨柱军因涉嫌抢劫罪在市二中院受审
-BAC009S0765W0454 杀害中传失联女主嫌犯想找个无辜的人发泄
-BAC009S0765W0455 视频截图新京报快讯记者杨锋昨日
-BAC009S0765W0456 杀害中传女学生犯罪嫌疑人从小家庭教育严格
-BAC009S0765W0457 失联近两天的中传研究生周云露
-BAC009S0765W0458 李斯达表示自己跟周云露并没有深仇大恨
-BAC009S0765W0459 称就是想找个无辜的人
-BAC009S0765W0460 目前李斯达被关押在朝阳区看守所
-BAC009S0765W0461 周云露的父母在昨天上午去过朝阳刑警队
-BAC009S0765W0462 杀害中传女生嫌犯曾私藏刺刀同学称其特立独行
-BAC009S0765W0463 李斯达手持尖刀的自拍照
-BAC009S0765W0464 新京报快讯记者杨锋凌晨今日下午
-BAC009S0765W0465 中国传媒大学官网发布消息称
-BAC009S0765W0466 在朝阳区百子湾阳光嘉园小区遇害
-BAC009S0765W0467 犯罪嫌疑人已被抓获
-BAC009S0765W0468 学校正在全力配合公安机关和家属进行善后处理
-BAC009S0765W0469 杀害夜跑女子嫌犯不言不语拾荒者身份尚未确认
-BAC009S0765W0470 杀害女教师疑犯行凶后脸有伤警方悬赏五万缉拿
-BAC009S0765W0471 遇害女教师昨晚七时五七分
-BAC009S0765W0472 其作案后身上有大量血迹
-BAC009S0765W0473 双手背脸部等裸露部位有刺伤划伤
-BAC009S0765W0474 通告呼吁广大群众积极检举揭发提供线索
-BAC009S0765W0475 对提供重大线索协助破案者
-BAC009S0765W0476 我局将给予五万元奖励
-BAC009S0765W0477 杀害女童凶手被抓指认现场上千民众喊打
-BAC009S0765W0478 四川广安一一岁女孩的失踪
-BAC009S0765W0479 九日晚女孩尸体被找到
-BAC009S0765W0480 凶手在郫县安靖镇被抓
-BAC009S0765W0481 凶手到岳池县石垭镇指认骗走孩子的现场
-BAC009S0765W0482 数千围观人群高呼打死这个杂碎
-BAC009S0765W0483 现场喊打声持续不断
-BAC009S0765W0484 杀害宝鸡夜跑教师嫌犯落网是否为拾荒者尚无定论
-BAC009S0765W0485 吕某于一零月一四日晚从家中外出锻炼失踪
-BAC009S0765W0486 尸体于一零月二零日在宝鸡渭河公园被发现
-BAC009S0765W0487 李克强集众智汇众力攻坚克难激发活力
-BAC009S0765W0488 李彬彬喂大象喝水略显老态提醒网友夏天要补水
-BAC009S0765W0489 联合国官方微博晒出一张李彬彬喂大象喝水的照片
-BAC009S0765W0490 华西都市报讯记者杜恩湖一零月二四日中午一二时
-BAC009S0765W0491 一零月二三曰现身成都平乐古城
-BAC009S0765W0492 应邀参加第二届天府古镇艺术节
-BAC009S0765W0493 现场李双江受到了观众的热烈欢迎
-BAC009S0765W0494 二零零多幅珍贵油画抵达南京
-BAC009S0765W0495 李嘉诚军师抛售马云一五亿买香港最贵单价豪宅
-BAC009S0766W0121 实现数字化整合营销
-BAC009S0766W0122 是当今广告行业的需要
-BAC009S0766W0123 消费者行为的变化及技术的进步
-BAC009S0766W0124 催生了广告领域新的变革和创新
-BAC009S0766W0125 唯有实力雄厚又颇具现代创新意识的广告企业
-BAC009S0766W0126 今久整合营销集团就是如此
-BAC009S0766W0127 成为圈子里首屈一指的超大企业
-BAC009S0766W0128 自成立以来
-BAC009S0766W0129 服务项目几千个
-BAC009S0766W0130 开创了蔓延全国的青年社区概念
-BAC009S0766W0131 确立了无人撼动的行业老大地位
-BAC009S0766W0132 成为房地产最信任的营销公司
-BAC009S0766W0133 然而这家雄心勃勃的公司并未止步于此
-BAC009S0766W0134 一个以互联网和大数据为核心的时代已经到来
-BAC009S0766W0135 今久必须担当起引领时代潮流的重任
-BAC009S0766W0136 蓝色光标以几亿人民币收购今久
-BAC009S0766W0137 这成为今久转型的起点
-BAC009S0766W0138 依托蓝色光标强大的技术和资源优势
-BAC009S0766W0139 今久率先提出整合营销的概念
-BAC009S0766W0140 其核心在于利用数字化工具
-BAC009S0766W0141 为房地产商提供系统化的服务
-BAC009S0766W0142 整合营销实现了从策略到执行的系统化服务
-BAC009S0766W0143 当地产商的效果预期不断提高
-BAC009S0766W0144 这时候更要求服务商具备思考和行动的一致性
-BAC009S0766W0145 这样也为开发商节省了运营成本
-BAC009S0766W0146 整合营销是利用全案思维和大数据技术
-BAC009S0766W0147 市场上就出现了各类新型技术软件
-BAC009S0766W0148 但大多是雷声大雨点小
-BAC009S0766W0149 与房地产商的需求相去甚远
-BAC009S0766W0150 大数据营销需要的是强大的技术实力
-BAC009S0766W0151 而非某些功能的简单嫁接
-BAC009S0766W0152 蓝色光标作为全球首屈一指的广告服务商
-BAC009S0766W0153 在大数据上的技术优势无可匹敌
-BAC009S0766W0154 今久正是在蓝色光标的技术支持下
-BAC009S0766W0155 实现了大数据营销的创新
-BAC009S0766W0156 许多数字新产品
-BAC009S0766W0157 广泛应用于移动端
-BAC009S0766W0158 分析用户的消费行为和生活方式
-BAC009S0766W0159 帮助广告主找出目标用户
-BAC009S0766W0160 然后对广告信息进行精确匹配
-BAC009S0766W0161 达到降低成本提升营销效果的目的
-BAC009S0766W0162 今久在大举创新的同时
-BAC009S0766W0163 保持原有业务的正常运作
-BAC009S0766W0164 这才是一个大企业应该有的战略方向
-BAC009S0766W0165 带动了区域板块的扩张
-BAC009S0766W0166 在海南成立了分公司
-BAC009S0766W0167 现在已经是海南本土最大的房地产推广公司
-BAC009S0766W0168 拥有许多优质客户
-BAC009S0766W0169 今久上海分公司又悄无声息地开张了
-BAC009S0766W0170 新媒体推广的业务扩张
-BAC009S0766W0171 逐渐地撬开了上海这个外来公司很难生根的大都市
-BAC009S0766W0172 郑州长春和哈尔滨三地办事处
-BAC009S0766W0173 用蓝色光标强大的新媒体技术和资源
-BAC009S0766W0174 搭起了全国地产推广新媒体的版图
-BAC009S0766W0175 今久又出高价
-BAC009S0766W0176 收购了房地产互联网营销公司沈阳新维一半股份
-BAC009S0766W0177 今久又一次利用资本市场
-BAC009S0766W0178 实现区域扩张
-BAC009S0766W0179 区域产品和业务三大层面
-BAC009S0766W0180 今久成功实现了转型
-BAC009S0766W0181 后今久时代正式到来
-BAC009S0766W0182 转型后的今久整合营销集团
-BAC009S0766W0183 在全球大数据浪潮中
-BAC009S0766W0184 依托蓝色光标的强大平台
-BAC009S0766W0185 助力中国房地产开发企业发掘并实现更大的价值需求
-BAC009S0766W0186 在机遇与挑战共存的互联网时代
-BAC009S0766W0187 发行利率也有较大幅度上升
-BAC009S0766W0188 人民银行多次提高存款准备金率和存贷款基准利率
-BAC009S0766W0189 不仅是城投债券发行利率
-BAC009S0766W0190 债券市场所有品种发行利率整体上都表现出向上的走向
-BAC009S0766W0191 导致城投债券发行产生较高的风险溢价
-BAC009S0766W0192 城投债券收益率上升
-BAC009S0766W0193 对债券投资人来说不是坏事
-BAC009S0766W0194 有利于提升城投债券的资产配置价值
-BAC009S0766W0195 则需要在发债时机和发债规模上进行合理的把握
-BAC009S0766W0196 我个人不赞成这一判断
-BAC009S0766W0197 债券发行人是优质的
-BAC009S0766W0198 还本付息也是正常的
-BAC009S0766W0199 投资者对城投债券风险表现出的恐慌
-BAC009S0766W0200 加强城投债监管完善制度建设
-BAC009S0766W0201 有的媒体甚至用井喷来描述
-BAC009S0766W0202 您如何看待城投债券这几年的发展和作用
-BAC009S0766W0203 徐林这几年城投债券发行数量的确有所增加
-BAC009S0766W0204 地方投融资平台公司通过发行债券进行融资
-BAC009S0766W0205 符合提高直接融资比重的要求
-BAC009S0766W0206 城投债券也适应了发行人和投资人的需要
-BAC009S0766W0207 这是这几年城投债券发行规模不断扩大的主要原因
-BAC009S0766W0208 我委核准发行的企业债券累计为七千亿元
-BAC009S0766W0209 其中城投债券共发行七千亿元
-BAC009S0766W0210 占比只有百分之七
-BAC009S0766W0211 城投债券的发行有比较严格的条件
-BAC009S0766W0212 从已发行的城投债券用途看
-BAC009S0766W0213 保障房建设和棚户区改造
-BAC009S0766W0214 城市文化和体育设施
-BAC009S0766W0215 地震灾后重建等领域
-BAC009S0766W0216 都起到了积极的作用
-BAC009S0766W0217 随着我国资本市场的进一步发展
-BAC009S0766W0218 城投债券作为中国债券市场的准市政债
-BAC009S0766W0219 发行规模还会稳步扩大
-BAC009S0766W0220 中国证券报面对市场对城投债券风险的担忧
-BAC009S0766W0221 是如何更好地防范城投债券可能出现的风险的
-BAC009S0766W0222 虽然已发行的城投债券的还本付息都是正常的
-BAC009S0766W0223 城投债作为一个信用产品
-BAC009S0766W0224 不可能是完全无风险的
-BAC009S0766W0225 我看了以后很受震动
-BAC009S0766W0226 虽然报道内容并没有具体的城投债券还本付息违约案
-BAC009S0766W0227 但却提醒了我们要更加关注城投债券可能出现的风险
-BAC009S0766W0228 并采取措施切实保护债券投资人的合法权益
-BAC009S0766W0229 作为城投债券发行监管部门
-BAC009S0766W0230 我们对城投债券发行人的审核一直是比较严格的
-BAC009S0766W0231 地方投融资平台公司申请发行债券
-BAC009S0766W0232 必须符合一些基本的条件企业必须连续三年盈利
-BAC009S0766W0233 所投项目必须经过合规性审查
-BAC009S0766W0234 我们还控制了投融资平台公司发债的范围
-BAC009S0766W0235 才能申请发行城投债券
-BAC009S0766W0236 就不得再通过发行城投债券新增政府性债务
-BAC009S0766W0237 正是有了这样一些严格的规定
-BAC009S0766W0238 使得很多投融资平台公司
-BAC009S0766W0239 难以满足发行城投债券的资格和条件
-BAC009S0766W0240 这在相当程度上控制了城投债券的发行规模
-BAC009S0766W0241 也降低了城投债券的风险
-BAC009S0766W0242 为了控制地方政府本届发债下届还钱的道德风险
-BAC009S0766W0243 我们还安排了专门的偿债均摊机制
-BAC009S0766W0244 也就是将债券还本压力在债券存续期内进行合理分摊
-BAC009S0766W0245 避免在最后一年累积过大的还本压力和风险
-BAC009S0766W0246 有媒体报道了云投集团等发债企业转移核心资产
-BAC009S0766W0247 损害债券持有人利益的事件
-BAC009S0766W0248 并对债券市场形成了不小的冲击
-BAC009S0766W0249 我们如何考虑防止这类事件再次发生
-BAC009S0766W0250 更好地保护债券投资人的利益
-BAC009S0766W0251 徐林发债企业在债券存续期内进行资产转移
-BAC009S0766W0252 极有可能对债券持有人利益构成不利影响
-BAC009S0766W0253 华尔街的半兽人已经为他的离开紧锣密鼓地敲起退堂鼓
-BAC009S0766W0257 问题是他也想不出谁能干得更好
-BAC009S0766W0260 但亏损却达到了一点八亿美元
-BAC009S0766W0262 不应从用户身上榨取广告收入
-BAC009S0766W0263 试问又有哪位有魔法能挽回巨额亏损呢
-BAC009S0766W0268 对于高中生来说这会有点令人尴尬罢了
-BAC009S0766W0269 可对于一个成年人来说算什么
-BAC009S0766W0270 还有他对日本文化的迷恋
-BAC009S0766W0271 然后又要去竞选纽约州长
-BAC009S0766W0273 跟星巴克的合作就是灾难
-BAC009S0766W0285 梅姐待的已算够长了
-BAC009S0766W0287 但是至少会给股价刺激一下
-BAC009S0766W0288 而梅姐则可以陪陪小孩或者去搞搞政治
-BAC009S0766W0293 但亏损达到了一点七亿美元
-BAC009S0766W0294 这样的成绩已经比二零一三年要好
-BAC009S0766W0296 十年都还没赚钱的话
-BAC009S0766W0299 它已经失去了作为独立公司的存在意义
-BAC009S0766W0303 同比增长一百三十四点七百分之三
-BAC009S0766W0304 归属于上市公司股东的净利润二十五十二万元
-BAC009S0766W0305 去年同期则是亏损二百四十二万元
-BAC009S0766W0306 同比增长十一五十六点四百分之二
-BAC009S0766W0307 公司锂电池业务实现营业收入四点零一亿元
-BAC009S0766W0308 同比增长二百六十八点五百分之一
-BAC009S0766W0309 成飞集成相关人士告诉每日经济新闻记者
-BAC009S0766W0310 前两年锂电池行业整体处于市场培育期
-BAC009S0766W0311 虽然国家在二零一零年就颁布了新能源补贴政策
-BAC009S0766W0312 但是市场启动不像预期那么快
-BAC009S0766W0313 基本上是从二零一四年下半年才有明显的感觉
-BAC009S0766W0314 目前公司锂电池订单比较充足
-BAC009S0766W0315 由于传统汽车产业步入寒冬
-BAC009S0766W0316 汽车厂商纷纷转型新能源汽车
-BAC009S0766W0317 新能源汽车的爆发使得锂电池供不应求
-BAC009S0766W0318 随着锂电池产业链迎来井喷
-BAC009S0766W0319 锂电需求带动业绩增长
-BAC009S0766W0320 成飞集成的锂电池业务在前两年情况并不好
-BAC009S0766W0321 新能源汽车市场在逐步启动
-BAC009S0766W0322 锂电池市场也在向好
-BAC009S0766W0323 成飞集成相关人士告诉记者
-BAC009S0766W0324 这是今年上半年锂电池业务爆发的原因
-BAC009S0766W0325 成飞集成的其他主营业务中
-BAC009S0766W0326 汽车模具以及汽车零部件表现一般
-BAC009S0766W0327 汽车模具实现营收一点一零一亿元
-BAC009S0766W0328 毛利率为十八点百分之三
-BAC009S0766W0329 毛利率为十九点六百分之六
-BAC009S0766W0330 同比增长四点百分之三十五
-BAC009S0766W0331 但是由于该项业务占比较小
-BAC009S0766W0332 所以对业绩的影响有限
-BAC009S0766W0333 汽车零部件总体规模不大
-BAC009S0766W0334 汽车模具毛利率下滑
-BAC009S0766W0335 一方面是由于上半年模具的比较基数较低
-BAC009S0766W0336 也就是去年和今年上半年营收总额都不高
-BAC009S0766W0337 另外今年上半年个别订单的价格也比较低
-BAC009S0766W0339 这一状况有望发生改变
-BAC009S0766W0340 七年之后宋安东二十五岁
-BAC009S0766W0341 正是冰球运动员的黄金年龄
-BAC009S0766W0342 年少成名的他带领国家队出征冬奥会
-BAC009S0766W0343 铁定会有助于提升我国的冰球水平
-BAC009S0766W0344 进而提升我国在冬奥会申办过程中的竞争力
-BAC009S0766W0345 二零二二年冬奥会在北京举行
-BAC009S0766W0346 以宋安东为首的运动员们可以说是鲜活的冬奥名片
-BAC009S0766W0347 让越来越多人关注并参与到其中来
-BAC009S0766W0348 建设三个相对集聚的场馆群
-BAC009S0766W0349 申奥过程本身已经推动了城际交通建设
-BAC009S0766W0350 因此对于北京申办冬奥会的最终结果
-BAC009S0766W0351 我们应该抱着更加长远和开阔的视角来看待
-BAC009S0766W0352 更要期待着中国冰雪运动真正强大起来的那一天
-BAC009S0766W0353 法国冰协同于放人五度世界冠军即将复出搜狐体育
-BAC009S0766W0354 北京时间十月二十七日
-BAC009S0766W0355 经过将近一年时间的漫长谈判
-BAC009S0766W0356 法国花样滑冰联合会终于同意
-BAC009S0766W0357 允许布鲁诺马塞洛特代表德国
-BAC009S0766W0358 两人的更改国籍禁赛期即将开始
-BAC009S0766W0359 这也意味着最晚在明年的各项赛事中
-BAC009S0766W0360 我们就能看到这对强大组合的身影
-BAC009S0766W0361 在金牌搭档罗宾索尔科维退役之后
-BAC009S0766W0362 萨维申科宣布会再坚持一个冬奥会周期
-BAC009S0766W0363 她所选择的新搭档就是法国猛男马塞洛特
-BAC009S0766W0364 但是因为涉及到男伴更改国籍问题
-BAC009S0766W0365 两人的联手十分不顺利
-BAC009S0766W0366 这也让他们虽然可以参加小型赛事
-BAC009S0766W0367 但是由于国籍不统一
-BAC009S0766W0368 无法参加奥运会的比赛
-BAC009S0766W0369 对于法国冰协的行为
-BAC009S0766W0370 不少粉丝都表达了谴责
-BAC009S0766W0371 支持他们继续训练参加比赛
-BAC009S0766W0372 显然处于最艰难时期的两人丝毫没有放弃
-BAC009S0766W0373 休赛期内他们参加了小型赛事
-BAC009S0766W0374 从目前已经传出的视频来看
-BAC009S0766W0375 男伴更是在最新公布的视频中
-BAC009S0766W0376 他们的不放弃换来了成功
-BAC009S0766W0377 马塞洛特的母亲表示
-BAC009S0766W0378 法国冰协方面的态度有了缓和
-BAC009S0766W0379 法国冰协提出最后要求
-BAC009S0766W0380 要求马塞洛特缴纳七万欧元的转国籍费用
-BAC009S0766W0381 随后冰迷们自发为其网上募集资金
-BAC009S0766W0382 马塞洛特来到法国花样滑冰联合会
-BAC009S0766W0383 双方进行了最后一次也是最成功的一次洽谈
-BAC009S0766W0384 能够保障他的职业生涯发展是我的荣幸
-BAC009S0766W0385 恭喜他与萨维申科走上正确的道路
-BAC009S0766W0386 我们的朋友将代表德国
-BAC009S0766W0387 继续征战花样滑冰的比赛
-BAC009S0766W0388 今年第二位离开法国冰协更换国籍的选手
-BAC009S0766W0389 对于这个万众期待的消息
-BAC009S0766W0390 我可以带着它回家了
-BAC009S0766W0391 谢谢每一个支持我们的人
-BAC009S0766W0392 没有你们的支持我们该如何度过最挣扎的时期呢
-BAC009S0766W0393 是时候去努力工作了
-BAC009S0766W0394 他们的禁赛期即将开始
-BAC009S0766W0395 我们或许就将看到他们征战各类大型赛事的身影
-BAC009S0766W0396 对于隋文静韩聪彭程张昊领衔的中国双人滑军团
-BAC009S0766W0397 五度世锦赛冠军萨维申科联手年轻新搭档马塞洛特
-BAC009S0766W0398 这会是一对绝对强大的对手
-BAC009S0766W0399 经过近两个星期的漫长等待
-BAC009S0766W0400 北京时间八月九日一五三零
-BAC009S0766W0401 为观众们奉上昆仑决鏖战香江的精彩赛事
-BAC009S0766W0402 泰拳黑王子播求无疑同小皇帝詹姆斯最为相似
-BAC009S0766W0403 并在各自领域中享受着各自粉丝们帝王般的顶礼膜拜
-BAC009S0766W0404 曾以分歧者星运里的错窜红的谢琳伍德蕾
-BAC009S0766W0405 将出演影片的女主角
-BAC009S0766W0406 搜狐娱乐讯文耷子备受关注的重拍版乌鸦
-BAC009S0766W0407 在经历了无数次的导演和演员更换之后
-BAC009S0766W0408 除了去年结婚
-BAC009S0766W0409 有时候人生是计划赶不上变化的
-BAC009S0766W0410 就顺着你的感觉走就好了
-BAC009S0766W0411 日前在初赛收官战中返场的她加盟猜评团
-BAC009S0766W0412 一袭土豪金西装简直潮爆
-BAC009S0766W0413 有眼尖的网友发现
-BAC009S0766W0414 与孙楠巫启贤共同起立鼓掌的许茹芸小腹凸起
-BAC009S0766W0415 搜狐娱乐讯九月十二日
-BAC009S0766W0416 许茹芸与韩国丈夫崔栽诚迎来结婚一周年纪念日
-BAC009S0766W0417 许茹芸特地发微博感谢婚姻带来的幸福
-BAC009S0766W0418 许茹芸重回舞台不做苦情女娱乐频道
-BAC009S0766W0419 华西都市报讯闪婚欧巴一年后二零一四年
-BAC009S0766W0420 许茹芸与韩籍男朋友举行了婚礼
-BAC009S0766W0421 迎来了人生崭新阶段
-BAC009S0766W0422 不同于大家心中按部就班的乖乖女形象
-BAC009S0766W0423 许茹芸突然闪婚让当时的娱乐圈也惊起了一阵小波澜
-BAC009S0766W0424 此后便鲜有消息
-BAC009S0766W0425 和往日的端庄淑女形象大有不同
-BAC009S0766W0426 对于重回舞台夺下当日歌王
-BAC009S0766W0427 她也坦言内心感触很多
-BAC009S0766W0428 论眉毛重要性
-BAC009S0766W0430 中新网五月七日电据台湾中国时报消息
-BAC009S0766W0431 李嘉诚回应撤资不爱国指控完全不成立
-BAC009S0766W0432 李嘉诚首次公开回应撤资不爱国等质疑
-BAC009S0766W0433 称一篇似是而非的文章
-BAC009S0766W0434 在其发给记者的新闻稿中说
-BAC009S0766W0435 所谓撤资指控完全不成立
-BAC009S0766W0436 其集团在全球拥有一三零零零间店铺
-BAC009S0766W0437 其中内地由两年前的一三零零间增至今天的二三零零间
-BAC009S0766W0438 李嘉诚怎么回答与中央关系有变
-BAC009S0766W0439 李嘉诚或再抛售内地地产项目拟出售上海办公楼
-BAC009S0766W0440 中国日报网八月三日电据华尔街日报三日报道
-BAC009S0766W0441 据两名知情人透露
-BAC009S0766W0442 李嘉诚正式回应撤资指控不相信文革式思维复苏
-BAC009S0766W0443 李嘉诚首次对撤资做出回应
-BAC009S0766W0444 我明白言论自由是一把两刃刀
-BAC009S0766W0445 因此一篇似是而非的文章
-BAC009S0766W0446 李嘉诚首次回应撤资传闻对中国发展充满信心
-BAC009S0766W0447 中新网九月三零日电据香港文汇报报道
-BAC009S0766W0448 对中央坚定不移继续改革开放
-BAC009S0766W0449 致力优化营商环境有信心
-BAC009S0766W0450 对中国发展充满信心
-BAC009S0766W0451 李娜产女后首次亮相运动员掌握英语很重要
-BAC009S0766W0452 李娜在一个商业活动中谈退役后的生活
-BAC009S0766W0453 李娜不想大家一直记得我那说明中国网球没突破
-BAC009S0766W0454 虽然已经退役近一年
-BAC009S0766W0455 但曾经的中国网球一姐李娜仍然没有淡出媒体的关注
-BAC009S0766W0456 李娜媒体用一次性参赛是对运动员的侮辱
-BAC009S0766W0457 长江商报消息本报记者张萌昨日
-BAC009S0766W0458 家居养娃的李娜又重新出现在媒体大众的面前
-BAC009S0766W0459 带着辛吉斯逛完了黄鹤楼
-BAC009S0766W0460 当日的新闻发布会上
-BAC009S0766W0461 李娜一身素色休闲装
-BAC009S0766W0462 走进了武网的新闻大厅
-BAC009S0766W0463 她身上少了些以往的悍将拼劲
-BAC009S0766W0464 多了初为人母的幸福光彩
-BAC009S0766W0465 看似犀利不再的娜姐老将气场立刻显出
-BAC009S0766W0466 一语回击一次性参赛这种说法是一种侮辱
-BAC009S0766W0467 希望媒体不要用这样的词来形容所有网球运动员
-BAC009S0766W0468 因为没有哪个运动员不想表现出最好的自己
-BAC009S0766W0469 李岚清座谈戏称自己八零后退休不等于生命终结
-BAC009S0766W0470 不知不觉我成为一个八零后的老头
-BAC009S0766W0471 退休后不在其位不谋其政
-BAC009S0766W0472 退休并不等于生命的终结
-BAC009S0766W0473 如果放弃学习没有追求
-BAC009S0766W0474 一个人的精神生命就将走向衰老
-BAC009S0766W0475 因此我给自己规划了八个字的退休生活
-BAC009S0766W0476 戏称自己年过八零当为八零后
-BAC009S0766W0477 李开复经历死亡这一课学会看透和放下
-BAC009S0766W0478 李开复被医生宣判为第四期淋巴癌
-BAC009S0766W0479 不期而至的阴霾让他被迫抛下工作
-BAC009S0766W0480 在新书向死而生我修的死亡学分中
-BAC009S0766W0481 我从没想过自己竟会出版一本这样的书
-BAC009S0766W0482 李晨马震就是玩笑任何情况都力挺范冰冰
-BAC009S0766W0483 新京报快讯记者刘玮近日
-BAC009S0766W0484 由于电影王朝的女人杨贵妃中的一场激情戏
-BAC009S0766W0485 范冰冰承包了娱乐头条
-BAC009S0766W0486 出席活动时笑称今后拍激情戏会征求男友李晨的意见
-BAC009S0766W0487 李晨秀才遇到兵发布会后回应称
-BAC009S0766W0488 如果这个事情反过来
-BAC009S0766W0489 演员这个职业就是这样
-BAC009S0766W0490 李玉刚张学友黄琦雯入选一零大最涨姿势歌曲
-BAC009S0766W0491 李玉刚新歌点击逾一亿网友李家每人只需半次
-BAC009S0766W0492 李玉刚饰演的杨贵妃被指芳华绝代说到神曲
-BAC009S0766W0493 该歌曲二零零字的歌词用典竟达三六处之多
-BAC009S0766W0494 让一些网友有如猜谜
-BAC009S0766W0495 被称为二零一五年第一神曲
-BAC009S0767W0121 时刻保持创新和变革意识
-BAC009S0767W0122 引领中国房地产广告行业走向新的黄金时代
-BAC009S0767W0123 今久整合营销集团迎来了它的十岁生日
-BAC009S0767W0124 今久从最初的几十个人
-BAC009S0767W0125 今久商品房销售额首次上涨
-BAC009S0767W0126 房地产投资增速仍下降
-BAC009S0767W0127 大智慧阿思达克通讯社
-BAC009S0767W0128 一五年一月份
-BAC009S0767W0129 全国房地产开发投资三万亿元
-BAC009S0767W0130 同比名义增长许多
-BAC009S0767W0131 增速比一月份回落零点九个百分点
-BAC009S0767W0132 全国商品房销售额两万亿元
-BAC009S0767W0133 年内首次出现同比增长
-BAC009S0767W0134 住宅销售额也增长了
-BAC009S0767W0135 办公楼销售额下降了
-BAC009S0767W0136 商业营业用房销售额下降了
-BAC009S0767W0137 住宅成为全国房地产销售金额唯一增长的板块
-BAC009S0767W0138 一系列楼市新政效果逐步显现
-BAC009S0767W0139 德佑链家市场研究部总监陆骑麟表示
-BAC009S0767W0140 全国房地产开发投资增速仍然延续了增速放缓的渠势
-BAC009S0767W0141 尽管有央行降息等各方利好刺激
-BAC009S0767W0142 尤其是库存高企的三四线城市
-BAC009S0767W0143 开发商仍然面临着较大的销售压力
-BAC009S0767W0144 国家统计局公布的数据显示
-BAC009S0767W0145 无论是东部中部还是西部地区
-BAC009S0767W0146 商品房房的销售面积同比数据出现好转
-BAC009S0767W0147 商品房销售面积三亿平方米
-BAC009S0767W0148 降幅比四月份收窄六个百分点
-BAC009S0767W0149 在公积金松绑等作用的刺激下
-BAC009S0767W0150 五月份商品房销售的面积同比数据由负转正
-BAC009S0767W0151 作为三四线城市最为集中的中部地区来说
-BAC009S0767W0152 房地产开发企业土地购置面积很大
-BAC009S0767W0153 同比下降不少
-BAC009S0767W0154 降幅收窄三个版百分点
-BAC009S0767W0155 各方原因的叠加导致了房企拿地量的明显减少
-BAC009S0767W0156 今年一线城市住宅用地价格涨五成
-BAC009S0767W0157 今年商品房销售一度低迷
-BAC009S0767W0158 一线城市土地市场成交火热
-BAC009S0767W0159 中介机构统计数据显示
-BAC009S0767W0160 平均价格为十万元每平方米
-BAC009S0767W0161 同比上涨五成
-BAC009S0767W0162 随着一线城市楼市企温回升
-BAC009S0767W0163 房企在一线城市拿地的热情还将提高
-BAC009S0767W0164 土地市场热度可能有所下降
-BAC009S0767W0165 大型房企低迷期拿地
-BAC009S0767W0166 中原地产市场研究部统计数据显示
-BAC009S0767W0167 土地成交价款三千亿元
-BAC009S0767W0168 预计全年有望突破四千亿元
-BAC009S0767W0169 一线城市住宅用地平均价格为十一万元每平方米
-BAC009S0767W0170 同比上涨约六成
-BAC009S0767W0171 中原地产首席分析师张大伟认为
-BAC009S0767W0172 住宅市场交易明显升温
-BAC009S0767W0173 迅速带动一线城市土地市场的整体成交
-BAC009S0767W0174 房地产业正经历一个调整阶段
-BAC009S0767W0175 大型房企实施低迷期拿地的策略
-BAC009S0767W0176 在整体市场供大于求区域分化严重的情况下
-BAC009S0767W0177 房企更加愿意扎堆一线城市
-BAC009S0767W0178 而非在三四线城市深耕
-BAC009S0767W0179 这使得一线城市的土地竞争激烈
-BAC009S0767W0180 今年一线城市宅地成交的溢价率不高
-BAC009S0767W0181 平均溢价率较低
-BAC009S0767W0182 较去年明显下降
-BAC009S0767W0183 这是因为土地一级开发成本提高
-BAC009S0767W0184 一线城市住宅用地的低价不断抬升
-BAC009S0767W0185 北京等城市在土地出让中
-BAC009S0767W0186 将保障房地块和商品房地块捆绑出让
-BAC009S0767W0187 直接涉及到债券持有人利益的保护问题
-BAC009S0767W0188 我们立即与云投集团进行了沟通
-BAC009S0767W0189 并严格按照合规程序进行
-BAC009S0767W0190 我委也注意到在企业债券存续期间
-BAC009S0767W0191 需要对发行人资产重组等重大事宜加强监管
-BAC009S0767W0192 在制度上对债券持有人的合法权益进行保护
-BAC009S0767W0193 建立地方政府债务管理体系
-BAC009S0767W0194 中国证券报从您刚才的介绍中我们了解到
-BAC009S0767W0195 城投债券对推动城市基础设施和市政设施的建设
-BAC009S0767W0196 起到了非常积极的作用
-BAC009S0767W0197 对丰富债券市场品种也具有积极意义
-BAC009S0767W0198 结合地方政府债务管理制度的完善
-BAC009S0767W0199 下一步我国的城投债券还需要做哪些完善
-BAC009S0767W0200 徐林这个问题涉及到一系列的制度完善
-BAC009S0767W0201 是一个比较复杂的问题
-BAC009S0767W0202 我个人是这么认认识的
-BAC009S0767W0203 我国还处于城市化快速发展期
-BAC009S0767W0204 需要为各地的城市建设提供规范的融资渠道
-BAC009S0767W0205 农业与非农产业之间劳动生产率的差距也很大
-BAC009S0767W0206 这决定了我国城市化动力十分强劲
-BAC009S0767W0207 城市化进程远未结束
-BAC009S0767W0208 城市化快速发展期的重要特征就是基础设施投资需求大
-BAC009S0767W0209 这是我国所处的发展阶段决定的
-BAC009S0767W0210 政府通过债务融资从事基础设施建设
-BAC009S0767W0211 我们应该建立风险可控的规范化的地方政府融资机制
-BAC009S0767W0212 为各地的基础设施建设提供有制度保障的融资渠道
-BAC009S0767W0213 城投债劵作为准市政债劵仍将是有效的融资工具
-BAC009S0767W0214 但还需要进一步改进
-BAC009S0767W0215 在政府投融资体制改革过程中
-BAC009S0767W0216 从事当地的基础设施建设
-BAC009S0767W0217 相对于过去的体制而言是更加市场化的
-BAC009S0767W0218 城投债劵作为投融资平台公司最透明的直接融资工具
-BAC009S0767W0219 仍然会存在并具有发展空间
-BAC009S0767W0220 由于目前城投债劵的发行需要符合企业债劵发行的条件
-BAC009S0767W0221 这使得我国城投债劵的发行利率相对偏高
-BAC009S0767W0222 城投债劵的发行期限和利率
-BAC009S0767W0223 未来应该在制度上作进一步完善
-BAC009S0767W0224 使得城投公司能够发行真正意义上的长期市政债劵
-BAC009S0767W0225 要尽快建立我国的地方政府债务管理体系
-BAC009S0767W0226 对于如何建立规范的地方政府融资渠道
-BAC009S0767W0227 加强地方政府债务管理和风险防控
-BAC009S0767W0228 一些专家学者提出了许多好的建议
-BAC009S0767W0229 如建立规范透明的地方政府融资渠道
-BAC009S0767W0230 并对地方政府债务进行监控和风险防范等
-BAC009S0767W0231 由于我国还没有建立统一的地方政府债务风险管理制度
-BAC009S0767W0232 设定政府性债务风险控制指标和标准
-BAC009S0767W0233 并对政府性债务实行馀额管理
-BAC009S0767W0234 使地方政府的债务融资规模控制在安全范围内
-BAC009S0767W0235 远低于发生债务危机的欧美国家
-BAC009S0767W0236 债券发行人是优质的
-BAC009S0767W0237 还本付息也是正常的
-BAC009S0767W0238 应该建立风险可控的规范化地方政府融资机制
-BAC009S0767W0239 为各地的基础设施建设提供有制度保障的融资渠道
-BAC009S0767W0240 本报记者曹志为王婷王颖春来源中国证券报
-BAC009S0767W0241 责任编辑廖一宁
-BAC009S0767W0242 据国家发改委网站消息
-BAC009S0767W0243 将考试费标准由各地自行制定改为实行上限管理
-BAC009S0767W0244 价格主管部门将按统一合理的平均成本确定考试费用
-BAC009S0767W0245 将切实减轻考生经济负担
-BAC009S0767W0246 针对目前职业资格考试收费项目增加
-BAC009S0767W0247 一些考试单位考务成本偏高
-BAC009S0767W0248 有的考试在不同地区收费标准差异较大
-BAC009S0767W0249 考生对考试收费问题反映较多等问题
-BAC009S0767W0250 改革了职业资格考试收费管理方式
-BAC009S0767W0251 对考务费标准实行统一标准化管理
-BAC009S0767W0252 通知按照不同考生规模考试类类别的合理平均成本
-BAC009S0767W0254 无疑也会成为投资者的宠儿
-BAC009S0767W0260 而苹果虽有可能卖出不少手表给忠实的粉丝
-BAC009S0767W0264 就开始追寻打造真正的机器人的梦想
-BAC009S0767W0265 但是过去整整一年他都在秘密工作
-BAC009S0767W0266 没人知道他在干什么
-BAC009S0767W0269 无论他做的什么都是什么都会引人注目的
-BAC009S0767W0270 像索尼被黑那样的事
-BAC009S0767W0271 也可能会是受到国家支持的攻击
-BAC009S0767W0272 未来的战争形态有可能就是计算机对抗计算机
-BAC009S0767W0273 当年泡沫破裂前也是这样的情景
-BAC009S0767W0274 一堆不赚钱的公司赶着上市当然不是什么好事
-BAC009S0767W0276 疯狂估值局限于私有公司内
-BAC009S0767W0277 可现在那些公司纷纷上市后疯狂是不是就暴露了呢
-BAC009S0767W0278 而现在的股票市场也已经达到创纪录的新高
-BAC009S0767W0282 称有些技术公司烧钱太快可能会人间蒸发
-BAC009S0767W0284 连这些人都预测不准的话
-BAC009S0767W0285 还有谁能预测得准呢
-BAC009S0767W0287 但是需记住对风向保持敏感
-BAC009S0767W0288 高空飞航时战略无人机
-BAC009S0767W0289 全球鹰并不能独占鳌头
-BAC009S0767W0290 继二零一一年出现独特的连翼造型的翔龙无人机以后
-BAC009S0767W0291 又一款个性十足的双机身气动外形的大型无人机神雕
-BAC009S0767W0292 又一次引爆坊间议论
-BAC009S0767W0293 今年三月美国大众科学杂志刊文称
-BAC009S0767W0294 中国正在研制一种世界上尺寸最大的无人机
-BAC009S0767W0295 发表的想象图与最近曝光的飞机布局很像
-BAC009S0767W0296 这使得神雕在全球也成为最大的无人机之一
-BAC009S0767W0297 据可靠的网络消息源称
-BAC009S0767W0298 其相应的对手不是全球鹰
-BAC009S0767W0300 神雕的两个机身前后装有两对机翼
-BAC009S0767W0301 位于后方的主翼中央挂着两具涡轮风扇发动机
-BAC009S0767W0303 上述公司人士对每日经济新闻记者表示
-BAC009S0767W0304 成飞机成业绩增长主要是由于锂电需求增长
-BAC009S0767W0305 传统汽车业务并没有太大起色
-BAC009S0767W0306 现在汽车市场也在下滑
-BAC009S0767W0307 整个汽车体系都是随着汽车销量在变动
-BAC009S0767W0308 在锂电业务爆发的情况下
-BAC009S0767W0309 公司的汽车业务应该会有一些弱化
-BAC009S0767W0310 新能源汽车的爆发带动了锂电池供不应求
-BAC009S0767W0311 几乎所有锂电厂商都在满负荷生产
-BAC009S0767W0312 上述成飞集成人士表示
-BAC009S0767W0313 产能现在已经满足不了订单需求
-BAC009S0767W0314 八月初公司通过了增加产能建设的决议
-BAC009S0767W0315 今年初也在原来厂里新增了生产线
-BAC009S0767W0316 预计在三四季度会有陆续新增产能
-BAC009S0767W0317 每日经济新闻记者注意到
-BAC009S0767W0318 项目总投资十四点五亿元
-BAC009S0767W0319 总投资预计一百二十五亿元
-BAC009S0767W0320 上述成飞集成人士告诉记者
-BAC009S0767W0321 公司目前看好锂电池行业的发展渠势
-BAC009S0767W0322 但其并未向记者透露项目盈利水平预测
-BAC009S0767W0323 洛阳本部项目是一个增量投资
-BAC009S0767W0324 有一部分研发楼办公楼是利用现成的
-BAC009S0767W0325 包括管理人员等并不会因为新增生产线而增加
-BAC009S0767W0326 这个项目是自有资金投入
-BAC009S0767W0327 就没有要求专业机构做可研报告
-BAC009S0767W0328 我们内部做的盈利测算数据暂时无法公告
-BAC009S0767W0329 成飞集成与常州市金坛区政府合作的项目将分三期完成
-BAC009S0767W0330 一期投资额为二十五亿元
-BAC009S0767W0331 上述成飞集成人士告诉记者
-BAC009S0767W0332 随着国内新能源车产业的迅猛发展
-BAC009S0767W0333 锂电池作为新能源车的重要部件
-BAC009S0767W0334 锂电池生产企业将迎来业绩持续高增长阶段
-BAC009S0767W0335 二零一四年其市场规模已达七十一五亿元
-BAC009S0767W0336 随着锂电池产业链迎来井喷
-BAC009S0767W0337 锂电池在今年上半年成为诸多上市公司的业绩功臣
-BAC009S0767W0338 二者虽从事项目不同
-BAC009S0767W0339 也恰恰符合播求的个人风格
-BAC009S0767W0340 擂台上的黑王子肌肉强健
-BAC009S0767W0341 这也是他一次次在擂台上打出恐怖重击的最大资本
-BAC009S0767W0342 却可以演绎出撼人心魄的体育大美
-BAC009S0767W0343 此次播求面对的强敌
-BAC009S0767W0344 恰恰在风格打法和比赛理念上
-BAC009S0767W0345 同詹姆斯昔年头号强敌卡梅隆安东尼如出一辙
-BAC009S0767W0346 丰富的战斗技巧是我的特色
-BAC009S0767W0347 我希望自己可以像卡梅隆一样
-BAC009S0767W0348 在比赛中展示出更多击败对手的手段
-BAC009S0767W0349 对于我的对手制造更多的麻烦
-BAC009S0767W0350 俄罗斯搏击新生代旗帜性天才高手哈亚在接受采访时
-BAC009S0767W0351 而哈亚的表现也正如其自己所言
-BAC009S0767W0352 展示出了如同其偶像安东尼一样的全面犀利
-BAC009S0767W0353 直接将威瑟里诺夫击倒
-BAC009S0767W0354 其搏击技能之全面精湛格斗天赋之卓越令人惊叹
-BAC009S0767W0355 当搏击界的勒布朗与卡梅隆狭路相逢
-BAC009S0767W0356 激情战火必将以燎原之势
-BAC009S0767W0357 彭博一英里接力赛将在十月十五日首次登陆上海
-BAC009S0767W0358 今天赛事举行了赛前新闻发布会
-BAC009S0767W0359 宣告彭博一英里接力赛上海站全面启动
-BAC009S0767W0360 让他们能在工作之馀释放对于体育的热情
-BAC009S0767W0361 从二零零七年在伦敦创办至今
-BAC009S0767W0362 已在新加坡香港等城市成功落地
-BAC009S0767W0363 得到当地企业的强烈积极响应
-BAC009S0767W0364 在各城市都有至少百支队伍报名参加
-BAC009S0767W0365 彭博一英里接力赛区别于其他跑步活动的是
-BAC009S0767W0366 每支报名队伍以企业为单位每队十名成员
-BAC009S0767W0367 每人分别完成一英里即约一点六公里的路程
-BAC009S0767W0368 最后取全队用时最少者为胜
-BAC009S0767W0369 冠军皆由麦格理集团获得
-BAC009S0767W0370 现场参赛企业誓言要打破这项记录
-BAC009S0767W0371 本次赛事已经开始接受团队报名
-BAC009S0767W0372 目前报名仍在进行中
-BAC009S0767W0373 这一项目将为神农架林区的孩子筹建开放式体育空间
-BAC009S0767W0374 为他们搭建一个特色自由的体育室加户外体育课堂
-BAC009S0767W0375 让他们也可以有机会参与体育运动
-BAC009S0767W0376 高清女排金花手捧奖杯庆夺冠
-BAC009S0767W0377 刚刚在日本女排世界杯上夺冠的中国女排载誉回京
-BAC009S0767W0378 在首都国际机场受到了各界人士的欢迎
-BAC009S0767W0379 这其中一位身材高挑的女孩子颇为引人关注
-BAC009S0767W0380 她就是因伤未能随队参加本次赛事的徐云丽
-BAC009S0767W0381 我觉得这冠军来之不易
-BAC009S0767W0382 特别是我们今年刚开始的时候特别艰难
-BAC009S0767W0383 整个队伍承受了很大的困难和考验
-BAC009S0767W0384 最后顶住困难和压力拿到冠军
-BAC009S0767W0385 我为她们感到骄傲和自豪
-BAC009S0767W0386 那就是姑娘们高举起惠若琪徐云丽和杨方旭的球衣
-BAC009S0767W0387 感谢这三位因伤未能参赛的姐妹对球队做出的巨大贡献
-BAC009S0767W0388 徐云丽透露在赛前队友曾经给自己发了一条短信
-BAC009S0767W0389 就此事征求她的意见
-BAC009S0767W0390 看到这一幕我控制不住自己了
-BAC009S0767W0391 一切都难以用言语来表达
-BAC009S0767W0392 自己此时此刻特别迫切地想要尽快恢复
-BAC009S0767W0393 希望能够跟大家一起再次站在领奖台上
-BAC009S0767W0394 徐云丽最后动情地说
-BAC009S0767W0395 搜狐体育郭健文
-BAC009S0767W0396 二零一五年八月十五
-BAC009S0767W0397 这次赛事是昆仑决二零一五欧洲之旅的第三站
-BAC009S0767W0398 四季如春的俄罗斯黑海东部沿岸
-BAC009S0767W0399 新一轮激战烽火即将炽烈点燃
-BAC009S0767W0400 我是这次中俄对抗赛第一个出场的中方选手
-BAC009S0767W0401 一定要尽全力打一场漂亮的比赛
-BAC009S0767W0402 为中国战队打响第一枪
-BAC009S0767W0403 徐永昊的站立打击技术在该级别中堪称翘楚
-BAC009S0767W0404 如今再遭厄运
-BAC009S0767W0406 最终又确定为科林哈迪
-BAC009S0767W0407 科林也退出了剧组
-BAC009S0767W0408 柯震东去年经历吸毒风波
-BAC009S0767W0409 演艺事业受挫
-BAC009S0767W0410 沉寂一段时间后
-BAC009S0767W0411 近期他积极复出
-BAC009S0767W0412 再度经营他的粉丝团与粉丝互动六日昨晚
-BAC009S0767W0414 他调皮地将自己的眉毛抹掉
-BAC009S0767W0415 搜狐娱乐讯据香港明晚九月十日报道
-BAC009S0767W0416 诸葛紫岐十日晚出席活动时表示
-BAC009S0767W0417 一个月内暴瘦了九至十三斤
-BAC009S0767W0418 有时甚至忘记吃饭
-BAC009S0767W0419 也有情绪问题
-BAC009S0767W0420 打算看医生寻求纾缓方式
-BAC009S0767W0421 她说之前打电话给医生
-BAC009S0767W0422 医生说得好恐怖
-BAC009S0767W0423 但不至于要吃药
-BAC009S0767W0424 我叫他不要吓我
-BAC009S0767W0425 现在有点怕要见他
-BAC009S0767W0426 搜狐娱乐讯九月十六日二十点二十七分
-BAC009S0767W0428 并自嘲的写道自幼就走性感风格
-BAC009S0767W0429 谢依霖穿着白色吊带裙
-BAC009S0767W0430 嘟嘴作亲吻状
-BAC009S0767W0431 李玉刚离开蒙面歌王网友遗憾没听见神曲
-BAC009S0767W0432 蒙面歌王迎来初赛的收官之战
-BAC009S0767W0433 千面娇娃绝地反击拿下最后一席歌王头衔
-BAC009S0767W0434 而绝代歌姬李玉刚揭面引起了广泛热议
-BAC009S0767W0435 也有网友发出疑问若是李玉刚演唱李的话
-BAC009S0767W0436 登上歌王宝座的概率是不是会要大很多倍呢
-BAC009S0767W0437 短短二零零字歌词运用诗词典故三六处
-BAC009S0767W0438 每句歌词都蕴含一段李姓历史文化在里头
-BAC009S0767W0439 该歌曲今年一经在各大音乐网站上线便收获无数好评
-BAC009S0767W0440 更有全球李氏宗亲大会将其列为祭祖主题曲
-BAC009S0767W0441 同时李也俘获了无数中国大妈的芳心
-BAC009S0767W0442 成为各国各地广场舞今年最流行的背景音乐之一
-BAC009S0767W0443 李磊灭门案遗产纠纷终结八零零馀万三人有份
-BAC009S0767W0444 大兴灭门案的凶犯李磊被执行死刑后
-BAC009S0767W0445 其身后的财产分割问题尘埃落定
-BAC009S0767W0446 市二中院终审认定李家遗产共计八零零多万元
-BAC009S0767W0447 李磊的奶奶继承四三七万馀元
-BAC009S0767W0448 姥姥继承二六六万馀元
-BAC009S0767W0449 岳父母继承一零九万馀元
-BAC009S0767W0450 李谷一曾怒揭东方歌舞团腐败事后被调离岗位
-BAC009S0767W0451 顾欣资料图片昨早九号一零时
-BAC009S0767W0452 东方演艺集团大门口戒备森严
-BAC009S0767W0453 中纪委监察部网站发布消息
-BAC009S0767W0454 顾欣因涉嫌严重违纪违法
-BAC009S0767W0455 集团新领导已经上任
-BAC009S0767W0456 是原中国文化集团党委书记宋官林
-BAC009S0767W0457 李连杰名誉维权案一审胜诉网站被判至致歉赔偿一零万
-BAC009S0767W0458 新京报快讯记者林野记者今天傍晚获悉
-BAC009S0767W0459 李银河的文学梦将出版虐恋小说三卷本
-BAC009S0767W0460 权义澎湃资料李银河在现实中是柔软的
-BAC009S0767W0461 不像她发表的那些先锋的观点一样冲击人眼球
-BAC009S0767W0462 李银河写虐恋不会伤害小波
-BAC009S0767W0463 北京南三环附近一家茶楼里
-BAC009S0767W0464 六三岁的李银河拿着钥匙袋走了进来
-BAC009S0767W0465 她身着湖蓝色细纱短袖黑长裤白运动鞋
-BAC009S0767W0466 手腕上还戴着一块与之呼应的白色塑料腕表
-BAC009S0767W0467 出门前我拿了两套衣服
-BAC009S0767W0468 面对第一财经日报记者
-BAC009S0767W0469 说起自己的伴侣大侠
-BAC009S0767W0470 村中数百亩农田干旱村民质疑水库断了灌溉水
-BAC009S0767W0471 高新区钓渭镇疙瘩沟村村民称
-BAC009S0767W0472 却为了发电断了灌溉农田的水
-BAC009S0767W0473 导致数百亩农田干旱
-BAC009S0767W0474 该镇农办一名主管水利负责人介绍
-BAC009S0767W0475 政府曾多次叫停电站发电
-BAC009S0767W0476 但是干旱原因主要系降水减少
-BAC009S0767W0477 今后将加强水库管理
-BAC009S0767W0478 努力处理好灌溉与发电之间的关系
-BAC009S0767W0479 村主任发环卫工一六零零元工资含一四张假钞
-BAC009S0767W0480 村主任发账号给村文书想要继续当拿四八万元
-BAC009S0767W0481 华商报商洛讯记者白鹏飞近日
-BAC009S0767W0482 并向其发送银行账号
-BAC009S0767W0483 原因是有人愿为该村垫资四八万元费用修桥
-BAC009S0767W0484 村委会主任建议由垫资人担任村文书
-BAC009S0767W0485 唐寨子村党支部书记村委会主任已被全镇通报批评
-BAC009S0767W0486 村主任向开发商索贿五二零万村组干部几乎全参与分赃
-BAC009S0767W0487 城改拆迁对很多村民来说
-BAC009S0767W0488 意味着生活条件的改善
-BAC009S0767W0489 但对于部分村官及个别政府工作人员来说
-BAC009S0767W0490 却是一块大大的唐僧肉
-BAC009S0767W0491 想办法扑上去咬一口
-BAC009S0767W0492 村主任给狗盖房吞六万公款被判刑二年八个月
-BAC009S0767W0493 京华时报讯记者王晓飞在农村
-BAC009S0767W0494 几乎家家户户都会在院子里养狗
-BAC009S0767W0495 平时作为看家护院之用
-BAC009S0768W0121 成本的转嫁使得商品房用地成本更高
-BAC009S0768W0122 明年初料迎供应淡季
-BAC009S0768W0123 土地市场交易火热的局面可能降温
-BAC009S0768W0124 土地供应往往呈现前松后紧的态势
-BAC009S0768W0125 年初往往是土地供应的淡季
-BAC009S0768W0126 为完成全念土地供应计划
-BAC009S0768W0127 地方政府倾向于频繁推出优质地块
-BAC009S0768W0128 土地交易可能随着供应淡季的到来而降温
-BAC009S0768W0129 叠加春节因素的影响
-BAC009S0768W0130 这种情况在二月可能较明显
-BAC009S0768W0131 房地产企业的整体资金状况超紧
-BAC009S0768W0132 不利于继续大规模拿地
-BAC009S0768W0133 国家统计局数据显示
-BAC009S0768W0134 房地产开发企业到位资金十万亿元
-BAC009S0768W0135 增速比三月回落六个百分点
-BAC009S0768W0136 未来房企拿地投资新开工等指标可能受到影响
-BAC009S0768W0137 尽管降息等利好政策出台
-BAC009S0768W0138 但房地产市场仍处于调整期
-BAC009S0768W0139 预计不会在短期内迅速回暖
-BAC009S0768W0140 与之相联系的土地市场也会受到影响
-BAC009S0768W0141 中国证券报报道
-BAC009S0768W0142 今年商品房销售一度低日迷
-BAC009S0768W0143 一线城市土地市场成交火热
-BAC009S0768W0144 中介机构统计数据显示
-BAC009S0768W0145 五环内商品住宅的成交在市场中并非主流
-BAC009S0768W0146 一位房企人士认为五环内项目的稀缺性难以改变
-BAC009S0768W0147 新京报讯记者张徐报道
-BAC009S0768W0148 北京去年土地出让落下大幕
-BAC009S0768W0149 在丰台潘家村一宗商业用地低价成交后
-BAC009S0768W0150 北京今年的土地出让金锁定在两千亿元
-BAC009S0768W0151 同比前年增长五成
-BAC009S0768W0152 丰台区域潘家村危改三号地成为今年的收官地质块
-BAC009S0768W0153 这宗零售商业用地位于南三环外
-BAC009S0768W0154 邻近地铁十号线首竟贸站
-BAC009S0768W0155 规划建筑面积约五万平方米
-BAC009S0768W0156 潘家村地块体量较小
-BAC009S0768W0157 未必吸引太多擅长上规模开发的企业
-BAC009S0768W0158 因此最终仅有龙湖地产一家报价
-BAC009S0768W0159 龙湖即以低价五点五亿元拿地
-BAC009S0768W0160 楼面价折合约一万元每平方米
-BAC009S0768W0161 龙湖地产有关人士对记者表示
-BAC009S0768W0162 龙湖已经在丰台有土地储备
-BAC009S0768W0163 未来还将继续深耕丰台区域
-BAC009S0768W0164 龙湖在丰台西局撤资三十亿元拿地
-BAC009S0768W0165 纯商品房楼面价接近六万元每平方米
-BAC009S0768W0166 并不代表全年土地市场行情走低
-BAC009S0768W0167 今年北京土地市场仍然是高温状态
-BAC009S0768W0168 特别是一至四月土地出让金即破千亿元
-BAC009S0768W0169 根据北京中原地产统计
-BAC009S0768W0170 去年北京共出让五十宗居住楼用地
-BAC009S0768W0171 规划建筑面积合计九百万平方米
-BAC009S0768W0172 出让金合计一千亿元
-BAC009S0768W0173 整体平均楼面价折合一万元每平方米
-BAC009S0768W0174 这一平均楼面价较年前的九千元每平方米
-BAC009S0768W0175 北京中原地产首席分析师张大伟认为
-BAC009S0768W0176 一二线城市特别是京沪这样的核心城市
-BAC009S0768W0177 投资价值更好房企看好
-BAC009S0768W0178 加上优质地块的连续供应
-BAC009S0768W0179 促成了今年北京土地市场的走高
-BAC009S0768W0180 通州新城彩虹之门用地挂出三十日
-BAC009S0768W0181 记者从北京市国土局网站看到
-BAC009S0768W0182 通州运河核心区一宗多功能用地挂出
-BAC009S0768W0183 将于明年投标
-BAC009S0768W0184 该地块位于通州新城五河交汇处东南角
-BAC009S0768W0185 规划建筑面积为四十万平方米
-BAC009S0768W0186 据记者从多个渠道了解
-BAC009S0768W0187 分档制定了中央部门收取的考务费统一上限标准
-BAC009S0768W0188 考虑到地方组织考试的成本相对比较固定
-BAC009S0768W0189 即各省在考务费标准基础上
-BAC009S0768W0190 实践技能操作和面试类考试科目
-BAC009S0768W0191 需配备租赁精密仪器专业设备大型场地
-BAC009S0768W0192 考试过程需要消耗相关材料或需聘请专业面试考官的
-BAC009S0768W0193 由于影响成本的因素过多
-BAC009S0768W0194 由各省根据实际成本制定
-BAC009S0768W0195 三是促进考务成本降低和考试单位合并
-BAC009S0768W0196 对考务费实行统一标准化管理后
-BAC009S0768W0197 而是改按统一合理的平均成本确定
-BAC009S0768W0198 将切实减轻考生经济负担
-BAC009S0768W0199 改革将对考试单位的费用支出形成倒逼机制
-BAC009S0768W0200 促使考试单位自觉降低成本由于形不成规模效益
-BAC009S0768W0201 一些规模较小的考试机构也将自动寻求合并
-BAC009S0768W0202 利用价格杠杆促进考试单位向集约化发展
-BAC009S0768W0203 他就上述关注问题指出
-BAC009S0768W0204 目前我国出现政府性债务违约可能性并不大
-BAC009S0768W0205 下一步将进一步完善城投债卷发行制度和防范风险机制
-BAC009S0768W0206 并尽快建立我国地方政府债务管理体系等
-BAC009S0768W0207 政府性违约可能性不大
-BAC009S0768W0208 中国证卷报随着欧美等国主权债务危机陆续爆发
-BAC009S0768W0209 您如何看待政府的举债行为和债务风险
-BAC009S0768W0210 徐林吸取欧美等国主权债务危机的教训
-BAC009S0768W0211 采取必要措施加强政府债务管理
-BAC009S0768W0212 防范我国政府债务风险
-BAC009S0768W0213 但在具体评估我国地方政府债务风险程度时
-BAC009S0768W0214 也要看到我国与欧美国家的不同之处
-BAC009S0768W0215 我国地方政府性债务
-BAC009S0768W0216 特别是地方投投融资平台公司形成的债务
-BAC009S0768W0217 主要由于各各种基础设施的投资建设
-BAC009S0768W0218 当代人和后代人共同承担债务还本付息责任
-BAC009S0768W0219 可以更好地体现代际公平
-BAC009S0768W0220 克服当期建设资金不足的瓶颈制约
-BAC009S0768W0221 有利于加快完善基础设施和投资环境
-BAC009S0768W0222 是一种合理的基础设施投融资金建设行为
-BAC009S0768W0223 政府举债建设形成大量资金
-BAC009S0768W0224 相当部分资产具有长期的直接收益
-BAC009S0768W0225 一些没有直接收益的项目
-BAC009S0768W0226 也具有间接的经济效益或社会效益
-BAC009S0768W0227 对促进当地经济增长和政府财力的增长
-BAC009S0768W0228 不能简单地用寅吃卯粮来作价值判断
-BAC009S0768W0229 这并不意味着政府可以无节制地借债
-BAC009S0768W0230 关键是要把投资规模和债务规模
-BAC009S0768W0231 控制在合理的范围内
-BAC009S0768W0232 防止出现系统性的偿债风险
-BAC009S0768W0233 国务院高度重视防范地方政府债务风险
-BAC009S0768W0234 从二零零九年下半年就开始要求有关部门调研这一问题
-BAC009S0768W0235 国家审计署还专门组织力量
-BAC009S0768W0236 对全国各地的政府债务进行啦严格审计
-BAC009S0768W0237 审计署的审计结论表明
-BAC009S0768W0238 我国地方政府的累积债务相对于偿付能力来看
-BAC009S0768W0239 远低于发生债务危机的欧美国家
-BAC009S0768W0240 考虑到我国正处在经济快速增长期
-BAC009S0768W0241 政府财力增长也相当较快
-BAC009S0768W0242 政府还拥有较多的可变现资产
-BAC009S0768W0243 相对于目前的负债规模
-BAC009S0768W0244 政府总体上具有较强的偿债能力
-BAC009S0768W0245 采取积极有效的措施化解部分地区和领域的债务风险
-BAC009S0768W0246 在我国出现政府性债务违约的可能性是不大的
-BAC009S0768W0247 债卷市场城投债卷发行不畅
-BAC009S0768W0248 从城投债券发行监管部门的角度
-BAC009S0768W0249 您如何看待这一现象
-BAC009S0768W0250 徐林出于对地方政府债务风险的担忧
-BAC009S0768W0251 投资者采取措施防范风险是成熟的表现
-BAC009S0768W0252 但出于对我国地方政府债务风险的不合理判断
-BAC009S0768W0253 神雕的机身四周装有分布式有源相控阵雷达天线
-BAC009S0768W0254 可以提供三六零度无死角的早期预警
-BAC009S0768W0255 它的雷达可能采用了双波段设计
-BAC009S0768W0259 该机的雷达还具备合成孔径工作能力
-BAC009S0768W0260 可用于侦察缓慢移动的地面和海面目标
-BAC009S0768W0261 神雕还有一定的隐身特性
-BAC009S0768W0262 加上它凭借远程雷达与对方舰队保持远距离
-BAC009S0768W0263 如果神雕大量服役和部署
-BAC009S0768W0264 在战区上空形成有效韧的信息网络
-BAC009S0768W0265 那将会是中国海空军的战力倍增器之一
-BAC009S0768W0266 高空长航时战略无人机
-BAC009S0768W0267 全球鹰并不能独占鳌头
-BAC009S0768W0268 继二零一一年出现独特的连翼造型的翔龙无人机以后
-BAC009S0768W0269 又一款个性十足的双机身气动外形的大型无人机神雕
-BAC009S0768W0271 据新华社电美国国际贸易委员会二十一日作出终裁
-BAC009S0768W0272 从台湾地区进口的此类产品存在切销行为
-BAC009S0768W0273 美国国际贸易委员会称
-BAC009S0768W0274 在征收反倾销或反补贴税之前
-BAC009S0768W0275 美商务部与国际贸易委员会都需作出肯定性终裁
-BAC009S0768W0276 商务部裁定切销或补贴幅度
-BAC009S0768W0277 根据美国商务部去年十二月份终裁确定的幅度
-BAC009S0768W0278 针对中美光伏贸易纠纷
-BAC009S0768W0279 中国商务部已明确表示
-BAC009S0768W0280 再次对中国光伏产品发起双反调查并试图征收高额关税
-BAC009S0768W0281 中方对此表示强烈不满
-BAC009S0768W0282 美方对中国产品进行限制的做法
-BAC009S0768W0283 是对贸易救济措施的滥用
-BAC009S0768W0284 势必使用中美光伏贸易纠纷再度升级
-BAC009S0768W0285 美国智库学学者和太阳能行业协会也多次警告
-BAC009S0768W0286 许多美国太阳太阳能制造商依赖于全球光伏供应链
-BAC009S0768W0287 并减少太阳能产业相关就业岗位
-BAC009S0768W0288 美初裁中国产轮胎倾销
-BAC009S0768W0289 据新华社电美国商务部二十一日宣布初裁结果
-BAC009S0768W0290 认定从中国进口的乘用车和轻型卡车轮胎存在倾销行为
-BAC009S0768W0291 美商务部当天发表声明说
-BAC009S0768W0292 倾销幅度从百分之十七至百分之九十九
-BAC009S0768W0293 基于倾销幅度的初裁结果
-BAC009S0768W0294 就美国对中国产轮胎发起双坊调查
-BAC009S0768W0295 中国商务部曾表示强烈反对
-BAC009S0768W0296 认为美国此举违反世界贸易组织规则和美国国内法
-BAC009S0768W0297 希望美方吸取前车之鉴
-BAC009S0768W0298 避免破坏两国相关产业的贸易与合作
-BAC009S0768W0299 据新华社电美国国际贸易委员会二十一日作出终裁
-BAC009S0768W0300 从台湾地区进口的此类产品存在倾销行为
-BAC009S0768W0301 这意味着美国将对相关产品
-BAC009S0768W0303 成飞集成百二十一九十
-BAC009S0768W0305 公司上半年营业收入六点三四亿元
-BAC009S0768W0307 从而获取用户信息的案件
-BAC009S0768W0308 杨某等四人一同在深圳成立了安丰公司
-BAC009S0768W0309 公司主要从事计算机手机的软件开发业务
-BAC009S0768W0310 由于安丰公司的业务不景气
-BAC009S0768W0311 杨某等四人经过商议
-BAC009S0768W0312 决定由麦德公司的技术部门研发静默插件
-BAC009S0768W0313 使用户在刷机过程中
-BAC009S0768W0314 不知不觉地安装上公司开发的插件
-BAC009S0768W0315 而手机被安装上这一插件后
-BAC009S0768W0316 公司不仅可以向手机推送软件广告等商业性电子信息
-BAC009S0768W0317 安丰公司的广告网页是他们推送的重要内容
-BAC009S0768W0318 他们通过这个插件已获利广告收入二十馀万元
-BAC009S0768W0319 同案被捕的马某等四人是公司技术部门的员工
-BAC009S0768W0320 软件开发是领导的授意
-BAC009S0768W0321 自己只是执行公司的工作要求
-BAC009S0768W0322 三百六十软件识别出了麦德公司的插件
-BAC009S0768W0323 将其列为恶意软件用户称其为流氓软件
-BAC009S0768W0324 马某等人进一步完善了插件
-BAC009S0768W0325 再次利用同样的静默安装方式继续推广软件
-BAC009S0768W0326 二十馀万部手机遭殃
-BAC009S0768W0327 在被公安机关查获后
-BAC009S0768W0328 警方在麦德公司数据库中发现
-BAC009S0768W0329 获取到的通讯录近两千万条
-BAC009S0768W0330 判处有期徒刑三年六个月
-BAC009S0768W0331 其馀九人获刑一年五个月至三年不等
-BAC009S0768W0332 依据国家相关法律法规
-BAC009S0768W0333 杨某等人在明知插件功能的情况下
-BAC009S0768W0334 未经用户同意将该插件预置到呃用户手机中
-BAC009S0768W0335 非法获取用户身份认证信息
-BAC009S0768W0336 已经构成了对他人计算机信息系统的侵入控制
-BAC009S0768W0337 侵犯了公民的合法权益
-BAC009S0768W0338 强劲犀利的拳法与膝法破坏力惊人
-BAC009S0768W0339 二零一五年初在南京的笼斗中
-BAC009S0768W0340 徐永昊以雷霆万钧之势缔造了一场震撼的秒杀之作
-BAC009S0768W0341 迅即杀狠的站立技术令人惊叹不已
-BAC009S0768W0342 也是我喜欢的格斗方式
-BAC009S0768W0343 我都会对站立技术进行重点强化
-BAC009S0768W0344 让自己的攻击变得更快更狠
-BAC009S0768W0345 对于这场比赛的备战
-BAC009S0768W0346 我在重点强化站立技术的同时
-BAC009S0768W0347 也对地面技术和防摔技术上做了很多针对性的训练
-BAC009S0768W0348 对于综合能力的严苛要求
-BAC009S0768W0349 是综合格斗运动的一大特色
-BAC009S0768W0350 相较于其精湛凶猛的站立技术
-BAC009S0768W0351 徐永昊的地面技术无疑是其格斗体系中的一块短板
-BAC009S0768W0352 上一场同包尔江的比赛之后
-BAC009S0768W0353 一个强项跟弱项同样突出的拳手
-BAC009S0768W0354 是很难成为真正的王者
-BAC009S0768W0355 我必须要变的更加全面
-BAC009S0768W0356 此次昆罗决中俄对抗赛上
-BAC009S0768W0357 对于代表中国战队略先出阵的徐永昊来讲
-BAC009S0768W0358 这无疑又是一次严峻的考验
-BAC009S0768W0359 也是其对于自身技术全面性提高程度的一次检验
-BAC009S0768W0360 我这次的对手水平很高
-BAC009S0768W0361 拳法和摔跤能力很出色
-BAC009S0768W0362 而在谈及此次应敌的策略时
-BAC009S0768W0363 我不会改变自己擅长的风格
-BAC009S0768W0364 这次比赛我会用胜利证明自己的实力
-BAC009S0768W0365 二零一五年世界田径锦标赛即将在北京拉开序幕
-BAC009S0768W0366 近日德郭队公布了参加此次世锦赛的六六人大名单
-BAC009S0768W0367 上届莫斯科世锦赛上拿到了金牌的四位选手悉数出战
-BAC009S0768W0368 主教练对于这支以老带新的队伍也充满了自信
-BAC009S0768W0369 上届莫斯科世锦赛上拿到的金牌的四位选手悉数出战
-BAC009S0768W0370 包括前秋运动员维斯多尔
-BAC009S0768W0371 撑杆跳选手拉斐尔霍尔泽德佩
-BAC009S0768W0372 哈特灵今年饱受十字韧带伤势困扰
-BAC009S0768W0373 他是否接受外卡参赛要视情况而定
-BAC009S0768W0374 德国队此次以老带新
-BAC009S0768W0375 这也是他一年四记来第一次参加世锦赛
-BAC009S0768W0376 也有经验丰富的老队员
-BAC009S0768W0377 我相信每个人都会付出一切来为团队力争最好的成绩
-BAC009S0768W0378 附二零一五田径世锦赛德国队名单
-BAC009S0768W0379 一百米塞文基尼菲尔斯
-BAC009S0768W0380 二百米罗宾埃尔瓦
-BAC009S0768W0381 八百米罗宾斯切姆贝拉
-BAC009S0768W0382 五千米理查德灵格
-BAC009S0768W0383 一万米阿尔恩加比乌斯
-BAC009S0768W0384 一百一十米栏马特里亚斯布赫雷尔
-BAC009S0768W0385 格里格尔特拉贝尔
-BAC009S0768W0386 马特伍兹菲兹比亚尔科
-BAC009S0768W0387 撑杆跳拉斐尔霍尔泽德斯佩
-BAC009S0768W0388 托比亚斯斯切尔巴尔斯
-BAC009S0768W0389 跳远阿莱恩卡马拉
-BAC009S0768W0390 铅球达维斯多尔
-BAC009S0768W0391 铁饼克里斯托弗哈特灵
-BAC009S0768W0392 标枪拉尔斯哈曼恩
-BAC009S0768W0393 全能里科费雷姆斯
-BAC009S0768W0394 迈克尔斯齐莱德尔
-BAC009S0768W0395 二零千米竞走尼尔斯布莱姆巴号
-BAC009S0768W0396 五零千米竞走卡尔多赫曼
-BAC009S0768W0397 四乘一百米接力罗伯特哈特灵
-BAC009S0768W0398 卢卡斯亚库比泽克
-BAC009S0768W0399 亚历山大克塞诺科夫
-BAC009S0768W0400 雅莱克斯欧帕拉迪尼门格
-BAC009S0768W0401 一百米莱贝卡哈塞
-BAC009S0768W0402 吉娜卢克肯科姆普尔
-BAC009S0768W0403 八百米克里斯蒂娜哈灵
-BAC009S0768W0404 这部命运多旭的电影
-BAC009S0768W0405 原本计划在今年六月正式开机
-BAC009S0768W0406 可现在已经全部泡汤
-BAC009S0768W0407 科林之前已经积极的支持影片拍摄
-BAC009S0768W0408 圆圆的脸蛋非常的可爱
-BAC009S0768W0409 此照片萌翻众网友
-BAC009S0768W0410 纷纷留言点赞
-BAC009S0768W0411 称哈哈哈性感的不要不要的
-BAC009S0768W0412 自小卖得一脸好萌
-BAC009S0768W0413 搜狐娱乐讯据香港媒体报道
-BAC009S0768W0414 诞下很像天华的小宝贝
-BAC009S0768W0415 一向都是在圈子中人缘甚佳的谢天华
-BAC009S0768W0416 使得宝宝刚出生就有了一大班星星级干爹干娘
-BAC009S0768W0417 搜狐娱乐讯据香港媒体报道
-BAC009S0768W0418 艺人谢婷婷出席活动时
-BAC009S0768W0419 被问到有传其胞兄谢霆锋将与王菲结婚
-BAC009S0768W0420 她回应是么
-BAC009S0768W0421 没有人同我讲
-BAC009S0768W0422 好多传闻我都不会特别问他
-BAC009S0768W0423 是真的话他自己会同我讲
-BAC009S0768W0424 想不想他再次成家立室
-BAC009S0768W0425 他开心就好
-BAC009S0768W0426 不过要看他心情工作同家人相处同小朋友
-BAC009S0768W0427 各样都平衡得好处理得好
-BAC009S0768W0428 结婚都只是一张纸同戒指
-BAC009S0768W0430 搜狐娱乐讯据香港媒体报道
-BAC009S0768W0431 为了给自己的爱犬盖狗舍及休息场所
-BAC009S0768W0432 他指使他人虚开发票六万馀元用公款报销
-BAC009S0768W0433 贾某被市三中院终审判处有期徒刑两年八个月
-BAC009S0768W0434 村书记被村民驾车撞倒身亡肇事者已被刑拘
-BAC009S0768W0435 京华时报讯记者迟名常鑫前天中午近一一点半
-BAC009S0768W0436 大兴区礼贤镇紫各庄村
-BAC009S0768W0437 村书记乔俊然在家门前被一辆轿车撞倒后
-BAC009S0768W0438 肇事者为紫各庄村民乔某
-BAC009S0768W0439 大兴警方以涉嫌交通肇事罪将肇事者刑事拘留
-BAC009S0768W0440 案件正在进一步调查中
-BAC009S0768W0441 村书记骗补助被判一一年受审辩称不了解政策
-BAC009S0768W0442 新京报讯记者王巍利用村里遭受泥石流灾害后
-BAC009S0768W0443 政府出钱搬迁盖房的机会
-BAC009S0768W0444 延庆县永宁镇偏坡峪村原党支部书记钱某
-BAC009S0768W0445 将不应享受政府的两个女儿作为搬迁户上报
-BAC009S0768W0446 骗取搬迁补助资金用于支付搬迁安置房费用
-BAC009S0768W0447 延庆法院一审判决认为
-BAC009S0768W0448 钱某贪污一二馀万元拆迁款
-BAC009S0768W0449 判处有期徒刑一一年
-BAC009S0768W0450 村内常有蛇出没疑从养蛇村民中爬出
-BAC009S0768W0451 信息时报讯记者陈子玉近日
-BAC009S0768W0452 白云区钟落潭竹一村民白云区钟落潭竹一村的村民跟记者报料
-BAC009S0768W0453 说最近他们村里经常有蛇出没
-BAC009S0768W0454 甚至还会爬到村民家中
-BAC009S0768W0455 他们怀疑是有人在村里养蛇所致
-BAC009S0768W0456 蛇主刘先生表示以后将不在家里养蛇
-BAC009S0768W0457 村医研发神奇止痛药网销全全国获刑三年
-BAC009S0768W0458 村卫生室医师兼职黑ｂ超记者暗访结束被跟踪
-BAC009S0768W0459 明着是大兴区黄村镇狼垡三村的医师
-BAC009S0768W0460 暗地里却发布小广告揽客
-BAC009S0768W0461 村妇为缓解丈夫病痛种罂丽当药用被判刑六个月
-BAC009S0768W0462 曲靖一村妇竟在自家菜地内非法种植罂丽一零四二株
-BAC009S0768W0463 用罂丽熬汤为丈夫止痛
-BAC009S0768W0464 该村妇因犯非法种植毒品原植物罪
-BAC009S0768W0465 被麒麟区法院判处有期徒刑六个月
-BAC009S0768W0466 并处罚金人民币一千元
-BAC009S0768W0467 村妇将一零万元现金埋地底四年多已腐烂成碎块
-BAC009S0768W0468 村委会主任因经济问题两次被免第三次当选惹争议
-BAC009S0768W0469 张绵跃当选村委会主任
-BAC009S0768W0470 村委会在农田搭起违法建筑每年收租金一四万元
-BAC009S0768W0471 奉化江口儒江村村委会却带头盖起了违法建筑
-BAC009S0768W0472 记者接到这样的报料
-BAC009S0768W0473 三改一拆可以说是一条红线
-BAC009S0768W0474 村委会竟然会顶风作案
-BAC009S0768W0475 记者和宁波市三三改一拆办工作人员前往现场
-BAC009S0768W0476 这听起来多少有些匪夷所思的违建竟然是真的
-BAC009S0768W0477 村官一顿工作餐吃二六个菜挂钩蹲点领导被诫勉谈话
-BAC009S0768W0478 一顿工作餐竟上二六个菜
-BAC009S0768W0479 且逢餐必有烟酒从园区领导到村组干部
-BAC009S0768W0480 在严查四风的高压态势下
-BAC009S0768W0481 以公务招待为名大肆公款吃喝
-BAC009S0768W0482 村官借四零零多户居民三亿一携款失联
-BAC009S0768W0483 村官接连顶风违纪其子领证为热闹摆酒六七桌
-BAC009S0768W0484 编者按为深入贯彻落实中央八项规定精神
-BAC009S0768W0485 按照中央纪委宣传部的统一部署
-BAC009S0768W0486 陆续派记者深入采访
-BAC009S0768W0487 进一步加大舆论监督力度
-BAC009S0768W0488 通报一个教育一批震灭一片
-BAC009S0768W0489 释放出中央执纪必严紧抓不放的强烈信号
-BAC009S0768W0490 广大领导干部要以引以为戒守住底线
-BAC009S0768W0491 坚决不在四风问题上犯错犯错误跌跟头
-BAC009S0768W0492 村官涉不雅视频被免职饭桌上摸女子胸部臀部等
-BAC009S0768W0493 村官私刻公章侵占二八万粮补派人殴打上访村民
-BAC009S0768W0494 党和人民不会管到我身上来
-BAC009S0768W0495 侵吞征地种粮补偿款
-BAC009S0769W0121 该地块即为通州新城核心地标彩虹之门用地
-BAC009S0769W0122 北京通州新城投资公司网站显示
-BAC009S0769W0123 彩虹之门建筑净高三十米
-BAC009S0769W0124 为双拱形非中心对称建筑
-BAC009S0769W0125 新京报讯记者张旭报道
-BAC009S0769W0126 北京去年土地出让落下大幕
-BAC009S0769W0127 在丰台樊家村一宗商业用地底价成交后
-BAC009S0769W0128 北京今年的土地出让金锁定在两千亿元
-BAC009S0769W0129 同比去年增长五成
-BAC009S0769W0130 市政府决定今年将全面加快棚户区改造步伐
-BAC009S0769W0131 确保完成六万户搬迁改造任务
-BAC009S0769W0132 推进上百个棚改项目全面启动实施
-BAC009S0769W0133 今年北京要建设筹集各类保障房十万套
-BAC009S0769W0134 各区县力争完成十五万套开工任务竣工八万套
-BAC009S0769W0135 开工建设公租房不低于三万套
-BAC009S0769W0136 为了确保保障房住宅的优良品质
-BAC009S0769W0137 北京将继续改进住宅产业化推进方式
-BAC009S0769W0138 推行标准化装配式装修
-BAC009S0769W0139 前年至今年期间
-BAC009S0769W0140 北京要筹集建设各类保障性住房一百万套
-BAC009S0769W0141 为改善中低收入家庭住房条件
-BAC009S0769W0142 今年北京除了建设保障性住房外
-BAC009S0769W0143 还加大棚户区的改造任务
-BAC009S0769W0144 各区县各单位要按照下达的任务指标
-BAC009S0769W0145 确保完成今年六万户棚户区改造任务
-BAC009S0769W0146 今年是十二五规划的收官之年
-BAC009S0769W0147 各区县各单位要加强协作配合
-BAC009S0769W0148 要重点加大政策支持
-BAC009S0769W0149 破解棚户区改造征收瓶颈问题
-BAC009S0769W0150 各相关部门要主动服务区县服务各参建单位
-BAC009S0769W0151 对于今后棚户区改造中遇到的问题
-BAC009S0769W0152 各项目标任务已分解至各区县
-BAC009S0769W0153 今年北京将继续加大集体土地建设公租房试点力度
-BAC009S0769W0154 加快公租房的配租进度
-BAC009S0769W0155 力争配租三万户以上
-BAC009S0769W0156 今年北京还将加大社会单位泵租力度
-BAC009S0769W0157 市政府决定今年将全面加快棚户区改造步伐
-BAC009S0769W0158 确保完成六万户搬迁改造任务
-BAC009S0769W0159 今年土地收入预计近四万亿元
-BAC009S0769W0160 今年国有土地使用权出让收入四千亿元
-BAC009S0769W0161 继前年和去年连续两年突破四万亿元后
-BAC009S0769W0162 今年土地收入再维持稳定
-BAC009S0769W0163 相关公司股票走势
-BAC009S0769W0164 房地产市场竞争加大
-BAC009S0769W0165 房企应走差异化路线
-BAC009S0769W0166 还有多家机构分析认为
-BAC009S0769W0167 政府对今年的土地出让金收入预期下降
-BAC009S0769W0168 这暗示房地产的库存大
-BAC009S0769W0169 这直接影响到房地产的买地情况
-BAC009S0769W0170 相应的房价涨跌
-BAC009S0769W0171 如今房地产市场已经供需相对平衡
-BAC009S0769W0172 甚至开始进入了供过于求的局面
-BAC009S0769W0173 但去年住宅土地成交建筑面积仅十二亿平米
-BAC009S0769W0174 远低于去年和前年平均的二十亿平米水平
-BAC009S0769W0175 除了开发商的买地行为减少
-BAC009S0769W0176 全国房地产库存正在堆积
-BAC009S0769W0177 而出让的住宅建筑面积总和至少为一百亿平米
-BAC009S0769W0178 约可供销售四年
-BAC009S0769W0179 开发商整体在手土地充足
-BAC009S0769W0180 瑞银分析师丁晓预测
-BAC009S0769W0181 预计明年全国土地市场仍难复苏
-BAC009S0769W0182 各路开发商一致看好一线城市房地产市场
-BAC009S0769W0183 从一月的一线城市的土地成交看
-BAC009S0769W0184 溢价率楼面价均处于高位
-BAC009S0769W0185 预计后市一二线城市拿地竞争将更加剧烈
-BAC009S0769W0186 中原地产首席市场分析师张大伟告诉南都记者
-BAC009S0769W0187 并进而对城投债券进行唱空或做空
-BAC009S0769W0188 最近企业债券特别是城投债券的发行难度加大
-BAC009S0769W0189 发行利率也有较大幅度上升
-BAC009S0769W0190 人民银行多次提高存款准备金率和存贷款基准利率
-BAC009S0769W0191 不仅是城投债券发行利率
-BAC009S0769W0192 债券市场所有品种发行利率整体上都表现出向上的走向
-BAC009S0769W0193 导致城投债券发行产生较高的风险溢价
-BAC009S0769W0194 城投债券收益率上升
-BAC009S0769W0195 对债券投资人来说不是坏事
-BAC009S0769W0196 有利于提升城投债券的资产配置价值
-BAC009S0769W0197 则需要在发债时机和发债规模上进行合理的把握
-BAC009S0769W0198 我个人不赞成这一判断
-BAC009S0769W0199 债券发行人是优质的
-BAC009S0769W0200 还本付息也是正常的
-BAC009S0769W0201 投资者对城投债券风险表现出的恐慌
-BAC009S0769W0202 加强城投债监管完善制度建设
-BAC009S0769W0203 有的媒体甚至用井喷来描述
-BAC009S0769W0204 您如何看待城投债券这几年的发展和作用
-BAC009S0769W0205 这几年城投债券发行数量的确有所增加
-BAC009S0769W0206 地方投融资平台公司通过发行债券进行融资
-BAC009S0769W0207 符合提高直接融资比重的要求
-BAC009S0769W0208 城投债券也适应了发行人和投资人的需要
-BAC009S0769W0209 这是这几年城投债券发行规模不断扩大的主要原因
-BAC009S0769W0210 我委核准发行的企业债券累计为七千亿元
-BAC009S0769W0211 其中城投债券共发行七千亿元
-BAC009S0769W0212 占比只有百分之七
-BAC009S0769W0213 城投债券的发行有比较严格的条件
-BAC009S0769W0214 从已发行的城投债券用途看
-BAC009S0769W0215 保障房建设和棚户区改造
-BAC009S0769W0216 城市文化和体育设施
-BAC009S0769W0217 地震灾后重建等领域
-BAC009S0769W0218 都起到了积极的作用
-BAC009S0769W0219 随着我国资本市场的进一步发展
-BAC009S0769W0220 城投债券作为中国债券市场的准市政债
-BAC009S0769W0221 发行规模还会稳步扩大
-BAC009S0769W0222 中国证券报面对市场对城投债券风险的担忧
-BAC009S0769W0223 是如何更好地防范城投债券可能出现的风险的
-BAC009S0769W0224 虽然已发行的城投债券的还本付息都是正常的
-BAC009S0769W0225 城投债作为一个信用产品
-BAC009S0769W0226 不可能是完全无风险的
-BAC009S0769W0227 我看了以后很受震动
-BAC009S0769W0228 虽然报道内容并没有具体的城投债券还本付息违约案
-BAC009S0769W0229 但却提醒了我们要更加关注城投债券可能出现的风险
-BAC009S0769W0230 并采取措施切实保护债券投资人的合法权益
-BAC009S0769W0231 作为城投债券发行监管部门
-BAC009S0769W0232 我们对城投债券发行人的审核一直是比较严格的
-BAC009S0769W0233 地方投融资平台公司申请发行债券
-BAC009S0769W0234 必须符合一些基本的条件企业必须连续三年盈利
-BAC009S0769W0235 所投项目必须经过合规性审查
-BAC009S0769W0236 我们还控制了投融资平台公司发债的范围
-BAC009S0769W0237 才能申请发行城投债券
-BAC009S0769W0238 就不得再通过发行城投债券新增政府性债务
-BAC009S0769W0239 正是有了这样一些严格的规定
-BAC009S0769W0240 使得很多投融资平台公司
-BAC009S0769W0241 难以满足发行城投债券的资格和条件
-BAC009S0769W0242 这在相当程度上控制了城投债劵的发行规模
-BAC009S0769W0243 也降低了城投债劵的风险
-BAC009S0769W0244 为了控制地方政府本届发债下届还钱的道德风险
-BAC009S0769W0245 我们还安排了专门的偿债均摊机制
-BAC009S0769W0246 也就是将债劵还本压力在债劵存续期内进行合理分摊
-BAC009S0769W0247 避免在最后一年累积过大的还本压力和风险
-BAC009S0769W0248 有媒体报道了云投集团等发债企业转移核心资产
-BAC009S0769W0249 损害债劵持有人利益的事件
-BAC009S0769W0250 并对债券市场形成了不小的冲击
-BAC009S0769W0251 你们如何考虑防止这类事件再次发生
-BAC009S0769W0252 更好地保护债券投资人的利益
-BAC009S0769W0253 据新华社电有病当然要吃药
-BAC009S0769W0254 但吃下去的药能否真正作用到病灶就很难说了
-BAC009S0769W0255 通过它能够实现药物的精准投送
-BAC009S0769W0256 他们开发出一种只有二十微米长的机器人
-BAC009S0769W0257 这个机器人由高分子材料制成
-BAC009S0769W0258 当它进入动物胃部时
-BAC009S0769W0259 锌就会与胃酸发生反应
-BAC009S0769W0260 从而推动机器人在胃部前行
-BAC009S0769W0261 这种技术很适合用来治疗胃溃疡等胃部疾病
-BAC009S0769W0262 高效精准投送药物不仅可降低用药量
-BAC009S0769W0263 这项技术离临床应用还有一段距离
-BAC009S0769W0264 据新华社电有病当然要吃药
-BAC009S0769W0265 但吃下去的药能否真正作用到病灶就很难说了
-BAC009S0769W0266 美国政府部门当地时间周四警示称
-BAC009S0769W0267 苹果设备的用户应当注意
-BAC009S0769W0269 不要在弹出窗口点击安装打开应用时
-BAC009S0769W0271 苹果公司也在第一时间发布官方声明
-BAC009S0769W0273 还没有任何一个用户真正遭受过此攻击
-BAC009S0769W0274 我们鼓励用户只从可信任的渠道
-BAC009S0769W0276 并注意下载过程中的任何警告
-BAC009S0769W0277 企业用户在安装定制应用程序时
-BAC009S0769W0278 须从他们公司的安全网站上进行下载并安装
-BAC009S0769W0279 美国政府部门当地时间周四警示称
-BAC009S0769W0280 苹果设备的用户应当注意
-BAC009S0769W0283 据新华社电印度官员透露
-BAC009S0769W0284 美国将向印度转让两项军事技术
-BAC009S0769W0285 其中包括美国大鸦无人机今后将由印度工厂制造
-BAC009S0769W0286 印度斯坦时报二十四日援引消息人士的话报道
-BAC009S0769W0287 大鸦无人机由美国航空环境公司研制
-BAC009S0769W0288 由士兵直接用手投掷起飞
-BAC009S0769W0289 二零零三年以来在阿富汗得到了广泛应用
-BAC009S0769W0290 预计从二零一五年下半年开始
-BAC009S0769W0291 美国将不再生产大鸦无人机
-BAC009S0769W0292 改由设在印度本加卢鲁的一家美印合资公司生产
-BAC009S0769W0293 一名印度高级官员透露
-BAC009S0769W0294 眼下已有七个国家打算购买大鸦无人机
-BAC009S0769W0295 预计订单总额为三十亿美元
-BAC009S0769W0296 美国航空环境公司停止生产大鸦无人机后
-BAC009S0769W0297 印方工厂将继续完成剩馀订单
-BAC009S0769W0298 此外还将与美方联手研制一款升级版大鸦无人机
-BAC009S0769W0299 该技术可用于识别隐藏于伪装下的目标
-BAC009S0769W0300 从而把运输机转化为更为复杂的远程侦察机
-BAC009S0769W0301 美国外交消息人士透露
-BAC009S0769W0302 肯德尔定于二月二十三日访问印度
-BAC009S0769W0303 且达到情节特别严重程度
-BAC009S0769W0304 故依法裁定驳回上诉
-BAC009S0769W0306 从而获取用户信息的案件
-BAC009S0769W0308 虽然工信部很快就删除了后半句话
-BAC009S0769W0309 但还是引发业内广泛关注
-BAC009S0769W0310 这种宣传方式的目的是什么
-BAC009S0769W0311 截至中国经营报记者发稿前
-BAC009S0769W0312 浪潮官方尚未给出回应
-BAC009S0769W0313 旗下拥有浪潮信息浪潮软件浪潮国际三家上市公司
-BAC009S0769W0314 尽管政府对国产品牌有一定扶持
-BAC009S0769W0315 浪潮的发展也有可圈可可点之处
-BAC009S0769W0318 浪潮信息的研发支出约四亿元
-BAC009S0769W0319 占营业收入的比例是五点百分之四十七
-BAC009S0769W0320 较上年同期增长八十四点百分之三十九
-BAC009S0769W0321 研发支出主要用于服务器产品的研究开发和升级换代
-BAC009S0769W0322 研发投入是一个刚性指标
-BAC009S0769W0323 与技术的更新换代速度还是有相关性
-BAC009S0769W0324 国内几个服务器品牌的盘子还比较小
-BAC009S0769W0325 他们的硬件技术研发等力量
-BAC009S0769W0326 经验积累不足也是一个大问题
-BAC009S0769W0327 国产服务器即使是自主设计
-BAC009S0769W0329 核心架构也基本照抄国外厂商
-BAC009S0769W0330 在中低端市场或占有相应份额
-BAC009S0769W0331 但高端市场仍然难以企及
-BAC009S0769W0332 一位股份制银行科技部负责人如此讲述
-BAC009S0769W0334 国内厂商在高端核心技术上普遍存有差距
-BAC009S0769W0336 浪潮高管在接受媒体采访时表示
-BAC009S0769W0337 浪潮将通过产品渠道服务价格的全方位发力
-BAC009S0769W0338 一百米栏辛迪罗勒德尔
-BAC009S0769W0339 三千米障碍吉萨费里欣塔斯卡鲁塞
-BAC009S0769W0340 跳高玛丽劳伦斯荣格菲利斯
-BAC009S0769W0341 撑杆跳丽萨莱兹奇
-BAC009S0769W0342 跳远莱纳马尔库斯
-BAC009S0769W0343 三级跳克里斯丁吉尔奇
-BAC009S0769W0344 铅球克里斯蒂娜斯齐万兹
-BAC009S0769W0345 铁饼沙尼斯克拉夫特
-BAC009S0769W0346 链球贝蒂海德尔
-BAC009S0769W0347 标枪克里斯丁胡宋
-BAC009S0769W0348 克里斯蒂娜奥伯福尔
-BAC009S0769W0349 全能詹妮弗奥赛尔
-BAC009S0769W0350 四乘一百米接力亚历山大布尔格哈德特
-BAC009S0769W0351 安娜莱纳法拉塞
-BAC009S0769W0352 吉娜卢克肯科姆普尔
-BAC009S0769W0353 孙杨因心脏不适退出一千五百米自由泳决赛
-BAC009S0769W0354 无疑是刚刚结束的喀山世锦赛最大的遗憾
-BAC009S0769W0355 孙杨在一千五百米自由泳上的实力不容置疑
-BAC009S0769W0356 而这一次击败他的不是对手
-BAC009S0769W0357 孙杨的心脏不适早就不是秘密
-BAC009S0769W0358 是孙杨在二零一四年因治疗心脏不适
-BAC009S0769W0359 误服曲美他嗪导致兴奋剂检测呈阳性遭禁赛
-BAC009S0769W0360 正是治疗他心悸不适症状的
-BAC009S0769W0361 也第一次被媒体关注
-BAC009S0769W0362 记者从浙江省游泳协会了解到
-BAC009S0769W0363 孙杨就出现过心脏问题
-BAC009S0769W0364 孙杨因感冒后出现了胸闷心悸不适等症状
-BAC009S0769W0365 专家会诊之后认为孙杨存在心肌缺血情况
-BAC009S0769W0366 与感冒病毒感染损伤心肌有关
-BAC009S0769W0367 予服用处方药以治疗心肌缺血保护心肌
-BAC009S0769W0368 孙杨的心肌损伤是在感冒后引发的
-BAC009S0769W0369 心肌同位素扫描显示局部灌注差
-BAC009S0769W0370 达到保护心脏的作用
-BAC009S0769W0371 是去年备战亚运会选拔赛期间
-BAC009S0769W0372 直到二零一四年四月才解禁复出
-BAC009S0769W0373 尽管期间孙杨的训练并没有中断
-BAC009S0769W0374 但训练量几乎和正常时不可同日而语
-BAC009S0769W0375 为了备战亚运会选拔赛
-BAC009S0769W0376 在世锦赛决赛检录前突感不适
-BAC009S0769W0377 也是孙杨整个比赛期间疲劳所致
-BAC009S0769W0378 从四百米预赛到最后的一千五百米预赛
-BAC009S0769W0379 二百米的高强度无氧到一千五百米的有氧
-BAC009S0769W0380 师姐罗雪娟也忍不住落泪
-BAC009S0769W0381 回忆起自己从前训练时因心脏不适被抢救的事
-BAC009S0769W0382 更大的战场还在里约
-BAC009S0769W0383 华西都市报记者陈甘露
-BAC009S0769W0384 二零零八年北京奥运会时
-BAC009S0769W0385 曾经在鸟巢服务的志愿者们
-BAC009S0769W0386 顶级田径赛事再次落户鸟巢
-BAC009S0769W0387 如今为这次赛事服务的志愿者们更为年轻
-BAC009S0769W0388 他们几乎都是九零后
-BAC009S0769W0389 这批志愿者也被称为新鸟巢一代
-BAC009S0769W0390 而他们已经为这次田径世锦赛做好了准备
-BAC009S0769W0391 并要为国内外运动员献上一张张北京最美的名片
-BAC009S0769W0392 在每次大型赛事中志愿者都是必不可少的一部分
-BAC009S0769W0393 他们也是历届大赛的一个亮点
-BAC009S0769W0394 总共有二千七百六十人来为这项大赛志愿服务
-BAC009S0769W0395 他们最大的特点就是九零后占主角
-BAC009S0769W0396 比例超过百分之九十四的志愿者是九零后
-BAC009S0769W0397 在今年世锦赛的志愿者中
-BAC009S0769W0398 有的人还会八国语言
-BAC009S0769W0399 志愿者除了要具备流利的英语交流能力外
-BAC009S0769W0400 还要求具备大型赛会或日常从事社会志愿服务的经验
-BAC009S0769W0401 北京青年报记者昨日在鸟巢采访了一些志愿者
-BAC009S0769W0402 发现他们中间真有不少是志愿达人
-BAC009S0769W0403 例如在竞赛部赛后控制中心的陈田希
-BAC009S0769W0404 也等待了很长时间
-BAC009S0769W0405 但最终却因为出品公司相对论影业申请破产
-BAC009S0769W0406 而不得不离开这个项目
-BAC009S0769W0407 乌鸦在没有其他公司愿意接手的情况下
-BAC009S0769W0408 谈到前日爸爸谢贤在宣传活动上出手打曾江
-BAC009S0769W0409 婷婷指收到消息时正在拍摄广告
-BAC009S0769W0410 亦未联络到爸爸了解他不是一个常打架的人
-BAC009S0769W0411 他是一个大人
-BAC009S0769W0412 他一定有他的原因
-BAC009S0769W0413 又指自己未试过受爸爸体罚
-BAC009S0769W0415 婷婷就坦言靠传媒得知
-BAC009S0769W0416 但会给哥哥谢霆锋传短信了解情况
-BAC009S0769W0417 中新网六月二十四日电六月二十三日
-BAC009S0769W0418 谢霆锋妹妹谢婷婷在微博晒出与父亲合影
-BAC009S0769W0419 谢婷婷将头挨着父亲的头
-BAC009S0769W0420 二人一脸笑容
-BAC009S0769W0421 搜狐娱乐讯据香港媒体报道
-BAC009S0769W0422 艺人谢婷婷从小就成为媒体焦点
-BAC009S0769W0423 而有鬼妹仔性格的婷婷不时以性感打扮亮相
-BAC009S0769W0424 她去游泳解暑
-BAC009S0769W0425 还在网上分享身穿比基尼泳装照
-BAC009S0769W0426 这种天气很适合搞池边派对
-BAC009S0769W0427 中新网五月二十一日报道据香港明报消息
-BAC009S0769W0428 谢婷婷为服装拍摄时装宣传照
-BAC009S0769W0429 她透露现在父母哥哥谢霆锋都各忙各的
-BAC009S0769W0430 一家人很难有机会团聚
-BAC009S0769W0431 施王祥被陆丰市纪委立案调查
-BAC009S0769W0432 二零一三一二二六
-BAC009S0769W0433 二零一四三七
-BAC009S0769W0434 陆丰市纪委决定给予施王祥开除党籍处分
-BAC009S0769W0435 南粤清风网通报该案详情
-BAC009S0769W0436 村官遭判刑处罚证据涉嫌造假公检法自查迟迟无果
-BAC009S0769W0437 山西省临汾市尧都区刘村镇刘南村一零名村干部
-BAC009S0769W0438 因决定取消刁天恩的土地承包合同移栽地上树苗
-BAC009S0769W0439 被法院以故意毁坏财物罪判刑或处罚
-BAC009S0769W0440 村小老师自掏腰包八零零零元为贫困生设奖学金
-BAC009S0769W0441 薛孝文在学生家中家访
-BAC009S0769W0442 从金堂县城驱车一个半小时至土桥镇的大禹村
-BAC009S0769W0443 就到了薛孝文任教的学校金堂县平桥学校
-BAC009S0769W0444 乡间公路也就四米宽
-BAC009S0769W0445 薛孝文还在给学生上课
-BAC009S0769W0446 在年轻时也有着跳龙门的梦
-BAC009S0769W0447 他辗转三所乡村学校
-BAC009S0769W0448 村干部大闹天宫孙大圣口碑爆棚
-BAC009S0769W0449 村干部强揽工程遭拒绝雇百名老人阻挠施工
-BAC009S0769W0450 犯罪嫌疑人刘德怀等六人被刑拘
-BAC009S0769W0451 村干部靠打架成名被抓后喊我是市人大代表
-BAC009S0769W0452 和平花苑现已更名为龙和华府
-BAC009S0769W0453 村庄晴天降奇冰十几斤重来历不明
-BAC009S0769W0454 天上掉下一块重约十几斤的冰块
-BAC009S0769W0455 虽然事情过去三天了
-BAC009S0769W0456 但嵩县德亭镇大王沟村村民们仍感到好奇
-BAC009S0769W0457 一零月一零日临近中午
-BAC009S0769W0458 砸到了村民的菜地里
-BAC009S0769W0459 还把地面砸了个大坑
-BAC009S0769W0460 附近村民闻讯纷纷赶来瞧个新鲜
-BAC009S0769W0461 捡拾一些冰块回家冰冻保存
-BAC009S0769W0462 专家排除了冰雹和飞机上落冰的两种可能
-BAC009S0769W0463 这块天降奇冰究竟是何物
-BAC009S0769W0464 村庄现两名村支书假支书无名有实村内掌权
-BAC009S0769W0465 村庄遭人倾倒数百吨化工废料附近植物全空死
-BAC009S0769W0466 非法倾倒数百吨化工废料
-BAC009S0769W0467 村民生活因此发生巨变井水变味田地减产前日
-BAC009S0769W0468 该村村民黎胜明向楚天快报求助
-BAC009S0769W0469 希望相关部门能处理此事
-BAC009S0769W0470 村支书一周只上二小时班村民称反映会遭报复
-BAC009S0769W0471 村支书上班时间带彩娱乐神秘人曝光视频证据
-BAC009S0769W0472 视频中正在带彩娱乐的灰衣男
-BAC009S0769W0473 被警方确认为新农村党支部书记毛家文
-BAC009S0769W0474 村支书为考公务员改小一零岁一四岁时三月内生两子
-BAC009S0769W0475 淅川县上集镇一名村支书被指将年龄改小一零岁
-BAC009S0769W0476 图为时上集镇派出所
-BAC009S0769W0477 三个月内连生两个儿子
-BAC009S0769W0478 村支书将两女儿家七口人列为搬迁户骗领搬迁款
-BAC009S0769W0479 村支书违法占地建加油站多部门介入处罚仍未拆
-BAC009S0769W0480 浙江在线零九月二一日讯浙江日报记者季建荣近日
-BAC009S0769W0481 村民多次向温岭市有关部门投诉反映
-BAC009S0769W0482 但问题至今没有解决
-BAC009S0769W0483 村支书违规建小产权房花钱买通所有关系
-BAC009S0769W0484 都说下属有困难找领导
-BAC009S0769W0485 灵璧县韦集镇韦集村原村支书石某
-BAC009S0769W0486 就花钱请领导为他撑腰
-BAC009S0769W0487 村支书遭集体举报买鼠药欲投毒报复村民
-BAC009S0769W0488 本报一零月五日讯国庆长假
-BAC009S0769W0489 省纪委要求新闻媒体主动参与到纠四风监督工作中
-BAC009S0769W0490 强化媒体根据群众举报开展调查采访和舆论监督
-BAC009S0769W0491 发生了一起村民举报村支书贪腐
-BAC009S0769W0492 村支书以在全村井水中投毒以报复村民的离奇事件
-BAC009S0769W0493 村支书醉驾撞伤孕妇刑满释放后仍当人大代表
-BAC009S0769W0494 华江瑶族乡十四届人大代表会第五次会议会务材料上
-BAC009S0769W0495 于二零一四年四月一九日晚
-BAC009S0770W0121 住宅土地出让金及成交面积均大幅下降
-BAC009S0770W0122 开发商进驻一二线城市
-BAC009S0770W0123 抛售三线城市
-BAC009S0770W0124 遇到毛利率低的问题
-BAC009S0770W0125 发现土地成本占比持续提升
-BAC009S0770W0126 目前全国该指标
-BAC009S0770W0127 一线城市超过三成
-BAC009S0770W0128 三线及以下为一成
-BAC009S0770W0129 一二线城市用地紧张
-BAC009S0770W0130 房地产商需要解决毛利率低的问题
-BAC009S0770W0131 中指院广州公司总经理张化学向南都记者表示
-BAC009S0770W0132 三线城市库存积压又逼倒房地产商在一线城市抢地
-BAC009S0770W0133 建议房企不要一味强调做大
-BAC009S0770W0134 可以重点关注如何做强
-BAC009S0770W0135 在自身优势领域发力
-BAC009S0770W0136 发现无论是千亿巨头地产商
-BAC009S0770W0137 多数在积极剑指一线城市
-BAC009S0770W0138 从今年房企的买地情况来看
-BAC009S0770W0139 今年万科拿下九宗地块
-BAC009S0770W0140 包括五个一二线城市
-BAC009S0770W0141 保利地产开始进军成都珠海
-BAC009S0770W0142 中海地产作为国企龙头
-BAC009S0770W0143 也在厦门拿下几宗商住用地和济南几宗居住用地
-BAC009S0770W0144 在房企扎堆一二线城市时
-BAC009S0770W0145 更致命的是中小房企在融资方面的短板
-BAC009S0770W0146 相比千亿房企的借贷利率
-BAC009S0770W0147 中小房企要面临高达两位数利率
-BAC009S0770W0148 中国市场空间多样化
-BAC009S0770W0149 房企除了像千亿地产一样做大
-BAC009S0770W0150 在某一方面找到自己的企业竞争力
-BAC009S0770W0151 行业的玩家门槛越来越高
-BAC009S0770W0152 主动退出和寻求并购的中小开发商增多
-BAC009S0770W0153 大开发商有机会借此提高行业集中程度
-BAC009S0770W0154 张大伟向南都记者分析
-BAC009S0770W0155 在三线城市库存高攀销售停滞的情况下
-BAC009S0770W0156 没有雄厚的资金良好的业绩以及成熟的融资平台
-BAC009S0770W0157 似乎难以在一二线城市站稳
-BAC009S0770W0158 房地产业将在明年有所洗牌
-BAC009S0770W0159 点击进入股友会参与讨论
-BAC009S0770W0160 今年国有土地出让权收入四千亿元
-BAC009S0770W0161 今年房地产市场地域分化将加剧
-BAC009S0770W0162 政策放松和高库存背景下
-BAC009S0770W0163 开发商均面临不均衡的复苏前景
-BAC009S0770W0164 今年中国房地产开发商仍将面临供应过剩
-BAC009S0770W0165 房价不太可能强劲反弹
-BAC009S0770W0166 房地产在不同城市之间的复苏也将存在分化
-BAC009S0770W0167 一线城市或将复苏率先复苏
-BAC009S0770W0168 三四线城市可能在继续因高库存而承压
-BAC009S0770W0169 中国房地产的市场价格和销量将继续调整
-BAC009S0770W0170 但下半年的销售可能会回升
-BAC009S0770W0171 开发商只需选择继续降价
-BAC009S0770W0172 尤其是在三四线城市
-BAC009S0770W0173 中国经济增速放缓的背景下
-BAC009S0770W0174 预期政府将继续放松政策
-BAC009S0770W0175 而政府放松限购按揭和内地融资政策
-BAC009S0770W0176 房地产需求可能会上升
-BAC009S0770W0177 这将有助于开发商明年维持销量
-BAC009S0770W0178 政府放松政策对房地产销售的正面影响可能会提升
-BAC009S0770W0179 标普信用分析师孔磊说道
-BAC009S0770W0180 关于明年的房价走势
-BAC009S0770W0181 标普在基准情景假设下的预期是
-BAC009S0770W0182 明年平均售价将维持不变
-BAC009S0770W0183 销售额则将维持不变
-BAC009S0770W0184 房地产价格调整还未完全结束
-BAC009S0770W0185 未来一年内中国房地产价格不太可能强劲反弹
-BAC009S0770W0186 虽然过去一年一些获评级开发商的信用状况变差
-BAC009S0770W0187 徐林发债企业在债劵存续期内进行资产转移
-BAC009S0770W0188 极可能对债劵持有人利益构成不利影响
-BAC009S0770W0189 直接涉及到债劵持有人的利益保护问题
-BAC009S0770W0190 我们立即与云投集团进行了沟通
-BAC009S0770W0191 并严格按照合规程序进行
-BAC009S0770W0192 我委也注意到在企业债劵存续期内
-BAC009S0770W0193 需要对发行人资产重组等重大事宜加强监管
-BAC009S0770W0194 在制度上对债券人的合法权益进行保护
-BAC009S0770W0195 建立地方政府债务管理体系
-BAC009S0770W0196 从您刚才的介绍中我们了解到
-BAC009S0770W0197 城投债劵对公司城市基础设施和市政的建设
-BAC009S0770W0198 起到了非常积极的作用
-BAC009S0770W0199 对丰富债劵市场品种也具有积极意义
-BAC009S0770W0200 结合地方政府债务管理制度的完善
-BAC009S0770W0201 下一步我国的城投债劵还需要做哪些完善
-BAC009S0770W0202 这个问题涉及到一系列的制度完善
-BAC009S0770W0203 是一个比较复杂的问题
-BAC009S0770W0204 我个人是这么认识的
-BAC009S0770W0205 我国还处于城市化快速发展期
-BAC009S0770W0206 需要为各地的城市建设提供规范的融资渠道
-BAC009S0770W0207 农业与非农产业之间劳动生产率的差距也很大
-BAC009S0770W0208 这决定了我国城市化动力十分强劲
-BAC009S0770W0209 城市化进程远未结束
-BAC009S0770W0210 城市化快速发展期的重要特征就是基础设施投资需求量大
-BAC009S0770W0211 这是我国所处的发展阶段决定的
-BAC009S0770W0212 政府通过债务融资从事基础建设
-BAC009S0770W0213 我们应该建设可控的规范化的地方政府融资机制
-BAC009S0770W0214 为各地的基础建设设提供有制度保障的融资渠道
-BAC009S0770W0215 城投债劵作为准市政债劵仍将是有效的融资工具
-BAC009S0770W0216 但是还需要进一步改进
-BAC009S0770W0217 在政府投融资体制改革过程中
-BAC009S0770W0218 从事当地基础建设
-BAC009S0770W0219 相当于过去体制而言是更加市场化的
-BAC009S0770W0220 城投债劵作为融资平台公司最透明的直接融资工具
-BAC009S0770W0221 仍然存在并具有发展空间
-BAC009S0770W0222 由于目前城投债劵的发行需要符合企业的债劵发行的条件
-BAC009S0770W0223 这使得我国城投债劵的发行利率相对偏高
-BAC009S0770W0224 城投债劵的发行期限和利率
-BAC009S0770W0225 未来应该在制度上进一步完善
-BAC009S0770W0226 使得城投公司能够发行真正意义上的长期市政债劵
-BAC009S0770W0227 要尽快建立我国的地方政府债务管理体系
-BAC009S0770W0228 对于如何建立规范的地方政府融资渠道
-BAC009S0770W0229 加强地方政府债务管理和风险防控
-BAC009S0770W0230 一些专家学者提出了许多好的建议
-BAC009S0770W0231 如建立规范透明的地方政府融资渠道
-BAC009S0770W0232 并对地方政府债务进行监控和风险防范等
-BAC009S0770W0233 由于我国还没有建立统一的地方政府债务风险管理制度
-BAC009S0770W0234 设定政府性债务风险控制指标和标准
-BAC009S0770W0235 并对政府性债务进行馀额管理
-BAC009S0770W0236 使用地方政府的债务融资规模控制在安全范围内
-BAC009S0770W0237 远低于发生债务危机的欧美国家
-BAC009S0770W0238 债劵发行人是优质的
-BAC009S0770W0239 还本付息也是正常的
-BAC009S0770W0240 应该建立风险可控的规范化地方政府融资机制
-BAC009S0770W0241 为各地的基础建设提供有力的保障的融资渠道
-BAC009S0770W0242 责任编辑廖一宁
-BAC009S0770W0243 该政策将于二零一二年施行
-BAC009S0770W0244 要继续深化天然气价格改革
-BAC009S0770W0245 加快理顺天然气价格与可代替能源的比价关系
-BAC009S0770W0246 引导天然气合理消费
-BAC009S0770W0247 提高天然气利用率支持天然气贸易机制创新
-BAC009S0770W0248 天然气用户为优先允许限制类和禁止类
-BAC009S0770W0249 限制类主要是指天然化工
-BAC009S0770W0250 各地要按照天然气利用优先顺序加强需求侧管理
-BAC009S0770W0251 鼓励优先类支持允许类天然气利用项目发展
-BAC009S0770W0252 对限制类项目的核准和审核要从严把握
-BAC009S0770W0253 商议向印度转移更多军事技术的事宜
-BAC009S0770W0254 据新华社电印度官员透露
-BAC009S0770W0255 美国将向印度转让两项军事技术
-BAC009S0770W0256 其中包括美国大鸦无人机今后将由印度工厂制造
-BAC009S0770W0257 印度斯坦时报对二十四日援引消息人士的话报道
-BAC009S0770W0258 二零一五年最适宜供职的公司仍在科技领域
-BAC009S0770W0259 该网站根据雇员的反馈
-BAC009S0770W0260 给出了前五十名的公司排名
-BAC009S0770W0261 排名前十的科技公司
-BAC009S0770W0263 不仅在科技公司领域排名第一
-BAC009S0770W0264 而且在整个榜单也位居首位
-BAC009S0770W0265 谷歌不仅会以优厚薪酬招募顶尖人才
-BAC009S0770W0267 该应用交付网络在整个榜单中位居第四
-BAC009S0770W0268 在科技领域排名第二
-BAC009S0770W0270 这家社交网络巨头对待员工也是相当慷慨
-BAC009S0770W0271 谷歌的福利待遇他家基本都有
-BAC009S0770W0272 之前刚刚提出为女性员工提供冷冻卵子费用
-BAC009S0770W0274 去年高通被评为最佳实习科技公司
-BAC009S0770W0276 对于苹果公司来说这是很关键的一年
-BAC009S0770W0279 都是该公司的强心剂
-BAC009S0770W0280 雇员们也在很大程度上受到了鼓舞
-BAC009S0770W0282 作为全球最大的职业社交网站
-BAC009S0770W0283 领英在榜单上的成绩也是相当不错的
-BAC009S0770W0284 提供免费房地产估价服务的网站
-BAC009S0770W0285 在美国一上线就造成大轰动
-BAC009S0770W0290 且把服务范围特别局限在医疗健康领域
-BAC009S0770W0291 搜狐消息外媒消息
-BAC009S0770W0292 二零一五年最适宜供职的公司仍在科技领域
-BAC009S0770W0293 该网站根据雇员的反馈
-BAC009S0770W0297 排名从第十三位上升至第十一位
-BAC009S0770W0298 高通二零一四年所获专利也增长了百分之二十三
-BAC009S0770W0299 排名从第九升至第七
-BAC009S0770W0300 以上大多数专利都与计算软件及相关技术有关
-BAC009S0770W0304 加速推进中国服务器市场份额的第一目标
-BAC009S0770W0305 这是浪潮借助政策东风来做的营销手段
-BAC009S0770W0306 对于企业提高股价促成业务
-BAC009S0770W0307 某个银行的系统采购
-BAC009S0770W0308 在确保系统顺利运行的情况下
-BAC009S0770W0309 大家可能因为国家政策扶持国产品牌的大势
-BAC009S0770W0310 而选择国产的服务器
-BAC009S0770W0311 就更加愿意长期持有他们的股票
-BAC009S0770W0312 核心技术待突破自棱镜门事件之后
-BAC009S0770W0313 国家信息安全的问题被推到了风口浪尖
-BAC009S0770W0314 而体现在服务器产业上
-BAC009S0770W0315 由于中国政府的大力扶持
-BAC009S0770W0316 国产服务器厂商迎来利好
-BAC009S0770W0317 在国内四大厂商浪潮华为联想曙光中
-BAC009S0770W0318 浪潮的特点在于定制化策略
-BAC009S0770W0319 与互联网企业深度合作
-BAC009S0770W0320 而这种策略带来的结果是市场份额的快速提升
-BAC009S0770W0322 至于像整机柜这类深度定制化的细分市场
-BAC009S0770W0323 百分之百为国产品牌
-BAC009S0770W0324 其中浪潮达到了百分之六十的市场占有率
-BAC009S0770W0325 近年来随着国内互联网企业的快速发展
-BAC009S0770W0326 宽带和服务器的采购量也水涨船高
-BAC009S0770W0327 由于各家之间竞争激烈
-BAC009S0770W0328 往往在采购过程中尽量压低报价
-BAC009S0770W0329 再加上互联网企业对服务器技术性可能等要求很高
-BAC009S0770W0330 很多服务器厂商进入做一两年
-BAC009S0770W0331 而浪潮从二零一零坚持做到现在
-BAC009S0770W0332 业内对其做法的解读是先凭着低价杀入市场
-BAC009S0770W0333 以品质和服务黏住用户
-BAC009S0770W0334 虽然面对赔钱赚吆喝的质疑
-BAC009S0770W0335 浪潮与海关总署启动战略合作助推智慧海关搜狐科技
-BAC009S0770W0336 浪潮集团与海关总署启动战略合作
-BAC009S0770W0337 合作范围遍及全国各直属海关及隶属海关
-BAC009S0770W0338 对于我而言现在已经成为了一种习惯与本能
-BAC009S0770W0339 有着较为丰富志愿服务经历的九零后吴雯的话
-BAC009S0770W0340 只是本次田径世锦志愿者这个大群体的一个缩影
-BAC009S0770W0341 他们有理由相信九零后同样可以做好
-BAC009S0770W0342 我希望能通过这次田径世锦赛
-BAC009S0770W0343 以及未来几年更多志愿经历
-BAC009S0770W0344 来为二零二二年的冬奥会积累经验
-BAC009S0770W0345 到时将会成为冬奥会志愿者的主力
-BAC009S0770W0346 张锦麟将为自己称为鸟巢新一代志愿者
-BAC009S0770W0347 他在为此时刻准备着
-BAC009S0770W0348 本报记者宋翔王薇
-BAC009S0770W0349 著名双人滑运动员庞清和董健虽未正式宣布退役
-BAC009S0770W0350 但现在的生活已经进入了准退役状态
-BAC009S0770W0351 两人把更多的精力放到了花滑运动的推广上
-BAC009S0770W0352 他俩组建了工作团队
-BAC009S0770W0353 过上了比运动员复杂得多的生活
-BAC009S0770W0355 九月初顺利通过了考试
-BAC009S0770W0356 佟健已经完成了第一个学模块的学习
-BAC009S0770W0357 常年的专业训练给身体带来了各种伤病
-BAC009S0770W0358 二零一四年索契冬奥会上
-BAC009S0770W0359 早到了退役年龄的庞清和佟健克服了伤病困难
-BAC009S0770W0360 但这对老将却以追梦无悔的精神
-BAC009S0770W0361 赢得了同行媒体和观众的敬意
-BAC009S0770W0362 庞清和佟健没有马上退役
-BAC009S0770W0363 而是坚持参加了今年三月的世界花滑锦标赛
-BAC009S0770W0364 一方面是他们从事花样滑冰二零多年
-BAC009S0770W0365 与这项运动结下深厚感情
-BAC009S0770W0366 始终对那块冰面恋恋不舍
-BAC009S0770W0367 也是中国双人滑在申雪赵宏退役后
-BAC009S0770W0368 庞清和佟健仍肩负着扛起中国双人滑大旗的重任
-BAC009S0770W0369 这让他们的退役迟迟没有提上日程
-BAC009S0770W0370 中国双人滑项目的后续发展应当有了较为清晰的前景
-BAC009S0770W0371 庞清和佟健终于可以放心地考虑退役的事情了
-BAC009S0770W0372 受大学生的提议启发
-BAC009S0770W0373 该公众号已经举办了两期公益活动
-BAC009S0770W0374 佟健又将国内部分优秀的单人滑和冰舞运动员集合起来
-BAC009S0770W0375 组建了花滑表演团队
-BAC009S0770W0376 与商业性冰场达成合作协议
-BAC009S0770W0377 以表演的方式推广花样滑冰
-BAC009S0770W0378 现成的选择就在面前
-BAC009S0770W0379 或进入体育行政机关
-BAC009S0770W0380 这些出路也是中国运动员比较常见的退役选择
-BAC009S0770W0381 但庞清和佟健并不愿意随遇而安地安排自己的后半生
-BAC009S0770W0382 自己和庞清曾在赛场上努力地追求优秀更优秀
-BAC009S0770W0383 他们对退役后的人生同样也有追求
-BAC009S0770W0384 佟健给自己定下了要做就做到最好
-BAC009S0770W0385 和绝不会是短期行为的基调
-BAC009S0770W0386 对于工作中遇到的管理经验和能力欠缺问题
-BAC009S0770W0387 佟健的解决办法就只能努力提高自己
-BAC009S0770W0388 佟健报考了北大光华管理学院
-BAC009S0770W0389 在九月初参加考试时
-BAC009S0770W0390 佟健做好了考不上的思想准备
-BAC009S0770W0391 佟健因此顺利通过了入学考试
-BAC009S0770W0392 佟健是同班同学里唯一运动员出身的
-BAC009S0770W0393 记者查阅相关资料发现
-BAC009S0770W0395 来自体育圈的并不多见
-BAC009S0770W0396 只有姚明和刘国梁等少数几个人
-BAC009S0770W0397 佟健希望自己能真的学到管理知识
-BAC009S0770W0398 管理知识肯定都是用的上的
-BAC009S0770W0399 至于中国花滑运动的推广
-BAC009S0770W0400 佟健更希望能有实实在在的发展
-BAC009S0770W0401 这同样需要有效的办法和手段
-BAC009S0770W0402 借着北京将要举办二零二二年冬奥会的东风
-BAC009S0770W0403 冰雪运动在中国势必会有一次发展高潮
-BAC009S0770W0404 很可能被雪藏下去
-BAC009S0770W0405 搜狐娱乐赛文耷子备受关注重拍版乌鸦
-BAC009S0770W0406 在原定男主角卢克伊万斯退出剧组之后
-BAC009S0770W0407 将双双加盟该片
-BAC009S0770W0408 搜狐娱乐据香港媒体报道
-BAC009S0770W0409 谢婷婷九月七日三十三岁生日
-BAC009S0770W0410 网友纷纷留言祝谢婷婷生日快乐
-BAC009S0770W0411 还拉赞姑还是那么漂亮
-BAC009S0770W0412 搜狐娱乐讯北京时间八月十二日消息
-BAC009S0770W0413 据香港媒体报道
-BAC009S0770W0414 谢贤昨天庆祝七十九岁生日
-BAC009S0770W0415 相约家人到谢霆锋家中上演十二道锋味私房菜
-BAC009S0770W0416 由于谢霆锋亲为家人做大厨
-BAC009S0770W0417 同场更有两个神秘嘉宾
-BAC009S0770W0419 搜狐娱乐讯据香港媒体报道
-BAC009S0770W0420 谢贤怒打曾江
-BAC009S0770W0421 究竟是演戏还是积怨已深
-BAC009S0770W0422 只有他们才知道
-BAC009S0770W0423 有不少幕后花絮片花
-BAC009S0770W0424 节目推出至今收视很高
-BAC009S0770W0425 下星期更进入结局周
-BAC009S0770W0426 曾江谢贤四哥及胡枫修哥大谈往日情时
-BAC009S0770W0427 曾江当时说我和谢贤相识多年
-BAC009S0770W0428 也没有发生什么冲突
-BAC009S0770W0429 不好的事情发生
-BAC009S0770W0430 怎料无心说话却一语成谶
-BAC009S0770W0431 经兴安县人大常委会许可
-BAC009S0770W0432 杨爱明被兴安警方刑事拘留
-BAC009S0770W0433 二零一四九二
-BAC009S0770W0434 兴安县法院判杨爱明拘役四个月
-BAC009S0770W0435 杨爱明却参加了兴安县第十五届人大五次会议
-BAC009S0770W0436 村支书骗拆迁款一二万获刑一一年
-BAC009S0770W0437 骗取搬迁补偿金一二二万元
-BAC009S0770W0438 北京晨报记者昨天获悉
-BAC009S0770W0439 延庆法院一审以贪污罪判处钱某有期徒刑一一年
-BAC009S0770W0440 村支书村民被政府工作人员土埋系邻里纠纷
-BAC009S0770W0441 河南省新乡市封丘县留光镇政府东五零零米左右
-BAC009S0770W0442 当地村民孙秋英在自家门口因是否垫路与邻居产生争执
-BAC009S0770W0443 遭到镇政府工作人员用土掩埋
-BAC009S0770W0444 肇事方为镇政府安全保卫人员
-BAC009S0770W0445 所开拉土车辆是镇政府扣押车辆
-BAC009S0770W0446 村支委办公室猥亵女童被刑拘的孩子奶奶在隔壁开会
-BAC009S0770W0447 海峡都市报大白天
-BAC009S0770W0448 在村委会办公楼书记办公室
-BAC009S0770W0449 五一岁的村支委猥亵一名一零岁的留守儿童隔壁
-BAC009S0770W0450 孩子的奶奶正在参加村里的道路环境综合治理工作会议
-BAC009S0770W0451 这事发生在福建省漳州市诏安县林头村
-BAC009S0770W0452 该村支委李某因涉嫌猥亵儿童被警方传唤
-BAC009S0770W0453 村民一零年在沙洲植树造林已成林却被指种错地方
-BAC009S0770W0454 两个村子之间的长江江面上
-BAC009S0770W0455 有一块面积近五零零零面积的沙洲
-BAC009S0770W0456 沙洲几乎年年被淹
-BAC009S0770W0457 村民二六零棵梨树被连根推倒在地里住房被夷为平地
-BAC009S0770W0458 华商报讯记者张林实习生邓泽惠一夜之间
-BAC009S0770W0459 村民地里二六零馀棵正在挂果的梨树被连根推倒
-BAC009S0770W0460 地头边的一间平房也被夷为平地
-BAC009S0770W0461 至今未找到肇事者
-BAC009S0770W0462 村民不满地讨说法要求楼盘开发商停工被拘留
-BAC009S0770W0463 去年一二月四日在村民多次上访无果的情况下
-BAC009S0770W0464 大家到施工现场的临时大门外
-BAC009S0770W0465 尽管检察管最后以事事实不清
-BAC009S0770W0466 但在张关押了三七天后
-BAC009S0770W0467 公安局仍采取了取保候审的手段
-BAC009S0770W0468 没有发生任何肢体冲突
-BAC009S0770W0469 更没有扰乱社会秩序
-BAC009S0770W0470 村民不满行政批复诉市区政府区长庭应诉
-BAC009S0770W0471 门头沟雁翅镇村民李冬梅因不服行政批复
-BAC009S0770W0472 将市区两级政府告上法庭
-BAC009S0770W0473 门头沟区长张贵林出庭应诉
-BAC009S0770W0474 门头沟雁翅镇村民李冬梅向市政府提起了行政复议
-BAC009S0770W0475 复议维持了区政府的认定结论
-BAC009S0770W0476 村民为多拿补偿在拆迁前突击装修全用劣质建材
-BAC009S0770W0477 村里随处可见装潢小广告
-BAC009S0770W0478 村民为救坠井男童身亡被拉出时呈托举姿势
-BAC009S0770W0479 为了一名坠入废井的男童
-BAC009S0770W0480 邳州几名村民先后下井救人
-BAC009S0770W0481 第一个下井救人的大叔却再也没能爬上来
-BAC009S0770W0482 他的双手还保持着托举的姿势
-BAC009S0770W0483 他的义举感动了四里八乡
-BAC009S0770W0484 七月一三日的葬礼上
-BAC009S0770W0485 数百名乡邻自发赶来送他一程
-BAC009S0770W0486 实习生郭杨雪通讯员耿万志现代快报记者李伟豪
-BAC009S0770W0487 村民为解决问题给领导建庙官员其诉求不合规
-BAC009S0770W0488 其在村西旁花费万元建起一名叫清明堂的家庙
-BAC009S0770W0489 每天烧香敬拜办事处主任
-BAC009S0770W0490 该事件引发社会关注
-BAC009S0770W0491 以上两村民所要求的内容不符合相关规定
-BAC009S0770W0492 村民为阻止儿子与女友相见编造偷小孩谎言
-BAC009S0770W0493 涉嫌编造谣言非法拘禁被刑拘
-BAC009S0770W0494 村民举报县城干部建十馀栋别墅纪检委部门介入调查
-BAC009S0770W0495 村小组干部未经过小组集集体讨论
-BAC009S0901W0121 作为一线城市的北京
-BAC009S0901W0122 其市管国管住房公积金政策也均进行调整
-BAC009S0901W0123 公积金贷款最高额度由七万元提升至十万元
-BAC009S0901W0124 公积金政策调整方式各异对楼市影响几何
-BAC009S0901W0125 盘活各地公积金资源
-BAC009S0901W0126 以北京提高公积金贷款最高额度为例
-BAC009S0901W0127 据伟嘉安捷数据统计显示
-BAC009S0901W0128 该政策在七月份实施一周后
-BAC009S0901W0129 公积金贷款额度的提高
-BAC009S0901W0130 将使更多购房者具备买房支付能力
-BAC009S0901W0131 中原地产首席分析师张大伟认为
-BAC009S0901W0132 放宽提取住房公积金支付房租条件则对楼市影响甚微
-BAC009S0901W0133 对楼市也有较大影响
-BAC009S0901W0134 利用公积金可以减少租赁者负担
-BAC009S0901W0135 使其缓冲过度到买房阶段
-BAC009S0901W0136 对楼市消化库存起到正面作用
-BAC009S0901W0137 中新网房产频道每每
-BAC009S0901W0138 要求各地放宽公积金贷款条件后
-BAC009S0901W0139 美丽北京大型绿色公益品牌项目
-BAC009S0901W0140 住建部等三部委再次联合发
-BAC009S0901W0141 美丽北京大型绿色公益品牌项目
-BAC009S0901W0142 随着广州住房公积金贷款政策的调整实施
-BAC009S0901W0143 公积金贷款最高额度也不同程度上调
-BAC009S0901W0144 住房公积金贷款因其利率较低的优势
-BAC009S0901W0145 一直以来广受购房者青睐
-BAC009S0901W0146 本轮住房公积金房贷政策调整
-BAC009S0901W0147 进一步加速了消费者的入市节奏
-BAC009S0901W0148 广州调整住房公积金个人住房贷款政策
-BAC009S0901W0149 同时对申请公积金贷款的缴纳时限调整为五个月
-BAC009S0901W0150 据广州日报昨天报道
-BAC009S0901W0151 公积金贷款首付比例降低的消息令购房者喜出望外
-BAC009S0901W0152 其中刚需买家入市积极性明显提高
-BAC009S0901W0153 据伟嘉安捷提供的数据显示
-BAC009S0901W0154 北京公积金贷款首付比例松绑一周后
-BAC009S0901W0155 公积金贷款及组合贷咨询量明显上涨
-BAC009S0901W0156 尤其组合贷的咨询量较上月月初一周上涨百分之五左右
-BAC009S0901W0157 上海深圳等主要城市也在公积金新政推动下
-BAC009S0901W0158 呈现购房者积极入市的行情
-BAC009S0901W0159 全国已有超百个城市发布了不同力度的公积金松绑政策
-BAC009S0901W0160 加之降息降准等政策组合拳
-BAC009S0901W0161 呈现出量价齐涨的局面
-BAC009S0901W0162 据中国指数研究院最新数据显示
-BAC009S0901W0163 深圳环比上涨百分之七
-BAC009S0901W0164 涨幅据十大城市之首
-BAC009S0901W0165 五月份多地楼市的成交量明显上涨
-BAC009S0901W0166 是房地产当前发展格局下的一个必然
-BAC009S0901W0167 唯独这样才能盘活公积金资源
-BAC009S0901W0168 促使更多购房者积极入市
-BAC009S0901W0169 伴随着各地中住房公积金新政的落地实施
-BAC009S0901W0170 楼市进展仍需进一步观望
-BAC009S0901W0171 购房者受惠于政策利好的同时
-BAC009S0901W0172 公积金在申请放贷流程上并未提速
-BAC009S0901W0173 相反相关环节上审批更加严格
-BAC009S0901W0174 从目前上海住房公积金的具体政策看
-BAC009S0901W0175 购房的扶持力度在加大
-BAC009S0901W0176 但主要还是体现在贷款成本的降低
-BAC009S0901W0177 而申请公积金贷款方面还是需要走严格的流程
-BAC009S0901W0178 公积金提取一直是目前试图突破的内容
-BAC009S0901W0179 但目前还未出现大面积提取行为
-BAC009S0901W0180 来自广州日报的报道称
-BAC009S0901W0181 从申请到最后的拨放款
-BAC009S0901W0182 部分客户甚至等两个多月
-BAC009S0901W0183 如果申请公积金贷款或公积金贷款与商业贷款的组合贷
-BAC009S0901W0184 伟嘉安捷对中新网房产频道表示
-BAC009S0901W0185 现在公积金贷款办理需要一个月左右的时间
-BAC009S0901W0186 而申请办理组合贷款的手续则更为复杂
-BAC009S0901W0187 农业现代化水平显着提升
-BAC009S0901W0188 发展现代农业的条件更加有利
-BAC009S0901W0189 加快发展现代农业机遇遇得
-BAC009S0901W0190 一是工业化城镇化的引领推动作用将更加明显
-BAC009S0901W0191 信息化水平不断提高
-BAC009S0901W0192 农村劳动力大量转移
-BAC009S0901W0193 以及扩大内需战略的实施
-BAC009S0901W0194 二是政策支持将更加强化
-BAC009S0901W0195 随着我国综合国力和财政实力不断增强
-BAC009S0901W0196 强农惠农富农政策力度将进一步加大
-BAC009S0901W0197 支持现代农业发展的物质基础更加牢固
-BAC009S0901W0198 三是科技支撑将更加有力
-BAC009S0901W0199 科技创新孕育新突破
-BAC009S0901W0200 全球绿色经济低碳技术正在兴起
-BAC009S0901W0201 现代农业发展的动力更加强劲
-BAC009S0901W0202 四是外部环境将更加优化
-BAC009S0901W0203 形成合力推进现代农业发展的新局面
-BAC009S0901W0204 广大农民的积极性创造性将得到进一步激发和释放
-BAC009S0901W0205 发展现代农业的要求更加迫切
-BAC009S0901W0206 在工业化城镇化快速推进时期
-BAC009S0901W0207 农业面临着容易被忽视或削弱的风险
-BAC009S0901W0208 我国工业化城镇化快速发展
-BAC009S0901W0209 但农业现代化明显滞后
-BAC009S0901W0210 面临着一系列严峻挑战
-BAC009S0901W0211 科技创新和推广新应用能力不强
-BAC009S0901W0212 农业社会化服务体系不健全
-BAC009S0901W0213 国际农产品市场投机炒作及传导影响加深
-BAC009S0901W0214 我国现代农业发展面临更多的外部不确定性
-BAC009S0901W0215 必须珍惜抓住用好难得的历史机遇
-BAC009S0901W0216 坚持用现代物质条件装备农业
-BAC009S0901W0217 努力探索出一条具有中国特色的农业现代化道路
-BAC009S0901W0218 指导思想基本原则与发展目标
-BAC009S0901W0219 以邓小平理论和三个代表重要思想为指导
-BAC009S0901W0220 深入贯彻落实科学发展观
-BAC009S0901W0221 坚持走中国特色农业现代化道路
-BAC009S0901W0222 以转变农业发展方式为主线
-BAC009S0901W0223 着力强化政策科技设施装备人才和体制支撑
-BAC009S0901W0224 着力完善现代农业产业体系
-BAC009S0901W0225 提高农业现代化水平农民生活水平和新农村建设水平
-BAC009S0901W0226 坚持确保国家粮食安全
-BAC009S0901W0227 坚持立足国内实现粮食基本自给的方针
-BAC009S0901W0228 实行最严格的耕地保护和节约用地制度
-BAC009S0901W0229 加强农业基础设施建设
-BAC009S0901W0230 着力提高粮食综合生产能力
-BAC009S0901W0231 坚持和完善农村基本经营制度
-BAC009S0901W0232 在保持农村土地承包关系稳定并长久不变的前提下
-BAC009S0901W0233 推进农业经营体系体制创新
-BAC009S0901W0234 坚持科教兴农和人才强农
-BAC009S0901W0235 加快农业科技自主创新和农业农村人才培养
-BAC009S0901W0236 加快农业科技成果转化与推广应用
-BAC009S0901W0237 提高农业物质技术水装备水平
-BAC009S0901W0238 坚持政府支持农民主体社会参与
-BAC009S0901W0239 加大强农惠农富农力度
-BAC009S0901W0240 充分发挥农民的主体作用和首创精神
-BAC009S0901W0241 引导和鼓励社会资本投入农业
-BAC009S0901W0242 合力推进现代农业发展
-BAC009S0901W0243 坚持分类指导重点突破梯次推进
-BAC009S0901W0244 进一步优化农业生产力布局
-BAC009S0901W0245 因地制宜地采取有选择差别化扶持政策
-BAC009S0901W0246 支持主要农产品优势产区建设
-BAC009S0901W0247 鼓励有条件地区率先实现农业现代化
-BAC009S0901W0248 推动其他地区加快发展
-BAC009S0901W0249 全面提高农业现代化水平
-BAC009S0901W0250 现代农业建设取得明显进展
-BAC009S0901W0251 粮食等主要农产品供给得到有效保障
-BAC009S0901W0252 物质装备水平明显提高
-BAC009S0901W0253 并没有提供什么帮助
-BAC009S0901W0254 由于关于乔布斯的电话即将上演了
-BAC009S0901W0255 想了解苹果最初的事
-BAC009S0901W0256 乔布斯在最初产品开发过程中
-BAC009S0901W0257 到底发挥了什么作用
-BAC009S0901W0259 乔布斯几乎没发挥什么作用
-BAC009S0901W0261 而这都是我自己的努力
-BAC009S0901W0262 乔布斯在它出现之前都不知道它的存在
-BAC009S0901W0263 不过这话他在去年就说过
-BAC009S0901W0264 其中一个回答就说过
-BAC009S0901W0265 乔布斯不是一名工程师
-BAC009S0901W0266 他从来没有写过代码
-BAC009S0901W0267 也没有参与过任何产品的原始设计
-BAC009S0901W0268 乔帮主并没有他说的那么不堪
-BAC009S0901W0269 沃兹尼亚克自己也说
-BAC009S0901W0270 乔布斯想成为重要人物
-BAC009S0901W0271 而这种人通常是商业人士
-BAC009S0901W0272 他是一个杰出的商人
-BAC009S0901W0273 一个公司不能缺少两种人
-BAC009S0901W0274 公司的成功缺一不可
-BAC009S0901W0275 而沃兹尼亚克似乎乐于承担驱魅的角色
-BAC009S0901W0276 车库没有发挥过太大作用
-BAC009S0901W0277 除了有时候让他们觉得那里像家
-BAC009S0901W0278 车库虽然最能够代表初期创业
-BAC009S0901W0279 但是在那没做任何设计工作
-BAC009S0901W0280 他还吐槽过乔布斯电影中的桥段
-BAC009S0901W0281 他从未对产品被偷发表过任何评论
-BAC009S0901W0282 并不像乔布斯那样激动
-BAC009S0901W0283 我们外人是无法知道真相的
-BAC009S0901W0284 原创张驰乔布斯逝世已久
-BAC009S0901W0285 而苹果的另一位联合创始人沃兹尼亚克还活跃在科技圈
-BAC009S0901W0286 而且以喜欢点评各家公司着称
-BAC009S0901W0287 乔帮主在首批苹果产品的开发中
-BAC009S0901W0288 苹果股价下跌百分之五分析师出现重大分歧搜狐科技
-BAC009S0901W0289 本报记者纪佳鹏北京报道北京时间八月十二日
-BAC009S0901W0290 作为科技股领头羊的苹果股份当天下挫百分之二
-BAC009S0901W0291 人民币的贬值很可能会增加苹果设备进口的费用
-BAC009S0901W0292 这也是影响股价的一大因素
-BAC009S0901W0293 苹果股价的这轮连续下跌
-BAC009S0901W0294 从今年的七月二十一日便开始了
-BAC009S0901W0295 苹果股价已下挫了百分之七十九
-BAC009S0901W0296 不少报道与评论表示
-BAC009S0901W0299 也过分依赖于大中华地区
-BAC009S0901W0300 甚至是负增长而其中
-BAC009S0901W0301 根据近期公布的苹果第三财季业业绩
-BAC009S0901W0302 该季度苹果大中华区营收为一百三十二点三亿美元
-BAC009S0901W0303 为中国的智能制造产业做出贡献
-BAC009S0901W0304 由于该项目尚处于保密期
-BAC009S0901W0305 赵伟国并未透露更多内容
-BAC009S0901W0306 沈阳机床董事长关锡友认为
-BAC009S0901W0307 中国企业与世界企业同在同一起跑线上
-BAC009S0901W0308 中国的中高端嵌入式芯片全部从德国日本进口
-BAC009S0901W0309 德国制造业最核心的技术就是嵌入式系统
-BAC009S0901W0310 在体积能耗上存在一定的不足
-BAC009S0901W0311 紫光与沈阳机床可以在此布局
-BAC009S0901W0312 三十一九二零一五
-BAC009S0901W0313 紫光集团系清华控股旗下最主要的资产
-BAC009S0901W0314 二零一三年二零一四年
-BAC009S0901W0315 并一举成为中国最大全球第三大通讯芯片设计公司
-BAC009S0901W0316 紫光集团还计划布局物联网网络设备芯片
-BAC009S0901W0317 二零一五年紫光集团预计收入约四百亿元
-BAC009S0901W0318 资产规模将达到六十五亿八百亿元
-BAC009S0901W0319 中国机床龙头企业沈阳机床在北京举行战略发布会
-BAC009S0901W0320 紫光股份云计算股收涨停搜狐科技
-BAC009S0901W0321 大盘股仍是毫无作为
-BAC009S0901W0322 题材股继续扮演黑马角色
-BAC009S0901W0323 紫光股份在公告扩展云计算市场后
-BAC009S0901W0324 盘中有二千六百八十六万元资金净流入
-BAC009S0901W0325 主营信息电子和环保
-BAC009S0901W0326 公司昨日发布公告称
-BAC009S0901W0327 各方本着互惠互利优势互补合作共赢的原则
-BAC009S0901W0328 通过搭建具有领先技术水平的混合云解决方案平台
-BAC009S0901W0329 共同拓展国内云计算市场
-BAC009S0901W0330 紫光股份将与世纪互联共同出资组建合资公司
-BAC009S0901W0332 搭建混合云解决方案平台
-BAC009S0901W0333 满足政府和企业级客户云计算下的定制化需求
-BAC009S0901W0334 推动公司云服务战略的实施
-BAC009S0901W0335 紫光股份拟定增募资二百二十五亿元
-BAC009S0901W0336 公司继续推进云服务战略
-BAC009S0901W0337 紫光集团和员工持股计划参与非公开增发
-BAC009S0901W0338 医生此次将对惠若琪的心脏进行微创手术
-BAC009S0901W0339 彻底解决目前存在的隐患
-BAC009S0901W0340 惠若琪将在微创手术后回到南京调养
-BAC009S0901W0341 张蓉芳主持排管中心
-BAC009S0901W0342 成就了中国女排五连冠伟业
-BAC009S0901W0343 北京时间九月十七日
-BAC009S0901W0344 已经确定本赛季不会参加任何的比赛
-BAC009S0901W0345 明年春天普鲁申科将再次进行手术
-BAC009S0901W0346 作为有史以来天赋最高的花样滑冰运动员之一
-BAC009S0901W0347 普鲁申科的职业生涯却堪称多灾多难
-BAC009S0901W0348 他屡次受到伤病的困扰
-BAC009S0901W0349 背伤更是常年阻碍着他的发挥
-BAC009S0901W0350 去年的索契冬奥会上
-BAC009S0901W0351 赛后有媒体发布了他几乎扭曲的背部肌肉的照片
-BAC009S0901W0352 照片中看到普鲁申科的背部肌肉伤痕累累
-BAC009S0901W0353 全都是手术缝合的痕迹
-BAC009S0901W0354 他不仅动过多次肌肉手术
-BAC009S0901W0355 连身上的痛觉神经都进行了更换
-BAC009S0901W0356 普鲁申科出人意料地宣布复出
-BAC009S0901W0357 表示愿意再征战一个冬奥会周期
-BAC009S0901W0358 但就在外界期待着冰王子的卷土重来时
-BAC009S0901W0359 本赛季的各项赛事参赛名单上却都没有见到他的身影
-BAC009S0901W0360 据外媒最新的爆料显示
-BAC009S0901W0361 普鲁申科被诊断患上了一种新的脊椎疾病
-BAC009S0901W0362 这也让他必须在明年春天进行一次小手术来加以治疗
-BAC009S0901W0363 普鲁申科将错过整个二零一五二零一六季一六赛季的比赛
-BAC009S0901W0364 普鲁申科丝毫没有隐退的打算
-BAC009S0901W0365 他还在积极地为二零一八年韩国平昌冬奥会进行着准备
-BAC009S0901W0367 因为卷入兴奋剂丑闻
-BAC009S0901W0368 朴泰桓无法加入海外先进的训练团队进行训练
-BAC009S0901W0369 转投到昔日恩师卢民相任教练的游泳俱乐部训练
-BAC009S0901W0370 但遭到了韩国国内舆论的非议
-BAC009S0901W0372 到今年十二月为止将在东京的法政大学进行训练
-BAC009S0901W0373 备战明年的里约奥运会
-BAC009S0901W0374 但法政大学很快公开辟谣
-BAC009S0901W0375 韩国媒体报道称朴泰桓确实人在日本
-BAC009S0901W0376 他状告首尔某美容医院的官司将在十一月迎来终审
-BAC009S0901W0377 判决结果成为他能否参加里约奥运的变数
-BAC009S0901W0378 据韩国体育首尔的最新消息
-BAC009S0901W0379 二十一日抵达日本的朴泰桓目前确实在东京
-BAC009S0901W0380 计划在那里进行三个月的封闭训练
-BAC009S0901W0381 备战明年的里约奥运
-BAC009S0901W0382 但他的具体行踪成为谜团
-BAC009S0901W0383 能否在里约奥运东山再起
-BAC009S0901W0384 不仅要看他的竞技状态恢复程度
-BAC009S0901W0385 首先要跨过大韩体育会这一关
-BAC009S0901W0386 朴泰桓的禁期禁赛期将在明年三月期满
-BAC009S0901W0387 因为服用禁药被停赛的选手在禁赛期满起的三年内
-BAC009S0901W0388 都无法代表韩国参加国际比赛
-BAC009S0901W0389 义不容辞地想拯救运动生涯在绝境中的朴泰桓
-BAC009S0901W0390 废除这个第五条第六项
-BAC009S0901W0391 为他参加里约奥运扫清最后的障碍
-BAC009S0901W0392 体育首尔的报道分析
-BAC009S0901W0393 大韩体育会这一计划的顺利实施
-BAC009S0901W0394 最终判决结果将在十一月出炉
-BAC009S0901W0395 如果该医院罪名被判成立的话
-BAC009S0901W0396 这样一来可以获得韩国舆论的同情和理解
-BAC009S0901W0397 民众自然会支持大韩体育会给他一个人修改规则
-BAC009S0901W0398 如果美容院的医疗过失罪名不成立
-BAC009S0901W0399 朴泰桓会面临更加严峻的舆论环境
-BAC009S0901W0400 这场官司的前五次公判
-BAC009S0901W0401 朴泰桓和美容院都互不相让
-BAC009S0901W0402 一度让不少粉丝心碎不已
-BAC009S0901W0403 退役之后的高桥大辅并未远离公众视线
-BAC009S0901W0404 瓦尔兹将精心演绎这个著名角色
-BAC009S0901W0405 其首脑恩斯特布鲁菲尔是邦德的最终敌人
-BAC009S0901W0406 这个角色拥有一只白色的波斯猫作为自己的宠物
-BAC009S0901W0407 值得一提的是
-BAC009S0901W0408 搞怪调皮吐舌卖萌娱乐频道
-BAC009S0901W0409 搜狐娱乐讯八月九日晚
-BAC009S0901W0410 陈冠希在微博晒出一段小视频
-BAC009S0901W0411 陈冠希开始一直把镜头对着帽子上的皮卡丘
-BAC009S0901W0412 后来突然冒出头来
-BAC009S0901W0413 对着镜头吐舌卖萌
-BAC009S0901W0414 搜狐娱乐讯九月二日凌晨
-BAC009S0901W0415 陈冠希在微博晒出一张自拍照
-BAC009S0901W0416 陈冠希穿休闲短袖配宽松裤子
-BAC009S0901W0417 网友纷纷留言越来越像潮流教父了
-BAC009S0901W0418 这裙子娇艳
-BAC009S0901W0419 帅出新高度
-BAC009S0901W0420 搜狐娱乐讯据台湾媒体报道
-BAC009S0901W0421 多次想复合却无下文
-BAC009S0901W0422 感情事备受关注
-BAC009S0901W0423 前天他在脸书晒出自拍照
-BAC009S0901W0424 满脸黑斑与大眼袋
-BAC009S0901W0425 老残样再度乍现
-BAC009S0901W0426 搜狐娱乐讯据台湾媒体报道
-BAC009S0901W0427 事后解释是生活观不同才分开
-BAC009S0901W0428 但隔年三月却又分享一张女方坐他大腿的照片
-BAC009S0901W0429 一度让外界以为两人复合
-BAC009S0901W0430 但现在又有别的女孩坐上他的大腿
-BAC009S0901W0431 校长邱勇上任后首次参加学生毕业典礼并演讲
-BAC009S0901W0432 追求使命需要有强大的定力昨日上午
-BAC009S0901W0433 他叮嘱五千馀名毕业生
-BAC009S0901W0434 要有清晰的目标人文情怀和做到执着坚守
-BAC009S0901W0435 清华法学院教授司法改革应限制两长权力
-BAC009S0901W0436 本报讯记者汪红日前
-BAC009S0901W0437 对允许其亲自过问的案件提出严格限定标准
-BAC009S0901W0438 清华辟谣保安迫降无人机为人为诋毁
-BAC009S0901W0439 该事件引发广泛关注
-BAC009S0901W0440 清华大学通过调取监控录线发现
-BAC009S0901W0441 该保安为附近大厦保安
-BAC009S0901W0442 目前该保安承认有人花二百元雇他进行拍照
-BAC009S0901W0443 称当时几位学生模样的人让他帮忙配合拍照用来宣传
-BAC009S0901W0444 抓着男生的动作为摆拍
-BAC009S0901W0445 摔毁无人机一事为杜撰
-BAC009S0901W0446 保安得知自己被骗后表示我真的很恨他们
-BAC009S0901W0447 记者联系发微博男子
-BAC009S0901W0448 他表示我没想到弄这么大
-BAC009S0901W0449 目前该男子已将微博内容全部删除
-BAC009S0901W0450 清华附小昨迎百年校庆校长诠释成志教育理念
-BAC009S0901W0451 清华附小校长窦桂海诠释成志教育理念
-BAC009S0901W0452 清晨飘来辣眼白雾济南八名村民中毒入院
-BAC009S0901W0453 赵女士的公公躺在病床上
-BAC009S0901W0454 目前神志已恢复清醒
-BAC009S0901W0455 记者李焜染摄十三日早晨
-BAC009S0901W0456 历城区港沟镇神武村飘来多股白色不明气体
-BAC009S0901W0457 八位村民先后出现中毒症状
-BAC009S0901W0458 目前八人均已脱离生命危险
-BAC009S0901W0459 神秘气体成分及来源正在进一步核实
-BAC009S0901W0460 清洁工开宝马上下班真实身份为在逃诈骗犯
-BAC009S0901W0461 彭某下班后准备开车离开
-BAC009S0901W0462 清洁工开宝马上班被称励志故事经查系逃犯
-BAC009S0901W0463 十四日开宝马来上班重庆晨报记者罗伟雷罗伟雷键摄
-BAC009S0901W0464 清洁工被电梯咬断腿曾反映这样擦电源危险
-BAC009S0901W0465 制图黄欣晨报记者佟继萍王亦菲实习生张诗欢
-BAC009S0901W0466 网络时代信息的存在有了新方式
-BAC009S0901W0467 云盘就是一种直接把信息存在网络空间里的存储工具
-BAC009S0901W0468 和传统硬盘不同的是
-BAC009S0901W0469 用户不需要把它带在身上
-BAC009S0901W0470 只需要一个账户名和密码
-BAC009S0901W0471 就可以在网络环境下
-BAC009S0901W0472 上传读取和下载里面的信息
-BAC009S0901W0473 本来云盘的出现方便了人们的生活和工作
-BAC009S0901W0474 把云盘变成了一个淫秽色情信息的隐蔽聚散地
-BAC009S0901W0475 清风正在吹散互联网雾霾
-BAC009S0901W0476 四年前的一幕仍没从夏英俊的记忆中抹去
-BAC009S0901W0477 渐冻男孩驾驶电动轮椅上班医生曾诊断活不过十八岁
-BAC009S0901W0478 蔡兴桥在妈妈的帮助下靠墙练习站立
-BAC009S0901W0479 渔民南海捞出外国间谍潜航器搜集情报或已传回
-BAC009S0901W0480 在许多人眼里这都是小说和电影里才会出现的情节
-BAC009S0901W0481 可实际上维护国家边海防安全保护国家利益不受侵犯
-BAC009S0901W0482 这样的斗争和考验有时就发生在我们身边
-BAC009S0901W0483 南海的渔民在捕鱼的时候就曾捞出过一个奇怪的东西
-BAC009S0901W0484 由此引出一起重大安全案件
-BAC009S0901W0485 渔民在南海打捞起可疑电子装置确系无人潜航器
-BAC009S0901W0486 经国家安全部门会同有关技术权威部门鉴定
-BAC009S0901W0487 它既能搜集我国重要海域内各类环境数据
-BAC009S0901W0488 又能探测获取我海军舰队活动动向
-BAC009S0901W0489 实现近距离侦查和情报收集任务
-BAC009S0901W0490 渔民投诉遭离奇执法被派出所讨价还价式罚款
-BAC009S0901W0491 海南临高籍多位渔民向中新网记者反应称
-BAC009S0901W0492 二十二日在文昌市清澜港边防派出所执法检查时
-BAC009S0901W0493 渔民缴纳罚款后在摁手印时
-BAC009S0901W0494 被民警用针扎破手指
-BAC009S0901W0495 让他们很担心会不会相互传染疾病
-BAC009S0902W0121 所以审批加上放款的时间最快也要在七个半月左右
-BAC009S0902W0122 作为取之于民用之于民的住房公积金
-BAC009S0902W0123 缴存者还可以在租房装修离退休时提取
-BAC009S0902W0124 因此操作环节的快捷性与便捷性非常重要
-BAC009S0902W0125 后续要加大公积金贷款的便利性
-BAC009S0902W0126 鼓励购房者积极缴纳公积金
-BAC009S0902W0127 进而选择此类方式购房
-BAC009S0902W0128 另外要处理公积金异地使用的问题
-BAC009S0902W0129 这对于目前一线城市来说很紧要
-BAC009S0902W0130 很多人受限购政策的影响
-BAC009S0902W0131 难以在周边城市用公积金购房
-BAC009S0902W0132 导致公积金资源闲置的问题出现
-BAC009S0902W0133 美丽北京大型绿色公益品牌项目
-BAC009S0902W0134 随着广州住房公积金贷款政策的调整实施
-BAC009S0902W0135 政策内容主要涉及购房
-BAC009S0902W0136 随着广州住房公积金贷款政策的调整实施
-BAC009S0902W0137 公积金贷款最高额度亦不同程度上调
-BAC009S0902W0138 住房公积金贷款因其利率较低的优势
-BAC009S0902W0139 一直以来广受购房者青睐
-BAC009S0902W0140 本轮本轮住房公积金房贷政策调整
-BAC009S0902W0141 进一步加速了消费者的入市节奏
-BAC009S0902W0142 广州调整住房公积金个人住房贷款政策
-BAC009S0902W0143 同时对申请公积金贷款的缴纳时限调整为七个月
-BAC009S0902W0144 据广州日报昨天报道
-BAC009S0902W0145 公积金贷款首付比例降低的消息令购房者喜出望外
-BAC009S0902W0146 其中刚需要买入市积极性明显提高
-BAC009S0902W0147 据伟嘉安捷提供的数据显示
-BAC009S0902W0148 北京公积金贷款首付比例松绑一周后
-BAC009S0902W0149 公积金贷款及组合贷咨询量明显上涨
-BAC009S0902W0150 尤其组合贷的咨询量较上月月初一上涨百分之七左右
-BAC009S0902W0151 上海深圳等主要城市也在公积金新政推动下
-BAC009S0902W0152 呈现购房者积入市的行情
-BAC009S0902W0153 全国已有超百个城市发布了不同力度的公积金松绑政策
-BAC009S0902W0154 加之降息降准等政策组合拳
-BAC009S0902W0155 呈现出量价齐涨的局面
-BAC009S0902W0156 据中国指数研究院最新数据显示
-BAC009S0902W0157 深圳环比上上涨百分之七
-BAC009S0902W0158 涨幅据十大城市之首
-BAC009S0902W0159 五月份多地楼市的成交量明显上涨
-BAC009S0902W0160 是房地产当前发局格局下的一个必然
-BAC009S0902W0161 唯独这样才能盘活公积金资源
-BAC009S0902W0162 促使更多购房者积极入市
-BAC009S0902W0163 伴随着各地住房公积金新政的落地实施
-BAC009S0902W0164 楼市进展仍需进一步观望
-BAC009S0902W0165 购房者受惠于政策利好的同时
-BAC009S0902W0166 公积金在申请放贷流程上并未提速
-BAC009S0902W0167 相反相关环节上审批更加严格
-BAC009S0902W0168 从目前上海住房公积金的具体政策看
-BAC009S0902W0169 购房的扶持力度在加大
-BAC009S0902W0170 但主要还是体现在贷款成本的降低
-BAC009S0902W0171 而申请公积金贷款方面还是需要走严格的流程
-BAC009S0902W0172 公积金提取一直是目前试图突破的内容
-BAC009S0902W0173 但目前还未出现大面积提取行为
-BAC009S0902W0174 来自广州日报的报道称
-BAC009S0902W0175 从申请到最后的放款
-BAC009S0902W0176 部分客户甚至等两个多月
-BAC009S0902W0177 如果申请公积金贷款及公积金贷款与商业贷款的组合贷
-BAC009S0902W0178 伟嘉安捷对中新网房产频道表示
-BAC009S0902W0179 现在公积金贷款办理需要一个月左右的时间
-BAC009S0902W0180 而申请办理组合贷款的手续则更为复杂
-BAC009S0902W0181 所以审批加上放款的时间最快也要在五个半月左右
-BAC009S0902W0182 作为取之于民用之于民的住房公积金
-BAC009S0902W0183 缴存者还可以在租房装修离退休时提取
-BAC009S0902W0184 因此操作环节的快捷性与便捷性非常重要
-BAC009S0902W0185 后续要加大公积金贷款的便利性
-BAC009S0902W0186 鼓励购房者积极缴纳公积金
-BAC009S0902W0187 科技支撑能力显着增强
-BAC009S0902W0188 生产经营方式不断优化
-BAC009S0902W0189 农业产业体系更趋完善
-BAC009S0902W0190 土地产出率劳动生产率资源利用率显着提高
-BAC009S0902W0191 现代农业建设取得突破性进展
-BAC009S0902W0192 主要农产品优势区基本实行农业现代化
-BAC009S0902W0193 现代农业发展主要指标类别
-BAC009S0902W0194 粮食综合生产能力五亿吨
-BAC009S0902W0195 粮食播种面积五亿亩棉花总产量七万吨
-BAC009S0902W0196 油料总产量七万吨
-BAC009S0902W0197 肉类总产量五万吨
-BAC009S0902W0198 奶类总产量七万吨水产品总产量七万吨
-BAC009S0902W0199 农产品质量安全例行监测总体合格率百分之五十
-BAC009S0902W0200 畜牧业产值占农业总产值比重百分之
-BAC009S0902W0201 渔业产值占农业总产值比重百分之
-BAC009S0902W0202 农产品加工业产值与农业总产值
-BAC009S0902W0203 丰富和解调仲裁诉等维权内容和方式
-BAC009S0902W0204 新增农田有效灌溉面积万亩
-BAC009S0902W0205 耕种收综合机械化水平百分之五
-BAC009S0902W0206 丰富和解调解仲裁诉诉讼等
-BAC009S0902W0207 科技科技进步贡献率百分之七
-BAC009S0902W0208 农村实用人才总量万人
-BAC009S0902W0209 农业产业化组织带动农户数量亿户
-BAC009S0902W0210 团结就是力量
-BAC009S0902W0211 适宜农户沼气普及率百分之五
-BAC009S0902W0212 农作物秸秆综合利用率百分之五
-BAC009S0902W0213 薛之谦的歌儿很棒
-BAC009S0902W0214 农林牧渔业增长值年均增长率百分之五
-BAC009S0902W0215 增长速度按可比价格计算
-BAC009S0902W0216 从加快转变农业发展的方式关键环节入手
-BAC009S0902W0217 完善现代农业产业体系
-BAC009S0902W0218 稳定发展粮食和棉油糖生产
-BAC009S0902W0219 实施全国增长千亿斤粮食生产能力规划
-BAC009S0902W0220 积极推进南方稻区单改双
-BAC009S0902W0221 扩大东北优势区粳稻种植面积
-BAC009S0902W0222 稳步推进江淮等粳高稻生产适宜区糟改粳
-BAC009S0902W0223 稳定增加玉米播种面积
-BAC009S0902W0224 积极恢复和稳定大豆种植面积
-BAC009S0902W0225 积极开发和选育马铃薯优质专用高产品种
-BAC009S0902W0226 提高脱毒种薯供给能力
-BAC009S0902W0227 继续加强优质棉花生产基地建设
-BAC009S0902W0228 多油并举稳定食用植物油自给率
-BAC009S0902W0229 基本满足国内棉花消费需求
-BAC009S0902W0230 积极发展菜篮子产品生产
-BAC009S0902W0231 加强蔬菜水果肉蛋奶水产品等产品优势产区建设
-BAC009S0902W0232 扩大大中城市郊区菜篮子产品生产基地规模
-BAC009S0902W0233 推动苹果柑橘等优势园艺产品生产
-BAC009S0902W0234 稳定发展生猪和蛋禽
-BAC009S0902W0235 大力发展农产品加工和流通业
-BAC009S0902W0236 加强主要农产品优势产区加工基地建设
-BAC009S0902W0237 引导农产品加工业向种养业优势区域和城市郊区集中
-BAC009S0902W0238 启动实施农产品加工提升工程
-BAC009S0902W0239 提高生产流通组织化程度
-BAC009S0902W0240 培育一批产值过百亿元的大型加工和流通企业集团
-BAC009S0902W0241 强化流通基础设施建设和产销信息引导
-BAC009S0902W0242 升级改造农产品批发市场
-BAC009S0902W0243 支持优势产区现代化鲜活农产品批发市场建设
-BAC009S0902W0244 大力发展冷链体系和生鲜农产品配送
-BAC009S0902W0245 推进订单生产和农超对接
-BAC009S0902W0246 落实鲜活农产品运输绿化通道政策
-BAC009S0902W0247 降低农产品流通成本
-BAC009S0902W0248 规范和完善农产品期货市场
-BAC009S0902W0249 强化农业科技和人才支撑
-BAC009S0902W0250 增强农业科技自主创新能力
-BAC009S0902W0251 明确农业科技的公共性基础社会性地位
-BAC009S0902W0252 加强基础性前沿性公益性重大农业科学技术研究
-BAC009S0902W0253 比去年同期的六十二点三十亿美元大幅增长百分之十二
-BAC009S0902W0254 系涨幅最为明显的地区
-BAC009S0902W0255 占总营收的二十六点百分之六十点六十七
-BAC009S0902W0257 苹果的股价有一定的波动规律
-BAC009S0902W0258 即是在新品发布前的一个多季度的时间内
-BAC009S0902W0259 因为在新品发布之前
-BAC009S0902W0260 由于许多用户都持币待购
-BAC009S0902W0261 因此需求会暂时被抑制住
-BAC009S0902W0262 销量都会有一定的影响
-BAC009S0902W0263 孙永杰对二十一世纪报道记者表示
-BAC009S0902W0264 苹果的股价会随着销量相反
-BAC009S0902W0266 缺乏新的业务增长点
-BAC009S0902W0267 苹果在二零一四年营收为二百二十二亿美元
-BAC009S0902W0269 就手机领域的发展趋势
-BAC009S0902W0270 苹果高端市场已经确立了一个独一无二的地位
-BAC009S0902W0271 以前在高端智能手机市场领域
-BAC009S0902W0272 苹果有两个竞争对手
-BAC009S0902W0274 今年股价已经累计下跌了百分之六十而三星的情况也不佳
-BAC009S0902W0275 在三星第二季度财报中
-BAC009S0902W0277 降至二十六点零六万亿韩元
-BAC009S0902W0278 其中手机的销售额下降了七点百分之三
-BAC009S0902W0279 至二十五点五万亿韩元
-BAC009S0902W0280 在安卓手机的总体交付量中
-BAC009S0902W0281 价格高于六百美元的高端手机占比为百分之一
-BAC009S0902W0282 价格高于六百美元的高端手机
-BAC009S0902W0283 在安卓出货量中的占比减少到了百分之六
-BAC009S0902W0285 价格高于六百美元的占比从百分之七十增加到了百分之八十
-BAC009S0902W0287 苹果在高端市场击溃了对手
-BAC009S0902W0288 而这对于未来苹果保持高利润和利润率至关重要
-BAC009S0902W0289 这对苹果是一个利好
-BAC009S0902W0290 意味着只要用户要选择高端手机
-BAC009S0902W0291 在类似印度之类的新兴市场
-BAC009S0902W0292 因此从全球的角度来看
-BAC009S0902W0293 智能手机仍然有增长和爆发的空间
-BAC009S0902W0295 但是作为一个仍在成长没有其他对手的市场来讲
-BAC009S0902W0296 苹果已然可以单点突破
-BAC009S0902W0297 至于新的业务增长点
-BAC009S0902W0298 但是瑞士联合银行分析师估计
-BAC009S0902W0299 较最初的预期减少了一半左右
-BAC009S0902W0302 已经占有了全球智能手表市场的百分之五
-BAC009S0902W0303 云计算和大数据时代
-BAC009S0902W0305 紫光股份曾经出现一连波连续十六个一字涨停的狂飙行市
-BAC009S0902W0306 近日的走势也强于大盘
-BAC009S0902W0307 两个机构专用席位列于买一和卖二的位置
-BAC009S0902W0308 买卖前五名共计净出于该股六十二点九三万元
-BAC009S0902W0309 大盘股仍是毫无作为
-BAC009S0902W0310 题材股继续扮演黑马角色
-BAC009S0902W0311 紫光股份千九十三八在公布拓展云计算市场后
-BAC009S0902W0312 盘中有两千六百八十六万元资金净流入
-BAC009S0902W0313 给孩子买儿童电话手表有必要吗
-BAC009S0902W0314 消费者在听销售人员介绍小天才手表
-BAC009S0902W0315 消费者在听销售人员介绍小天才手表
-BAC009S0902W0316 消费者在听销售人员介绍小天才电话手表
-BAC009S0902W0317 很多家长都在给孩子购置各种学习用品
-BAC009S0902W0318 除了传统的书包文具以及辅导书外
-BAC009S0902W0319 这个儿童电话手表以其强大的定位通话微聊等功能
-BAC009S0902W0320 深受家长和儿童欢迎
-BAC009S0902W0321 很多孩子都以拥有一款电话手表为豪
-BAC009S0902W0322 而不少品牌的电话手表量销售量更是突破百万大关
-BAC009S0902W0323 电话手表对儿童健康安全是否有危险
-BAC009S0902W0324 老师是否允许孩子戴手表上学
-BAC009S0902W0325 电话手表应该如何选购
-BAC009S0902W0326 笔者进行了深度的了解
-BAC009S0902W0327 儿童电话手表到底有多火
-BAC009S0902W0328 年龄或大或小的孩子
-BAC009S0902W0330 都会目不转睛的盯着
-BAC009S0902W0331 或者跟着广告哼起歌曲来
-BAC009S0902W0332 随着产品快速进入家长和孩童的视野
-BAC009S0902W0333 每天的销量让你感受到儿童电话手表的火爆
-BAC009S0902W0334 对于如此火爆的市市场需求
-BAC009S0902W0335 来自广西的苏女士说家长对孩子安全的关心
-BAC009S0902W0336 是电话手表今年大受欢迎的主要原因
-BAC009S0902W0337 在电话手表出现之前
-BAC009S0902W0338 据悉他正在积极进修表演准备进入演艺圈
-BAC009S0902W0339 近日日本媒体曝出惊人消息
-BAC009S0902W0340 称高桥大辅可能在一段时间里出柜
-BAC009S0902W0341 公开自己的同性恋者身份
-BAC009S0902W0342 恐怕又要传来不少女粉丝心碎的声音了
-BAC009S0902W0343 高桥大辅堪称日本花样滑冰男单领域的领军人物
-BAC009S0902W0344 在他的职业生涯里曾在二零一零年拿到世锦赛金牌
-BAC009S0902W0345 温哥华冬奥会拿到铜牌
-BAC009S0902W0346 一二年总决赛拿到金牌
-BAC009S0902W0347 还曾经两次拿到了四大洲锦标赛的男单冠军
-BAC009S0902W0348 表示未来会进入演艺圈发展
-BAC009S0902W0349 颜值颇高的他今年四月远赴美国纽约
-BAC009S0902W0350 高桥大辅丝毫不加掩饰
-BAC009S0902W0351 他经常在社交网站公开美食等照片
-BAC009S0902W0352 看起来在美国过得很开心的样子
-BAC009S0902W0353 过去一直背负着日本花滑界的重压
-BAC009S0902W0354 终于得到了释放的样子
-BAC009S0902W0355 他每周二三天来学校
-BAC009S0902W0356 还有记者爆料说居住在纽约的日本人透露
-BAC009S0902W0357 高桥在当地过着奢华享乐的生活
-BAC009S0902W0358 如果真的想学习的话
-BAC009S0902W0359 就不会刻意选择位于纽约闹市区的这所大学
-BAC009S0902W0360 图片中他们一行人面对镜头尽显搞怪天赋
-BAC009S0902W0361 高桥大辅则是噘着嘴做出索吻的动作
-BAC009S0902W0362 外界认为这是一种另有深意的暗示
-BAC009S0902W0363 而对于他的好友小林尊
-BAC009S0902W0364 被认为日本体育界的相关人士称
-BAC009S0902W0365 但多年来关于他的形婚
-BAC009S0902W0366 实际上是同性恋者的传闻一直未停过
-BAC009S0902W0367 和澳洲鱼雷索普一样
-BAC009S0902W0368 高桥大辅因为其比赛风格的妖娆多变
-BAC009S0902W0369 多年来围绕其性取向的争论一直没有停息
-BAC009S0902W0370 退役前高桥大辅曾与花滑女神浅田真央传出恋情
-BAC009S0902W0371 身为上司而且已婚有儿女的桥本被指责涉嫌性侵
-BAC009S0902W0372 不过两位当事人双双否认性侵的说法
-BAC009S0902W0373 如今和小林尊出双入对
-BAC009S0902W0374 高调参加同性恋者的年度盛事
-BAC009S0902W0375 有可靠消息称高桥很可能在近期正式宣布出柜
-BAC009S0902W0376 此消息一出迅速引发外界强烈关注
-BAC009S0902W0377 日本网友也是众说纷纭一点儿也不吃惊
-BAC009S0902W0378 看他在冰场上搔首弄姿地表现
-BAC009S0902W0379 高桥大辅应该是他的新欢
-BAC009S0902W0380 难怪他能接受年过半百的桥本的索吻
-BAC009S0902W0381 许多为高桥痴迷多年的女粉丝肯定深受打击
-BAC009S0902W0382 作为日本的花滑王子
-BAC009S0902W0383 这么多年一直要压抑自己的性取向
-BAC009S0902W0384 挺不容易的支持他追属属于自己的真正幸福
-BAC009S0902W0385 据美联社十日报道
-BAC009S0902W0386 一些参赛选手赛后感到胃部不适
-BAC009S0902W0387 而队医怀疑这或许与比赛地水污染有关
-BAC009S0902W0388 美国队官员不排除他们的队员因食物或饮水而生病
-BAC009S0902W0389 近来有关里约水污染问题备受关注
-BAC009S0902W0390 美联社公布的一项独立水质检测显示
-BAC009S0902W0391 在奥运会赛艇和铁人三项公开水域等比赛地
-BAC009S0902W0392 也存在高危病毒危险
-BAC009S0902W0393 该湖区也将是明年奥运会赛艇比赛地
-BAC009S0902W0394 比污染严重的瓜内巴拉湾相比
-BAC009S0902W0395 赛艇比赛所在湖区的水污染问题近年来得到改善
-BAC009S0902W0396 但是上周公布的水质检测显示
-BAC009S0902W0397 湖区水污染仍旧十分严重
-BAC009S0902W0398 在本次赛艇测试赛期间
-BAC009S0902W0399 一些参赛选手也向新华社记者表示
-BAC009S0902W0400 比赛地的湖水比较浑浊
-BAC009S0902W0401 但还是担心水质问题
-BAC009S0902W0402 来自中国的赛艇选手崔帅豪说
-BAC009S0902W0403 比赛地水不是太干净
-BAC009S0902W0404 他自己还将出任影片的男主角
-BAC009S0902W0405 忙碌成本可想而知
-BAC009S0902W0406 外媒发布了更令人兴奋的消息
-BAC009S0902W0407 将在本届美国电影学会影展中进行秘密放映
-BAC009S0902W0408 对方是二十五岁的人妻名模泰舒培
-BAC009S0902W0409 搜狐娱乐讯七月十五日
-BAC009S0902W0410 陈冠希前女友嫩模黄榕在香港书展出席写真宣传活动
-BAC009S0902W0411 身穿白色抹胸的她大秀性感好身材
-BAC009S0902W0412 谈及前男友陈冠希近日被指外貌衰老了不少
-BAC009S0902W0413 黄榕坦言可能他做了太多运动
-BAC009S0902W0414 搜狐娱乐讯日前
-BAC009S0902W0415 众星云集上海出席某商家的开业活动
-BAC009S0902W0416 由潮男陈冠希打头阵
-BAC009S0902W0417 更云集了罗中旭前任
-BAC009S0902W0418 黄宗泽绯闻女友等女星
-BAC009S0902W0419 现场气氛火爆
-BAC009S0902W0420 粉丝们一度失控
-BAC009S0902W0421 陈冠希坦言认为陈奕迅是ｋ歌之王
-BAC009S0902W0422 但由于风格不同
-BAC009S0902W0423 新专辑音乐方面还是坚持做自己
-BAC009S0902W0424 搜狐娱乐讯九月五日
-BAC009S0902W0425 一怒之下把大叔身份证扔在地上
-BAC009S0902W0426 二人发生姓肢体冲突
-BAC009S0902W0427 此视频曝光后
-BAC009S0902W0428 网友纷纷力挺陈冠希
-BAC009S0902W0429 温兆伦许飞欧弟等明星也通过微博表示支持力挺
-BAC009S0902W0430 搜狐娱乐讯北京时间六月二十四日消息
-BAC009S0902W0431 渔船凶案嫌疑借发动机声将同船同事依次杀害
-BAC009S0902W0432 渔船海上爆炸沉没四名渔民漂流三天获救
-BAC009S0902W0433 昨天上午七点五零分
-BAC009S0902W0434 目前正在根据海事部门的要求开往盐城大分港
-BAC009S0902W0435 准备将获救的四人送上岸边医院救治
-BAC009S0902W0436 渔船海上被撞翻仅一人逃生同伴求救却无能为力
-BAC009S0902W0437 出事的渔船被拖到韩榆石桥海边
-BAC009S0902W0438 渔船被其他船撞翻六人死海事部门悬赏五万寻肇事者
-BAC009S0902W0439 快报讯通讯员李欢乐记者王晓宇八月二六日
-BAC009S0902W0440 船上八名船员六人不幸遇难
-BAC009S0902W0441 只有一名船员得以逃生
-BAC009S0902W0442 渝武高速武胜段发生追尾事故已造成六死九伤
-BAC009S0902W0443 记者从广安消防部门获悉
-BAC009S0902W0444 大客车的车头和车身损毁严重
-BAC009S0902W0445 车辆载有数十名乘客
-BAC009S0902W0446 截至九点四零分消防人员撤离时
-BAC009S0902W0447 已造成六人死亡九人受伤
-BAC009S0902W0448 目前记者正赶往武胜县人民医院
-BAC009S0902W0449 渝蓉高速四川段计划明年通车被称最拖沓高速
-BAC009S0902W0450 渝蓉高速四川段因烂尾被称为最拖沓高速
-BAC009S0902W0451 渝蓉高速四川段资金断裂烂尾已修了六年
-BAC009S0902W0452 渣土车右转弯骑车男童被卷入车轮下不幸身亡
-BAC009S0902W0453 肇事车及损伤严重的自行车报料人供图
-BAC009S0902W0454 渣土车挂倒电动车致一死一伤肇事车主逃逸
-BAC009S0902W0455 蚌飞市发生一起惨剧
-BAC009S0902W0456 一对男女骑电动车在通过一个十字路口时
-BAC009S0902W0457 被同方向行驶的一辆渣土车挂倒
-BAC009S0902W0458 骑电动车男子当场死亡
-BAC009S0902W0459 但渣土车司机肇事后不仅没有下车救援
-BAC009S0902W0460 目前当地警方正在追查这名司机
-BAC009S0902W0461 渣土车撞进路边民房女子抱小孩幸运逃生
-BAC009S0902W0462 山水湾小区斜对面的一处工地旁
-BAC009S0902W0463 肇事的大卡车车头仍然卡在工房内
-BAC009S0902W0464 图记者陈斌潇湘晨报长沙讯一零月一一日下午
-BAC009S0902W0465 长沙县湘龙西路一处十字路口
-BAC009S0902W0466 一辆红色的卡车和一辆黄色的渣土车发生碰撞
-BAC009S0902W0467 黄色渣土车一头撞进了路边的工房
-BAC009S0902W0468 被撞废的奔驰昨日二二时左右
-BAC009S0902W0469 省城政务区习友路与怀宁路交叉口
-BAC009S0902W0470 一辆渣土车突然冲向逆向车道
-BAC009S0902W0471 连续撞了五辆小轿车最终才停了下来
-BAC009S0902W0472 其中一辆奔驰轿车被撞出近一零米远
-BAC009S0902W0473 渤海一渔船沉没船上一六人落水一二人失踪
-BAC009S0902W0474 唐山乐亭一船队在渤海与一山东渔船发生纠纷
-BAC009S0902W0475 导致唐山一渔船沉没
-BAC009S0902W0476 但因海上风大浪急影响救援
-BAC009S0902W0477 目前仍未发现失踪船员
-BAC009S0902W0478 渤海垃圾成堆变死海
-BAC009S0902W0479 与韩国西海相连的中国渤海湾由于垃圾堆积
-BAC009S0902W0480 有人忧虑渤海湾的污染会直接影响到韩国西部海域
-BAC009S0902W0481 渤海失事河北籍渔船已致四人遇难仍有八人失踪
-BAC009S0902W0482 又在船仓内发现四名船员遗体
-BAC009S0902W0483 目前仍有八名失踪人员下落不明
-BAC009S0902W0484 渤海湾溢油事故赔偿案宣判康菲公司被判赔一六八万
-BAC009S0902W0485 温岭倒塌厂房系违章建筑涉事负责人已被控制
-BAC009S0902W0486 据新华社电七月四日一六时许
-BAC009S0902W0487 浙江温岭市大溪镇发生鞋厂厂房倒塌事故
-BAC009S0902W0488 共造成一四人死亡三三人受伤
-BAC009S0902W0489 事故厂房系违章建筑
-BAC009S0902W0490 此前已被列入拆除范围
-BAC009S0902W0491 涉事两企业负责人均已被控制
-BAC009S0902W0492 温岭医院助理殴打女病人五年后提拔为副院长
-BAC009S0902W0493 法晚深度即时记者杜雯雯实习生张明明近日
-BAC009S0902W0494 此关于滕灵方此后晋升为副院长一事
-BAC009S0902W0495 该医院党委书记杨幼萍向晚报记者表示
-BAC009S0903W0121 进而选择此类方式购房
-BAC009S0903W0122 另外要处理公积金异地使用的问题
-BAC009S0903W0123 这对于目前一线城市来说很紧要
-BAC009S0903W0124 很多人受限购政策的影响
-BAC009S0903W0125 难以在周边城市用公积金购房
-BAC009S0903W0126 导致公积金资源闲置的问题出现
-BAC009S0903W0127 中新网房产频道
-BAC009S0903W0128 随着广州住房公积金贷款政策的调整实施
-BAC009S0903W0129 北上广深四个一线城市已经全部放开公积金房贷业
-BAC009S0903W0130 公积金新政加速楼市库存消化至搜狐财经
-BAC009S0903W0131 住建部等三部委联合发文
-BAC009S0903W0132 再次降低公积金贷款的门槛
-BAC009S0903W0133 还清首套房公积金贷款
-BAC009S0903W0134 再次申请公积金贷款购买第二套房的
-BAC009S0903W0135 该政策延续了去年新政以来
-BAC009S0903W0136 也延续了公积金担当扶持楼市主力军的政策选择
-BAC009S0903W0137 从去年三部委发文
-BAC009S0903W0138 公积金对楼市的扶持力度不断加大
-BAC009S0903W0139 相继有一百多个城市出台了公积金新政
-BAC009S0903W0140 公积金贷款利率也数次下调
-BAC009S0903W0141 二套还清十首套比例降至五成
-BAC009S0903W0142 与新政相比
-BAC009S0903W0143 目前公积金政策已经与去年等同了
-BAC009S0903W0144 此次公积金政策大力度调整
-BAC009S0903W0145 主要目的是通过激励改善型住房需求
-BAC009S0903W0146 实现三四线城市去库存
-BAC009S0903W0147 尽管全国商品房销售面积持续回升
-BAC009S0903W0148 但库存压力却难以缓减
-BAC009S0903W0149 全国商品房待售面积比七月末增加了五百万平方米
-BAC009S0903W0150 比去年底增加了七万平方米
-BAC009S0903W0151 库存逆势攀升的根本原因在于供求错配
-BAC009S0903W0152 推动全国成交面积止跌反弹
-BAC009S0903W0153 但供应和库存却主要集中在七个三四线城市
-BAC009S0903W0154 且待售库存单套面积较大
-BAC009S0903W0155 无论是降低二套房公积金首付比例
-BAC009S0903W0156 还是不再区分普通和非普通住房
-BAC009S0903W0157 都意在有针对性地加大三四线城市楼市库存消化力度
-BAC009S0903W0158 只有楼市库存真正消化了
-BAC009S0903W0159 才能提振开发商拿地和开工的积极性
-BAC009S0903W0160 在公积金利率已降至历史低位
-BAC009S0903W0161 站在金九银十即将来临的起点上
-BAC009S0903W0162 再次降低公积金贷款首付比例
-BAC009S0903W0163 目的也是为了夯实楼市回升的基础
-BAC009S0903W0164 尽管去年新政以来
-BAC009S0903W0165 楼市持续三个季度回升
-BAC009S0903W0166 回升势头有转弱的迹象
-BAC009S0903W0167 首先是重点城市楼市成交回落趋势明显
-BAC009S0903W0168 领头羊一线城市分别下降百分之一和百分之七
-BAC009S0903W0169 而重点城市的供应也在七月份下滑了百分之七
-BAC009S0903W0170 五月份更是增加了七百万平方米
-BAC009S0903W0171 银行房贷额度开始紧张
-BAC009S0903W0172 首套房贷利润优惠也开始减少
-BAC009S0903W0173 近期人民币贬值叠加资本外流预期
-BAC009S0903W0174 资金面紧张对楼市的影响开始显现
-BAC009S0903W0175 市场对金九银十的预期也开始谨慎起来
-BAC009S0903W0176 除了去库存和夯实楼市回升基础外
-BAC009S0903W0177 此次公积金政策调整
-BAC009S0903W0178 也在于全面落实分类调控因城施策
-BAC009S0903W0179 纠偏政策一刀切的负面影响
-BAC009S0903W0180 去年新政以来
-BAC009S0903W0181 松绑二套房贷认定标准降低二套房贷首付比例
-BAC009S0903W0182 以及营业税免征期
-BAC009S0903W0183 第一次在公积金上提出差别对待
-BAC009S0903W0184 包括上海广州厦门南京在内的重点城市
-BAC009S0903W0185 以及前几次公积金新政的实施
-BAC009S0903W0186 公积金可贷额度受到严重冲击
-BAC009S0903W0187 着力解决一批影响现代农业发展全局的重大科技问题
-BAC009S0903W0188 加快农业技术引进消化吸收再创新步伐
-BAC009S0903W0189 加强农业科技领域国际合作
-BAC009S0903W0190 调整优化农业科研布局
-BAC009S0903W0191 加强农业科研基地和重点实验室建设
-BAC009S0903W0192 完善农业科技创新体系和现代农业产业技术体系
-BAC009S0903W0193 启动实施农业科技创新能力建设工程
-BAC009S0903W0194 组建一批产业技术创新战略联盟和国家农业科技园区
-BAC009S0903W0195 完善农业科技评价机制
-BAC009S0903W0196 激发农业科技创新活力
-BAC009S0903W0197 大力发展现代农作物种业
-BAC009S0903W0198 实施好转基因生物新品种培育重大专项
-BAC009S0903W0199 加快发展生物育种战略性新兴产业
-BAC009S0903W0200 加快农业新品种新技术转化应用
-BAC009S0903W0201 加强小麦一喷三防喷施叶面肥
-BAC009S0903W0202 加快牲畜水产遗传改良进程
-BAC009S0903W0203 创新农业技术推广机制
-BAC009S0903W0204 大规模开展高产创建
-BAC009S0903W0205 在有条件地区实行整乡整县场推进
-BAC009S0903W0206 力争实现优势产区和主要品种全复盖
-BAC009S0903W0207 壮大农业农村人才队伍
-BAC009S0903W0208 以实施现代农业人才支撑计划为抓手
-BAC009S0903W0209 加大农村劳动力培训阳光工程实施力度
-BAC009S0903W0210 大力发展农业职业培养
-BAC009S0903W0211 加快技能型人才培养
-BAC009S0903W0212 支持高校毕业生和各类优秀人才投身现代农业建设
-BAC009S0903W0213 鼓励外出务工农农民带技术带资金回乡创业
-BAC009S0903W0214 改善农业基础设备和装备条件
-BAC009S0903W0215 大规模开展高标准农田建设
-BAC009S0903W0216 按照统筹规划分工协作集中投入连片推进的思想
-BAC009S0903W0217 大规模改造中低产田
-BAC009S0903W0218 建设旱涝保收高标准农田
-BAC009S0903W0219 加快大中型灌区排灌泵站配套改造
-BAC009S0903W0220 大力开展小型农田水利建设
-BAC009S0903W0221 增加农田有效灌溉面积
-BAC009S0903W0222 加强新增千亿斤粮食生产能力规划的田间工程建设
-BAC009S0903W0223 完善机耕道农田防护林等设施
-BAC009S0903W0224 推广土壤有机质提升测土配方施肥等培肥地力技术
-BAC009S0903W0225 完善高标准农田建后管护支持政策和制度
-BAC009S0903W0226 延长各类设施使用年限
-BAC009S0903W0227 确保农田综合生产能力长期持续稳定提升
-BAC009S0903W0228 改善养殖业生产条件
-BAC009S0903W0229 加快实施生禽良种工程
-BAC009S0903W0230 支持生禽规模化养殖场小区开展标准化改造和建设
-BAC009S0903W0231 加快草原围栏棚圈和牧区水利建设
-BAC009S0903W0232 配套发展节水高效灌溉词草基地
-BAC009S0903W0233 健全水产良良种体系
-BAC009S0903W0234 开展池塘标准化改造
-BAC009S0903W0235 建设水产健康养殖示范场
-BAC009S0903W0236 加强渔港和渔政执法能力建设
-BAC009S0903W0237 全面落实农机具购置补贴各项管理制度和规定
-BAC009S0903W0238 加快推进水稻栽插收获和玉米收获机械化
-BAC009S0903W0239 重点突破棉花油菜甘蔗收获机械化瓶颈
-BAC009S0903W0240 大力发展高效植保机器
-BAC009S0903W0241 积极推进养殖业园艺业农产品初加工机械化
-BAC009S0903W0242 加快实施保护性耕作工程
-BAC009S0903W0243 提高大型农机具和农药化肥农膜等农资生产水平
-BAC009S0903W0244 加强农业防灾减灾能力建设
-BAC009S0903W0245 提高防汛抗旱减灾能力
-BAC009S0903W0246 加强种子饲草料等急救灾物资储备调运条件建设
-BAC009S0903W0247 推广相应的生产技术和防灾减灾措施大力推进农业标准化
-BAC009S0903W0248 以农兽药残留标准为重点
-BAC009S0903W0249 加快健全农业标准体系
-BAC009S0903W0250 以园艺产品生产品水产品等为重点
-BAC009S0903W0251 推行统一的标准操作规程和技术规范
-BAC009S0903W0252 加强国家级农业标准化整建制推进示范县场建设
-BAC009S0903W0253 市场占有率为百分之五
-BAC009S0903W0254 二零一四年三星期累计销售超过一百二十万块智能手表
-BAC009S0903W0255 这个数据不及苹果的一个季度
-BAC009S0903W0256 因此不能表示苹果没有新的业务增长点
-BAC009S0903W0257 本报记者纪佳鹏北京报道北京时间八月十二日
-BAC009S0903W0258 作为科技股领头羊的苹果股价当天下挫百分之二
-BAC009S0903W0259 十二月二日路透社报道
-BAC009S0903W0260 苹果股票每分钟交易量已超过六百七十万股
-BAC009S0903W0261 这种巨大且异乎寻常的抛售量
-BAC009S0903W0262 瞬间将苹果估价拉低了至少百分之六
-BAC009S0903W0263 使其市值分秒间蒸发近四百亿美元
-BAC009S0903W0264 成为苹果近三个月以来股价下跌最严重的一次
-BAC009S0903W0265 苹果股价一度每分钟跌幅已破百分之三
-BAC009S0903W0266 每股报价报收于一百一十一点二七美元
-BAC009S0903W0267 报收于每股一百一十五点四五美元
-BAC009S0903W0268 对于造成此次异常闪崩的原因目前尚未公布
-BAC009S0903W0269 此举或与摩根士丹利下调苹果股票持股比例有关
-BAC009S0903W0270 同时将苹果持股比例由百分之四下调至百分之三
-BAC009S0903W0271 并建议客户减少对该股票在投资组合中的占比
-BAC009S0903W0272 高频交易也与此次闪崩事件逃脱不了干系
-BAC009S0903W0273 高频交易一直饱受诟病
-BAC009S0903W0274 美国股市九点五十起
-BAC009S0903W0275 超过三百馀种不同类别股票均出现不正常股价波动
-BAC009S0903W0276 当出现此类价格变化时
-BAC009S0903W0277 通常只是算法交易造成的影响
-BAC009S0903W0278 也就是所说的流动性蒸发事实上
-BAC009S0903W0279 流动性从未得到足够的重视
-BAC009S0903W0280 我们当下的股市在流动性方面也表现得支离破碎
-BAC009S0903W0281 苹果领头的股价闪崩原因可能比想象中的更为复杂
-BAC009S0903W0282 现在就下结论将原因推给高频交易
-BAC009S0903W0283 这种做法很容易误导客服
-BAC009S0903W0284 阿里巴巴当日股价下跌一点百分之四
-BAC009S0903W0285 谷歌十点五八分股价也出现一点百分之七十九的最大跌幅
-BAC009S0903W0286 苹果股价闪崩只是正常股票套利的表现
-BAC009S0903W0287 苹果股价相较十月份低点已经上涨约百分之二十五
-BAC009S0903W0288 纳斯达克在此期间只涨了百分之十
-BAC009S0903W0289 选择套现或也是情理之中
-BAC009S0903W0290 每股下滑三点八八美元报收于一点一十五点零五美元
-BAC009S0903W0292 苹果股票每分钟交易量已超过六十七万股
-BAC009S0903W0293 这不仅创下苹果公司自二零一四年
-BAC009S0903W0294 苹果背后那行字应该在每个中国人心里搜狐科技
-BAC009S0903W0295 翻译过来就是加利福尼亚苹果公司设计
-BAC009S0903W0296 按说这只是一个客观表述
-BAC009S0903W0297 对于谋求转型发展怀揣创新型国家梦想的中国来说
-BAC009S0903W0298 这行字值得我们深思
-BAC009S0903W0299 众多跨国品牌在中国都有生产基地
-BAC009S0903W0300 像苹果这样在产品上强调在本国设计的很少
-BAC009S0903W0301 这样的做法当然是企业行为
-BAC009S0903W0302 这行字对于我们来说
-BAC009S0903W0303 很多家长都考虑给孩子配置具有定位功能的智能手机
-BAC009S0903W0304 智能手机特定的上网和游戏功能
-BAC009S0903W0305 注定了它强烈的娱乐性
-BAC009S0903W0306 给孩子配置智能手机
-BAC009S0903W0307 担心会直接影响孩子正常的学习
-BAC009S0903W0308 儿童电话手表除了通话定位等功能外
-BAC009S0903W0309 还针对性的设置了上课禁用等功能
-BAC009S0903W0310 孩子带到学校既不会让孩子分心
-BAC009S0903W0311 又可以让家长了解孩子的位置
-BAC009S0903W0312 是很多父母迫切需要的
-BAC009S0903W0313 对于小天才电话手表上课禁用功能
-BAC009S0903W0314 相关人员表示为了方便和孩子保持联系
-BAC009S0903W0315 之前很多家长会给孩子买手机
-BAC009S0903W0316 影响学习虽说功能手机可以阻止孩子玩游戏
-BAC009S0903W0317 儿童电话手表正好解决了这两个问题
-BAC009S0903W0318 家长随时和孩子保持联系
-BAC009S0903W0319 我就给自己的孩子也买了一个呢
-BAC009S0903W0320 失孤等影片的上映
-BAC009S0903W0321 也将儿童人身安全的话题推向了妙论的风口浪尖
-BAC009S0903W0322 儿童电话手表的诞生
-BAC009S0903W0323 为孩子多了一份强有力的保障
-BAC009S0903W0324 电话手表正是瞄准了这一需求
-BAC009S0903W0325 加上随身携带的便捷性和流畅的操作体验
-BAC009S0903W0326 在手机平板电脑之外
-BAC009S0903W0327 开扩了一个新的市场
-BAC009S0903W0328 现在三百六十腾讯等大公司都涉足了这一领域
-BAC009S0903W0329 自今年六月电话手表行业兴起起来
-BAC009S0903W0330 整体行业出货量应该不断突破
-BAC009S0903W0331 并将成为新兴的销售热点
-BAC009S0903W0332 科技创新带动了电话手表行业
-BAC009S0903W0333 其实儿童电话手表的火
-BAC009S0903W0334 是火在行业的科技创新
-BAC009S0903W0335 随着国家在科技创新方面的投入和关注度的增加
-BAC009S0903W0336 新兴行业对于创新的热情也不断增加
-BAC009S0903W0337 我们小天才电话手表就是不断创新的成果
-BAC009S0903W0338 意大利选手弗菜戈也说我们在来里约之前
-BAC009S0903W0339 看到了有关这里水污染的报道
-BAC009S0903W0340 对这里的水质比较关心
-BAC009S0903W0341 这个湖虽然没有漂浮的垃圾
-BAC009S0903W0342 但湖水很脏也很浑浊
-BAC009S0903W0343 里约奥组委此前表示
-BAC009S0903W0344 运动员的健康是他们关注的头等大事
-BAC009S0903W0345 无论帆船赛艇还是公开水域
-BAC009S0903W0346 在奥运期间水质都可以保证运动员的健康
-BAC009S0903W0347 二零一五年九月十二日星期六十一点
-BAC009S0903W0348 开幕式举行了庄严的入场仪式
-BAC009S0903W0349 裁判员队伍和参赛代表队依次入场亮相
-BAC009S0903W0350 裁判员代表和运动员代表进行了宣誓
-BAC009S0903W0351 曾春蕾和刘晓彤向各参赛队赠送了签名排球
-BAC009S0903W0352 北京市体育局副局长孙学才宣布比赛开幕
-BAC009S0903W0353 响应北京市振兴三大球战略的号召
-BAC009S0903W0354 促进北京排球事业发展
-BAC009S0903W0355 丰富北京市业馀排球群体活动
-BAC009S0903W0356 激发广大群众对排球的热情
-BAC009S0903W0357 为将其打造成具有影响力的群众性品牌赛事
-BAC009S0903W0358 在社会主义核心价值观的指引下
-BAC009S0903W0359 突出弘扬北京排球文化
-BAC009S0903W0360 组委会在部门设置上调整了人员分工
-BAC009S0903W0361 组委会工作机构共分为四部一室
-BAC009S0903W0362 并且全部采用有经验的工作人员参与竞赛组织工作
-BAC009S0903W0363 在制定竞赛规程方面严格遵循规范化专业化原则
-BAC009S0903W0364 不仅能够将业馀排球与职业排球严格地区分开
-BAC009S0903W0365 而且满足了绝大多数业馀排球爱好者的参赛需求
-BAC009S0903W0366 充分做到公平公正公开
-BAC009S0903W0367 其中国际级裁判员两名
-BAC009S0903W0368 结合业馀排球特点做出细微调整制定而成
-BAC009S0903W0369 营造出良好的比赛氛围
-BAC009S0903W0370 悬挂于场馆醒目位置
-BAC009S0903W0371 增强参赛者的荣誉感与积极性的同时
-BAC009S0903W0372 进一步提升了比赛品质
-BAC009S0903W0373 要将北京市业馀排球联赛打造成群众性品牌赛事
-BAC009S0903W0374 离不开广大媒体的支持
-BAC009S0903W0375 组委会特意举办隆重的开幕式
-BAC009S0903W0376 并邀请京城排球界全部媒体参加报道
-BAC009S0903W0377 并制作了精美的秩序册发给媒体及参赛队
-BAC009S0903W0379 以大球套小球为设计理念
-BAC009S0903W0380 为振兴三大球贡献自己的一份力量
-BAC009S0903W0381 他们的造型由排球的五个经典动作组成
-BAC009S0903W0382 分别是发接传垫扣
-BAC009S0903W0383 颜色则是由代表着运动精神的奥运五环色组成
-BAC009S0903W0384 来自全国各地的业馀排球爱好者纷纷前来踊跃报名
-BAC009S0903W0385 半个月的报名期限未到
-BAC009S0903W0386 二四个参赛名额就已经全部报满
-BAC009S0903W0387 共有三百二十三名业馀排球爱好者报名参加比赛
-BAC009S0903W0388 其中年龄最小的年仅十四岁
-BAC009S0903W0389 最大的已经年过半百
-BAC009S0903W0390 另外还有两名来自加拿大和美国的外籍华侨
-BAC009S0903W0391 由此可见北京市业馀排球联赛的影响力与号召力
-BAC009S0903W0392 在参赛的二四支队伍中
-BAC009S0903W0393 有一些临时组建的球队
-BAC009S0903W0394 但大部分都是常年活跃在业馀排球圈里成熟队球
-BAC009S0903W0395 而且多次参加过业馀排球比赛
-BAC009S0903W0396 相信有这些高水平业馀排球队的参与
-BAC009S0903W0397 这一届北京市业馀排球联赛一定会精彩纷呈
-BAC009S0903W0398 为期五天的比赛全部结束后
-BAC009S0903W0399 将举行隆重的颁奖仪式
-BAC009S0903W0400 从四分之一决赛开始每场比赛评选出一名优秀运动员
-BAC009S0903W0401 为参加联赛的吸引力
-BAC009S0903W0402 提升参赛队的积极性
-BAC009S0903W0403 组委会提高了前三名的含金量
-BAC009S0903W0404 这也是该片首次亮相大荧幕
-BAC009S0903W0405 影片的正式公映要到圣诞节当天
-BAC009S0903W0406 但本月评论界就可以知道该片的真实成色
-BAC009S0903W0407 曾在二零一零年获得空前成功
-BAC009S0903W0408 据香港媒体报道
-BAC009S0903W0409 因参演剧集殭而与陈嘉宝及赖慰玲成为好姐妹
-BAC009S0903W0410 众人一起为寿寿星女庆生
-BAC009S0903W0411 陈嘉宝昨天六月二十三日将大合照上传个人主页
-BAC009S0903W0412 除了看见陈嘉宝及赖慰玲外
-BAC009S0903W0413 亮点正是与陈凯琳互相了解中郑嘉颖也有出席
-BAC009S0903W0414 并做陈凯琳背后的男人
-BAC009S0903W0415 网友纷纷将焦点转移到这对情侣身上
-BAC009S0903W0416 中新网七月二十八日电据香港明报消息
-BAC009S0903W0417 陈凯琳田心妮等出席新剧开机机仪式
-BAC009S0903W0418 谈及此前她曾到横店探班郑嘉颖
-BAC009S0903W0419 因为新剧的厂景和外景推迟了
-BAC009S0903W0420 才有时间去探班
-BAC009S0903W0421 在当地逗留了三四天
-BAC009S0903W0422 自己也有带剧本去看
-BAC009S0903W0423 搜狐娱乐讯北京时间十月二十六日消息
-BAC009S0903W0424 据香港媒体报导
-BAC009S0903W0425 昨晚张保仔播映大结局故演员齐集饭局以及庆祝
-BAC009S0903W0426 陈展鹏风骚到场
-BAC009S0903W0427 他要赶进厂开工
-BAC009S0903W0428 因此开香槟后要先离场
-BAC009S0903W0429 一直传他跟洪永城不和
-BAC009S0903W0430 两人在台下分枱坐欠交流
-BAC009S0903W0431 公安局的决定书说不对他做出行政处罚
-BAC009S0903W0432 我们才按照正常程序给他转为副院长一职的
-BAC009S0903W0433 温岭鞋厂倒塌事故已一四人遇难鞋厂老板被控制
-BAC009S0903W0434 新京报快讯记者杨锋七月四日下午四时零八分
-BAC009S0903W0435 浙江台州温岭市一一零指挥中心接警称
-BAC009S0903W0436 新京报记者从温岭市政府新闻办获悉
-BAC009S0903W0437 早前通报的五名失联人员已全部找到
-BAC009S0903W0438 死亡人数上升至一四人
-BAC009S0903W0439 涉事企业老板已被警方控制
-BAC009S0903W0440 温州二零位面包师制出二五米蛋糕或申报吉尼斯纪录
-BAC009S0903W0441 前往温州龙湾万达广场游玩的市民
-BAC009S0903W0442 无不发出这样的惊叹
-BAC009S0903W0443 一糕点店派出二零位面包师傅
-BAC009S0903W0444 耗时一四个小时打造的二米五超长蛋糕
-BAC009S0903W0445 吸引众多市民驻足观看
-BAC009S0903W0446 温州二名已婚男为争美女驾豪车互撞四个回合
-BAC009S0903W0447 车子被撞得破烂不堪七月四日凌晨
-BAC009S0903W0448 宝马奔驰连续四次相撞
-BAC009S0903W0449 两车驾驶员一度下车大打出手
-BAC009S0903W0450 起因是为了一名年轻的刘姓美女
-BAC009S0903W0451 经保险公司初步估算
-BAC009S0903W0452 两车损失高达三四十万元
-BAC009S0903W0453 温州七人涉嫌百倍抬杠非法经营期货三二亿被批货
-BAC009S0903W0454 浙江温州一公司安装虚拟交易系统
-BAC009S0903W0455 以一一零倍的杠杆吸引社会公众投资
-BAC009S0903W0456 非法经营期货金额共计人民币三二亿元
-BAC009S0903W0457 七名犯罪嫌疑人因涉嫌非法经营罪被批准逮捕
-BAC009S0903W0458 温州城管掌掴女清洁工已被停职检查
-BAC009S0903W0459 温州天价馒头续店方称顾客要狭索赔三条中华烟
-BAC009S0903W0460 荞麦窝窝头一零月二零日
-BAC009S0903W0461 网络上一张永嘉桥头国际饭店的结帐单十分引人注目
-BAC009S0903W0462 菜单显示该饭店的荞麦窝窝头卖三八元一个
-BAC009S0903W0463 三零馀位食客吃了四五个窝窝头
-BAC009S0903W0464 发现事情并没有这么简单
-BAC009S0903W0465 温州火锅先生后续涉案者父亲写公开道歉信
-BAC009S0903W0466 温州网八月二十七日讯记者项锐见习记者黄梦思
-BAC009S0903W0467 温州一七月大女童接种疫苗抽搐省疾控专家调查
-BAC009S0903W0468 金报讯记者蓝莹九月一一日上午
-BAC009S0903W0469 随即被送到儿童医院进行救治
-BAC009S0903W0470 经过连续三天的抢救
-BAC009S0903W0471 孩子仍处于昏迷阶段
-BAC009S0903W0472 省市区三三级疾控部门专家已介入调查
-BAC009S0903W0473 温州一中学门口氢气罐爆炸卖气球摊贩不治身亡
-BAC009S0903W0474 温州一住持被免政协委员遭准儿媳举报娶妻开路虎
-BAC009S0903W0475 关于中国嵩山少林寺方丈齐永信的举报风波尚未停歇
-BAC009S0903W0476 因准儿媳的举报跌下神坛
-BAC009S0903W0477 位于温州苍南龙港镇水门村的一个仓库发生火灾
-BAC009S0903W0478 记者从消防部门处了解到
-BAC009S0903W0479 这里存放着乙酯和工业酒精等化工品
-BAC009S0903W0480 温州一夫妻非法集资五亿丈夫将赃款送给情妇洗钱
-BAC009S0903W0481 温州一女士洗澡被已婚男多次偷窥二年后才报警
-BAC009S0903W0482 温州网讯洗澡被偷窥却一忍再忍
-BAC009S0903W0483 但愿望总是照不进现实
-BAC009S0903W0484 称一名男子时常偷窥自己洗澡
-BAC009S0903W0485 且时间已长达两年多了
-BAC009S0903W0486 温州一家六口吃毒蘑菇身亡女婴拒吃面躲过死神
-BAC009S0903W0487 温州一家六口误食毒菌五人已死亡
-BAC009S0903W0488 温州永嘉县桥下镇吴山村的潘老伯一家六口
-BAC009S0903W0489 半个月前因误食有毒野生菌中毒
-BAC009S0903W0490 至七月一六日中午已有五人去世
-BAC009S0903W0491 潘老伯二六岁的外孙还在医院重症监护室治疗
-BAC009S0903W0492 仍处于深度昏迷状态
-BAC009S0903W0493 温州一村主任台风夜转移群众其妻子遇难
-BAC009S0903W0494 石柱村泥石流塌方现场
-BAC009S0903W0495 温州一男子在医院放置假炸弹被判处有期徒刑一年
-BAC009S0904W0121 为了解决额度荒的问题
-BAC009S0904W0122 近期广州和南京已经开始启动公转商贷款贴息模式
-BAC009S0904W0123 即由商业银行向市民发放执行公积金利率的贷款
-BAC009S0904W0124 公积金贷款与商业贷款之间的利息差额
-BAC009S0904W0125 由公积金中心向银行支付
-BAC009S0904W0126 重点城市公积金贷额款度也有限制
-BAC009S0904W0127 比如北京和上海家庭最高贷款额度均为一百万元
-BAC009S0904W0128 广州和深圳分别为五万元和七万元
-BAC009S0904W0129 在近期房价持续回升的背景下
-BAC009S0904W0130 多数二线城市和三四线城市
-BAC009S0904W0131 公积金贷款上限能够复盖单套房总价
-BAC009S0904W0132 这些城市公积金贷款买房的比例也比较高
-BAC009S0904W0133 此次政策调整也有较好的针对性
-BAC009S0904W0134 包括此次三部委发布公积金新政
-BAC009S0904W0135 再结合近期限外政策全面松绑
-BAC009S0904W0136 具有实时性合理性和较强的针对性
-BAC009S0904W0137 有助于发挥内需在稳增长中的积极作用
-BAC009S0904W0138 住建部等三部委联合发文
-BAC009S0904W0139 再次降低公积金贷款的门槛
-BAC009S0904W0140 还清首套房公积金贷款
-BAC009S0904W0141 在公积金贷款额度上调后一个月内
-BAC009S0904W0142 北京公积金贷款成交量上涨百分之五
-BAC009S0904W0143 中小户型住房去化速度明显加快
-BAC009S0904W0144 北京公积金贷款额度提高
-BAC009S0904W0145 虽有助于使刚需购房者长期受益
-BAC009S0904W0146 但仍存在七万最高贷款额申请难等落地问题
-BAC009S0904W0147 公积金政策放宽对楼市成交的短期刺激作用将难以持续
-BAC009S0904W0148 各地公积金政策步入频繁调整期
-BAC009S0904W0149 一向严格收紧购房政策的北京也加入此阵营
-BAC009S0904W0150 据中新网房产频道梳理
-BAC009S0904W0151 北京针对公积金的调整次数便达到五次
-BAC009S0904W0152 美丽北京大型绿色公益品牌项目
-BAC009S0904W0153 调整公积金年度缴存上下限和缴存比例
-BAC009S0904W0154 放宽公积金贷款二套房认定标准
-BAC009S0904W0155 将公积金贷款利率下调
-BAC009S0904W0156 公积金政策的调整从未这么频繁
-BAC009S0904W0157 从一系列公积金政策看来
-BAC009S0904W0158 扶持刚需客群已经成为北京房地产调控的主要方向
-BAC009S0904W0159 未来政策层面将继续保持宽松
-BAC009S0904W0160 在上海易居房地产研究院研究员严跃进看来
-BAC009S0904W0161 这一系列公积金政策的放宽
-BAC009S0904W0162 与目前房地产救市的市场导向相吻合
-BAC009S0904W0163 盘活各地公积金资源
-BAC009S0904W0164 年初选择使用公积金贷款的购房者占比环比增多
-BAC009S0904W0165 伟嘉安捷提供数据显示
-BAC009S0904W0166 七月北京公积金贷款成交量提升了百分之五
-BAC009S0904W0167 贷款需求将在下个月继续释放
-BAC009S0904W0168 北京七月楼市的成交情况
-BAC009S0904W0169 也佐证了公积金政策放宽刺激作用的显现
-BAC009S0904W0170 在总成交中占比环比增加五个百分点
-BAC009S0904W0171 且低于七十平米的小户型住房成交明显上升
-BAC009S0904W0172 北京调整首套房公积金贷款最高额度
-BAC009S0904W0173 伟业我爱我家集团副总裁胡景晖分析
-BAC009S0904W0174 刚需人群观望心理正逐步消散
-BAC009S0904W0175 开始加速进入新房市场
-BAC009S0904W0176 公积金政策的放宽对二手房市场也产生了影响
-BAC009S0904W0177 据伟业我爱我家市场研究院测算
-BAC009S0904W0178 在过去六个月中处于高点
-BAC009S0904W0179 虽然刚需购房者入市积极性有所提升
-BAC009S0904W0180 但不少业内人士认为
-BAC009S0904W0181 这一刺激作用并不会长时间延续
-BAC009S0904W0182 上调公积金贷款额度对市场的刺激是短期的
-BAC009S0904W0183 公积金短期拉动的购房需求有限
-BAC009S0904W0184 更多是原本计划购房的客群享受到了政策利好
-BAC009S0904W0185 原本短期内不考虑购房的客群
-BAC009S0904W0186 在这一政策出台后匆忙购房
-BAC009S0904W0187 加强农产品质量安全监管
-BAC009S0904W0188 建立协调配合检打联动联防联控应急处置机制
-BAC009S0904W0189 实行农产品产地安全分级管理
-BAC009S0904W0190 推动农产品生产加工和流通企业建立诚信制度
-BAC009S0904W0191 提高农业产业化和规模化经营水平
-BAC009S0904W0192 推进农业产业化经营跨越式发展
-BAC009S0904W0193 制定扶持农业产业化龙头企业发展的综合性政策
-BAC009S0904W0194 启动实施农业产业化经营跨越发展行动
-BAC009S0904W0195 按照扶优扶大扶强的原则
-BAC009S0904W0196 依托农产品加工物流等各类农业园区
-BAC009S0904W0197 选建一批农业产业化示范基地
-BAC009S0904W0198 推进龙头企业集群发展
-BAC009S0904W0199 引导龙头企业采取兼并重组参股收购等方式
-BAC009S0904W0200 支持龙头企业跨区域经营
-BAC009S0904W0201 提升产品研发精深加工技术水平和装备能力
-BAC009S0904W0202 鼓励龙头企业采取参股合作等方式
-BAC009S0904W0203 与农户建立紧密型利益联联结关系
-BAC009S0904W0204 强化农民专业合作社组织带动能力
-BAC009S0904W0205 广泛开展示范社建设行动
-BAC009S0904W0206 加大合作社经营管理人员培训培养力度
-BAC009S0904W0207 加强合作社辅导员队伍建设
-BAC009S0904W0208 支持农民专业合作社参加农产品展示展销活动
-BAC009S0904W0209 建立稳定的产销关系
-BAC009S0904W0210 鼓励农民专业合作社开展信用合作
-BAC009S0904W0211 在自愿基础上组建联合社
-BAC009S0904W0212 提高生产经营和市场开拓能力
-BAC009S0904W0213 扶持合作社建设农产品仓储冷藏初加工等设施
-BAC009S0904W0214 发展多种形式的适度规模经营
-BAC009S0904W0215 在依法自愿有偿和加强服务基础上
-BAC009S0904W0216 完善土地承包经营权流转市场
-BAC009S0904W0217 发展多种形式的规模化专业化生产经营
-BAC009S0904W0218 引导土地承包经营权向生产和经营能手集中
-BAC009S0904W0219 大力培育和发展种养大户家庭农牧场
-BAC009S0904W0220 实施一村一品强村富民工程
-BAC009S0904W0221 大力发展农业社会化服务
-BAC009S0904W0222 增强农业公益性服务能力
-BAC009S0904W0223 加快基层农技推广体系改革和建施
-BAC009S0904W0224 健全公益性农业技术推广服务体系
-BAC009S0904W0225 加强农业有害生物监测预警和防控能力建设
-BAC009S0904W0226 加强农业资源和生态环境保护
-BAC009S0904W0227 继续实行最严格的耕地保护制度
-BAC009S0904W0228 确保耕地保有量保持在十亿亩
-BAC009S0904W0229 基本农田不低于十亿亩
-BAC009S0904W0230 科学保护和合理利用水资源
-BAC009S0904W0231 大力发展节水增效农业
-BAC009S0904W0232 继续建设国家级旱作农业示范区
-BAC009S0904W0233 坚持基本草原保护制度
-BAC009S0904W0234 推行禁牧休牧和划区轮牧
-BAC009S0904W0235 实施草原保护重大工程
-BAC009S0904W0236 加大水生生物资源养护力度
-BAC009S0904W0237 强化水生生态修复和建设
-BAC009S0904W0238 加强畜禽遗传资源和农业野生植物资源保护
-BAC009S0904W0239 加强农业生态环境治理
-BAC009S0904W0240 鼓励使用生物农药高效低毒低残留农药和有机肥料
-BAC009S0904W0241 回收再利用农膜和农药包装物
-BAC009S0904W0242 加快规模养殖场粪污处理利用
-BAC009S0904W0243 治理和控制农业面源污染
-BAC009S0904W0244 培育门类丰富层次齐用的综合利用产业
-BAC009S0904W0245 建立秸秆禁烧和综合利用的长效机制
-BAC009S0904W0246 继续实施农村沼气工程
-BAC009S0904W0247 大力推进农村清洁工程建设
-BAC009S0904W0248 清洁水源田园和家园
-BAC009S0904W0249 大力推进农业节能减排
-BAC009S0904W0250 树立绿色低碳发展理念
-BAC009S0904W0251 积极发展资源节约型和环境友好型农业
-BAC009S0904W0252 淘汰报废高耗能老旧农业机械
-BAC009S0904W0253 应该也是个提醒中国再也不能仅仅满足于组装了
-BAC009S0904W0254 我们在科技创新方面的进步非常显着
-BAC009S0904W0255 一项项领先世界的科技成果
-BAC009S0904W0256 不断刷新中国创造的精度高度深度
-BAC009S0904W0257 成为一个个响亮的中国品牌
-BAC009S0904W0258 我们的自主创新能力还不够强
-BAC009S0904W0259 与世界先进水平相比还有明显差距
-BAC009S0904W0260 特别是企业自主创新方面
-BAC009S0904W0261 具有重大影响的科技产品还不是很多
-BAC009S0904W0262 与世界第二经济大国的地位还不相称
-BAC009S0904W0263 希望中国品牌在国际市场的知名度和影响力越来越大
-BAC009S0904W0264 中国人从来不缺乏创新创造的基因
-BAC009S0904W0265 创新是中华民族最鲜明的禀赋
-BAC009S0904W0266 我们完全有理由树立创新自信
-BAC009S0904W0267 上一次工业革命我们落在了西方发达国家后面很远
-BAC009S0904W0268 面对以网络和数字技术为标志的信息技术发展
-BAC009S0904W0269 我们迎来了赶超发达国家的难得机遇
-BAC009S0904W0270 我国拥有近一四亿人口
-BAC009S0904W0271 手机网民近五点六亿
-BAC009S0904W0272 这样的规模没有任何一个国家可以比拟
-BAC009S0904W0273 他们的消费需求是拉动创新创业的巨大牵引力
-BAC009S0904W0274 规模超大的人才群体更是创新创造无与伦比的重要资源
-BAC009S0904W0275 我国经济发展进入新常态
-BAC009S0904W0276 双目标不仅包括保持中高速增长
-BAC009S0904W0277 还包括迈向中高端水平
-BAC009S0904W0278 我国的经济处在爬坡过坎的重要关口
-BAC009S0904W0279 我们也许不用像以前那样为了追求某个数字赶紧赶慢了
-BAC009S0904W0280 但松一口气的想法是没有出路的
-BAC009S0904W0281 恰恰更需要我们有所作为
-BAC009S0904W0282 就是在创新驱动上下功夫
-BAC009S0904W0283 在转型发展上下功夫
-BAC009S0904W0284 不断提高技术创新对经济发展的贡献率
-BAC009S0904W0285 如果说过去的这些年
-BAC009S0904W0286 我们成为世界工厂是不可逾越的发展阶段
-BAC009S0904W0287 那么未来的五年十年二十年
-BAC009S0904W0288 我们肯定不能再沾沾自喜于世界工厂
-BAC009S0904W0289 也不能一直被贴上中国组装的标签
-BAC009S0904W0290 长期处在产业链的末端
-BAC009S0904W0291 期待着越来越多中国设计的产品不断涌现并享誉国际
-BAC009S0904W0292 未必印在每个产品上
-BAC009S0904W0293 但应刻在每个中国企业家甚至每个中国人心里
-BAC009S0904W0295 中国经营网注有国外媒体报道称
-BAC009S0904W0296 苹果市场价值达到七千亿美元刚刚过去几个月
-BAC009S0904W0297 已经有股票经纪公司预测
-BAC009S0904W0298 那么苹果能突破一万亿大关吗
-BAC009S0904W0299 苹果公司上次发布全新产品是在五年以前
-BAC009S0904W0301 苹果的目标股价也开始相应地上涨
-BAC009S0904W0302 苹果的市值可能将突破万亿美元
-BAC009S0904W0303 现在市面上的电话手表功能最主要有两个通话和定位
-BAC009S0904W0304 儿童电话手表还推出了其他更多人性化的创新功能
-BAC009S0904W0305 对手表的大力普及也起到了至关重要的作用
-BAC009S0904W0306 以小天才电话手表为例
-BAC009S0904W0307 除了能和手机一样接打电话
-BAC009S0904W0308 做到全方位亲子沟通
-BAC009S0904W0310 击掌成为加好友等功能也一应俱全
-BAC009S0904W0311 电话手表就相当于一部简化的智能手机
-BAC009S0904W0312 主要在于将通信和定位的模块大大缩小到方寸之间
-BAC009S0904W0313 置入只有手机几分之一大小的手表表盘
-BAC009S0904W0314 还要保证与手机一样的通话质量呢
-BAC009S0904W0315 这是摆在行业面前最大的技术难题
-BAC009S0904W0316 小天才产品负责人表示
-BAC009S0904W0317 以小天才电话手表为例
-BAC009S0904W0318 公司超百位研发人员历经半年多时间
-BAC009S0904W0319 投入巨资研究经费攻关
-BAC009S0904W0320 最后找到芬兰的高级技术团队
-BAC009S0904W0321 才解决电话手表的内线内置问题
-BAC009S0904W0322 对于这种突破性的天线内置方案
-BAC009S0904W0323 我们进行了极为严谨的测试
-BAC009S0904W0324 确保信号与手机相当才真正投放市场
-BAC009S0904W0325 对于创新成果的实证和检验
-BAC009S0904W0326 电话手表对儿童安全吗
-BAC009S0904W0327 儿童电话手表的辐射对儿童的健康安全是否存在隐患呢
-BAC009S0904W0328 这种说法到底有无科学依据呢
-BAC009S0904W0329 关于手机等产品的辐射问题
-BAC009S0904W0330 任何家用电器只要通电就会产生电磁辐射
-BAC009S0904W0331 大到空调电视机电脑微波炉加湿器
-BAC009S0904W0332 小到吹风机充电器甚至接线板都会产生电磁辐射
-BAC009S0904W0333 虽然电磁辐射无处不在
-BAC009S0904W0334 并非所有的电磁辐射都会对人体产生危害
-BAC009S0904W0335 中国电力科学研究院高级工程师邬雄表示
-BAC009S0904W0336 比如阳光也是一种电磁辐射
-BAC009S0904W0337 根据国际非电离辐射防护委员会制定的标准
-BAC009S0904W0338 北京市业馀排球联赛未来每年都将举办一届
-BAC009S0904W0339 并且会逐渐扩大比赛规模
-BAC009S0904W0340 筹备时间和比赛周期都将延长
-BAC009S0904W0341 参赛队伍数量也会有所提升
-BAC009S0904W0342 明年北京市业馀排球联赛将在中国排球协会备案
-BAC009S0904W0343 北京市排球协会与天津排协已经初步达成合作意向
-BAC009S0904W0344 今后北京与天津两地可能会联合办赛
-BAC009S0904W0345 通过冠军赛季后赛垫场赛等形式
-BAC009S0904W0346 通过未来几年的发展
-BAC009S0904W0347 影响力强的全国性比赛
-BAC009S0904W0348 高清图女排凯旋郎平受热捧
-BAC009S0904W0349 时隔一二年重夺世界杯冠军的中国女排
-BAC009S0904W0350 新队长曾春蕾揭秘了角色转变前后的幕后故事
-BAC009S0904W0351 并且介绍自己是如何通过实战调整状态而渐入佳境的
-BAC009S0904W0352 后两轮死磕俄罗斯和日本更是有红了眼的感觉
-BAC009S0904W0353 回忆起当时临危受命接班队长一职的情况
-BAC009S0904W0354 曾春蕾介绍是在中国女排出发的前一天
-BAC009S0904W0355 主教练郎平训练结束后通知她的
-BAC009S0904W0356 当时确实没有什么心理准备
-BAC009S0904W0357 虽然知道惠若琪的心脏不太好
-BAC009S0904W0358 但是也不好过问太多
-BAC009S0904W0359 结果等到的消息是她不能去世界杯
-BAC009S0904W0360 其实在二零一四年女排大奖赛的总决赛
-BAC009S0904W0361 曾春蕾就曾经临时客串过队长职务
-BAC009S0904W0362 不过和这次在世界杯当队长相比压力明显不同
-BAC009S0904W0363 这位北京姑娘直言在三大赛当队长的感觉很特殊
-BAC009S0904W0364 是心智上的一个考验
-BAC009S0904W0365 刚开始无谓的心理压力很大
-BAC009S0904W0366 甚至在头一场的比赛还影响到自己的技术发挥
-BAC009S0904W0367 好在队友们相互弥补得非常出色
-BAC009S0904W0368 曾春蕾通过自我调节而让竞技状态渐入佳境
-BAC009S0904W0369 在保障好技术稳定发挥的同时
-BAC009S0904W0370 还能够在情绪上带动队友
-BAC009S0904W0371 谈及当队长的责任感
-BAC009S0904W0372 曾春蕾认为中国女排的困难体现在伤病多
-BAC009S0904W0373 需要不停地告诫自己要淡定下来
-BAC009S0904W0374 毕竟她本人是经历过伦敦奥运会的
-BAC009S0904W0375 当队长的一举一动都会带来情绪上影响
-BAC009S0904W0376 因此一个眼神一个动作
-BAC009S0904W0377 都要给队友们传递乐观和放松的讯号
-BAC009S0904W0378 曾春蕾一记五米线的调整攻打得非常漂亮
-BAC009S0904W0379 评价自己发挥的最好一场其实就是本场比赛
-BAC009S0904W0380 因为和高手过招有种红了眼的感觉
-BAC009S0904W0381 个别球更是像释放怒火一般
-BAC009S0904W0382 桎梏挣脱开了就敢于发挥
-BAC009S0904W0383 由于中国女排的前期准备特别充分
-BAC009S0904W0384 这在曾春蕾看来打俄罗斯很有底
-BAC009S0904W0385 发挥也很从容和淡定
-BAC009S0904W0386 曾春蕾坦言打关键分的状态很忘我
-BAC009S0904W0387 打日本从来都不需要动员
-BAC009S0904W0388 队友彼此之间需要相互鼓励
-BAC009S0904W0389 但更多的是落实在技术环节的细腻方面
-BAC009S0904W0390 因为想要捧起来冠军奖杯的欲望太强烈
-BAC009S0904W0391 直通里约奥运会的目标也近在咫尺
-BAC009S0904W0392 身为大队员就会去提醒大家
-BAC009S0904W0393 将去年输球的原因作为教训反思
-BAC009S0904W0394 对垒日本女排有这样一个小细节
-BAC009S0904W0395 曾春蕾在刘晓彤一传失误后直接说我来
-BAC009S0904W0396 表明队长角色转换完成得还不错
-BAC009S0904W0397 自言就应该去承担更多的任务
-BAC009S0904W0398 曾春蕾保持着清醒的头脑
-BAC009S0904W0399 深知世界杯夺冠是对过去努力的肯定
-BAC009S0904W0400 但更多的是看到了自己的不足
-BAC009S0904W0401 也知道了未来需要努力的方向
-BAC009S0904W0402 视频中国三比一大胜俄罗斯独占女排世界杯榜首
-BAC009S0904W0403 日本二零一五女排世界杯单循环赛战至第十轮
-BAC009S0904W0404 不仅在全球收回十亿美元票房
-BAC009S0904W0405 且获得奥斯卡最佳动画片大奖
-BAC009S0904W0406 皮克斯终于有了拍摄玩具总动员四的计划
-BAC009S0904W0407 这部正在酝酿中的续集敲定了导演
-BAC009S0904W0408 但上台祝酒时都会交足戏
-BAC009S0904W0409 洪永城还主动跟陈展鹏碰杯
-BAC009S0904W0411 她自言最近在拍戏
-BAC009S0904W0412 戏中的角色常常以性感打扮示人
-BAC009S0904W0413 所以自己也很喜欢性感打扮
-BAC009S0904W0414 问及男友郑嘉颖会不会介意这么性感
-BAC009S0904W0416 这个程度是美的
-BAC009S0904W0417 他应该也是喜欢
-BAC009S0904W0418 问及最近是否有跟男友见面
-BAC009S0904W0420 自己也有一段时间没有跟他见面了
-BAC009S0904W0421 两人都是依赖电话沟通
-BAC009S0904W0422 自己也很期待九月中旬和男友见面
-BAC009S0904W0423 并大呼我自己也非常期待他回来
-BAC009S0904W0424 因为很久了很想念他
-BAC009S0904W0425 问及见面后两人怎样庆祝
-BAC009S0904W0426 她表示应该是吃吃饭看电影之类的
-BAC009S0904W0428 是否会请教男友拍戏上的问题
-BAC009S0904W0429 她透露有些不懂的会问男友郑嘉颖
-BAC009S0904W0430 对方给了她很大的帮助
-BAC009S0904W0431 图自网络温州网讯有网友爆料
-BAC009S0904W0432 温州瑞安一驾考考生在科目三考试中突然晕了过去
-BAC009S0904W0433 送到医院时已没有呼吸
-BAC009S0904W0434 现场图温都讯今天下午四时许
-BAC009S0904W0435 看来温州市区电梯也该大整修了
-BAC009S0904W0436 温州一网友造谣苏迪罗登陆期间水库崩塌被拘
-BAC009S0904W0437 澎湃新闻八月一零日从浙江温州平阳警方获悉
-BAC009S0904W0438 因在台风苏迪罗登陆期间在网络散布水库崩塌谣言
-BAC009S0904W0439 温州一路虎店隐瞒新车维修史被判赔三一四万
-BAC009S0904W0440 温州新力虎汽车销售公司展示台
-BAC009S0904W0441 温州一酒店窝头三八元一个三盘消费一七一零元
-BAC009S0904W0442 温州一闲置地块填满垃圾臭味浓烈附近居民不敢开窗
-BAC009S0904W0443 小区外的空置地上填满垃圾近日
-BAC009S0904W0444 与小区只有一河之隔的东边
-BAC009S0904W0445 因惠民路南段从去年开通后
-BAC009S0904W0446 对一块闲置地监管没有跟上
-BAC009S0904W0447 近一年来每天晚上有垃圾倒在这块闲置地上
-BAC009S0904W0448 垃圾刺鼻的臭味害得住户们连窗户都不敢打开
-BAC009S0904W0449 此前温州政协委员连续两届提出要求整治垃圾污染问题
-BAC009S0904W0450 日前本报记者前往实地调查了解
-BAC009S0904W0451 温州三学生为庆生爬上浙江第一高楼玩自拍
-BAC009S0904W0452 再上到楼顶一座高约四零米的铁塔上
-BAC009S0904W0453 并在铁塔上借助自拍杆合影
-BAC009S0904W0454 一则长达一分五七秒的视频在网络上热传
-BAC009S0904W0455 有网友称之为青春任性
-BAC009S0904W0456 温州三家熟食店摊主被捕为求卖相好添加日落黄
-BAC009S0904W0457 本报讯记者范跃红通讯员瓯文为了卖相好
-BAC009S0904W0458 温州两女孩溺水救护车因车多路堵错过救援时间
-BAC009S0904W0459 温州两男子为争女人驾奔驰宝马街头四次对撞
-BAC009S0904W0460 瑞安市商业街和联中路交叉口
-BAC009S0904W0461 一辆宝马和一辆奔驰四次相撞
-BAC009S0904W0462 两车损失高达数十万元
-BAC009S0904W0463 温州企业家卖房建养老院捐给当地却被闲置三年
-BAC009S0904W0464 浙江温州乐清七五岁的企业家虞一杰退休之后
-BAC009S0904W0465 拿出了自己全部的积蓄
-BAC009S0904W0466 还卖了自己在杭州和乐清的房子
-BAC009S0904W0467 但是养老院建成至今已经有三年了
-BAC009S0904W0468 那原因到底在哪里呢
-BAC009S0904W0469 温州体育局官员逼女教练陪酒当地纪委介入调查
-BAC009S0904W0470 以给编制五险一金等为借口
-BAC009S0904W0471 诱逼女教练陪他喝酒吃饭唱歌
-BAC009S0904W0472 并贴出多张聊天记录截图
-BAC009S0904W0473 立即引起众多网友关注
-BAC009S0904W0474 温州六旬老人辗转各地看公厕一二年还债七六万元
-BAC009S0904W0475 温州网讯我不想死后给后人说闲话
-BAC009S0904W0476 省吃俭用也要把该还的钱尽力还掉
-BAC009S0904W0477 让借给我钱的好心人
-BAC009S0904W0478 这是富林愚老人发自内心的一句话
-BAC009S0904W0479 温州农贸市场现注胶虾业内人称为增加重量
-BAC009S0904W0480 虾里有明显的胶状物质图片来源网友微信日前
-BAC009S0904W0481 回家后发现大虾体内竟然被注射了不明胶状物
-BAC009S0904W0482 瑞安市市场监管局玉海所介入调查
-BAC009S0904W0483 当事水产摊贩已退还郑女士一零零元购虾款
-BAC009S0904W0484 温州化工仓库起火殃及附近河流大量死鱼漂河面
-BAC009S0904W0485 图为几天前村民拍到的河面
-BAC009S0904W0486 温州医生夫妇贩婴被批捕女儿欲捐款替父赎罪
-BAC009S0904W0487 温州医生涉贩卖儿童谎称活婴是死婴骗父母放弃
-BAC009S0904W0488 参考消息网九月二五日报道新报称
-BAC009S0904W0489 继陕西富平妇产科医生张淑侠因贩卖婴儿被判刑之后
-BAC009S0904W0490 中国再现医生涉嫌拐卖婴儿的案例
-BAC009S0904W0491 一对来自浙江温州的医生夫妇涉案被捕
-BAC009S0904W0492 温州瑞安市发生一起违停女司机故意伤害交通协警案件
-BAC009S0904W0493 温州城管协管员掌掴女清洁工被停职
-BAC009S0904W0494 该段视频时长仅有六秒
-BAC009S0904W0495 一名路人疾步上前将男子拦开
-BAC009S0905W0121 公积金贷款额度的提高
-BAC009S0905W0122 确实降低了刚需人群购房成本
-BAC009S0905W0123 对房地产市场的利好影响将是长期的
-BAC009S0905W0124 而这一落地难题也会影响其对刚需市场的支持效力
-BAC009S0905W0125 这一公积金新政实际上仍然存在很多门槛
-BAC009S0905W0126 在住房公积金贷款的申请过程中
-BAC009S0905W0127 有些要与房企具体项目挂钩
-BAC009S0905W0128 在公积金贷款额度上调后一个月内
-BAC009S0905W0129 北京公积金贷款成交量上涨百分之五
-BAC009S0905W0130 金融市场总体平稳鲁指冲高回落
-BAC009S0905W0131 但专家预计短期央行仍可能会积极维稳
-BAC009S0905W0132 汇率较大概率维持双向
-BAC009S0905W0133 相关公司股票走势招商银行
-BAC009S0905W0134 降准降息或再掀收益率的下降潮
-BAC009S0905W0135 双降加上广州公积金贷款新政落地
-BAC009S0905W0136 上周末成为潜在买家们争相咨询看楼的时机
-BAC009S0905W0137 期待岁末能有更多利好出现
-BAC009S0905W0138 第四季度二手住宅成交量将环比增幅在百分之七以内
-BAC009S0905W0139 价格要到明年初才出现上涨
-BAC009S0905W0140 广州日报讯记者林琳上周五
-BAC009S0905W0141 再加上广州公积金贷款新政终于落地
-BAC009S0905W0142 一系列利好消息影响下的首个周末
-BAC009S0905W0143 买家积极咨询看楼
-BAC009S0905W0144 降息消息传出后首日
-BAC009S0905W0145 地铺门店咨询量与七月同期相比约有百分之七左右的增幅
-BAC009S0905W0146 满堂红链家市场研究部高级经理周峰透露
-BAC009S0905W0147 店均电话咨询量比上一个周末增加十一百分之左右
-BAC009S0905W0148 看楼量对比上一周末大概增加百分之七左右
-BAC009S0905W0149 不过他认为这种增幅并不算太明显
-BAC009S0905W0150 搜房网广州二手房电商集团市场部总监罗来平发现
-BAC009S0905W0151 市场上约有两成业主反价
-BAC009S0905W0152 一个天河区的中介人士告诉记者
-BAC009S0905W0153 市民对连续多次降息已经麻木了
-BAC009S0905W0154 公积金贷款新政出台
-BAC009S0905W0155 市场不可能那么快有反应
-BAC009S0905W0156 七月广州二手楼市交投升温的态势已相当明确
-BAC009S0905W0157 按照这一趋势发展下去
-BAC009S0905W0158 再加上央行降息以及公积金新政等利好的叠加效应
-BAC009S0905W0159 有望进一步激活买家在接近年底这段时间的入市积极性
-BAC009S0905W0160 据阳光家缘网站公布数据统计
-BAC009S0905W0161 广州二手住宅市场七月的网签量已达一千套
-BAC009S0905W0162 广州二手住宅市场网签量达一千套
-BAC009S0905W0163 超过五月七千套的水平
-BAC009S0905W0164 目前市场上的低价房源已基本消耗完毕
-BAC009S0905W0165 广州二手房迎来新一轮涨价潮
-BAC009S0905W0166 搜房网广州二手房统计中心数据显示
-BAC009S0905W0167 广州五月二手房均价为一千元每平方米
-BAC009S0905W0168 比月初增长了一百元每平方米
-BAC009S0905W0169 因此判断随着利好政策的实施和成交量的增加
-BAC009S0905W0170 今年的房价还会有上升空间
-BAC009S0905W0171 广州还是在执行严厉的限购政策
-BAC009S0905W0172 我预计市场成交量会有所增加
-BAC009S0905W0173 但增加的幅度不会太大
-BAC009S0905W0174 他预测今年剩馀的两个月中
-BAC009S0905W0175 昨日人民币汇率小幅走弱
-BAC009S0905W0176 人民币中间价
-BAC009S0905W0177 美丽北京大型绿色公益品牌项目
-BAC009S0905W0178 在岸人民币兑美元收盘下跌百分之一
-BAC009S0905W0179 双降后首日在岸人民币由弱转强
-BAC009S0905W0180 人民币成交额减少百分之一
-BAC009S0905W0181 报七千亿美元
-BAC009S0905W0182 上周五的双降政策让市场担忧
-BAC009S0905W0183 投金或在经济增长速度放缓形势下加速外流
-BAC009S0905W0184 投资者担心这将加重人民币所面临的压力
-BAC009S0905W0185 就在上周五双降公布之后
-BAC009S0905W0186 招商银行同业金融部高级分析师刘东亮指出
-BAC009S0905W0187 加快老旧渔船更新改造
-BAC009S0905W0188 不断增强农业可持续发展能力
-BAC009S0905W0189 创建国家现代农业示范区
-BAC009S0905W0190 加大示范区建设力度
-BAC009S0905W0191 加大示范目建设投入力度
-BAC009S0905W0192 努力打造现代农业发展的典型和样板
-BAC009S0905W0193 发挥示范区引领作用
-BAC009S0905W0194 通过产业拉动技术辐射和人员培训等
-BAC009S0905W0195 带动周边地区现代农业加快发展
-BAC009S0905W0196 引导各地鉴借示范区发展现代农业的好做法和好经验
-BAC009S0905W0197 推动创建不同层次特色鲜明的现代农业示范区
-BAC009S0905W0198 按照分类指导突出重点梯次推进的思路
-BAC009S0905W0199 以七区二十三带农业战略格局为核心
-BAC009S0905W0200 着力建设重点推进率先实现和稳步发展三类区域
-BAC009S0905W0201 引领全国现代农业加快发展
-BAC009S0905W0202 重点推进区域
-BAC009S0905W0203 农业生产技术较为成熟
-BAC009S0905W0204 农业生产条件具有良好基础
-BAC009S0905W0205 承担着主要农产品供给保证的主体功能
-BAC009S0905W0206 加快推进该区域现代农业建设
-BAC009S0905W0207 事关全国农业现代化进程和国家粮食安全大局
-BAC009S0905W0208 继续发挥该区域粮食安全基础保障作用
-BAC009S0905W0209 调动各方发展粮食生产积极性
-BAC009S0905W0210 以建设小麦玉米水稻大豆优势产业带为重点
-BAC009S0905W0211 深入开展粮食稳定增产行动
-BAC009S0905W0212 加强农田水利和高标准农田建设
-BAC009S0905W0213 提高农机装备和作业水平
-BAC009S0905W0214 大力开展高产创建和科技指导服务
-BAC009S0905W0215 推广防灾减灾增产关键技术
-BAC009S0905W0216 加快选育应用优良品种
-BAC009S0905W0217 大幅度提升粮食综合生产能力和现代化生产水平
-BAC009S0905W0218 大力发展粮食精深加工及仓储物流业
-BAC009S0905W0219 完善粮食仓储运输设备
-BAC009S0905W0220 引导龙头企业向优势产区集聚
-BAC009S0905W0221 提高粮食生产综合效益
-BAC009S0905W0222 其他主要农产品优势区
-BAC009S0905W0223 以及蔬菜蚕卓等农产品生产的主体区域
-BAC009S0905W0224 以建设区域内各类农产品优势产业带为重点
-BAC009S0905W0225 提高资源利用率和加工转化率
-BAC009S0905W0226 继续巩固棉油糖水果和蔬菜等产品供给保证地位
-BAC009S0905W0227 着力强化技术装备支撑
-BAC009S0905W0228 提高现代化生产水平
-BAC009S0905W0229 强化出口水产品生产基地功能
-BAC009S0905W0230 加快现代养殖业发展
-BAC009S0905W0231 率先实现区域
-BAC009S0905W0232 该区域交通区位市场和人力资源优势明显
-BAC009S0905W0233 资本技术等现代化生产要素集约化程度高
-BAC009S0905W0234 加快该区域现代农业建设
-BAC009S0905W0235 对于引领全国现代农业加快发展具有重要意义
-BAC009S0905W0236 东部沿海先导农业区
-BAC009S0905W0237 大力发展资本技术密集型农业
-BAC009S0905W0238 保持耕地面积不减少
-BAC009S0905W0239 探索企业化集团化发展模式
-BAC009S0905W0240 大力推进标准化生产和集约化经营
-BAC009S0905W0241 提高信息化优质化和品牌化水平
-BAC009S0905W0242 提升产品的科技含量和附加值
-BAC009S0905W0243 大城市郊区多功能农业区
-BAC009S0905W0244 主要指沿海地区以外的直辖市省会城市等大城市郊区
-BAC009S0905W0245 统筹推进新一轮菜篮子工程建设
-BAC009S0905W0246 合理确定大城市郊区菜篮子产品生产用地保有数量
-BAC009S0905W0247 提高大城市菜篮子产品的自给率
-BAC009S0905W0248 在稳定城市副食品供应保证能力的基础上
-BAC009S0905W0249 全面推进机械化标准化品牌化产业化发展
-BAC009S0905W0250 加快农田基础设备和现代农业装备建设
-BAC009S0905W0251 着力建设国家商品粮供给重点保证区
-BAC009S0905W0252 提升垦区现代农业发展水平
-BAC009S0905W0253 业界首次开始认真讨论苹果市值晋升万亿大关的潜力
-BAC009S0905W0254 苹果股票价格创下历史新高
-BAC009S0905W0255 苹果市值超过七千亿美元
-BAC009S0905W0256 如果按照每股一二七美元的股价来算
-BAC009S0905W0257 那么苹果市价约为七四四十亿美元
-BAC009S0905W0258 这一价格也是目前华尔街给出的最高估值
-BAC009S0905W0261 随着四克网络的在中国的展开
-BAC009S0905W0262 苹果对电动汽车表现出的浓厚兴趣
-BAC009S0905W0263 也能够给股票市场来带更多兴奋
-BAC009S0905W0264 苹果将继续向股东返还现金
-BAC009S0905W0265 四月份或将采取更多的举动
-BAC009S0905W0266 这些力量的结合将会推动苹果的市盈率大幅上正
-BAC009S0905W0267 苹果公司的市价将突破一万亿美金大关
-BAC009S0905W0268 这只是最乐观的估计
-BAC009S0905W0269 苹果在成长为万亿美元市场的巨无霸之前
-BAC009S0905W0270 还有很多阻碍要解决
-BAC009S0905W0271 先是价格昂贵功能鸡肋的特点遭到一众业内人士吐槽
-BAC009S0905W0273 屏幕良品率仅在百分之三十至百分之四十之间
-BAC009S0905W0274 苹果公司现在已将约三百万的原始订单削减了一半
-BAC009S0905W0275 准备和特斯拉一较高下
-BAC009S0905W0276 但相对于传统的汽车制造工业
-BAC009S0905W0277 苹果作为消费数码产品的公司是否具备造车能力
-BAC009S0905W0279 目前大部分华尔街分析师们都对苹果的未来保持乐观
-BAC009S0905W0280 仅有三点百分之四的分析师建议卖出
-BAC009S0905W0281 中国经营网注有国外媒体报道称
-BAC009S0905W0282 苹果市场价值达到七千亿美元刚刚过去几个月
-BAC009S0905W0283 已经有股票经纪公司预测
-BAC009S0905W0284 苹果能否摆脱王者魔咒
-BAC009S0905W0285 苹果晋身道指固属众望所归
-BAC009S0905W0287 而苹果得以顺利跻身道指
-BAC009S0905W0288 亦拜股份去年六月一拆七所赐
-BAC009S0905W0289 却完全不足以彰显编制机构与时并进
-BAC009S0905W0290 苹果固然不会因此而升格
-BAC009S0905W0292 毕竟还有许多人的心愿
-BAC009S0905W0293 老毕于跟苹果押注太阳能一文问过大家
-BAC009S0905W0294 苹果股价在说不准的时间内有望上升三成
-BAC009S0905W0295 是否能令捧场客心满意足
-BAC009S0905W0296 问题焦点若是太阳能
-BAC009S0905W0297 诸位自然不会满足于前面提及的潜在回报
-BAC009S0905W0298 这家市值离万亿美元不远的股王
-BAC009S0905W0299 难不成真能第三期发育
-BAC009S0905W0300 读者若信经济学人
-BAC009S0905W0302 若定苹果第三期发育的立场已呼之欲出
-BAC009S0905W0303 手机辐射的比吸收率最高限值为二瓦特每千克
-BAC009S0905W0304 我国的标准和国际差不多
-BAC009S0905W0306 对生活中的电磁辐射进行了全面健康风险评估
-BAC009S0905W0307 不存在实际健康问题
-BAC009S0905W0308 辐射吸收率在国家的安全标准范围之内
-BAC009S0905W0309 电话手表的辐射主要来自天线
-BAC009S0905W0310 包括外置天线和内置天线
-BAC009S0905W0311 正规厂家生产的电话手表辐射一般符合国家标准
-BAC009S0905W0312 以小天才电话手表为例
-BAC009S0905W0313 根据权威机构检测报告显示
-BAC009S0905W0314 小天才电话手表辐射远小于国家标准二瓦特每千克
-BAC009S0905W0315 只要辐射值小于或等于国家标准值
-BAC009S0905W0316 就是符合国家标准的
-BAC009S0905W0317 小天才负责人介绍说
-BAC009S0905W0318 手机是直接贴着耳朵使用
-BAC009S0905W0319 而电话手表通话时离头部还有一百零一百一十五厘米的距离
-BAC009S0905W0320 可见电话手表的辐射比手机还小
-BAC009S0905W0321 不排除有一些杂牌的电话手表辐射会超标
-BAC009S0905W0322 建议家长通过正规渠道购买正规厂家生产的产品
-BAC009S0905W0323 电话手表应如何选购
-BAC009S0905W0324 关于儿童电话手表应该如何选购
-BAC009S0905W0325 也是众多家长特别想了解的
-BAC009S0905W0326 除了之前提到的关于辐射的测试报告外
-BAC009S0905W0327 专家提醒相关的产品认证也是消费者必须要关注的
-BAC009S0905W0328 所有在中国境内销售及使用的无线电组件产品
-BAC009S0905W0329 必须取得无线电型号的核准认证
-BAC009S0905W0330 没有该认证的产品属于违法产品
-BAC009S0905W0331 未获得进网许可证的
-BAC009S0905W0332 不得接入公用电信网使用和在国内销售
-BAC009S0905W0333 小天才电话手表等国内几个大品牌都有
-BAC009S0905W0334 这也是选购电话手表要注意关注的
-BAC009S0905W0335 很多家长都在给孩子购置各种学习用
-BAC009S0905W0336 网络安全漏洞挡道车联网阴霾笼罩搜狐科技
-BAC009S0905W0337 对频频的骚扰电话显得无可奈何
-BAC009S0905W0338 由郎平挂帅的中国女排在名古屋赛区
-BAC009S0905W0339 提升战绩为九胜一负反超至榜首位置
-BAC009S0905W0340 只要在明天的最后一战中赢下东道主日本
-BAC009S0905W0341 高清女排力擒俄罗斯夺冠占主动众将喜极而泣
-BAC009S0905W0342 能够赢得比赛真的很开心
-BAC009S0905W0343 对手给我们制造了非常多的困难
-BAC009S0905W0344 我和队友们一起团结努力克服了这些困难
-BAC009S0905W0345 在今天的比赛中曾春蕾首发出场
-BAC009S0905W0346 凭借十三分位列本队和扣球榜第二位
-BAC009S0905W0347 而主教练郎平则在全面性方面对大家做了更多要求
-BAC009S0905W0348 说到今天获胜的原因
-BAC009S0905W0349 作为队长出席新闻发布会的曾春蕾提到了凝聚力三个字
-BAC009S0905W0350 凝聚力一直都是中国女排的传统
-BAC009S0905W0351 它都是女排精神的一部分
-BAC009S0905W0352 当队伍遇到一些困难的时候
-BAC009S0905W0353 我们不需要教练要求就会团结在一起
-BAC009S0905W0354 像这种无形的向心力是在队伍中一直存在的
-BAC009S0905W0355 在今天的比赛中中国女排始终相互鼓励相互扶持
-BAC009S0905W0356 在几度遇险的情况下顽强咬住
-BAC009S0905W0357 无论年轻队员还是老队员都可能在比赛中出现起伏
-BAC009S0905W0358 我们要做的就是相互弥补
-BAC009S0905W0359 今天作为队长我更多是在精神层面上提醒大家
-BAC009S0905W0360 而在技术上年轻队员也弥补了我的不足
-BAC009S0905W0361 这是我们每个人都应该做的
-BAC009S0905W0362 如果能够战而胜之的话
-BAC009S0905W0363 明天还剩最后一场比赛
-BAC009S0905W0364 对我们来讲最重要的就是兢兢业业
-BAC009S0905W0365 大家回去之后将马上投入到对日本的准备中
-BAC009S0905W0366 明天比赛里我们会冷静下来落实到细节
-BAC009S0905W0367 一分分和对手拼到最后
-BAC009S0905W0368 北京时间明天晚间十八点
-BAC009S0905W0369 中国女排将应战日本队
-BAC009S0905W0370 搜狐体育郭健文
-BAC009S0905W0371 女排三零阿根廷朱婷复出扣杀状态神勇
-BAC009S0905W0372 搜狐体育郭健九月一日发自日本冈山今天下午
-BAC009S0905W0373 二零一五年第十二届女排世界杯单循环赛战至第八轮
-BAC009S0905W0374 从而将战绩提升为七胜一负积二十一分
-BAC009S0905W0375 本场比赛朱婷复出担任首发主攻并当选为当场最佳
-BAC009S0905W0376 虽然在比赛中没有得到出场机会
-BAC009S0905W0377 但曾春蕾赛后还是以队长身份出席了新闻发布会
-BAC009S0905W0378 很开心赢得今天的比赛
-BAC009S0905W0379 队伍凭借稳定的整体发挥获得了三零的胜利
-BAC009S0905W0380 曾春蕾表示阿根廷是一支拥有良好防守能力的球队
-BAC009S0905W0381 这一点也值得中国女排学习
-BAC009S0905W0382 中国女排队长坦言不仅是后面的几场比赛
-BAC009S0905W0383 每场较量对球队都很关键
-BAC009S0905W0384 我们球员要做的就是立足于自己
-BAC009S0905W0385 争取把自身水平发挥出来
-BAC009S0905W0386 至于其他球队的比赛结果
-BAC009S0905W0387 阿根廷队队长索萨认为
-BAC009S0905W0388 中国队的快速打法给自己的球队制造了很大的麻烦
-BAC009S0905W0389 像她们这样的亚洲对手速度很快
-BAC009S0905W0390 对我们来说比赛很困难
-BAC009S0905W0391 还有三场非常重要的比赛
-BAC009S0905W0392 希望得到想要的结果
-BAC009S0905W0393 对阵中国这样的球队是非常困难的
-BAC009S0905W0394 令我满意的是球队能够以一个积极的态度进行比赛
-BAC009S0905W0395 以前接触比较多的巴西队速度也很快
-BAC009S0905W0396 我们应该多和亚洲球队比赛来适应这样的打法
-BAC009S0905W0397 接下来中国女排将转战名古屋
-BAC009S0905W0398 从九月四日起迎接多米尼加俄罗斯和日本的挑战
-BAC009S0905W0399 搜狐体育郭健文
-BAC009S0905W0400 广州日报社记者许胚日前
-BAC009S0905W0401 英国人保拉拉德克利夫公开了自己的血液检测结果
-BAC009S0905W0402 以此证明自己并没有使用过违禁药物
-BAC009S0905W0403 在英国议会关于血液兴奋剂的听证会中
-BAC009S0905W0404 将出任玩具总动员四的导演
-BAC009S0905W0405 影片将在二零一七年登陆全国
-BAC009S0905W0406 来源时光网昨日
-BAC009S0905W0407 在英格兰多塞特群的波维顿坦克博物馆
-BAC009S0905W0408 至于有传拍台庆剧很容易获奖
-BAC009S0905W0410 她笑称我不想说我没有信心
-BAC009S0905W0411 很多演员都非常棒
-BAC009S0905W0412 搜狐娱乐讯北京时间七月二十日消息
-BAC009S0905W0413 据香港媒体报导
-BAC009S0905W0417 不到几个月的时间已爱得如此火热了
-BAC009S0905W0418 两人不想恋情变得高调
-BAC009S0905W0419 却多次被身边的人将他们的行踪暴露出来
-BAC009S0905W0420 两人被传媒追问恋情时都要求给予空间
-BAC009S0905W0421 看来他们需要身边的朋友保密他们的行踪
-BAC009S0905W0422 这样做反而更实际
-BAC009S0905W0423 搜狐娱乐讯北京时间六月三十日消息
-BAC009S0905W0424 据香港媒体报道
-BAC009S0905W0425 陈凯琳的心被郑嘉颖成功俘虏
-BAC009S0905W0426 更是郑嘉颖愿意公开承认的女友
-BAC009S0905W0427 不过二人因给陈嘉宝把生日合照在网上公开才泄露恋情
-BAC009S0905W0428 对此陈凯琳没有怪责陈嘉宝
-BAC009S0905W0429 觉得对方只是分享生日上的喜悦
-BAC009S0905W0430 陈凯琳之前说没交过男友
-BAC009S0905W0431 温州鹿城区宣传部官微做出回应
-BAC009S0905W0432 称涉事男子为某街道协管员
-BAC009S0905W0433 其发现清洁工保洁不到位
-BAC009S0905W0434 因此与清洁工引发争执
-BAC009S0905W0435 进一步导致肢体冲突
-BAC009S0905W0436 目前该协管已经停职
-BAC009S0905W0437 温州多地商户拉横幅求降租导购不少店亏本经营
-BAC009S0905W0438 东越花苑不少商铺都关门转租记者谢国林摄
-BAC009S0905W0439 温州大妈年逾半百冒充女儿成功骗婚多名小鲜肉
-BAC009S0905W0440 该女子已经行骗多地
-BAC009S0905W0441 她一直假冒的林某竟是她的女儿
-BAC009S0905W0442 而且她还是已婚身份
-BAC009S0905W0443 凭着远比真实年龄看起来要年经许多的容貌
-BAC009S0905W0444 雷某一直在河北邢台衡水等地干着游走骗婚的勾当
-BAC009S0905W0445 温州天价窝头事件背后顾客要持持赔三条中华
-BAC009S0905W0446 网络上一张永嘉桥头国际饭店的结帐单十分引人注目
-BAC009S0905W0447 菜单显示该饭店的荞麦窝窝头卖三八元一个
-BAC009S0905W0448 三零馀位食客吃了四五个窝窝头
-BAC009S0905W0449 发现事情并没有这么简单
-BAC009S0905W0450 温州女协管员侮辱环卫工行尸走肉已辞职
-BAC009S0905W0451 温州女协管员发伪辱性文字环卫节一群行尸走肉
-BAC009S0905W0452 温州女婴打疫苗后口吐白沫抽搐昏迷
-BAC009S0905W0453 温州网讯在温医大附属育英儿童医院的重监护室里
-BAC009S0905W0454 才七个月大的女童腾腾化名已昏迷了两天时间
-BAC009S0905W0455 随即被送到儿童医院进行抢救
-BAC009S0905W0456 区市省三级疾控部门专家已介入调查
-BAC009S0905W0457 温州家庭误食毒蘑菇后续小女儿已确诊脑死亡
-BAC009S0905W0458 温州少年峡谷失踪续二零万馀元赔偿款执行到位
-BAC009S0905W0459 金报讯记者蓝莹还记得小温吗
-BAC009S0905W0460 二零一三六二三
-BAC009S0905W0461 温州一四岁少年小温迷失莒溪大峡谷
-BAC009S0905W0462 浙江省史上规模最大的户外救援行动开始了
-BAC009S0905W0463 经过长达四个月的搜救
-BAC009S0905W0464 最终在峡谷上游的石头夹缝下
-BAC009S0905W0465 发现小温残缺的遗骸
-BAC009S0905W0466 温州市场现胶注虾业内不仅增重卖相更好
-BAC009S0905W0467 温州市民郑女士在农贸市场购买了三只大虾
-BAC009S0905W0468 回家后发现大虾体内居然被注射了不明胶状物
-BAC009S0905W0469 生活经验让郑女士起了疑心
-BAC009S0905W0470 她将几只虾的图片通过微博发布
-BAC009S0905W0471 迅速引起了网友以及当地监管部门的关注
-BAC009S0905W0472 温州市域铁路将成为全国第一条城市交通铁路
-BAC009S0905W0475 温州市治堵办的负责人表示
-BAC009S0905W0477 温州开水浇头服务员被批捕涉嫌故意伤害罪
-BAC009S0905W0478 京华时报讯昨天下午
-BAC009S0905W0479 浙江温州鹿城区检察院通报九月六日
-BAC009S0905W0480 开水淋顾客的火锅店服务员朱某被依法批准逮捕
-BAC009S0905W0481 温州惊现注胶虾续苍南再查六公斤注胶大虾
-BAC009S0905W0482 温州一菜场惊现注胶虾追踪
-BAC009S0905W0483 温州昆明出现注胶虾产地均指向广东湛江
-BAC009S0905W0484 浙江温州市一位市民一零零元买回三只斑节虾
-BAC009S0905W0485 在虾体内发现疑似胶状物质七月二十一日
-BAC009S0905W0486 云南昆明市同样发现类似注胶虾
-BAC009S0905W0487 国内两地出现注胶虾踪迹
-BAC009S0905W0488 且产地均指向广东省湛江市
-BAC009S0905W0489 温州景山花木市场发生大火火势已得到基本控制
-BAC009S0905W0490 温州服务员向顾客头上泼开水继而已被批捕
-BAC009S0905W0491 今天九月八日下午
-BAC009S0905W0492 因火锅加水问题与顾客发生争执
-BAC009S0905W0493 为泄愤将开水淋到顾客头上
-BAC009S0905W0494 并将其摁倒在地殴打
-BAC009S0905W0495 火锅店服务员朱某被温州市鹿城区检察院依法批准逮捕
-BAC009S0906W0121 双降会令市场看贬人民币的情绪持续
-BAC009S0906W0122 人民币未来贬值压力依然较大
-BAC009S0906W0123 预计短期央行仍可能会积极维稳
-BAC009S0906W0124 汇率较大概率维持双向波动
-BAC009S0906W0125 公积金松绑接棒释压房价下跌动力趋缓至搜狐财经
-BAC009S0906W0126 上海南昌等城市近期继续松绑了公积金贷款政策
-BAC009S0906W0127 而南昌除了放松首套房界定标准
-BAC009S0906W0128 还降低了首套房公积金首付比例
-BAC009S0906W0129 公积金贷款首付款比例不低于百分之七
-BAC009S0906W0130 上海易居研究院研究院严跃进认为
-BAC009S0906W0131 存销比已经见顶回落
-BAC009S0906W0132 房价下跌压力将趋于缓解
-BAC009S0906W0133 公积金大力度松绑相关商业银行信贷政策
-BAC009S0906W0134 各地对公积金贷款的松绑力度更大
-BAC009S0906W0135 江苏省对省级机关住房公积金政策做出了调整
-BAC009S0906W0136 昆明市住房公积金管理中心出台三项公积金新政
-BAC009S0906W0137 上海市公积金管理中心公布公积金新政
-BAC009S0906W0138 有一套住房并已结清公积金贷款
-BAC009S0906W0139 再次申请公积金贷款购房的
-BAC009S0906W0140 参照首套房贷款政策
-BAC009S0906W0141 中原地产市场研究部统计数据显示截至目前
-BAC009S0906W0142 二套执行认贷不认房
-BAC009S0906W0143 二套首付降比百分之七
-BAC009S0906W0144 南京武汉市放宽第二套房公积金贷款门槛
-BAC009S0906W0145 扬州杭州成都无锡等地
-BAC009S0906W0146 已有一套住房并结清贷款馀额的家庭
-BAC009S0906W0147 再购房执行首套房贷款政策
-BAC009S0906W0148 中原地产分析师张大伟认为
-BAC009S0906W0149 公积金是地方政府可以直接通过政策调整动用的资金
-BAC009S0906W0150 用公积金政策刺激市场是地方政府最习惯的举措
-BAC009S0906W0151 对购房者心理影响也非常大
-BAC009S0906W0152 由于公积金贷款利率相当于市场贷款利率的七折
-BAC009S0906W0153 对需求拉动作用比较大
-BAC009S0906W0154 上海作为一线城市代表
-BAC009S0906W0155 对房地产市场的心理影响比较大
-BAC009S0906W0156 预计还有其他城市将发布同类型松绑政策
-BAC009S0906W0157 房价下跌压力缓解各地救市政策不断
-BAC009S0906W0158 房企促销力度也在加大
-BAC009S0906W0159 各城市库存压力正在减小
-BAC009S0906W0160 房价下跌压力趋于缓解
-BAC009S0906W0161 上海易居房地产研究院数据显示
-BAC009S0906W0162 同比增长百分之七
-BAC009S0906W0163 这是今年五月份以来库存环比增幅最小的一次
-BAC009S0906W0164 环比增长百分之七
-BAC009S0906W0165 同比减小百分之七
-BAC009S0906W0166 五月份的供求关系是今年前五个月最均衡的一次
-BAC009S0906W0167 存销比见顶的态势基本确立
-BAC009S0906W0168 五个城市新建商品住宅存销比为七个月
-BAC009S0906W0169 该存销比数值为七个月
-BAC009S0906W0170 这直接利好去库存目标的实现
-BAC009S0906W0171 二到五个城市的总体水平看
-BAC009S0906W0172 库存去化周期依然偏大
-BAC009S0906W0173 说明各城市涨价的时机还不成熟
-BAC009S0906W0174 一至七月份大多数城市还是会采取积极降价的策略
-BAC009S0906W0175 房价未来可能会略微有下跌
-BAC009S0906W0176 一线城市由于需求面大
-BAC009S0906W0177 未来住宅价格会企稳回升
-BAC009S0906W0178 一些库存量较大的三四线城市
-BAC009S0906W0179 房价继续下行的可能性仍然比较大
-BAC009S0906W0180 同策咨询研究部总监张宏伟认为
-BAC009S0906W0181 月度市场成交量开始出现环比回升
-BAC009S0906W0182 市场去库存的速度在适度提高
-BAC009S0906W0183 从一线城市及存销比在七个月以下的城市来看
-BAC009S0906W0184 市场基本面有可能会率先好转
-BAC009S0906W0185 年底将出现翘尾行情
-BAC009S0906W0186 但年底出现翘尾行情并不代表楼市已经回暖
-BAC009S0906W0187 示范带动周边地区发展
-BAC009S0906W0188 并在农业走出去方面发挥重要作用
-BAC009S0906W0189 稳步发展区域
-BAC009S0906W0190 主要指草原生态经济区
-BAC009S0906W0191 包括北方干旱半干旱草原地区和青藏高原草原地区
-BAC009S0906W0192 加快该地区域现代农业建设
-BAC009S0906W0193 对于保障全国生态安全具有不可代替的战略作用
-BAC009S0906W0194 牢固树立生产生态有机结合生态优先的基本方针
-BAC009S0906W0195 加强草原生态环境保护和建设
-BAC009S0906W0196 稳步推进退牧还草和游牧民定居工程
-BAC009S0906W0197 加强以节水灌溉饲草地为重点的牧区水利建设
-BAC009S0906W0198 建立草原增加碳汇和生态补偿机制
-BAC009S0906W0199 转变畜牧业发展方式
-BAC009S0906W0200 优化生产布局和畜群结构
-BAC009S0906W0201 提高科学饲养和经营水平
-BAC009S0906W0202 加强农牧互补牧养结合
-BAC009S0906W0203 以最急需最关键最薄弱的环节和领域为重点
-BAC009S0906W0204 组织实施一批重大工程
-BAC009S0906W0205 全面分实现代农业发展的物质基础
-BAC009S0906W0206 一旱涝保收高标准农田建设工程
-BAC009S0906W0207 落实土壤改良地力培肥等措施
-BAC009S0906W0208 加快先进适用耕作技术推广应用
-BAC009S0906W0209 新建旱涝保收高标准农田四亿亩
-BAC009S0906W0210 新增千亿斤粮食生产能力建设工程
-BAC009S0906W0211 棉油糖生产基地建设工程
-BAC009S0906W0212 加强新疆黄淮海地区长江流域棉花生产基地建设
-BAC009S0906W0213 支持南方甘蔗和北方甜菜生产基地建设
-BAC009S0906W0214 着力改善田间基础设施良种科研繁育设施等生产条件
-BAC009S0906W0215 新一轮菜篮子建设工程
-BAC009S0906W0216 加强园艺作物标准园建设
-BAC009S0906W0217 引导建设优质农产品物流配送中心
-BAC009S0906W0218 发展农产品电子商务
-BAC009S0906W0219 健全农作物种质资源和畜禽遗传资源保存体系
-BAC009S0906W0220 建设动植物基因信息库
-BAC009S0906W0221 建立转基因生物安全保障体系
-BAC009S0906W0222 建设国家级农作物育制种基地
-BAC009S0906W0223 完善农作物品种试验和种子检测设施条件
-BAC009S0906W0224 建设水产遗传育种中心和原良种场
-BAC009S0906W0225 渔政渔港建设工程
-BAC009S0906W0226 建设一批大型渔政船
-BAC009S0906W0227 加强渔政基地和管理信息系统建设
-BAC009S0906W0228 动植物保护工程
-BAC009S0906W0229 健全六级动物疫病防控体系
-BAC009S0906W0230 健全兽药质量安全监管和动物防疫技术支撑体系
-BAC009S0906W0231 建设四级农作物病虫疫情监测防控体系
-BAC009S0906W0232 完善监测防控监管等设施设备
-BAC009S0906W0233 农产品质量安全检验检测能力建设工程
-BAC009S0906W0234 改扩建检验检测实验室
-BAC009S0906W0235 建设部级水产品质量安全研究中心
-BAC009S0906W0236 补充建设一批部级专业质检中心
-BAC009S0906W0237 构建全国农产品质量安全监测信息预警平台
-BAC009S0906W0238 乡镇农业公共服务能力建设工程
-BAC009S0906W0239 农业机械化推进工程
-BAC009S0906W0240 加大对秸秆机械化还田和收集打捆机具配套的支持力度
-BAC009S0906W0241 完善农业气象等方面的航空站和作业起降点基础设施
-BAC009S0906W0242 扶持农机服务组织发展
-BAC009S0906W0243 农业信息化建设工程
-BAC009S0906W0244 开展农业物物联网应用示范
-BAC009S0906W0245 加大天然草原退牧还草工程实施力度
-BAC009S0906W0246 加强京津风沙源区草地治理
-BAC009S0906W0247 继续加强三江源等地区草原生态建设
-BAC009S0906W0248 开展草原自然保护区建设和南方草地综合治理
-BAC009S0906W0249 加快实施游牧民定居工程
-BAC009S0906W0250 人工种草五亿亩
-BAC009S0906W0251 新型农村人才培养工程
-BAC009S0906W0252 必须从我国国情和农业发展实际出发
-BAC009S0906W0253 亦不可能跟自然规律抗衡
-BAC009S0906W0254 无止境地重复过去十年的惊人增长
-BAC009S0906W0256 企业于某个领域称王称霸的一刻
-BAC009S0906W0257 往往就是公司陷入灾难的开始
-BAC009S0906W0258 市场给予它的估值却异常克制
-BAC009S0906W0259 以二零一五年度每股八点五美元的盈利预测为准
-BAC009S0906W0260 苹果市盈率仅一五倍
-BAC009S0906W0261 莫说跟其他创意十足的科技股相提并论
-BAC009S0906W0262 比之大市亦有所不如
-BAC009S0906W0263 苹果早晚将步之前过气股王的后尘
-BAC009S0906W0265 不同意的地方多于同意
-BAC009S0906W0266 从随身听到智能电话
-BAC009S0906W0267 苹果的拿手好戏是把市场上原霸主拉下马
-BAC009S0906W0268 确认消费者喜新厌旧后
-BAC009S0906W0269 快速建立以苹果产品服务为核心的生态系统
-BAC009S0906W0270 透过不断的更新换代
-BAC009S0906W0271 索尼黑莓以至诺基亚
-BAC009S0906W0272 在最风光的时候看不见来自颠复者的威胁
-BAC009S0906W0273 从不可一世到遭对手边缘化
-BAC009S0906W0274 消费者贪新忘旧虽亦可能适用于苹果
-BAC009S0906W0277 对投资者大有参考价值
-BAC009S0906W0278 一九八三至二零零五年
-BAC009S0906W0279 标普五百指数市值冠军宝座
-BAC009S0906W0281 四大天王平均累计回报高达一千二百分之八十二
-BAC009S0906W0282 四倍于标指同期的三十百分之二
-BAC009S0906W0283 四大天王平均回报仅一百分之二十五
-BAC009S0906W0284 明显跑输标普五百指数的一百分之九十九
-BAC009S0906W0286 销售也总有饱和的一天
-BAC009S0906W0287 苹果能否第三期发育
-BAC009S0906W0290 从市场始终不愿给予苹果较高估值可见
-BAC009S0906W0291 管理层眼光得再高一点
-BAC009S0906W0292 苹果有意进军汽车产业
-BAC009S0906W0294 老毕对此说甚有保留
-BAC009S0906W0295 而库克若真有此意
-BAC009S0906W0297 汽车是苹果下一个颠复目标
-BAC009S0906W0298 马斯克乃商界新一代万人迷
-BAC009S0906W0299 人气不逊乔布斯在世之时
-BAC009S0906W0300 三藩市纪事报指此君曾与库克碰头
-BAC009S0906W0302 越多人讲往往越难成事
-BAC009S0906W0303 有黑客在网络上兜售车主信息
-BAC009S0906W0304 雪铁龙车主信息泄露规模或超十万条
-BAC009S0906W0305 该平台上显示的漏洞状态是
-BAC009S0906W0306 漏洞已通知厂商但厂商忽略该漏洞
-BAC009S0906W0307 该公司内部相关人士回应称
-BAC009S0906W0308 东风雪铁龙的客户数据存放在专业数据库中
-BAC009S0906W0309 对数据库设有监控及记录
-BAC009S0906W0310 对用户信息做足了保密工作
-BAC009S0906W0311 有业内人士分析指出
-BAC009S0906W0312 车企在信息安全方面的投入不足已经越来越成为其软肋
-BAC009S0906W0313 其中近一半的漏洞都可能造成网站用户的信息泄露
-BAC009S0906W0314 背后涉及到百万车主的信息安全
-BAC009S0906W0315 而绝大多数漏洞状态都是未联系到厂商或厂商忽略
-BAC009S0906W0316 汽车这个行业缺乏成熟的网络安全管理体系
-BAC009S0906W0317 网络运营人员的安全素质有待提高
-BAC009S0906W0318 很多车企网站是外包给第三方公司开发的
-BAC009S0906W0319 没有交付信息安全公司进行评估
-BAC009S0906W0320 因此更有可能留下信息安全风险
-BAC009S0906W0321 用户隐私遭泄露的问题日益突出
-BAC009S0906W0322 如果许多传统制造行业中的企业一样
-BAC009S0906W0323 车企诚待转化互联网思维以及加强互联网安全管控等
-BAC009S0906W0324 要跟上互联网发展的步伐不太容易
-BAC009S0906W0325 随着互联网快速发展
-BAC009S0906W0326 这类专业人才往往集中在互联网企业
-BAC009S0906W0327 而车企相对缺乏这类人才
-BAC009S0906W0328 网络安全管理体系方面投资非常大
-BAC009S0906W0329 涉及人才软件硬件服务以及管理等方面
-BAC009S0906W0330 互联网企业也是一步步投入不断完善
-BAC009S0906W0331 不同行业在网络安全方面投入比例不一
-BAC009S0906W0332 预计汽车行业在网络安全方面投入往往较少
-BAC009S0906W0333 一些车企为了节约成本
-BAC009S0906W0334 往往将数据库服务器都放在公网上
-BAC009S0906W0335 这样很容易被黑客攻破
-BAC009S0906W0336 一旦发现系统有漏洞
-BAC009S0906W0337 将及时采取主动或被动措施
-BAC009S0906W0338 拉德克利夫认为自己被暗指有问题
-BAC009S0906W0339 但检测结果的异常并不能就证实运动员使用违禁药物
-BAC009S0906W0340 因为导致这项数值波动的原因有很多
-BAC009S0906W0341 包括高原训练或身体过度消耗后立刻接受检测
-BAC009S0906W0342 因此我请求世界反兴奋剂机构回顾前后所有的数据
-BAC009S0906W0343 盘点昆仑决二零一五五大飙血之战搜狐体育
-BAC009S0906W0344 无疑是擂台上最能引爆肾上腺素的震撼一幕
-BAC009S0906W0345 令拳迷记忆犹新的飙血之战不计其数
-BAC009S0906W0346 而这些战斗也成为了圈内久聊不厌的经典谈资
-BAC009S0906W0347 在数百场真枪实弹的巅峰对决中
-BAC009S0906W0348 不乏诸多脍炙人口的飙血之战
-BAC009S0906W0349 十月二十八日与三十一日
-BAC009S0906W0350 下面小编将盘点本年度迄今为止昆仑决五大惨烈血战
-BAC009S0906W0353 北京时间七月二十八日晚
-BAC009S0906W0354 这场对决的惨烈程度超出了所有人的想象
-BAC009S0906W0355 比赛开始后仅仅十馀秒
-BAC009S0906W0356 播求的头部便被对方的肘击割破
-BAC009S0906W0357 打出一道深深的血口
-BAC009S0906W0358 伴随着双方激战的火爆升级
-BAC009S0906W0359 播求头部的伤口进一步扩大
-BAC009S0906W0360 几乎全部被鲜血复盖的半边身体令人触目惊心
-BAC009S0906W0361 双方的肘击对轰场面接连上演
-BAC009S0906W0362 哈亚的肘击刁钻狠辣
-BAC009S0906W0363 直肘反肘交替使用
-BAC009S0906W0364 令人防不胜防播求的肘击则更具王者霸气
-BAC009S0906W0365 以大刀阔斧的摆肘砸肘为主
-BAC009S0906W0366 凶悍直接大开大合
-BAC009S0906W0367 加之其半身浴血的黝黑健美体魄
-BAC009S0906W0368 颇似从地狱中走出的修罗帝王
-BAC009S0906W0369 播求久负盛名的扫腿与冲膝技术开始发威
-BAC009S0906W0370 令对手不再敢贸然近身
-BAC009S0906W0371 不得不暂停比赛进行处理
-BAC009S0906W0372 双方均向对手发起了不遗馀力的猛攻
-BAC009S0906W0373 这场史诗级的双王血战在两大强者最后的对决中
-BAC009S0906W0374 迎来了结束铃声的敲响
-BAC009S0906W0375 哈立以争议性的点数优势宣告获胜
-BAC009S0906W0376 浑身是血的播求由于头部三处动脉破裂失血过多
-BAC009S0906W0377 被立刻送往医院接受紧急输血治疗
-BAC009S0906W0378 也被送往医院进行抢救
-BAC009S0906W0379 对于任何一个目睹了整场比赛过程的人来讲
-BAC009S0906W0380 这场史诗级惊天血战中没有失败者
-BAC009S0906W0383 二零一五年六月七日
-BAC009S0906W0384 昆仑决雄霸山城在重庆江南体育馆重装上阵
-BAC009S0906W0385 面对身高臂展明显占优的对手
-BAC009S0906W0386 雅桑克莱并没有采取矮个子拳手惯用的闪击式打法
-BAC009S0906W0387 而是王气十足地向对手进行正面逼近
-BAC009S0906W0388 雅桑克莱的优势继续在扩大
-BAC009S0906W0389 标志性的扫腿重击力道沉猛的后手重拳纷纷呼啸而出
-BAC009S0906W0390 在其左扫腿无情踢击之下
-BAC009S0906W0391 祖耶夫的右肋很快便被踢出大片鲜红的淤血斑痕
-BAC009S0906W0392 经验丰富的雅桑克莱开始刻意放缓节奏
-BAC009S0906W0393 对已是强弩之末的对手进行消耗
-BAC009S0906W0394 此时的祖耶夫右眼已经肿胀得完全封闭
-BAC009S0906W0395 只能依靠顽强的意志进行支撑
-BAC009S0906W0396 雅桑克莱的组合拳将祖耶夫重重击倒然而
-BAC009S0906W0397 意志力惊人的白俄罗斯特种兵被没有就此放弃
-BAC009S0906W0398 顽强的意志力博得了对手以及全场观众致意
-BAC009S0906W0399 比赛在两名王者最后的对决中
-BAC009S0906W0400 比赛结果已经无需裁判的裁定
-BAC009S0906W0401 但看两人比赛后的面部状况
-BAC009S0906W0402 夺得了自己在昆仑拳坛上的第二场重要胜利
-BAC009S0906W0404 布拉德皮特新片狂怒接受了宣传媒体拍照
-BAC009S0906W0405 我们可以清晰看到皮特的结婚戒指
-BAC009S0906W0406 今天确定了上映日期二零一七年四月十七日
-BAC009S0906W0407 这是后年春季档的一个黄金上映期
-BAC009S0906W0408 看来郑嘉颖是她的初恋
-BAC009S0906W0409 问到他们在法国拍戏定情的细节
-BAC009S0906W0410 陈凯琳也拒绝回答
-BAC009S0906W0411 但就希望外界多给予他们发展空间
-BAC009S0906W0412 搜狐娱乐讯据香港媒体报道
-BAC009S0906W0413 早前有传媒更拍到陈凯琳直上嘉颖住所短聚
-BAC009S0906W0414 父女恋纸包不住火
-BAC009S0906W0415 两人于异国拍外景晨夕相对
-BAC009S0906W0416 感情一日千里
-BAC009S0906W0418 陈势安两天一夜没洗澡刷牙
-BAC009S0906W0419 猛嗑薄荷喉糖
-BAC009S0906W0420 搜狐娱乐讯据台湾媒体报道
-BAC009S0906W0421 香港女星吴君如与导演陈可辛爱情长跑十八年
-BAC009S0906W0422 虽然没有注册结婚
-BAC009S0906W0423 但两人关系比一般夫妻更加紧密
-BAC009S0906W0424 她日前被媒体目击与陈可辛在大街上逛街血拼
-BAC009S0906W0425 且沿途有说有笑
-BAC009S0906W0426 一路上都十指紧扣
-BAC009S0906W0427 甜蜜恩爱的模样彷彿热恋中的情侣
-BAC009S0906W0428 搜狐娱乐讯据香港媒体报导
-BAC009S0906W0429 一直邀请陈善之担任经理人
-BAC009S0906W0430 并兼任李嘉欣经理人及处理旗下其他艺人的合约事宜
-BAC009S0906W0431 执法人员将王靖苏押解回温州
-BAC009S0906W0432 温州水库沉车案现男女腐尸女方事发前行为古怪
-BAC009S0906W0433 温州沙城街道一民房发生火灾已造成四人死亡
-BAC009S0906W0434 温州沙城街道一民房今晨发生火灾已造成四人死亡
-BAC009S0906W0436 沙城街道七五村永安路一二五号一民房发生火灾
-BAC009S0906W0437 一时一零分火势完全扑灭
-BAC009S0906W0438 火灾造成四人死亡一人受伤
-BAC009S0906W0439 伤者目前在解放军第一一八医院进行治疗
-BAC009S0906W0441 温州惊现最牛菜场温州的状元农贸市场
-BAC009S0906W0443 买菜用支付宝扫码付钱
-BAC009S0906W0444 听说过段时间还要上场智能秤
-BAC009S0906W0445 用智能秤称重将自动生成二维码
-BAC009S0906W0446 用支付宝扫一下就能付款
-BAC009S0906W0447 温州美女学霸将赴非洲支教教当地小学生汉语
-BAC009S0906W0448 麻丽贤等一七位志愿者将远赴非洲支教
-BAC009S0906W0449 温州老人卖房筹四八零零万建养老院赠政府遭闲置
-BAC009S0906W0450 为了实现退休后能建一座养老机构
-BAC009S0906W0451 为更多的老人安度往年的心愿
-BAC009S0906W0452 浙江温州一老人拿出全部积蓄并卖掉两套房子
-BAC009S0906W0453 筹款四八零零万经六年建成养老院
-BAC009S0906W0454 捐给当地慈善部门后却遭闲置三年
-BAC009S0906W0455 温州苍南县看守所民警宿舍楼起火无人员伤亡
-BAC009S0906W0456 八月三日上午一一时左右
-BAC009S0906W0457 温州苍南县看守所一宿舍起火
-BAC009S0906W0458 该市苍南县公安局直属县看守所突发火情
-BAC009S0906W0459 所内民警宿舍楼突发大火
-BAC009S0906W0460 在看守所干警及消防人员的扑救下火势很快被扑灭
-BAC009S0906W0461 温州话到底有多难懂
-BAC009S0906W0463 大家对温州话难懂这事儿略有耳闻
-BAC009S0906W0464 一直被认为是全中国最难学习的方言之一
-BAC009S0906W0465 温州贩卖婴儿大案女医生假称婴儿已死然后卖掉
-BAC009S0906W0466 警方先后解救了一六名婴儿
-BAC009S0906W0467 有六个被送往苍南福利院
-BAC009S0906W0468 图为其中一名被解救的孩子
-BAC009S0906W0469 温州集资诈骗案犯汇给情人四千万小三被诉
-BAC009S0906W0470 二九岁的章某被控洗钱一二二万元
-BAC009S0906W0471 温州鞋业总经理遭追杀凶手行凶过程中被打死
-BAC009S0906W0472 陆续有人从乐清赶往平阳法院
-BAC009S0906W0473 平阳法院内外已聚集了三零零多人等待开庭
-BAC009S0906W0474 温州首家支付宝菜市场启动一周很多摊主不会用
-BAC009S0906W0475 状元农贸市场内挂着支付宝的宣传牌
-BAC009S0906W0476 温州高三男生坠楼身亡事发前无异常刚从家返校
-BAC009S0906W0477 龙湾永强中学一名高三男生从宿舍楼五楼楼顶坠楼身亡
-BAC009S0906W0478 永强中学校长也是坠楼学生的语文老师
-BAC009S0906W0479 印象里他性格是比较开朗的
-BAC009S0906W0480 没有发现近期有异常变化目前
-BAC009S0906W0481 龙湾警方已对此事展开调查
-BAC009S0906W0482 温州高速公路大米遭抢续五名涉案人员已落网
-BAC009S0906W0483 白花花的大米洒了一地
-BAC009S0906W0484 引来周边大批村民哄抢
-BAC009S0906W0485 一场考验道德与良知的大米保卫战悄然打响
-BAC009S0906W0486 温州鹿城警方发布通报称
-BAC009S0906W0487 五名涉嫌参与抢米的犯罪嫌疑人先后被抓获并拘留
-BAC009S0906W0488 民警仍在对其馀涉事人员进行调查
-BAC009S0906W0489 温州高速车祸九二包大米遭哄抢续带头者被拘
-BAC009S0906W0490 一辆货车在金丽温高速温州段发生事故
-BAC009S0906W0491 涉案的其中两名嫌疑人陈某女
-BAC009S0906W0492 永嘉县人谢某女
-BAC009S0906W0493 永嘉县人已被鹿城警方依法行政拘留
-BAC009S0906W0494 港京航班六名乘客推撞地勤四人被判九至一一天监禁
-BAC009S0907W0121 也不代表开发商资金面已经不再紧张
-BAC009S0907W0122 背后可能蕴含着开发商更多的窘境
-BAC009S0907W0123 本世纪网至本世纪经济报道
-BAC009S0907W0124 上海南昌等城市近期继续松绑了公积金贷款政策
-BAC009S0907W0125 而南昌除了放松首套房界定标准
-BAC009S0907W0126 还降低了首套房公积金首付
-BAC009S0907W0127 国家住房银行箭在弦上
-BAC009S0907W0128 住建部官员发表文章指出
-BAC009S0907W0129 以住房公积金制度为基础
-BAC009S0907W0130 设立国家住房银行条件已经基本成熟
-BAC009S0907W0131 国家住房银行是否箭在弦上
-BAC009S0907W0132 其成立需具备哪些条件
-BAC009S0907W0133 以住房公积金制度为基础
-BAC009S0907W0134 设立政策性住宅金融机构
-BAC009S0907W0135 此机构即是住房银行
-BAC009S0907W0136 设立住房银行的条件已基本成熟
-BAC009S0907W0137 改进住房公积金提取使用监管机制
-BAC009S0907W0138 全国住房公积金七万亿元
-BAC009S0907W0139 住房维修资金约七亿元
-BAC009S0907W0140 如允许每年发行专项金融债券七万亿元
-BAC009S0907W0141 今年资金规模接近七万亿元
-BAC009S0907W0142 明年预计达到七万亿元
-BAC009S0907W0143 可基本满足首套和改善性自住住房的低息贷款需求
-BAC009S0907W0144 三是已有人员和机构
-BAC009S0907W0145 全国共有管理中心一百个
-BAC009S0907W0146 业务网点一千个
-BAC009S0907W0147 从业人员五万人
-BAC009S0907W0148 可充分利用这些机构网点和人员
-BAC009S0907W0149 组建国家住房银行分行和支行
-BAC009S0907W0150 对各地分支机构实行垂直管理
-BAC009S0907W0151 全国住房公积金贷款风险准备金已接近一百亿元
-BAC009S0907W0152 其中五亿元为超额拨备
-BAC009S0907W0153 可转化为住房银行资本金
-BAC009S0907W0154 设立住房银行好处多多
-BAC009S0907W0155 提高家庭购房能力
-BAC009S0907W0156 通过国家住房银行提供低息贷款
-BAC009S0907W0157 可以解决贷款难和贷款贵问题
-BAC009S0907W0158 有效提高家庭购房能力
-BAC009S0907W0159 完善宏观调控机制
-BAC009S0907W0160 可以有效解决商业银行顺周期操作问题
-BAC009S0907W0161 避免房地产市场大起大落
-BAC009S0907W0162 拓展货币政策操作空间
-BAC009S0907W0163 为利率市场化改革创造条件
-BAC009S0907W0164 促进新型城镇化发展
-BAC009S0907W0165 将农民工纳入住房公积金制度
-BAC009S0907W0166 积累在城镇购房首期付款
-BAC009S0907W0167 再由国家住房银行提供低息贷款
-BAC009S0907W0168 后续还款用住房公积金支付
-BAC009S0907W0169 将有效缓解购房能力不足矛盾
-BAC009S0907W0170 提升新型城镇化质量和效益
-BAC009S0907W0171 改进住房公积金管理
-BAC009S0907W0172 根源是体制机制存在弊端
-BAC009S0907W0173 通过设立国家住房银行
-BAC009S0907W0174 可以有效提高资金管理集约化专业化和精细化水平
-BAC009S0907W0175 充分发挥住房公积金作用
-BAC009S0907W0176 住建部官员发表文章指出
-BAC009S0907W0177 以住房公积金制度为基础
-BAC009S0907W0178 设立国家住房银行条件已经基本成熟
-BAC009S0907W0179 国家住房银行是否箭在弦上
-BAC009S0907W0180 其成立需具备哪些条件
-BAC009S0907W0181 备受刚需购房者关注的公积金政策也频繁迎来调整
-BAC009S0907W0182 北京市管国管住房公积金中心先后发布通知
-BAC009S0907W0183 贷款最高额度由五万元升至七万元
-BAC009S0907W0184 公积金贷款总共可少缴利息三十馀万
-BAC009S0907W0185 是对过去住房公积金制度不作为方式的纠正
-BAC009S0907W0186 而随着各地公积金政策的调整
-BAC009S0907W0187 建立健全以工促农以城带乡的长效机制
-BAC009S0907W0188 为现代农业建设取得明显进展提供有力保障
-BAC009S0907W0189 建立农业投入稳定增长机制
-BAC009S0907W0190 按照总量持续增长比例稳步提高的要求
-BAC009S0907W0191 预算内固定资产投资要向重大农业农村建设项目倾斜
-BAC009S0907W0192 耕地占用税税率提高后
-BAC009S0907W0193 新增收入全部用于农业
-BAC009S0907W0194 积极推动土地出让收益用于高标准农田建设
-BAC009S0907W0195 充分发挥中国农业产业发展基金的引导作用
-BAC009S0907W0196 加快农村金融组织产品和服务创新
-BAC009S0907W0197 推动发展村镇银行等农村中小金融机构
-BAC009S0907W0198 引导金融机构发放农业中长期贷款
-BAC009S0907W0199 完善农民专业合作社管理方法
-BAC009S0907W0200 支持其开展信用合作
-BAC009S0907W0201 落实农民专业合作社和农村金融有关税收优惠政策
-BAC009S0907W0202 扶持农业信贷担保组织发展
-BAC009S0907W0203 扩大农村担保品范围
-BAC009S0907W0204 完善农业保险保费补贴政策
-BAC009S0907W0205 健全农业再保险体系
-BAC009S0907W0206 探索完善财政支持下的农业大灾风险分散机制
-BAC009S0907W0207 引导社会资本投入农业
-BAC009S0907W0208 各部门要主动服务三农
-BAC009S0907W0209 积极推动建立城乡要素平等交换关系
-BAC009S0907W0210 鼓励和促进工业与城市资源要素向农业农村配置
-BAC009S0907W0211 调动农民参与农业农村基础设施建设的积极性
-BAC009S0907W0212 通过组织动员和政策引导等多种途径
-BAC009S0907W0213 鼓励各种社会力量与乡村结对帮扶
-BAC009S0907W0214 参与农村产业发展和公共设施建设
-BAC009S0907W0215 努力形成多元化投入新格局
-BAC009S0907W0216 加大农业支持保护力度
-BAC009S0907W0217 坚持和完善农业补贴政策
-BAC009S0907W0218 建立农业补贴政策后评估机制
-BAC009S0907W0219 落实农资综合补贴动态调整机制
-BAC009S0907W0220 研究逐步扩大良种补贴品种和范围
-BAC009S0907W0221 扩大农机具购置补贴规模
-BAC009S0907W0222 加大农机化薄弱环节生产机械补贴力度
-BAC009S0907W0223 加大动物强制免疫补贴力度
-BAC009S0907W0224 逐步完善农业生产关键技术应用与服务支持政策
-BAC009S0907W0225 大幅度增加农业防灾减灾稳产增产关键技术良法补助
-BAC009S0907W0226 坚持和完善渔用柴油补贴政策
-BAC009S0907W0227 继续实施农业种子种苗种畜种禽免税进口优惠政策
-BAC009S0907W0228 建立完善农业生产奖补制度
-BAC009S0907W0229 完善主产区利益补偿机制
-BAC009S0907W0230 提高中央财政对粮食油料生产大县转移支付水平
-BAC009S0907W0231 继续加大对产粮大县生猪调出大县的奖励力度
-BAC009S0907W0232 规范粮食主产县涉农投资项目地方资金配套
-BAC009S0907W0233 全面取消主产区粮食风险基金地方资金配套
-BAC009S0907W0234 稳步提高粮食主产区县级人均财力水平
-BAC009S0907W0235 全面实施和完善草原生态保护补助奖励政策
-BAC009S0907W0236 扩大草原生态保护面源污染防控生态奖补范围和规模
-BAC009S0907W0237 探索实施生物农药低毒农药使用补助政策
-BAC009S0907W0238 研究建立高耗能老旧农业机械报废回收制度
-BAC009S0907W0239 探索实施报废更新补助
-BAC009S0907W0240 加大对农业科研和技术推广的支持力度
-BAC009S0907W0241 完善现代农业产业技术体系
-BAC009S0907W0242 选择部分农业科研院所予以稳定支持
-BAC009S0907W0243 按照种养规模和服务绩效安排工作经费
-BAC009S0907W0244 加大动物疫病防控经费投入
-BAC009S0907W0245 完善病死动物无害化处理补贴制度
-BAC009S0907W0246 建立和完善农作物病虫害专业化统防统治补助政策
-BAC009S0907W0247 继续向农民免费提供测土配方施肥服务
-BAC009S0907W0248 扩大土壤有机质提升项目实施范围和规模
-BAC009S0907W0249 继续加大农业农村人才培养力度
-BAC009S0907W0250 对大学生涉农创业按规定给予相关政策扶持
-BAC009S0907W0251 完善农产品市场调控机制
-BAC009S0907W0252 稳步提高稻谷小麦最低收购价
-BAC009S0907W0253 没有人提的往往才是真命天子
-BAC009S0907W0254 谁是苹果进军汽车市场的合作伙伴收购对象
-BAC009S0907W0258 这个问题存在于软件捆绑方式
-BAC009S0907W0259 它是软件集成的一种方式
-BAC009S0907W0261 他们很快提供了修复软件
-BAC009S0907W0262 不管是什么时候推出软件和开发一些超前的东西
-BAC009S0907W0263 避免不了出现一些漏洞
-BAC009S0907W0264 我们所做的就是发现漏洞后立即修复
-BAC009S0907W0265 在苹果发布靓丽的第四财季业绩报告后
-BAC009S0907W0266 乔斯维亚克就很少在公众场合露面
-BAC009S0907W0268 促使这家公司获得了创记录的第四财季盈利
-BAC009S0907W0269 苹果正在全力以赴出售尽可能多的智能手机
-BAC009S0907W0270 你必须保证自己了解稳态市场
-BAC009S0907W0271 而不仅仅是早期市场
-BAC009S0907W0272 大尺寸屏幕设备在亚洲很流行
-BAC009S0907W0273 但是在欧洲受欢迎度较低
-BAC009S0907W0274 美国市场刚好介于两者之间
-BAC009S0907W0275 目前这项服务已经达到了一个里程碑
-BAC009S0907W0277 有一百万张信用卡已被激活
-BAC009S0907W0278 其中就包括沃尔玛和百思买
-BAC009S0907W0279 这两家公司目前正在开发自己的移动支付系统
-BAC009S0907W0280 零售商最终都会向消费者妥协
-BAC009S0907W0281 想要成功的零售商将考虑消费者的利益
-BAC009S0907W0282 并接受消费者想要使用的支付方式
-BAC009S0907W0283 乔斯维亚克还谈及了苹果涉足可穿戴设备市场的问题
-BAC009S0907W0286 乔斯维亚克还为苹果平板电脑业务做了辩护
-BAC009S0907W0287 他拿出了数据作为证据截止目前
-BAC009S0907W0290 我们一直都在打造最好的产品
-BAC009S0907W0291 这次我们同样做到了
-BAC009S0907W0294 用户发现系统更新之后
-BAC009S0907W0295 心率测量记录没有之前那么频繁了
-BAC009S0907W0297 不过苹果官方很快澄清了这个事情
-BAC009S0907W0298 根据苹果官方的支持页面显示
-BAC009S0907W0300 不过更新后锻炼和运动手臂的时候不会记录心率
-BAC009S0907W0301 因此用户看到测量记录的记录要比之前少一些
-BAC009S0907W0302 不过这导致了很多新问题
-BAC009S0907W0303 在认证授权系统中对服务器设置权限管理
-BAC009S0907W0304 以及与经销商汽车垂直网站等签署保密协议等
-BAC009S0907W0305 这些措施在一定程度上将可防止用户数据泄露
-BAC009S0907W0306 除了投入大这一因素之外
-BAC009S0907W0307 往往对网络安全意识也不强
-BAC009S0907W0308 毕竟与互联网融合时间不长
-BAC009S0907W0309 上述网络安全人士称
-BAC009S0907W0310 乌云网合伙人邬迪接受第一财经日报记者采访时称
-BAC009S0907W0311 尽管网络安全目前投入成本大
-BAC009S0907W0312 又未直接产生经济效益
-BAC009S0907W0313 但到将来互联网时代
-BAC009S0907W0314 部分传统的车企或许还没有注意到这点
-BAC009S0907W0315 乌云上有不少因联网漏洞可导致车辆被控制
-BAC009S0907W0316 这将会导致行车安全问题
-BAC009S0907W0317 令车企烦恼的不仅是车主信息被泄露这一困扰
-BAC009S0907W0318 随着越来越多车企踊跃加入车联网浪潮中
-BAC009S0907W0319 信息安全隐患也随之而来
-BAC009S0907W0320 负责车辆网络安全问题
-BAC009S0907W0321 现在汽车与网络的联系越来越紧密
-BAC009S0907W0322 以后将能够与周围环境交流
-BAC009S0907W0323 如果车辆被黑客软件侵袭
-BAC009S0907W0324 车辆可能会发生严重的交通事故
-BAC009S0907W0325 比如现在的汽车一般采用了哪些新技术
-BAC009S0907W0326 其中十六家回复发函
-BAC009S0907W0327 在接受调查的这些公司中
-BAC009S0907W0328 有两家表示能够诊断或者反馈黑客入侵后的情况
-BAC009S0907W0329 有一家公司表示能够及时检测黑客入侵
-BAC009S0907W0330 像车上的信息娱乐系统和导航系统
-BAC009S0907W0331 很可能通过联网技术
-BAC009S0907W0332 被恶意软件或者黑客攻击
-BAC009S0907W0333 二十二二零一五
-BAC009S0907W0335 黑客可利用这些漏洞远程打开车门
-BAC009S0907W0336 宝马方面表示已经升级该数字系统
-BAC009S0907W0337 解决信息安全的问题
-BAC009S0907W0339 作为唯一能够入选五大飙血之战的女子比赛
-BAC009S0907W0340 正是得益于我国女子散打名将鄂美蝶的惊艳一击
-BAC009S0907W0341 在当天女子五二千克级自由搏击超级战中
-BAC009S0907W0342 鄂美蝶便毫无保留地将炮火轰向对手
-BAC009S0907W0343 三十三岁的大滨芳美在面对强大的火力下
-BAC009S0907W0344 比赛很快便呈向一边倒的局面第二回合
-BAC009S0907W0345 鄂美蝶继续将自己所学到的新搏击技能尽情展现
-BAC009S0907W0346 在一连串的拳腿风暴过后
-BAC009S0907W0347 终止时间定格在二分二十一秒
-BAC009S0907W0350 二零一五年四月十二
-BAC009S0907W0351 一场众星闪耀的群龙赛事震撼打响
-BAC009S0907W0352 作为此次赛事上唯一一场纯泰式规则的超级战
-BAC009S0907W0353 两位气质迥异的选手展示出了全然不同的擂台风格
-BAC009S0907W0354 在前两局僵持不下的情况下
-BAC009S0907W0355 面对兵行诡道的波斯弯刀
-BAC009S0907W0356 张春雨选择了加强压迫对手的力度
-BAC009S0907W0357 此举却导致了一次擂台意外的发生
-BAC009S0907W0358 张春雨被对手的一记肘击打破了右侧眉弓
-BAC009S0907W0359 经过场上护理人员的医治后
-BAC009S0907W0360 张春雨在全场观众的喝彩声中再次投入比赛
-BAC009S0907W0361 并向对手发起了凶猛的反扑
-BAC009S0907W0362 双方刺刀见红式的对攻中
-BAC009S0907W0363 伊萨的眉弓同样被张春雨以牙还牙的肘法击破
-BAC009S0907W0364 全面引爆现场观众的激情
-BAC009S0907W0365 比赛在双方互不相让的对攻中落下了帷幕
-BAC009S0907W0366 但对于每一位观赛者来讲
-BAC009S0907W0367 能够亲眼见证这场火爆刺激的的精彩大战
-BAC009S0907W0368 远比单纯的胜负有意义得多
-BAC009S0907W0369 这是一场没有输家的经典比赛
-BAC009S0907W0372 二零一五年二月一日
-BAC009S0907W0373 昆仑决广州站在广州天河体育中心成功打响
-BAC009S0907W0374 多国大神级搏击王者论剑昆仑武道之巅
-BAC009S0907W0375 决赛一如期待般精彩绝伦
-BAC009S0907W0376 马刀抡击式的中距离组合拳法配合高位膝技
-BAC009S0907W0377 打得对手只有招架之功
-BAC009S0907W0378 便将对手的眼部击伤
-BAC009S0907W0379 严重影响卡尔泽塔的实现
-BAC009S0907W0380 令对手无奈放弃比赛
-BAC009S0907W0381 他在二零一五年昆仑决诸神之战决赛圈的表现
-BAC009S0907W0382 将成为无数武迷接下来最大的期待之一
-BAC009S0907W0383 谁也不知道会发生什么
-BAC009S0907W0384 这就是竞技体育的魅力
-BAC009S0907W0385 在昨晚的女子标枪决赛中
-BAC009S0907W0386 然而就是这最后一掷
-BAC009S0907W0387 在昨天比赛的第五投
-BAC009S0907W0388 吕会会在全场观众的加油助威声中爆发
-BAC009S0907W0389 倾尽全力将标枪掷到了六十六米一三
-BAC009S0907W0390 然而就在全场仅剩下莫利托一个人的最后一掷时
-BAC009S0907W0391 虽然留下了巨大的遗憾
-BAC009S0907W0392 不过这依然是吕会会的个人最好成绩
-BAC009S0907W0393 吕会会在走到混合区接受记者采访时止住了泪水
-BAC009S0907W0394 在大赛中投出这样好的成绩我自己都没有想到
-BAC009S0907W0395 其实比赛过程中我也没有多想
-BAC009S0907W0396 就是要一枪一枪地投
-BAC009S0907W0397 比成这样我其实已经很开心了
-BAC009S0907W0398 能在北京获得一枚奖牌我很骄傲
-BAC009S0907W0399 观众们的鼓励也给了我力量
-BAC009S0907W0400 我的泪水主要还是来自于喜悦
-BAC009S0907W0401 要说一点儿没有遗憾和失落是假的
-BAC009S0907W0402 但总的来说还是高兴多于遗憾
-BAC009S0907W0403 文本报记者刘艾林
-BAC009S0907W0404 去年美国队长二寒冬战士就曾在四月登陆
-BAC009S0907W0405 结果创造了相当可观的票房成绩
-BAC009S0907W0406 丛林之书则将在二零一六年四月十五日登场
-BAC009S0907W0408 两人合作长达二十年
-BAC009S0907W0409 不过天下无不散之筵席
-BAC009S0907W0410 原来陈善之最近已离开了百仕活
-BAC009S0907W0411 有传他离开是因黎明不满其在挽留艺人方面没有尽力
-BAC009S0907W0412 搜狐娱乐讯十月九日
-BAC009S0907W0413 表示决定辞职
-BAC009S0907W0414 不与无线续约
-BAC009S0907W0415 他感叹自己在无线十五年都没有机会
-BAC009S0907W0416 眼见后辈爬头
-BAC009S0907W0417 希望出去发展
-BAC009S0907W0418 他直言不想看见自己变作一潭死水
-BAC009S0907W0419 早前演出的舞台剧令他醒觉要出外寻找更多演出机会
-BAC009S0907W0420 因此决定出外闯
-BAC009S0907W0421 虽然未知去向
-BAC009S0907W0422 但坚信有我落脚的地方
-BAC009S0907W0423 我便会到那里
-BAC009S0907W0424 搜狐娱乐讯北京时间七月二十八日消息
-BAC009S0907W0425 据香港媒体报导
-BAC009S0907W0427 陈奕迅双手合十认真地向蛋糕许愿
-BAC009S0907W0428 搜狐娱乐讯据台湾媒体报道
-BAC009S0907W0429 港歌神陈奕迅出道近二十年
-BAC009S0907W0430 曾获美国时代杂志形容为影响香港乐坛风格的人物
-BAC009S0907W0431 并于当日被香港警方拘捕
-BAC009S0907W0432 警方以普通袭击罪对涉事乘客提起诉讼
-BAC009S0907W0433 其中四名被告分别判即时监禁九至一一天
-BAC009S0907W0434 一人被判罚款一五零零元
-BAC009S0907W0435 港京航班延误九小时六名内地乘客推撞地勤被捕
-BAC009S0907W0437 六名内地乘客与地勤发生肢体冲突
-BAC009S0907W0439 将被以普通袭击罪起诉
-BAC009S0907W0440 港商在台遭绑三八天获救后痛哭以为必死
-BAC009S0907W0441 黄煜坤被警方送到附近医院接受检查
-BAC009S0907W0442 惠州公安在金山河捞获一具无头无双手女尸
-BAC009S0907W0443 广东惠州惊爆港商杀情妇碎尸凶案
-BAC009S0907W0444 五零岁港商疑与其工厂的同龄女主管偷情多年
-BAC009S0907W0445 近日再度拒绝女方的逼婚后
-BAC009S0907W0446 遭追讨欠款和抚养费共四零万元人民币
-BAC009S0907W0447 港商疑恼羞成怒将她杀害
-BAC009S0907W0448 并肢解尸体分成多袋抛入河中
-BAC009S0907W0449 港商被骗牵出路边地下钱庄涉案资金四三零零亿
-BAC009S0907W0450 深圳警方查获的一个地下钱庄窝点
-BAC009S0907W0451 由普通商店作为掩护
-BAC009S0907W0452 该商店老板郑晓生红衣者涉嫌暗地里兑换外汇
-BAC009S0907W0453 替人向境外转移资金
-BAC009S0907W0454 港媒关注天价虾店停业破坏青岛形象
-BAC009S0907W0455 参考消息网一零月八日报道港媒称
-BAC009S0907W0456 备受关注的青岛三八元一只大虾事件有最新发展
-BAC009S0907W0457 并责令其立即改正价格违法行为
-BAC009S0907W0458 事发后派出所和物价局都互相踢皮球
-BAC009S0907W0459 批评职能部门没有将消费者放在第一位
-BAC009S0907W0460 港媒关注内地私人美术馆新富人群热衷分享藏品
-BAC009S0907W0461 参考消息网七月二九日报道港媒称
-BAC009S0907W0462 用来保存他们的藏品
-BAC009S0907W0463 其中一些人是近年来国际拍卖会上艺术品的最大买家
-BAC009S0907W0464 港媒关注浙江暖男医生手术室播动画片哄小女孩
-BAC009S0907W0465 参考消息网九月二二日报道港媒称
-BAC009S0907W0466 网络上热传一组暖男医生哄小萝莉的温情照片
-BAC009S0907W0467 男医生为了安抚即将做手术的小女孩
-BAC009S0907W0468 将小女孩抱在腿上并播放手机中的动画片
-BAC009S0907W0469 港媒关注重雾霾重回华北罕见蓝天只持续两周
-BAC009S0907W0470 参考消息网九月一九日报道港媒称
-BAC009S0907W0471 随着严重雾霾卷土重来
-BAC009S0907W0472 港媒关注马云回应被逼捐花钱比挣钱难
-BAC009S0907W0474 企业应该做好的投资
-BAC009S0907W0475 盲目捐款没有益处
-BAC009S0907W0476 港媒曝水货客扮残疾人在轮椅中藏钻石月入八万
-BAC009S0907W0477 参考消息网七月二九日报道港媒称
-BAC009S0907W0478 香港海关严查水货客
-BAC009S0907W0479 水货集团看中轮椅人士收入不高
-BAC009S0907W0480 以高收入低风险和免缴税等好处利诱对方成为水货客
-BAC009S0907W0481 有走私奢侈品的人士月入高达八万港元
-BAC009S0907W0482 港媒盘点亚洲千禧一代十大富豪九人是中国人
-BAC009S0907W0483 参考消息网七月二二日报道
-BAC009S0907W0484 港媒称假沉香充斥内地多以化学香油制成
-BAC009S0907W0485 高仿沉香多以化学香精等制成
-BAC009S0907W0486 可比黄金的沉香价格每年倍增
-BAC009S0907W0487 港媒称内地中产人数猛增有助稳定企望渐进改革
-BAC009S0907W0488 一个国家稳定的社会结构呈橄榄形
-BAC009S0907W0489 而橄榄形结构是以中产为主的结构
-BAC009S0907W0490 中产阶级在一个国家的现代化中起着稳定作用
-BAC009S0907W0491 是社会稳定的主要力量
-BAC009S0907W0492 港媒称内地为国际市场修改动画片妖怪不能吃唐僧肉
-BAC009S0907W0493 参考消息网一一月一日报道港媒称
-BAC009S0907W0494 中国的动画工作室越来越看重海外市场
-BAC009S0907W0495 港媒称内地人不穷了为何仍爱抢学者抢习惯了
-BAC009S0908W0121 将进一步提振刚需购房者入市信心
-BAC009S0908W0122 加速今年楼市成交复苏回暖
-BAC009S0908W0123 公积金政策利好首套自住住房贷款需求的消息纷至沓来
-BAC009S0908W0124 并已实施
-BAC009S0908W0125 贷款额度上限调整为一百万元
-BAC009S0908W0126 购买一百平方米以上非政策性住房或第二套住房
-BAC009S0908W0127 贷款最高额度仍为一百万元
-BAC009S0908W0128 均规定贷款额度不再依据个人信用等级上浮
-BAC009S0908W0129 并对异地缴存住房公积金等政策作出调整
-BAC009S0908W0130 北京市公积金管理中心明确取消新建商品房评估
-BAC009S0908W0131 国管住房公积金中心则表示取消担保服务费
-BAC009S0908W0132 这一系列公积金门槛放低额度提高的调整
-BAC009S0908W0133 是对过去住房公积金制度不作为方式的纠正
-BAC009S0908W0134 即使去年十一月公积金贷款利率降至百分之七
-BAC009S0908W0135 很多人需要支付大额首付
-BAC009S0908W0136 使用公积金制度的作用和效果没有得到有效的发挥
-BAC009S0908W0137 此番公积金政策调整
-BAC009S0908W0138 将在诸多方面惠及刚需购房者
-BAC009S0908W0139 之前的公积金贷款额只有一百万
-BAC009S0908W0140 而最高额度提升至一百万后
-BAC009S0908W0141 大部分刚需购房者都可以选择公积金贷款
-BAC009S0908W0142 中原地产首席分析师张大伟分析
-BAC009S0908W0143 公积金额贷款额度升至一百万可以节省很多少利息
-BAC009S0908W0144 公积金贷款可少缴三十馀万
-BAC009S0908W0145 而额度最高一百万时
-BAC009S0908W0146 这一数值为二十馀万
-BAC009S0908W0147 这将使更多购房者具备买房支付能力
-BAC009S0908W0148 可以使用公积金贷款的购房者将起码增加百分之七
-BAC009S0908W0149 也将在一定程度上降低刚需购房者支付负担
-BAC009S0908W0150 公积金政策调整对于楼市成交刺激作用已初见瑞尔
-BAC009S0908W0151 链家地产市场研究部数据显示
-BAC009S0908W0152 北京市公积金额度提升后的元旦时期
-BAC009S0908W0153 近郊小户型楼盘及城区部分公房社区客户咨询量上升
-BAC009S0908W0154 而其房源多在一百平方米以下
-BAC009S0908W0155 中原地产市场研究部数据显示
-BAC009S0908W0156 以北京去年纯商品房成交结构为例
-BAC009S0908W0157 一百平均单套总价约一百万元左右
-BAC009S0908W0158 公积金贷款上限调整后
-BAC009S0908W0159 一百万元的贷款额度能满足大部分首套刚需的贷款需求
-BAC009S0908W0160 链家地产市场研究部张旭表示
-BAC009S0908W0161 此番公积金贷款政策调整将进一步提升振刚需
-BAC009S0908W0162 促进楼市预期向好发展
-BAC009S0908W0163 去年已有不少城市对公积金政策进行放松调整
-BAC009S0908W0164 公积金政策调整对购房者心理层面影响较大
-BAC009S0908W0165 将加速今年楼市成交复苏回暖
-BAC009S0908W0166 备受刚需购房者关注的公积金政策也频繁迎来调整
-BAC009S0908W0167 北京市管国管住房公积金中心先后发布通知
-BAC009S0908W0168 通知指出除北上广深一线城市外
-BAC009S0908W0169 对拥有一套住房并已结清相应购房贷款的居民家庭
-BAC009S0908W0170 申请公积金购买第二套住房
-BAC009S0908W0171 最低首付款比例由百分之七降低至百分之五
-BAC009S0908W0172 公积金首付的再次降低实际影响有限
-BAC009S0908W0173 但对购房者预期有积极响应
-BAC009S0908W0174 这将有利于稳定房地产市场
-BAC009S0908W0175 进而对稳定中国经济有正面作用
-BAC009S0908W0176 为进一步完善住房公积金个人住房贷款政策
-BAC009S0908W0177 对拥有一套住房并已结清相应购房贷款的居民家庭
-BAC009S0908W0178 最低首付款比例由百分之七降低至百分之五
-BAC009S0908W0179 该政策对于一线城市并不强制执行
-BAC009S0908W0180 而是北京上海广州深圳可在国家统一政策基础上
-BAC009S0908W0181 易居研究院智库中心研究总监严跃进认为
-BAC009S0908W0182 此次住建部财政部和中央联合发文
-BAC009S0908W0183 反映出政策层面较大的刺激力度
-BAC009S0908W0184 这是自去年以来除降息外
-BAC009S0908W0185 相关部门对公积金贷款政策的第三次放松
-BAC009S0908W0186 美丽北京大型绿色公益品牌项目
-BAC009S0908W0187 完善玉米大豆油菜籽棉花等农产品临时收储政策
-BAC009S0908W0188 完善主要农产品吞吐和调节机制
-BAC009S0908W0189 健全重要农产品储备制度
-BAC009S0908W0190 发挥骨干企业稳定市场的作用
-BAC009S0908W0191 完善生猪棉花食糖边销茶等调控预案
-BAC009S0908W0192 制定鲜活农产品调控办法
-BAC009S0908W0193 探索建立目标价格为核心的反周期补贴制度
-BAC009S0908W0194 加强农业科技交流合作
-BAC009S0908W0195 提高农业利用外资水平
-BAC009S0908W0196 继续用好国外优惠贷款和赠款
-BAC009S0908W0197 加大先进适用技术装备的引进消化和吸收力度
-BAC009S0908W0198 强化多双边和区域农业磋商谈判和贸易促进
-BAC009S0908W0199 做好涉农国际贸易规定制动工作
-BAC009S0908W0200 进一步强化贸易促进公共服务能力
-BAC009S0908W0201 积极推动优势农产品出口
-BAC009S0908W0202 积极应对国际贸易摩擦
-BAC009S0908W0203 支持行业协会办企业维护合法权益
-BAC009S0908W0204 进一步完善农业产业损害监测预警机制
-BAC009S0908W0205 运用符合世界贸易组织规定的相关措施
-BAC009S0908W0206 灵活有效调控农业产品进出口
-BAC009S0908W0207 积极推动种业农垦等方面改革
-BAC009S0908W0208 发展农村服务业和乡村企业
-BAC009S0908W0209 制定农村二三产业加快发展的鼓励政策
-BAC009S0908W0210 落实和完善有关税收政策
-BAC009S0908W0211 统筹城乡基础设施建和公共服务
-BAC009S0908W0212 逐步建立城乡统一的公共服务制度
-BAC009S0908W0213 积极稳妥推进户籍制度改革
-BAC009S0908W0214 推进省直接管理县市财政体制改革
-BAC009S0908W0215 优先将农业大县纳入改革范围
-BAC009S0908W0216 强化农业法制保障
-BAC009S0908W0217 坚持米袋子省长负责制和菜篮子市长负责制
-BAC009S0908W0218 全面落实耕地和基本农田保护领导干部离任审计制度
-BAC009S0908W0219 各有关部门和地方各级人民政府要围绕规划目标任务
-BAC009S0908W0220 研究落实各项强农惠农富农政策
-BAC009S0908W0221 统筹协调推动重大工程的实施
-BAC009S0908W0222 努力开创我国农业现代化发展新局面
-BAC009S0908W0223 农业农村信息化十二五规划
-BAC009S0908W0224 关于印发十二五规划的通知
-BAC009S0908W0225 中国老龄十二五规划
-BAC009S0908W0226 新农村十二五发展规划
-BAC009S0908W0227 国家林业十二五规划
-BAC009S0908W0228 十二五医药发展规划
-BAC009S0908W0229 老龄事业十二五规划
-BAC009S0908W0230 国务院总理温家宝五日主持召开国务院常务会议
-BAC009S0908W0231 再次听取全国民用核设施综合安全检查情况汇报
-BAC009S0908W0232 核电重启的曙光越来越近
-BAC009S0908W0233 国务院二零一一年五月
-BAC009S0908W0234 相关公司股票走势国海证券
-BAC009S0908W0235 决定对全国核设施进行安全检查
-BAC009S0908W0236 有关部门组织核安全地震海洋等方面专家
-BAC009S0908W0237 用五个多月时间对全国七十台运行在建核电机组
-BAC009S0908W0238 以及所有民用研究堆和核燃燃料循环设施等
-BAC009S0908W0239 进行了综合安全检查
-BAC009S0908W0240 形成了新形势下我国核电发展的建议阶段研究报告
-BAC009S0908W0241 国务院常务会议听取了综合安全检查情况汇报
-BAC009S0908W0242 对进一步深入检查及落实整改措施作了部署
-BAC009S0908W0243 核安全法规标准体系与国际接轨
-BAC009S0908W0244 具备一定的严重事故预防和缓解能力
-BAC009S0908W0245 部分核电厂未制定实施严重事故预防和缓解规程
-BAC009S0908W0246 海啸问题评估和应付基础比较薄弱等
-BAC009S0908W0247 有关部门和企业迅速组织整改
-BAC009S0908W0248 目前已取得阶段性成效
-BAC009S0908W0249 基本原则是预防为主纵深防御
-BAC009S0908W0250 新老并重防结结合
-BAC009S0908W0251 依靠科技持续改进
-BAC009S0908W0252 坚持法治严格监管
-BAC009S0908W0253 比如用户抱怨升级之后设施无法像以前那样工作了
-BAC009S0908W0254 甚至还不如原来的一点零版本系统好用
-BAC009S0908W0255 苹果此举是为了节约用电量
-BAC009S0908W0256 有人给出了解决方法
-BAC009S0908W0258 强制不断的心率测量
-BAC009S0908W0259 只是这种情况下心率传感器会每隔十秒进行一次
-BAC009S0908W0261 苹果表这么火爆微软也该出智能手表吗
-BAC009S0908W0262 刚开始微软因谨慎起见
-BAC009S0908W0264 最近才开始向其他市场推广销售
-BAC009S0908W0265 在谷歌与苹果相机推出智能手表后
-BAC009S0908W0266 微软目前仍局限于健身手环领域
-BAC009S0908W0267 但它的确算不上是智能手表
-BAC009S0908W0268 拥有内部存储空间与完整的应用平台
-BAC009S0908W0269 支持开发者为其编写应用
-BAC009S0908W0270 但它对开发者来说限制太多
-BAC009S0908W0272 微软正在向外界推广一次编写
-BAC009S0908W0273 跨设备使用的通用应用
-BAC009S0908W0274 但至今唯独没有提升智能手表平台
-BAC009S0908W0275 具体如下方视频介绍所示
-BAC009S0908W0277 刚开始微软因谨慎起见
-BAC009S0908W0280 原告当地时间周二在法庭上表示
-BAC009S0908W0281 苹果通过发布不必要的软件升级包
-BAC009S0908W0283 一起针对苹果的集体反垄断案两名原告的律师称
-BAC009S0908W0284 由于苹果要打压竞争对手
-BAC009S0908W0286 但却损害了消费者的利益
-BAC009S0908W0287 这次庭审将持续九天时间
-BAC009S0908W0288 给一桩近十年之久的诉讼一个定论
-BAC009S0908W0292 不过这些政策现在已经被废除
-BAC009S0908W0293 苹果打压了市场竞争
-BAC009S0908W0297 苹果担忧这会蚕食其市场份额
-BAC009S0908W0298 生态链中插入其他公司产品会造成问题
-BAC009S0908W0299 这会危及用户体验和产品质量
-BAC009S0908W0301 价格要么下降要么维持不变
-BAC009S0908W0302 苹果没有危害消费者利益
-BAC009S0908W0303 对频频的骚扰电话显示无可奈何
-BAC009S0908W0304 有黑客在网络上兜售车主信
-BAC009S0908W0305 美的摆稳棋局过冬搜狐科技
-BAC009S0908W0306 白电行业将进入最惨烈的一年
-BAC009S0908W0307 昔日巨头格力美的海尔也将沉浮于其中
-BAC009S0908W0308 从本年度第一份季报来看
-BAC009S0908W0309 三巨头中的格力海尔均出现不同程度下滑
-BAC009S0908W0310 实现净利营收双增长
-BAC009S0908W0311 美的吸取了当年大跃进的教训
-BAC009S0908W0312 一位买家电的朋友晒出一张销量清单
-BAC009S0908W0313 他担心自己马上就要被辞退了
-BAC009S0908W0314 发改委约谈各大空调企业的高管
-BAC009S0908W0315 媒体采访的电话打到各空调企业的市场负责人那里
-BAC009S0908W0316 各公司市场部都在卖场忙活
-BAC009S0908W0317 今年的促销从三月份就启动了
-BAC009S0908W0318 一位商场场内部人士称
-BAC009S0908W0319 注定是白色家电行业最惨烈的一年
-BAC009S0908W0320 现实的残酷落到报表上
-BAC009S0908W0321 是白电上市企业今年的一季报几乎全部沦陷
-BAC009S0908W0322 两大龙头企业格力和海尔
-BAC009S0908W0323 格力电器一季报营收为二百四十五亿元
-BAC009S0908W0324 同比去年降零点六百分之六
-BAC009S0908W0325 净利润为二十七点七五亿元
-BAC009S0908W0326 同比上升百分之二十三点零六
-BAC009S0908W0327 上一次是金融危机期间的二零零九一季度
-BAC009S0908W0328 另一白电巨头青岛海尔
-BAC009S0908W0329 一季度营收为二十一八点七亿元
-BAC009S0908W0330 净利润为九点七亿元
-BAC009S0908W0331 同比增百分之十三点一一
-BAC009S0908W0332 海信科龙和惠而浦则是营收增
-BAC009S0908W0333 海信科龙一季报营收为六十四点三亿元
-BAC009S0908W0334 净利润出现百分之一的下滑
-BAC009S0908W0335 净利出现七点三百分之一的降幅
-BAC009S0908W0337 实现营收净利双增长
-BAC009S0908W0338 十多天压抑的情感终于爆发
-BAC009S0908W0339 女排姑娘们在日本的最后一夜
-BAC009S0908W0340 大家才安安稳稳地睡了一觉
-BAC009S0908W0341 如果要数一下中国女排谁最红
-BAC009S0908W0342 张晓雅的人气肯定在前三名
-BAC009S0908W0343 她以最帅国手走红网络
-BAC009S0908W0344 网友大呼她帅过林丹
-BAC009S0908W0345 这位英气十足的九零后很有人缘
-BAC009S0908W0346 张晓雅最大的优点是有想法
-BAC009S0908W0347 张晓雅这个娃娃训练很自觉
-BAC009S0908W0348 在球场上的思路比较清楚
-BAC009S0908W0349 是一个在球场上有想法的球员
-BAC009S0908W0350 这个娃娃打球时很有思想
-BAC009S0908W0351 中国最帅的竞走冠军陈定将亮相苏州吴中
-BAC009S0908W0352 一九九二年八月五日出生于云南省保山市龙陵县
-BAC009S0908W0353 这个二十三岁的云南小伙子
-BAC009S0908W0354 取得瑞士卢加诺竞走挑战赛男子二十公里竞走银牌
-BAC009S0908W0355 夺得国际田联竞走世界杯男子二十公里竞走银牌
-BAC009S0908W0356 参加全国竞走大奖赛暨世锦赛选拔赛
-BAC009S0908W0357 以一小时二十一分十一秒成绩获铜牌
-BAC009S0908W0358 并取得世锦赛参赛资格
-BAC009S0908W0359 仰泳选手在比赛中
-BAC009S0908W0360 本次比赛使用最新的仰泳出发壁架
-BAC009S0908W0361 帮助仰泳运动员改善自己的出发技术
-BAC009S0908W0362 欧米茄计时管理委员会成员彼得许尔泽勒介绍说
-BAC009S0908W0363 可以帮助他们在出发时增加自己距离水面的高度
-BAC009S0908W0364 可以防止运动员出发时手部滑落
-BAC009S0908W0365 得到了仰泳选手的广泛好评
-BAC009S0908W0366 这是它第一次在游泳世界杯上亮相
-BAC009S0908W0367 也为背后的关键技术提供开发支持
-BAC009S0908W0368 从而确保高度精准地记录竞赛成绩
-BAC009S0908W0369 新科世界冠军宁泽涛领衔中国队出战
-BAC009S0908W0370 身材傲人颜值爆表的她魅力席卷整个亚洲
-BAC009S0908W0373 现年十八岁的莎宾娜身高达一百八十二厘米
-BAC009S0908W0374 腿长足足十二厘米
-BAC009S0908W0375 去年在亚青赛上亮相后
-BAC009S0908W0376 瞬间成为各国媒体的焦点
-BAC009S0908W0377 成为宅男心目中的排球女神
-BAC009S0908W0378 莎宾娜也凭借兼具清纯和性感气质的漂亮外形走红日本
-BAC009S0908W0379 甚至有日本的大牌经纪公司希望与其签约
-BAC009S0908W0380 做客日本电视台的新闻节目
-BAC009S0908W0381 不少媒体追问她是否有男朋友
-BAC009S0908W0382 莎宾娜透露目前单身理想型是喜欢运动
-BAC009S0908W0383 身材高挑并且不抽烟喝酒的男生
-BAC009S0908W0384 当下想把注意力集中在打球上
-BAC009S0908W0385 暂时不考虑恋爱的问题
-BAC009S0908W0386 这一单身宣言更加激发了日本粉丝对她的痴迷
-BAC009S0908W0387 希望可以见到她本人
-BAC009S0908W0388 该球队在官方博客上
-BAC009S0908W0389 但喜欢欧美音乐爱吃西红柿意大利面
-BAC009S0908W0391 看好她成为日本排球的新女神
-BAC009S0908W0392 美貌和实力并存的选手太稀罕了
-BAC009S0908W0393 莎宾娜已经在今年八月秘密抵达日本
-BAC009S0908W0395 她的母亲在采访中表示莎宾娜为了提升自己的实力
-BAC009S0908W0396 以哈萨克斯坦排协特派选手的方式加盟日本的球队
-BAC009S0908W0397 日本的排球训练是出了名的严厉
-BAC009S0908W0398 对此莎宾娜已经做好了吃苦的心理准备
-BAC009S0908W0399 家人和哈排协也表明了全力支持她的态度
-BAC009S0908W0400 不仅在各国网络社区和比赛中表现活跃
-BAC009S0908W0401 也成为哈萨克斯坦的宣传大使
-BAC009S0908W0402 日本排球界的人士指出
-BAC009S0908W0404 但是由于加朵要为蝙蝠侠大战超人忙碌
-BAC009S0908W0405 档期遇到了不可调和的冲突
-BAC009S0908W0406 因此不得不放弃宾虚的演出
-BAC009S0908W0407 这对她来说也是一个巨大的遗憾
-BAC009S0908W0408 私底下对歌迷亲切和善
-BAC009S0908W0409 最近人在大陆举行巡回演唱会的他
-BAC009S0908W0410 却被曝出在机场大发飙
-BAC009S0908W0411 有网友则晒出当天现场情况
-BAC009S0908W0412 搜狐娱乐讯据香港媒体报道
-BAC009S0908W0413 分享入行二十年的感受
-BAC009S0908W0414 陈奕迅坦言自己一直有情绪病
-BAC009S0908W0415 而且是一个爱哭鬼
-BAC009S0908W0416 常常在看电影和新闻时流泪
-BAC009S0908W0417 不开心时会找太太徐濠所倾诉
-BAC009S0908W0418 搜狐娱乐讯据香港媒体报道
-BAC009S0908W0419 陈奕迅在香港出席品牌活动
-BAC009S0908W0420 现场他透露道近日忙于内地巡演
-BAC009S0908W0421 对于天津爆炸时间
-BAC009S0908W0422 他表示感到伤痛
-BAC009S0908W0423 又透露去年曾在天津举办演唱会
-BAC009S0908W0424 希望送上歌曲今日为受害者打气
-BAC009S0908W0425 也祝福伤者早日康复
-BAC009S0908W0426 搜狐娱乐讯四月三十日
-BAC009S0908W0427 称这二人总是可以把自己逗笑
-BAC009S0908W0428 照片中二人坐在沙发上
-BAC009S0908W0429 谢霆锋戴着帽子
-BAC009S0908W0430 穿着白背心黑色短裤
-BAC009S0908W0431 数万只黄色小鸡散落路上
-BAC009S0908W0432 村民蜂拥而至捉小鸡
-BAC009S0908W0433 香港明报参考消息网八月二十九日报道港媒称
-BAC009S0908W0434 近日又出现疯抢水果捡漏等事
-BAC009S0908W0435 有内地学者分析背后心态
-BAC009S0908W0436 是因为国民抢习惯了
-BAC009S0908W0437 港媒称马云向浙江商人发出警告永远不要行贿
-BAC009S0908W0438 港媒评助学达人性侵女童案加强监管是关键
-BAC009S0908W0439 资料图王杰图片来源于网络
-BAC009S0908W0440 港媒评中国游客全球爆买旅游幼稚病
-BAC009S0908W0441 参考消息网一零月八日报道国庆长假结束
-BAC009S0908W0442 媒体再次盘点长假期间的各种热点新闻
-BAC009S0908W0443 其中一组中国旅游购物者全面攻陷日本的图片
-BAC009S0908W0444 多家媒体就这组图片中的场景和现象作出评论
-BAC009S0908W0445 并提出多种思考和提示
-BAC009S0908W0446 比如就中国游客热衷日本药品
-BAC009S0908W0447 歧视中国药企改进质量提高信誉改善用户体验
-BAC009S0908W0448 以便提高药品竞争力等等
-BAC009S0908W0449 港媒道士下山被批引发网友广泛讨论
-BAC009S0908W0450 参考消息网七月二零日报道
-BAC009S0908W0451 港媒上海成为亚洲奢华生活最昂贵的城市
-BAC009S0908W0452 参考消息网一零月二九日报道港媒称
-BAC009S0908W0453 上海已成为全亚洲奢华生活最昂贵的城市
-BAC009S0908W0454 垫底的是印度城市孟买
-BAC009S0908W0455 港媒东莞工地连续两次坍塌路面似被吸入地底
-BAC009S0908W0456 东莞常平一地盘两日两度地陷
-BAC009S0908W0457 网上流传的视频可见
-BAC009S0908W0458 地面在几秒内迅速塌陷成一个大坑
-BAC009S0908W0459 恐影响旁边大厦的基地
-BAC009S0908W0460 网络图片参考消息网八月一四日报道港媒称
-BAC009S0908W0461 一三日上午一零时许
-BAC009S0908W0462 东莞常平住宅大厦联邦花园旁边发生大面积地陷
-BAC009S0908W0463 面积达逾三零零平方米
-BAC009S0908W0464 造成一名井下工人死亡
-BAC009S0908W0465 该地盘曾发生地陷事故
-BAC009S0908W0466 现场流出的短片显示
-BAC009S0908W0467 每一次塌陷的区域前已有一个大坑
-BAC009S0908W0468 港媒中国人启动营养革命养生书籍热卖
-BAC009S0908W0469 参考消息网八月一五日报道
-BAC009S0908W0470 港媒中国出现多中心大都市郊区需要更多移民
-BAC009S0908W0471 参考消息网八月二五日报道
-BAC009S0908W0472 港媒中国发布金牌月嫂标准实用性遭质疑
-BAC009S0908W0473 参考消息网七月八日报道
-BAC009S0908W0474 港媒中国城市告别血汗工厂经济转型见成效
-BAC009S0908W0475 参考消息网八月一二日报道
-BAC009S0908W0476 港媒中国患者年底可在线上美国医生咨询病情
-BAC009S0908W0477 参考消息网九月二五日报道港媒称
-BAC009S0908W0478 在中国某个在线医疗平台增设一项新服务之后
-BAC009S0908W0479 中国正在逐步拥抱智能技术和数字至上创业精神
-BAC009S0908W0480 港媒中国成访日第一大客源国还会持续增加
-BAC009S0908W0481 参考消息网八月二日报道外媒称
-BAC009S0908W0482 访日外国游客突破千万
-BAC009S0908W0483 其中上半年中国访日游客接近翻倍
-BAC009S0908W0484 超过韩国成为访日最大客源国
-BAC009S0908W0485 更是扭转日本旅游赤字
-BAC009S0908W0486 港媒中国科学家研究蜈蚣毒液发现新止痛药
-BAC009S0908W0487 蜈蚣资料图参考消息网一零月二二日报道中国科学家称
-BAC009S0908W0488 港媒中式教学不可复制中国学生在哪都能拿高分
-BAC009S0908W0489 参考消息网九月二三日报道港媒称
-BAC009S0908W0490 宣传的重点是中国教育和英国教育之战
-BAC009S0908W0491 港媒中秋赏月航班受热捧部分靠窗座位售罄
-BAC009S0908W0492 参考消息网九月一三日报道港媒称
-BAC009S0908W0493 很多人都已为赏月做准备
-BAC009S0908W0494 如果对一般登高赏月仍未满足
-BAC009S0908W0495 可以考虑一下空中赏月
-BAC009S0912W0121 房地产相关领域问题频发
-BAC009S0912W0122 东地产财经周刊新一年度审计工作报告出炉
-BAC009S0912W0123 审计署审计长刘家义受国务院委托
-BAC009S0912W0124 土地相关的审查成为重点之一
-BAC009S0912W0125 刘家义在报告中指出
-BAC009S0912W0126 共审计二十个省本级和二百个市
-BAC009S0912W0127 二零零八年至二零一五年
-BAC009S0912W0128 这些地区批准建设用地二百万公顷
-BAC009S0912W0129 取得土地出让收入十三万亿元
-BAC009S0912W0130 支出十二万亿元
-BAC009S0912W0131 为经济社会发展提供了重要基础和支持
-BAC009S0912W0132 土地出入收入累计结馀五千亿元
-BAC009S0912W0133 主要是土地出让收入少征三千亿元
-BAC009S0912W0134 一些地方和单位少支付补偿一亿元
-BAC009S0912W0135 编造虚假资料等套取或骗取补偿一亿元
-BAC009S0912W0136 一些地方土地出让收支核算不够规范
-BAC009S0912W0137 减免或返还土地出让收入一亿元
-BAC009S0912W0138 建设用地方面也暴露了不少问题
-BAC009S0912W0139 违规以租代征改变规划条件等用地一万公顷
-BAC009S0912W0140 有一个突破土地或城市规划
-BAC009S0912W0141 还有一个违规扩区一万公顷
-BAC009S0912W0142 虚增耕地质量不达标的分别占百分之十和百分之三十
-BAC009S0912W0143 整治资金被挤占挪用等一亿元
-BAC009S0912W0144 纠正违法用地一万起
-BAC009S0912W0145 制定完善制度一百多项
-BAC009S0912W0146 审计已向有关部门移送重大违法违纪问题三百起
-BAC009S0912W0147 各级政府安排财政资金一亿元
-BAC009S0912W0148 为安居工程建设提供了资金保障
-BAC009S0912W0149 还有一亿元被套取或用于弥补经费不足等
-BAC009S0912W0150 有关地方追回资金或补贴一亿元
-BAC009S0912W0151 清理收回住房二十套
-BAC009S0912W0152 取消一万户家庭的保障资格
-BAC009S0912W0153 审计已向有关部门移送重大违法违纪问题三十起
-BAC009S0912W0154 在对央企的审计也发现了不少问题
-BAC009S0912W0155 中粮集团违规投资四亿元对原培训中心进行改扩建
-BAC009S0912W0156 受土地开发政策和土地规划限制未开发建设
-BAC009S0912W0157 六年土地收入十三万度审计报告中
-BAC009S0912W0158 房地产相关领域问题频发
-BAC009S0912W0159 东地产财经周度审计工作报告出炉
-BAC009S0912W0160 审计署审计长刘家义受国务院委托
-BAC009S0912W0161 羊年置业小调查的调查结果截图
-BAC009S0912W0162 二初楼市迎来多项利好政策
-BAC009S0912W0163 在多项政策的支持下
-BAC009S0912W0164 今年楼市将走向何方
-BAC009S0912W0165 中新网房产频道推置业小调查
-BAC009S0912W0167 十位网友参与了本次调查
-BAC009S0912W0168 在参与调查的网友中
-BAC009S0912W0169 约六成网友看涨全国的商品房价格
-BAC009S0912W0170 万科获选性价比最高的房企
-BAC009S0912W0171 房价的一涨一跌都牵动着购房者的神经
-BAC009S0912W0172 百分之五的网友认为房价将普遍上涨
-BAC009S0912W0173 百分之五的网友认为房价将普遍下跌
-BAC009S0912W0174 百分之五的网友认为房价走势不好判断
-BAC009S0912W0175 作为楼市政策的风向标
-BAC009S0912W0176 二全国两会或将楼市基调
-BAC009S0912W0177 国务院总理李克强在二政府工作报告中表示
-BAC009S0912W0178 支持居民自住和改善住房需求
-BAC009S0912W0179 促进房地产市场平稳健康发展
-BAC009S0912W0180 这也从宏观层面明确了政府对于房地产市场的态度
-BAC009S0912W0181 在今年两会是否会开启新一轮楼市调控这个问题上
-BAC009S0912W0182 中新网的调查结果显示
-BAC009S0912W0183 百分之五的网友认为不会
-BAC009S0912W0184 百分之五的网友认为会
-BAC009S0912W0185 百分之五的网友认为不好说
-BAC009S0912W0186 楼市政策也深刻影响着房地产行业的走向
-BAC009S0912W0187 抓紧做好故调查处理工作
-BAC009S0912W0188 督促责任单位彻底排查溢油风险点
-BAC009S0912W0189 并重新编报海洋环境影响报告书
-BAC009S0912W0190 彻底查明事故原因
-BAC009S0912W0191 查清事故造成的危害及损失
-BAC009S0912W0192 维护受损各方合法权益
-BAC009S0912W0193 立即部署开展海洋石油勘探开发安全生产检查
-BAC009S0912W0194 全面加强海洋环境监视监测和监督管理
-BAC009S0912W0195 全面准确及时发布事故处置相关信息
-BAC009S0912W0196 抓紧研究完善海洋环境保护的法律法规
-BAC009S0912W0197 入海污染物排放总量下降
-BAC009S0912W0198 力争渤海近岸海域水质总体改善
-BAC009S0912W0199 优化产业结构与布局
-BAC009S0912W0200 切实改变沿海地区重化工比重过大过于集中的状况
-BAC009S0912W0201 严格控制新上石化项目
-BAC009S0912W0202 禁止在可能造成生态严重失衡的地方进行围填海活动
-BAC009S0912W0203 有效控制陆海污染源
-BAC009S0912W0204 坚持海陆统筹河海兼顾
-BAC009S0912W0205 加强入海河流综合治理
-BAC009S0912W0206 合理布局入海排污口
-BAC009S0912W0207 制定更加严格的地方水污染排放标准
-BAC009S0912W0208 努力保护和修复渤海生态系统
-BAC009S0912W0209 加强用水总量控制与调度管理
-BAC009S0912W0210 改善河口和近岸海域生态环境
-BAC009S0912W0211 加强海陆过渡区生态建设
-BAC009S0912W0212 逐步恢复湿地生态功能
-BAC009S0912W0213 在海洋环境敏感区关键区等划定生态红线
-BAC009S0912W0214 有效防范海洋环境灾害
-BAC009S0912W0215 建立渤海海洋环境预警机制和突发事件应对机制
-BAC009S0912W0216 修订完善相关应急预案
-BAC009S0912W0217 强化地方政府和企业的主体意识法制意识
-BAC009S0912W0218 落实海洋环境保护责任
-BAC009S0912W0219 提高公众参与渤海环境保护的积极性和主动性
-BAC009S0912W0220 建立公开透明的信息发布机制
-BAC009S0912W0221 会议讨论进一步加强环境保护工作的意见
-BAC009S0912W0222 强调必须把污染治理和生态保护摆在更加重要的位置
-BAC009S0912W0223 切实解决损害公众健康影响科学发展的突发环境问题
-BAC009S0912W0224 落实节能减排各项任务
-BAC009S0912W0225 凡依法应当进行环评的建设规划和项目
-BAC009S0912W0226 都要严格履行环评程序
-BAC009S0912W0227 环评过程要公开透明
-BAC009S0912W0228 充分征求专家和社会公众意见
-BAC009S0912W0229 要依法追究管理部门责任企业及有关人员的责任
-BAC009S0912W0230 切实加强重金属污染防治
-BAC009S0912W0231 对重点地区行业和企业
-BAC009S0912W0232 妥善处理重金属污染历史遗留问题和突发污染事件
-BAC009S0912W0233 保障人民群众生命健康安全
-BAC009S0912W0234 严格化学品环境管理
-BAC009S0912W0235 对化学品项目布局进行梳理评估
-BAC009S0912W0236 对化学品生产经营企业进行环境隐患排查
-BAC009S0912W0237 对海洋江河湖泊沿岸化工企业进行集中综合整治
-BAC009S0912W0238 落实环境监管责任和安全保障措施
-BAC009S0912W0239 提高化学品生产的环境准入门槛
-BAC009S0912W0240 加强农村环境保护
-BAC009S0912W0241 集中整治存在突出环境问题的村庄和集镇
-BAC009S0912W0242 重点治理农村土壤饮用水水源地污染
-BAC009S0912W0243 推动环保基础设施和服务向农村延伸
-BAC009S0912W0244 引导和帮助农民科学处理垃圾和污水
-BAC009S0912W0245 科学使用农药化肥和农膜
-BAC009S0912W0246 严格农村工矿企业环境监管
-BAC009S0912W0247 坚决防止污染向农村转移
-BAC009S0912W0248 加快建设环境监测预警体系
-BAC009S0912W0249 完善环境事件应急机制
-BAC009S0912W0250 完善环境法律政策体系
-BAC009S0912W0251 针对近期各种环境事件暴露出的问题
-BAC009S0912W0252 抓紧制定和修订相关法律法规
-BAC009S0912W0253 毛利率也只有百分之十四
-BAC009S0912W0254 由此可见苹果现在的业务确实比汽车行业更加赚钱
-BAC009S0912W0256 他表示他肯定会与苹果展开合作
-BAC009S0912W0257 苹果公司一直在秘密从事电汽汽车的研发
-BAC009S0912W0258 并且计划最早在二零二零年推出生产首款车型
-BAC009S0912W0259 苹果已为汽车项目招募了数百名员工
-BAC009S0912W0260 包括电池和机器人技术领域的专家
-BAC009S0912W0261 苹果涉足汽车行业并不是一个好主意
-BAC009S0912W0264 除了苹果上周公布的选定合作伙伴
-BAC009S0912W0269 将会在设备发售稳定的推出与更新
-BAC009S0912W0278 此款健康设备将延迟到明年推出
-BAC009S0912W0279 根据知情人士获得的安吉拉录音手稿
-BAC009S0912W0280 安吉拉要求零售店员工养精蓄锐
-BAC009S0912W0281 为即将到来的购物季
-BAC009S0912W0282 以及中国的春节做准备
-BAC009S0912W0285 后有消息称该款产品将于今年的情人节推出
-BAC009S0912W0286 不过目前看来不大可能
-BAC009S0912W0287 因为春季的计算方式是从三月二十日到六月三十日
-BAC009S0912W0290 有报道称苹果计划在二零一四年秋季推出其可穿戴设备
-BAC009S0912W0291 该产品将延迟到二零一五年发布
-BAC009S0912W0292 纷至沓来的报道显示
-BAC009S0912W0293 电池的技术难题最终导致了它的延迟推出
-BAC009S0912W0299 包括更换不同尺寸型号和不同的表带
-BAC009S0912W0302 这将是苹果零售店采用的全新模式
-BAC009S0912W0303 如果融入移动互联的新时代
-BAC009S0912W0304 我们凭什么征战全世界
-BAC009S0912W0305 在前段时间的上海家电展上
-BAC009S0912W0306 美的集团总裁方洪波提出了上述三个问题
-BAC009S0912W0307 这是当前所有中国家电企业
-BAC009S0912W0308 都必须要回答的问题
-BAC009S0912W0309 如果不回答这三个课题
-BAC009S0912W0310 企业所有的目标都是空洞的
-BAC009S0912W0311 得出这个结论来自于方洪波对当前形势的判断
-BAC009S0912W0312 中国家电企业现在正面临前所未有的挑战
-BAC009S0912W0313 过去三十年高速发展的前提条件没有了
-BAC009S0912W0316 在全世界的产业格局看
-BAC009S0912W0317 全世界排列的二加三格局
-BAC009S0912W0319 这样一个全球白电的格局短期内是难以撬动的
-BAC009S0912W0320 时代力量正在颠复着家电行业
-BAC009S0912W0321 移动互联以前改变的是软的层面
-BAC009S0912W0322 比如流程的缩短平台化的应用
-BAC009S0912W0323 转型升级应该在十年前就开始了
-BAC009S0912W0324 中国家电企业在世界产业链地位弱小
-BAC009S0912W0325 跟世界产业的差距不是在缩小
-BAC009S0912W0326 这是目前我们中国家电企业面临的具体挑战
-BAC009S0912W0327 这些挑战来自于四面八方
-BAC009S0912W0328 未来给我们的机会和空间是有限的
-BAC009S0912W0330 白电行业将进入最惨烈的一年
-BAC009S0912W0331 昔日巨头格力美的海尔也将沉浮于其中
-BAC009S0912W0332 从本年度第一份季报来看
-BAC009S0912W0333 三巨头中的格力海尔均出现不同程度
-BAC009S0912W0335 下称美的内部的组织架构二点一五年加大了调整力度
-BAC009S0912W0336 七月成立了美的部品事业部
-BAC009S0912W0337 威灵电机将有可能兼并美芝压缩机
-BAC009S0912W0338 这一切并不是说说而已
-BAC009S0912W0339 而是要明确落实在数字上
-BAC009S0912W0340 控制在六零微克立方米左右
-BAC009S0912W0341 这与市民的期望和城市发展的愿景也是一致的
-BAC009S0912W0342 二零一七年二零二二年
-BAC009S0912W0343 我们还将继续加大污染防治力度
-BAC009S0912W0344 这一点对于京津冀一带的居民来说是才最重要的
-BAC009S0912W0345 因为每个人都需要呼吸
-BAC009S0912W0346 场馆建设一简约而不简单
-BAC009S0912W0347 二零二二年北京冬奥会计划使用一二个比赛场馆
-BAC009S0912W0348 总体上以节俭办赛为原则进行规划建设和改造使用
-BAC009S0912W0349 充分利用北京奥运后的丰富遗产
-BAC009S0912W0350 仅有三个场馆需要新建
-BAC009S0912W0351 分别是位于北京市区的国家速滑馆和延庆的二个雪场
-BAC009S0912W0352 其馀场馆改建后均可满足赛事需要
-BAC009S0912W0353 既免去了不必要的花费
-BAC009S0912W0354 每个场馆又高端大气上档次
-BAC009S0912W0355 真可谓是简约而不简单啊
-BAC009S0912W0356 花样滑冰短道速滑项目在首都体育馆进行
-BAC009S0912W0357 冰壶项目在水立方进行
-BAC009S0912W0358 计划明年就将开始动工
-BAC009S0912W0359 速滑馆建成后将设置四百米滑道
-BAC009S0912W0360 设有座位一万两千个
-BAC009S0912W0361 在冬奥会举办之前这里将为专业队伍训练提供场地
-BAC009S0912W0362 我们的奥运健儿将在此努力备战
-BAC009S0912W0363 成为市民体验冰上运动的乐园
-BAC009S0912W0364 张家口市的崇礼县从每年十一月初到第二年四月初
-BAC009S0912W0365 崇礼县发展较成熟的万龙滑雪场和云顶滑雪场
-BAC009S0912W0366 加上一个仍在建的太舞四季滑雪场
-BAC009S0912W0367 均已被纳入二零二二年冬奥会的规划场馆
-BAC009S0912W0368 万龙和云顶滑雪场都将根据赛事要求进行改造和扩建
-BAC009S0912W0369 小海坨山是位于延庆境内的海坨山主峰
-BAC009S0912W0370 此地春秋冬三季有雪
-BAC009S0912W0371 滑雪期从十一月下旬到次年三月中旬
-BAC009S0912W0372 这里常年吸引着众多登山探险运动爱好者
-BAC009S0912W0373 拥有高山滑雪要求的八百米落差
-BAC009S0912W0374 非常适合修建高山雪场
-BAC009S0912W0375 将依托现有山体地形修建临时场地设施
-BAC009S0912W0376 用作雪车雪橇大项和滑雪大项中的高山滑雪比赛场地
-BAC009S0912W0377 全民冰雪季奥运健儿助力
-BAC009S0912W0378 早在申办北京冬奥会的时候
-BAC009S0912W0379 很多人都看到了新的奥运商机
-BAC009S0912W0380 会投资建设一些冰雪主题乐园和冬季项目体验场所
-BAC009S0912W0381 在全民健身成为国家战略的大背景下
-BAC009S0912W0382 观赛便利不出国门看奥运
-BAC009S0912W0383 以往想要见识奥运级别的比赛
-BAC009S0912W0384 冰雪爱好者不得不选择出国
-BAC009S0912W0385 高昂的交通和住宿成本让很多人望而却步
-BAC009S0912W0386 如今在家门口就可以实现这个愿望了
-BAC009S0912W0387 交通住宿花费大大降低
-BAC009S0912W0388 让我们能够来一次说走就走的冬奥之行
-BAC009S0912W0389 在主场为中国健儿加油
-BAC009S0912W0390 该是一件多幸福的事啊
-BAC009S0912W0392 责任编辑冯浩
-BAC009S0912W0393 十月十八日早上九点
-BAC009S0912W0394 各地跑步爱好者齐聚一堂
-BAC009S0912W0395 共同享受奔跑带来的乐趣
-BAC009S0912W0396 经历过北京站和上海站两次比赛
-BAC009S0912W0397 本次沈阳站赛场迎来了许多熟悉的面孔
-BAC009S0912W0398 尤为引人瞩目的莫过于李子成
-BAC009S0912W0399 他更是以三十分十七秒一举夺得奔跑中国三连冠
-BAC009S0912W0400 而十公里女子组由刘庆红以三十四分十秒夺得冠军
-BAC009S0912W0401 海信一汽大众等知名企业和品牌也依旧亮相赛场
-BAC009S0912W0402 以不同方式助力本次比赛胜利进行
-BAC009S0912W0403 近四千名跑步爱好者和其家人朋友齐聚于此
-BAC009S0912W0404 全球范围内的创收达到十一点八亿美元
-BAC009S0912W0405 亚当桑德勒成功卫冕
-BAC009S0912W0406 约翰尼德普紧随其后
-BAC009S0912W0407 但是出于预算考虑
-BAC009S0912W0408 陈奕迅隔空发表爱的宣言也是啊
-BAC009S0912W0409 例如出入帮忙开门拉椅子
-BAC009S0912W0410 新京报报道思维发散表情与肢体语言丰富
-BAC009S0912W0411 对于疯癫陈奕迅所长的这些设定歌迷早已习惯了
-BAC009S0912W0412 在凭借专辑米闪成为新一轮金曲歌王后
-BAC009S0912W0414 朱祖儿操刀灰色调封面
-BAC009S0912W0415 袁两半一人歌词包办
-BAC009S0912W0416 处于寻找状态中的挣扎
-BAC009S0912W0417 然而准备中三个字卸掉了他的纠结
-BAC009S0912W0418 二十九日晚间举办媒体听歌会
-BAC009S0912W0419 现场试听无条件人生马拉松等六首歌曲
-BAC009S0912W0420 终站是好友谢霆锋的创作
-BAC009S0912W0422 花了三年时间才得到这首歌
-BAC009S0912W0423 被问是否感觉到谢霆锋与王菲恋爱的甜蜜
-BAC009S0912W0428 十九点二十六分
-BAC009S0912W0429 好友陈妍希晒与潘玮柏搞怪合影为他庆生
-BAC009S0912W0430 称潘玮柏生日快乐
-BAC009S0912W0431 港富豪被绑涉及两岸三地绑匪要求赎金用比特币
-BAC009S0912W0432 日前遭人绑架并勒索七零零零万港元
-BAC009S0912W0433 台港警方追查一个月
-BAC009S0912W0434 二十七日深夜终于在云林县一家废弃空屋中救出了黄立坤
-BAC009S0912W0435 获救第一句话就是我以为我活不了了
-BAC009S0912W0436 港报评上海迪尼士不意味着香港迪尼士的没落
-BAC009S0912W0437 参考消息网七月二八日报道
-BAC009S0912W0438 港报内地医院仍控制处方药销售电商盼网售解禁
-BAC009S0912W0439 参考消息网九月一七日报道港媒称
-BAC009S0912W0440 自从中国内地的第一家网上药店一零年前开张以来
-BAC009S0912W0441 大量资本已投入医药企业中
-BAC009S0912W0442 希望能从中国内地日益老龄化的一三亿人口中受益
-BAC009S0912W0443 港校两名内地生酒后街头野战当事人被起底
-BAC009S0912W0444 南都讯记者王睦广发自香港今年四月初
-BAC009S0912W0445 被拍下短片冠以野战之名在网上疯传
-BAC009S0912W0446 二人早前被香港警方以有违公德罪落案起诉
-BAC009S0912W0447 事件中的女方昨日被判一二个月感化令
-BAC009S0912W0448 男方则将于下月庭审
-BAC009S0912W0449 港珠澳大桥又起漂移风波可能进一步影响工期
-BAC009S0912W0450 其人工岛被指移动六七米
-BAC009S0912W0451 这个意外可能进一步影响工期
-BAC009S0912W0452 游乐场大章鱼甩飞游客母亲落地时紧抱儿子
-BAC009S0912W0453 在空中以高速自转带给游客惊险刺激的体验
-BAC009S0912W0454 背部撞断了游乐场场边的三根不锈钢护栏
-BAC009S0912W0455 游学夏令营的无奈花豪华团价格吃喝难保
-BAC009S0912W0457 游客三亚海滩赏月后留二九吨垃圾三百人连夜清理
-BAC009S0912W0458 当海滩上如潮的人群散去
-BAC009S0912W0459 留下的却是被随手丢弃的垃圾
-BAC009S0912W0460 虽然海滩上设置了众多垃圾桶
-BAC009S0912W0461 但赏月人群还是乱扔垃圾
-BAC009S0912W0462 从二八日凌晨四点半至六点半这整整二个小时里
-BAC009S0912W0463 游客三亚游泳致终身残疾向旅行社索赔一九六万
-BAC009S0912W0464 成都男子张呈亮化名旅行时到三亚海滩游泳
-BAC009S0912W0465 下海后却突然失去意识
-BAC009S0912W0466 送医后被查出颈部脊髓损伤
-BAC009S0912W0467 张先生在青羊法院提起诉讼
-BAC009S0912W0468 此案正在进一步审理之中
-BAC009S0912W0469 游客下桥拍照踩死植物水杉栈道仙境拉铁丝网
-BAC009S0912W0470 当植物恢复正常生长后铁丝网将拆除
-BAC009S0912W0471 游客不满小孩超高补票与景区工作人员群殴
-BAC009S0912W0472 一段游客暴打景区员工的视频开始在网上发酵
-BAC009S0912W0473 某景点大门处多名游客与身着穿服的工作人员大打出手
-BAC009S0912W0474 游客乌鲁木齐吃自助被罚二四零零元工商部门介入
-BAC009S0912W0475 剩下了一二零零克食物
-BAC009S0912W0476 被餐厅罚款二四零零元
-BAC009S0912W0477 餐厅返还了游客的二四零零元
-BAC009S0912W0478 物价部门工商部门已介入调查
-BAC009S0912W0479 游客偷走雷峰塔砖块想供奉起来做药给老人喝
-BAC009S0912W0480 游客入住药店被收二零元马桶使用费消协可举报
-BAC009S0912W0481 住酒店还要交二零元马桶费
-BAC009S0912W0482 南京市民张女士化姓去无锡旅游时
-BAC009S0912W0483 通过网站团购了无锡江南丹青度假酒店一间套房
-BAC009S0912W0484 退房结账时却被告知扣了二零元马桶使用费
-BAC009S0912W0485 这让张女士哭笑不得
-BAC009S0912W0486 酒店方承诺退还二零元马桶使用费
-BAC009S0912W0487 酒店行为属于乱收费
-BAC009S0912W0488 消费者可以直接向物价部门和旅游部门举报
-BAC009S0912W0489 现代快报记者赵书伶
-BAC009S0912W0490 游客再曝日照点海鲜太少被围殴当地警方证实
-BAC009S0912W0491 网友先在微博中陈述了悲惨遭遇
-BAC009S0912W0492 据称是当事人之一在派出所通过一个亲戚的微博发的
-BAC009S0912W0493 游客北京游两天遭引导消费近二万元
-BAC009S0912W0494 京华时报讯记者武红利与家人来京旅游
-BAC009S0912W0495 王女士与旅行社签订四天五晚的旅行合同
-BAC009S0913W0121 在最希望国家实施的调控政策这一问题上
-BAC009S0913W0122 有百分之五的网友选择了提高公积金贷款额度
-BAC009S0913W0123 百分之五的网友选择了房贷利率打折优惠
-BAC009S0913W0124 百分之五的网友倾向于房产税的开征
-BAC009S0913W0125 百分之五的网友希望放开一线城市的限购政策
-BAC009S0913W0126 网友的置业目的为首套房自住的占到了百分之六十
-BAC009S0913W0127 改善型二套房比例比约为百分之五
-BAC009S0913W0128 三套以上投资性购房占百分之五
-BAC009S0913W0129 其他目的的占比为百分之五
-BAC009S0913W0130 在商品房性价比的选择上
-BAC009S0913W0131 万科以百分之五的票数获选性价比最高的房企
-BAC009S0913W0132 绿地保利万达分列性价比最高房企的二三四名
-BAC009S0913W0133 选择恒大世茂富力的网友均不足百分之十
-BAC009S0913W0134 有百分之五的网友选择了其他房企
-BAC009S0913W0135 二初楼市迎来多项利好政策
-BAC009S0913W0136 在多项政策的支持下
-BAC009S0913W0137 支持新产业新业态
-BAC009S0913W0138 集中释放用地政策红利
-BAC009S0913W0139 在加大新供用地保障力度方面
-BAC009S0913W0140 新产业发展快地用地集约求且需求大的地区
-BAC009S0913W0141 在鼓励盘活利用现有用地方面
-BAC009S0913W0142 意见提出对制造业迈向中高端的企业用地
-BAC009S0913W0143 生产性科技及高技术服务业发展用地
-BAC009S0913W0144 建设创业创新平台用地
-BAC009S0913W0145 互联网行动计划实实施用地实行过渡期政策
-BAC009S0913W0146 按新用途新权利类型市场价办理用地手续
-BAC009S0913W0147 支持新产业新业态
-BAC009S0913W0148 由国土资源部联合国家
-BAC009S0913W0149 正式放松外贸外资投资我国房地产相关规定
-BAC009S0913W0150 我国对房地产的行政干预政策陆续退出
-BAC009S0913W0151 放松限外是必然趋势
-BAC009S0913W0152 此举将有利于市场信心的培养
-BAC009S0913W0153 并利好一二线城市的中高端住宅
-BAC009S0913W0154 对于外商投资房地产企业注册资本与投资总额比例
-BAC009S0913W0155 对于实施住房限购政策的城市
-BAC009S0913W0156 境外个人购房应当符合当地政策规定
-BAC009S0913W0157 上海易居研究所副院长杨红旭表示
-BAC009S0913W0158 外资管制放松是大势所趋
-BAC009S0913W0159 随着我国行政干预政策的陆续退出
-BAC009S0913W0160 此前为限制外资炒房
-BAC009S0913W0161 我国出台了一系列限外令
-BAC009S0913W0162 二的向境外投资方出售国内资产征税规定
-BAC009S0913W0163 国家外汇局出台过规定
-BAC009S0913W0164 国家发改委也发出过通知
-BAC009S0913W0165 对于提供给外籍人士的个人住房按揭贷款的外债需求
-BAC009S0913W0166 不予安排中长期外债额度
-BAC009S0913W0167 房地产被视作保值升值的投资标的被炒作
-BAC009S0913W0168 但目前的形势早已改变
-BAC009S0913W0169 此前外资购房主要集中在一线城市和几个热点二线城市
-BAC009S0913W0170 而现在这类城市房价已经很高
-BAC009S0913W0171 即使限外令放开也不会出现外资大规模买房
-BAC009S0913W0172 中原地产市场总监张大伟认为
-BAC009S0913W0173 放松对外限制利用利好一二线城市中高端物业
-BAC009S0913W0174 对于外商房企的注册资本金降低要求
-BAC009S0913W0175 也有利于部分企业的资金周转
-BAC009S0913W0176 正式放松外资投资我国房地产相关规定
-BAC009S0913W0177 允许境外机构在境内设
-BAC009S0913W0178 六部委松绑楼市限外令
-BAC009S0913W0179 外资在华房地产投资购房限制被松绑
-BAC009S0913W0180 允许机构和个人在中国购房
-BAC009S0913W0181 中房指数研究所院长陈晟表示
-BAC009S0913W0182 此举对促进外企在华投资房地产有积极作用
-BAC009S0913W0183 相关公司股票走势鄂尔多斯
-BAC009S0913W0184 内地产投资比例有限
-BAC009S0913W0185 此项政策对中国楼市影响有限
-BAC009S0913W0186 对于实施住房限购政策的城市
-BAC009S0913W0187 为环境保护提供更加完备有效的法制保障
-BAC009S0913W0188 进一步完善环境政策
-BAC009S0913W0189 健全环境执法调协调机制
-BAC009S0913W0190 国务院国资委力挺国企
-BAC009S0913W0191 具备条件的要积极引进战略投资者
-BAC009S0913W0192 推进主营业务整体上市
-BAC009S0913W0193 国资委接二连三对此表态或意味着国企将迎来上市高峰
-BAC009S0913W0194 使国有资本更多地向重要行业和关键领域集中
-BAC009S0913W0195 向具有优势的行业集中
-BAC009S0913W0196 向大企业大集团集中
-BAC009S0913W0197 要吸收民间资本参与国有企业改制重组
-BAC009S0913W0198 发展混合所有制经济
-BAC009S0913W0199 发挥国有大企业引领带动作用
-BAC009S0913W0200 促进各种所有制企业共同发展
-BAC009S0913W0201 至二零一一年六月底
-BAC009S0913W0202 中央企业控股境外上市公司
-BAC009S0913W0203 国资委还将支持企业走出去
-BAC009S0913W0204 逐步实现战略运营管管理全球化
-BAC009S0913W0205 应当经国务院国资委核准
-BAC009S0913W0206 办法五月一日起实施
-BAC009S0913W0207 国务院国资委对央企境外投资的管理法规在逐渐完善
-BAC009S0913W0208 中央企业在境外从事非主业投资
-BAC009S0913W0209 需要向国务院国资委报送申请核准非主业投资的请示
-BAC009S0913W0210 对非主业投资项目的有关决策文件
-BAC009S0913W0211 项目可行性研究报告尽职调查等相关文件
-BAC009S0913W0212 办法还特别提出一些建议
-BAC009S0913W0213 国务院国资委将指导中央企业之间加强境外投资合作
-BAC009S0913W0214 中央走出去的步伐正趋加快
-BAC009S0913W0215 央企在境外含港澳地区营收
-BAC009S0913W0216 利润总额较大
-BAC009S0913W0217 同比较上年同期分别增长百分之三十和百分之二十八
-BAC009S0913W0218 涨幅远超央企整体水平
-BAC009S0913W0219 一方面很多央企已制定了海外战略
-BAC009S0913W0220 一些国家经济出现大的波动
-BAC009S0913W0221 而社会罢工劳资纠纷也时有发生
-BAC009S0913W0222 央企在境外投资面临的问题很多
-BAC009S0913W0223 目前央企境外投资仍处在初级阶段
-BAC009S0913W0224 制定和发布办法
-BAC009S0913W0225 是为了进一步建立健全境外国有资产管理制度
-BAC009S0913W0226 切实加强央企境外投资监管
-BAC009S0913W0227 确保境外国有资产保值增值
-BAC009S0913W0228 更好地适应了新形势的需要
-BAC009S0913W0229 国务院多举措力挺农产品流通
-BAC009S0913W0230 免征蔬菜流通环节的增值税
-BAC009S0913W0231 提出完善农产品流通税收政策
-BAC009S0913W0232 免征蔬菜流通环节增值税加强金融支持
-BAC009S0913W0233 相关公司股票走势农产品
-BAC009S0913W0234 各地要鼓励流通企业跨地区兼并重组和投资合作
-BAC009S0913W0235 以加强产销衔接为重点
-BAC009S0913W0236 加强鲜活农产品流通基础设施建设
-BAC009S0913W0237 创新鲜活农产品的流通模式
-BAC009S0913W0238 提高流通组织化程度
-BAC009S0913W0239 完善流通链条和市场布局
-BAC009S0913W0240 进一步减少流通环节
-BAC009S0913W0241 保障鲜活农产品市场供应和价格稳定
-BAC009S0913W0242 各地要依据城市总体规划和城市网点商业规划
-BAC009S0913W0243 鼓励流通企业跨地区兼并重组和投资合作
-BAC009S0913W0244 要大力推进产销衔接
-BAC009S0913W0245 完善市场监测预警和信息发布机制
-BAC009S0913W0246 建立健全重要农产品储备制度
-BAC009S0913W0247 完善农产品跨区调运调剂机制
-BAC009S0913W0248 各城市要根据消费需求和季节变化
-BAC009S0913W0249 合理确定耐贮蔬菜的流通动态库存数量
-BAC009S0913W0250 加快鲜活农产品质量安全追溯体系建设
-BAC009S0913W0251 通过投资入股产权回购回租建公建配套等方式
-BAC009S0913W0252 发挥财政资金引导示范作用
-BAC009S0913W0254 这样可以加深苹果和消费者之间的关系
-BAC009S0913W0255 对未来的销量至关重要
-BAC009S0913W0258 为提高苹果零售商店的服务质量
-BAC009S0913W0259 该系统包含一套算法
-BAC009S0913W0260 有媒体援引知情人士消息称
-BAC009S0913W0261 苹果将引入这样一套顾客接待系统
-BAC009S0913W0262 前往苹果零售店的顾客将比餐厅订餐叫号还方便
-BAC009S0913W0263 苹果零售店实行先到先服务的原则
-BAC009S0913W0264 这样难免会出现某个客户的维修问题特别复杂
-BAC009S0913W0265 导致技术支持时间超过了预期分配时间
-BAC009S0913W0266 从而影响接下来的客户无法在指定时间点获得服务
-BAC009S0913W0267 新系统可根据难易程度进行排序
-BAC009S0913W0268 与现在的接待原则不同的是
-BAC009S0913W0269 此时客户可以选择离开苹果零售店
-BAC009S0913W0270 而当预订时间接近时
-BAC009S0913W0271 客户会再次收到短信提醒
-BAC009S0913W0272 客户回到苹果零售店后
-BAC009S0913W0274 以告知客户相关技术人员确切的空闲时间
-BAC009S0913W0275 以及在店内的具体位置
-BAC009S0913W0277 为提高苹果零售商店的服务质量
-BAC009S0913W0278 苹果靠什么颠复移动支付市场
-BAC009S0913W0279 苹果一口气召开了两次新品发布会
-BAC009S0913W0280 就在会场的凳子和垃圾尚未收拾干净的时候
-BAC009S0913W0281 全世界的报道已经蜂拥而至
-BAC009S0913W0282 失望中夹杂着嘲讽的情绪霸占了各模块的头条
-BAC009S0913W0283 科技经济社会金融全都是苹果的消息
-BAC009S0913W0284 害得汪峰也不敢随便表白了
-BAC009S0913W0285 而是统一地认为苹果开了有史以来最烂的发布会
-BAC009S0913W0286 他们推出的产品不仅非常鸡肋
-BAC009S0913W0288 就足以让专家们恶心七七四十九天了
-BAC009S0913W0289 但这些口水式的讨伐并没有影响苹果前进的脚步
-BAC009S0913W0291 证明了其向主流妥协的姿态
-BAC009S0913W0293 自二零零七年乔布斯重新发明手机开始
-BAC009S0913W0294 把它升级成为一款综合性智能终端之后
-BAC009S0913W0295 就开始潜移默化地渗透人类的生活
-BAC009S0913W0296 这种渗透犹如蜘蛛结网细菌繁殖病毒传播
-BAC009S0913W0297 悄无声息又经年累月
-BAC009S0913W0298 我们甚至都没有来得及反抗就被完全征服了
-BAC009S0913W0299 我根本无法想象每天在朋友圈上花两个小时的情景
-BAC009S0913W0300 但现在已经成为了习生活习惯
-BAC009S0913W0301 但新贵移动支付具绝对能更深层次地改变用户的生活
-BAC009S0913W0302 乃至颠复现有的经济形态和支付格局
-BAC009S0913W0303 第一财经日报记者七月十三日从美的内部获悉
-BAC009S0913W0304 已获任美的部品事业部的总裁
-BAC009S0913W0305 而威灵电器七月九日下午也公告透露
-BAC009S0913W0306 于一九九一年加盟美的集团
-BAC009S0913W0307 美芝压缩机已是全球最大空调压缩机企业
-BAC009S0913W0308 占全球空调压缩机市场三分之一的份额
-BAC009S0913W0309 美的将美芝压缩机威灵电机合并
-BAC009S0913W0310 将有助于两大部品业务的研发资源销售渠道共享
-BAC009S0913W0311 美的部品事业部建立后
-BAC009S0913W0312 将成立压缩机开发研究院和微电机开发研究院
-BAC009S0913W0313 以区域为中心建立客户经理负责制制造方面
-BAC009S0913W0314 负责统一管理原电机事业部的各工厂制造系统
-BAC009S0913W0315 原压缩机事业部各工厂保持不变
-BAC009S0913W0316 美的集团公关部的相关人士告诉第一财经日报记者
-BAC009S0913W0317 目前美芝与威灵的合并
-BAC009S0913W0318 仅处于美的集团内部管理架构调整的阶段
-BAC009S0913W0319 还没体现在香港上市公司威灵电器的业务层面
-BAC009S0913W0320 由于向为民已获任威灵电机的董事会主席
-BAC009S0913W0321 威灵电机今后兼并美芝压缩机
-BAC009S0913W0322 美芝压缩机是隶属于美的集团旗下的业务
-BAC009S0913W0323 由于美的集团本身就是威灵电机的大股东
-BAC009S0913W0324 即使今后美芝压缩机被威灵电器兼并
-BAC009S0913W0325 也对美的集团的总体业绩影响不大
-BAC009S0913W0326 而威灵电器二零一四年的营业额约九十二点七三亿港元
-BAC009S0913W0327 同比增长百分之四净利润六点七八亿港元
-BAC009S0913W0328 同比下跌十三六点百分之六
-BAC009S0913W0329 如果威灵电器兼并美芝压缩机
-BAC009S0913W0330 将有利于增加威灵电器的收入和利润
-BAC009S0913W0331 除了威灵电器与美芝压缩机合并成为美的部品事业部之外
-BAC009S0913W0332 美的最近还把洗碗机事业部合并到美的的厨房电器事业部
-BAC009S0913W0333 美的的洗碗机业务以外销为主
-BAC009S0913W0334 业物内士向记者分析说
-BAC009S0913W0335 被合并到美的的厨房电器事业部后
-BAC009S0913W0336 将有助于美的洗碗机开拓国内市场
-BAC009S0913W0337 破坏和颠复是互联网时代的特征
-BAC009S0913W0338 美丽的丁香湖公园成为跑步爱好者的狂欢圣地
-BAC009S0913W0339 剪纸皮影戏等特色节目更是吸引了一批批观众围观
-BAC009S0913W0340 跑友们积极的参与剪纸活动
-BAC009S0913W0341 亲身感受沈阳当地浓郁的民俗文化内蕴
-BAC009S0913W0342 许多跑友争先恐后穿上沈阳花棉袄拍照
-BAC009S0913W0343 并与亲朋好友分享这份快乐
-BAC009S0913W0344 而涂鸦墙上写满了跑友们的目标和愿望
-BAC009S0913W0345 伴随着专业啦啦队的加油声
-BAC009S0913W0346 跑友们在奔跑中国沈阳站的赛道上尽情的展示自己
-BAC009S0913W0347 赛道两边设置了许多专业摄像头
-BAC009S0913W0348 主办方试图记录每一个跑友挥洒激情的每一个瞬间
-BAC009S0913W0349 将这份快乐与跑对跑步的执着传递给身边的好友
-BAC009S0913W0351 同时带动当地人民的奔跑热情
-BAC009S0913W0354 更加多维度的助推跑步事业在中国的发展
-BAC009S0913W0355 服务广大跑步爱好者
-BAC009S0913W0356 奔跑中国系列竞跑赛事将转战广州
-BAC009S0913W0357 中新网成都九月十五日电付敬懿十五日
-BAC009S0913W0358 服务时间约为五十三万小时
-BAC009S0913W0359 自二零一四年十二月五日正式启动志愿者招募工作以来
-BAC009S0913W0360 因为本次赛事比赛周期长赛区跨度大
-BAC009S0913W0361 经过网络测试综合面试专业技能体能测试等环节
-BAC009S0913W0362 机关企事业单位等社会志愿者三千一百名
-BAC009S0913W0363 为做好志愿者服务工作
-BAC009S0913W0364 邀请专家学者等三十馀人组成志愿者培训导师库
-BAC009S0913W0365 指导各赛区开展志愿服务培训
-BAC009S0913W0366 组委会设计了具有四川特色的志愿者服装
-BAC009S0913W0367 志愿者的那一抹绿并大家亲切地称呼为小青椒
-BAC009S0913W0368 随着赛会推进被越来越多的人所熟知
-BAC009S0913W0369 成为本届残运会志愿服务文化的重要部分
-BAC009S0913W0370 电子科大的小青椒早上六点起床
-BAC009S0913W0371 每天忙碌十三个小时
-BAC009S0913W0372 用他们真挚的微笑和运动员建立起心与心的连接
-BAC009S0913W0373 四川大学的手语志愿者要学习四千个手语动作
-BAC009S0913W0374 而他们熟练掌握的秘笈是一次又一次反复的训练和排练
-BAC009S0913W0375 小青椒用热情和真诚打动了每位运动员
-BAC009S0913W0376 他们每天手牵手肩并肩出入赛场
-BAC009S0913W0377 就像认识多年的朋友和兄弟姐妹一样
-BAC009S0913W0378 湖北运动员的家长给小青椒写来致谢信
-BAC009S0913W0379 也温暖和感动着志愿者
-BAC009S0913W0380 北京时间十月十日
-BAC009S0913W0381 根据韩国乒乓球协会的相关规定
-BAC009S0913W0382 根据国际乒联刚刚公布的最新一期世界排名
-BAC009S0913W0383 而排名第三位的李尚洙
-BAC009S0913W0384 将只参加奥运会团体赛的比赛
-BAC009S0913W0385 此次韩国男团派出了一老带二新的阵容
-BAC009S0913W0386 此次里约奥运会也将会是其第三次征战奥运会比赛
-BAC009S0913W0387 作为经验最为丰富的老大哥
-BAC009S0913W0388 他将尽全力带领队伍取得好成绩
-BAC009S0913W0389 在韩国男队中排名第四
-BAC009S0913W0390 女排三零阿根廷朱婷复出扣杀状态神勇
-BAC009S0913W0391 二零一五年第十二届女排世界杯战至第八轮
-BAC009S0913W0392 中国女排直落三周以三零取胜阿根廷拿到第七胜
-BAC009S0913W0393 早前意外崴伤脚踝的朱婷强势复出
-BAC009S0913W0394 斩获十五分冠全场并且拦网独得四分
-BAC009S0913W0395 伤愈复出找手感一传防守遇考验
-BAC009S0913W0396 本报讯记者李晖经过两天转场
-BAC009S0913W0397 中国女排昨天下午在冈山迎战古巴队
-BAC009S0913W0398 三局比分是二五比一九二五比十和二五比一四
-BAC009S0913W0399 中国女排从第三轮开始便被挤出了三甲
-BAC009S0913W0400 而东道主日本队紧追在中国队之后
-BAC009S0913W0401 若想保住进入前两名的资格
-BAC009S0913W0402 中国队在第二阶段的第三场比赛不仅要保全取九个积分
-BAC009S0913W0403 而且还要尽量在小分上取得优势
-BAC009S0913W0404 福斯只允许先拍一部
-BAC009S0913W0405 另一部要视独立日二的票房而定
-BAC009S0913W0406 影片的上映日期
-BAC009S0913W0407 也从原计划的二零一六年七月一日
-BAC009S0913W0408 潘玮柏以侧颜出镜
-BAC009S0913W0409 与陈妍希分别看向对方
-BAC009S0913W0410 可见两人友谊非同一般
-BAC009S0913W0411 搜狐娱乐据讯据香港媒体报道
-BAC009S0913W0412 陈妍希今天五月二日下午出席公益活动
-BAC009S0913W0413 小洋装更衬托出她的纤细身材
-BAC009S0913W0414 不过她出道以来身材一直是外界关注的焦点
-BAC009S0913W0415 陈妍希一直努力让自己的脸圆圆脸变瘦
-BAC009S0913W0416 今天她出席活动
-BAC009S0913W0417 对着镜头嘟嘴吐舌
-BAC009S0913W0418 当被问到对于被选为棉花糖女孩比较肉感的女生
-BAC009S0913W0419 她笑说我觉得蛮好的啊
-BAC009S0913W0420 搜狐娱乐讯日前
-BAC009S0913W0421 名为娱乐圈八卦的自然自媒体
-BAC009S0913W0422 曝出陈妍希拍戏时突然干呕
-BAC009S0913W0423 并推断其已怀孕
-BAC009S0913W0424 陈妍希公司官方账号发表微博辟谣
-BAC009S0913W0425 否认了陈妍希疑似怀孕的传闻
-BAC009S0913W0426 称陈妍希目前还在剧组拍戏
-BAC009S0913W0427 并感谢了各界对于陈妍希公开与陈晓恋情的祝福
-BAC009S0913W0428 陈妍希在微博发布跳绳视频
-BAC009S0913W0429 并写道每天早上二十零下
-BAC009S0913W0430 中午二十零下
-BAC009S0913W0431 北京地接旅行社有限公司负责人称
-BAC009S0913W0432 向乘客收取的船费属于应收项目
-BAC009S0913W0433 旅行社为了盈利设置购物环节
-BAC009S0913W0434 北京旅游服务热线反馈称
-BAC009S0913W0435 还有待职能部门进一步调查
-BAC009S0913W0436 游客参与不合理低价游将被罚专家怎么判断
-BAC009S0913W0437 关于低价游旅行团因强制购物产生的纠纷事件频出
-BAC009S0913W0438 甚至还出现了一些造成游人身伤害的悲剧
-BAC009S0913W0439 旅游法早已明令禁止
-BAC009S0913W0440 游客参与不合理低价游也将受到受处难执行
-BAC009S0913W0441 京汇佳律师事务所律师邱宝昌表示
-BAC009S0913W0442 消费者根本很难判断什么叫做不合理低价
-BAC009S0913W0444 园中园收费超景区大门票
-BAC009S0913W0445 游客在乌鲁木齐市吃自助餐浪费食物被罚二四零零元
-BAC009S0913W0446 剩下了一二零零克食物
-BAC009S0913W0447 被餐厅罚款二四零零元
-BAC009S0913W0448 此事昨日经网络曝光后
-BAC009S0913W0449 食客该不该如此浪费
-BAC009S0913W0450 餐厅有没有权力罚款
-BAC009S0913W0451 成为了网民争相讨论的话题
-BAC009S0913W0452 游客在公园躲雨遭雷击已脑死亡至今无人负责
-BAC009S0913W0453 信息时报讯记者周伟龙八月一零日下午
-BAC009S0913W0454 六名游客在海珠湖公园凉亭内躲雨
-BAC009S0913W0455 昨日记者从医院了解到
-BAC009S0913W0456 目前黄某已被诊断为脑死亡
-BAC009S0913W0457 记者回访海珠湖公园发现
-BAC009S0913W0458 出事凉亭依然呈现事发时的状态
-BAC009S0913W0459 一旦雷雨天游客在亭内出事
-BAC009S0913W0460 该告示不能成为园方免责的理由
-BAC009S0913W0461 游客在北京动物园内小树间拉吊床摇荡
-BAC009S0913W0462 却要承载一个成年人的体重
-BAC009S0913W0463 一家三口在两棵树间拉起了一张吊床
-BAC009S0913W0464 父亲和孩子轮流上去躺
-BAC009S0913W0465 躺进吊床的父亲还荡起吊床
-BAC009S0913W0466 游客在新加坡买祖母绿回国发现非纯天然
-BAC009S0913W0467 夏先生带太太跟团去新马泰旅游
-BAC009S0913W0468 在新加坡花费三万元购买了纯天然的祖母绿吊坠
-BAC009S0913W0469 回国后经鉴定发现不是天纯天然的
-BAC009S0913W0470 游客在日照旅游吃海鲜太少被围殴警方都有错
-BAC009S0913W0471 大众网河南游客爆料在日照旅游团因吃海鲜太少被围殴
-BAC009S0913W0472 警方回应都有过错经警方调查
-BAC009S0913W0473 双方均有不同程度受伤
-BAC009S0913W0474 河南籍游客张某某手部受伤及表皮损伤
-BAC009S0913W0475 店主陈某头皮裂创二处
-BAC009S0913W0476 游客在济南景区水池许愿观赏莲被砸成马蜂窝
-BAC009S0913W0477 游客扔硬币许愿观赏莲被砸成马蜂窝
-BAC009S0913W0478 游客在百年老店买到发霉盐水鸭商家主动退款
-BAC009S0913W0479 谢女士购买的盐水鸭外包装
-BAC009S0913W0480 华商报讯记者杨德合买了两个肉夹馍
-BAC009S0913W0481 结果被店员搓走了二零零零元
-BAC009S0913W0482 尽管在民警的协助下
-BAC009S0913W0483 但这也让首次来到陕西游玩的孙女士感到憋屈
-BAC009S0913W0484 游客大铜缸刻字警察喊话故宫刻字者请自首
-BAC009S0913W0485 北京警方已介入调查
-BAC009S0913W0486 目前正在进行一步工作中
-BAC009S0913W0487 游客成都遇连环车祸近千人隧道里死里逃亡
-BAC009S0913W0488 图片由胡先生提供本报讯记者喻莉出门旅游
-BAC009S0913W0489 近千人在隧道里上演生死时速
-BAC009S0913W0490 武汉网友胡琦的一条短信微博引起众人关注
-BAC009S0913W0491 记者联系上胡先生才知虚惊一场
-BAC009S0913W0492 现场有人喊有车要爆炸
-BAC009S0913W0493 事后才了解他们遭遇的只是普通连环车祸
-BAC009S0913W0494 游客打车被找四张同号假钞官方疑遇克隆车
-BAC009S0913W0495 其在出行成都打车时被司机找了四张同号的二元零元假币
-BAC009S0914W0121 境外个人购买应当符合当地政策规定
-BAC009S0914W0122 外资在华房地产投资限制松绑已成大势
-BAC009S0914W0123 在限限制外商投资产业目录中
-BAC009S0914W0124 已经删除了此前针对外商投资房地产的全部限制类条款
-BAC009S0914W0125 放开外资购买房产限制
-BAC009S0914W0126 外资只可以购买商铺写字楼等物业
-BAC009S0914W0127 普通住宅很可能仍将限购
-BAC009S0914W0128 而就昨日六部委松绑楼市限外令的情况来看
-BAC009S0914W0129 对于中国楼市的影响不必过于乐观
-BAC009S0914W0130 取消限外令将促进外企在华投资房地产
-BAC009S0914W0131 对于中国楼市有一定积极作用
-BAC009S0914W0132 特别是在海外热钱有外流预期的情况下
-BAC009S0914W0133 继续限制外资投资中国房地产已经不合时宜
-BAC009S0914W0134 外资占国内地产投资比例有限
-BAC009S0914W0135 此项政策对中国楼市影响有限
-BAC009S0914W0136 中国房地产学会副会长陈国强也认为
-BAC009S0914W0137 外资购房主要集中在一线城市和几个热点二线城市
-BAC009S0914W0138 而现在这类城市的房价已经很高
-BAC009S0914W0139 即使限外令放开也不会出现大规模外资买房
-BAC009S0914W0140 正处于筑底回暖阶段
-BAC009S0914W0141 主要还是依靠中国国内企业投资
-BAC009S0914W0142 虽然一线城市房价已出现反弹
-BAC009S0914W0143 但包括鄂尔多斯温州等地的去库存还是非常困难
-BAC009S0914W0144 七月份各线城市房价分化仍然明显
-BAC009S0914W0145 目前整体的宏观经济还是比较困难的
-BAC009S0914W0146 房地产的投资增速目前不到五百分之
-BAC009S0914W0148 因此开发还要继续坚定的开工和拿地的信心
-BAC009S0914W0149 这种分化情况会更剧烈
-BAC009S0914W0150 但是整体回暖和好转态势已经确定
-BAC009S0914W0151 与前年差不多这种状态
-BAC009S0914W0152 外资在华房地产投资购房限制被松绑
-BAC009S0914W0153 允许机构和个人在中国购
-BAC009S0914W0154 六部委调整房地产市场外资准入和管理政策
-BAC009S0914W0155 为促进房地产市场平稳健康发展
-BAC009S0914W0156 一外商投资房地产企业注册资本与投资总额比例
-BAC009S0914W0157 对于实施住房限购政策的城市
-BAC009S0914W0158 境外个人购房应当符合当地政策规定
-BAC009S0914W0159 优化和改进外商投资房地产管理
-BAC009S0914W0160 除上述政策调整以外
-BAC009S0914W0161 为促进房地产市场平稳健康发展
-BAC009S0914W0162 以及在中国境内工作学习的境外个人
-BAC009S0914W0163 可以购买符合实际需要的自用自住商品房
-BAC009S0914W0164 外商投资房地产企业注册资本与投资总额比例
-BAC009S0914W0165 将依照中外合资经营企业的相关暂行规定
-BAC009S0914W0166 中新网八月二十七日电据商务部官网公布的文件显示
-BAC009S0914W0167 取消外商投资房地产企业
-BAC009S0914W0168 六部门出台新政楼市限外政策放松
-BAC009S0914W0169 这来外资进入我国房地产市场最宽松的政策
-BAC009S0914W0170 这份只有五百多字的通知印发于八月十九日
-BAC009S0914W0171 规定外商投资建立房地产企业
-BAC009S0914W0172 投资总额超过一千万美元含一千万美元的
-BAC009S0914W0173 注册资本金不得低于投资总额的百分之五十
-BAC009S0914W0174 外商投资房地产企业注册资本金未全部缴付的
-BAC009S0914W0175 未取得国有土地使用证的
-BAC009S0914W0176 或开发项目资本金未达到项目投资总额百分之五
-BAC009S0914W0177 不得办理境内境外贷款
-BAC009S0914W0178 外汇管理部门不予批准该企业的外汇借款结汇
-BAC009S0914W0179 不得购买非自用非自住商品房
-BAC009S0914W0180 港澳台地区居民和华侨因生活需要
-BAC009S0914W0181 可在境内限购一定面积的自住商品房
-BAC009S0914W0182 二到二
-BAC009S0914W0183 我国楼市正处在急速上升通道
-BAC009S0914W0184 大量外资希望进入我国市场
-BAC009S0914W0185 面对楼市中急剧增长的投资热情
-BAC009S0914W0186 对购买住房的数量也未做要求
-BAC009S0914W0187 带动和规范民间资本进入农产品流通领域
-BAC009S0914W0188 完善农产品流通税收政策
-BAC009S0914W0189 免征蔬菜流通环节增值税
-BAC009S0914W0190 加大涉农贷款投放力度
-BAC009S0914W0191 可按作价出资入股方式办理理用地手续
-BAC009S0914W0192 但禁止改变用途和性质
-BAC009S0914W0193 严厉打击农产品投机炒作
-BAC009S0914W0194 做好外资并购大型农产品批发市场的安全审查
-BAC009S0914W0195 严格执行鲜活农产品运输绿色通道政策
-BAC009S0914W0196 加快农产品流通标准体系建设
-BAC009S0914W0197 各地各部门加强组织领导
-BAC009S0914W0198 农产品产销对接的经验介绍
-BAC009S0914W0199 农产品产销合作社简介
-BAC009S0914W0200 海南农产品流通现状
-BAC009S0914W0201 农产品流通加工标准化
-BAC009S0914W0202 中国对农产品流通政策
-BAC009S0914W0203 温家宝主持召开国务院常务会议
-BAC009S0914W0204 研究部署在城市优先发展公共交通
-BAC009S0914W0205 审议通过缺陷汽车产品召回管理条例草案
-BAC009S0914W0206 国务院总理温家宝主持召开国务院常务会议
-BAC009S0914W0207 研究部署在城市优先发展公共交通
-BAC009S0914W0208 审议通过缺陷汽车产品召回管理条例草案
-BAC009S0914W0209 为加快发展中等职业教育
-BAC009S0914W0210 自秋季学期起
-BAC009S0914W0211 多数城市公共交通出行比例偏低
-BAC009S0914W0212 为从根本上缓解交通拥堵出行不便环境污染等矛盾
-BAC009S0914W0213 必须树立公共交通优先发展理念
-BAC009S0914W0214 将公共交通放在城市交通发展的首要位置
-BAC009S0914W0215 加快构建以公共交通为主
-BAC009S0914W0216 同时改善步行自行车出行条件
-BAC009S0914W0217 城市综合交通体系规划应明确公共交通优先发展原则
-BAC009S0914W0218 城市公共交通规划要科学布局线线网
-BAC009S0914W0219 促进城市内外交通便利衔接和城乡公共交通一体化发展
-BAC009S0914W0220 加快基础设施建设
-BAC009S0914W0221 提升公共交通设施装备水平
-BAC009S0914W0222 提高公共交通舒适性
-BAC009S0914W0223 将其纳入旧城改造和新城建设规划
-BAC009S0914W0224 加强公共交通用地综合开开发
-BAC009S0914W0225 对新建公共交通设施用地的地上地下空间
-BAC009S0914W0226 按照市场化原则实施土地综合开发
-BAC009S0914W0227 收益用于公共交通基础设施建设和弥补运营亏损
-BAC009S0914W0228 加大政府投入
-BAC009S0914W0229 城市政府要将公共交通发展资金纳入公共财政体系
-BAC009S0914W0230 对城市公共交通企业实行税收优惠政策
-BAC009S0914W0231 落实对城市公共交通行业的成品油价格补贴政策
-BAC009S0914W0232 对城市轨道交通运营企业实行电价优惠
-BAC009S0914W0233 拓宽投资渠道
-BAC009S0914W0234 吸引和鼓励社会资金参与公共交通基础设施建设和运营
-BAC009S0914W0235 保障公交路权优先
-BAC009S0914W0236 增加划设城市公共交通优先车道
-BAC009S0914W0237 允许机场巴士校车班车使用公共交通优先车道
-BAC009S0914W0238 加强公共交通优先车道的监控和管理
-BAC009S0914W0239 健全安全管理制度
-BAC009S0914W0240 规范技术和产品标准
-BAC009S0914W0241 构建服务质量评价指标体系
-BAC009S0914W0242 规范公共交通重大决策程序
-BAC009S0914W0243 实行线网规划编制公示制度和运营价格听证制度
-BAC009S0914W0244 建立城市公共交通运营成本和服务质量信息公开制度
-BAC009S0914W0245 应当立即停止生产销售进口
-BAC009S0914W0246 由其生产者实施召回
-BAC009S0914W0247 并及时发布产品缺陷及信息
-BAC009S0914W0248 对实施召回的缺陷汽车产品
-BAC009S0914W0249 生产者应当及时采取措施消除缺陷
-BAC009S0914W0250 会议还研究了其他事项
-BAC009S0914W0251 国务院将对各类交易场所清理整顿
-BAC009S0914W0252 国务院近期将开展对各类交易场所的清理整顿工作
-BAC009S0914W0254 而且这也可以看作是苹果利用硬件优势
-BAC009S0914W0255 衍生出软件服务的又一重要举措
-BAC009S0914W0256 又如何和政府银行搞好关系
-BAC009S0914W0257 证明他们真得没有手机用户信息
-BAC009S0914W0258 苹果靠什么颠复移动支付
-BAC009S0914W0259 苹果推出的每一款新产品都不免要引发大讨论
-BAC009S0914W0260 才能显得像个知识分子
-BAC009S0914W0261 不仅树立了良好的品牌形象
-BAC009S0914W0262 也向全世界推广了一种趋之若鹜的文化
-BAC009S0914W0263 他们真得赚了很多钱
-BAC009S0914W0264 这些特质让库克基本上实现了财务自由
-BAC009S0914W0265 这对于一家巨型企业是非常难能可贵的
-BAC009S0914W0266 而土豪和穷鬼做生意的最大区别就是
-BAC009S0914W0267 而是会更加关注产品本身
-BAC009S0914W0268 以及是否能提升他们的历史地位
-BAC009S0914W0270 他们没有必要着急回本
-BAC009S0914W0271 更大的野心在于深刻变革人类的支付习惯
-BAC009S0914W0272 这种状态是苹果颠复现有市场格局的根基
-BAC009S0914W0273 除却土豪式的生意属性之外
-BAC009S0914W0276 系统会使用不同编码来转移用户凭据和支付数据
-BAC009S0914W0277 整个过程基于安全元素芯片
-BAC009S0914W0278 这种芯片不会直接发送用户敏感信息
-BAC009S0914W0279 而是将其转化成唯一的临时编码
-BAC009S0914W0280 可有效降低信息泄漏的风险其次
-BAC009S0914W0284 苹果积累了海量的绑定信息卡用户
-BAC009S0914W0285 这些资源的特点不单单是数目庞大
-BAC009S0914W0286 而且苹果最早一批的用户积累
-BAC009S0914W0287 囊括了大量的优质资源
-BAC009S0914W0288 甚至包括了一些明星意见领袖和政府官员
-BAC009S0914W0290 更是一种文化和习惯的推广者
-BAC009S0914W0291 柯振东入狱期间的囚服都能在淘宝上热卖
-BAC009S0914W0292 要是詹妮弗劳伦斯也能在微博上说这个应用不错
-BAC009S0914W0293 一定会有立竿见影的推广效果
-BAC009S0914W0294 也在所不惜的最后
-BAC009S0914W0296 早在九月九日发布会上
-BAC009S0914W0297 苹果就公布了合作伙伴
-BAC009S0914W0298 包括迪斯尼耐克麦当劳梅西百货公司等巨头企业
-BAC009S0914W0300 从这些零售商的等级来看
-BAC009S0914W0301 库克团队应当是花费了大量精力
-BAC009S0914W0302 移动支付肯定会有井喷式的发展
-BAC009S0914W0303 现阶段管理创新和组织再造比任何的创新都重要
-BAC009S0914W0304 美的美的在二零一四年三月正式发布智慧家庭战略
-BAC009S0914W0305 未来将搭建空气水营养等智能管家平台
-BAC009S0914W0306 事业部制一直是美的快速成长的法宝
-BAC009S0914W0307 一定程度上影响了资源整合的效率
-BAC009S0914W0308 美的已将风扇加湿器等空气类产品
-BAC009S0914W0309 归到家用空调事业部旗下
-BAC009S0914W0310 围绕几大智能管家平台
-BAC009S0914W0311 美的整合事业部精简组织架构
-BAC009S0914W0312 也是顺应互联网时代管理扁平化的趋势
-BAC009S0914W0314 下称美的内部的组织架构二零一五年加大了调整力度
-BAC009S0914W0316 每日经济新闻记者从美的家用空调事业部了解到
-BAC009S0914W0317 自二零一一年事业部启动自动化升级至今的四年里
-BAC009S0914W0318 工人数量减少近一半
-BAC009S0914W0319 美的家用空调事业部制造副总裁乌守保对记者表示
-BAC009S0914W0320 到二零一八年美的空调营收到达一千亿元规划时
-BAC009S0914W0321 员工数量将减至两万人
-BAC009S0914W0322 虽然投入产生问题以及机器人后期运行维护等
-BAC009S0914W0323 都是家电企业自动化升级需要面临的挑战
-BAC009S0914W0324 自动化是未来唯一出路
-BAC009S0914W0325 四年来机器人代替人工近半
-BAC009S0914W0326 美的家用空调事业提出精品战略
-BAC009S0914W0327 机器人应用也进一步提速
-BAC009S0914W0328 二零一一年美的空调达到五百亿元营收规模时
-BAC009S0914W0329 工人数量超过五万以上
-BAC009S0914W0330 空调业务总营收接近七百亿元
-BAC009S0914W0331 工人数量已经缩减至二点六万人
-BAC009S0914W0332 除了在顺德工厂建成全自动遥控器生产线外
-BAC009S0914W0333 美的空调还在其他地区工厂建有三条全自动生产线
-BAC009S0914W0334 经过前几年自动化生产线升级改造
-BAC009S0914W0335 美的空调工厂的注塑车间
-BAC009S0914W0336 在无开灯照明的情况下也能正常稳定运行
-BAC009S0914W0337 钣金冲压已实现无人运行
-BAC009S0914W0338 而在昨天对阵古巴队的比赛中
-BAC009S0914W0339 中国队教练组还是做出了让朱婷继续休战的抉择
-BAC009S0914W0340 来自北汽女排的主攻手刘晓彤取代朱婷的位置首发出场
-BAC009S0914W0341 除了第一局在开局阶段古巴队一度领先外
-BAC009S0914W0342 比赛的节奏始终被中国队控制在手中
-BAC009S0914W0343 中国队直落三局零封对手
-BAC009S0914W0344 曾春蕾和张常宁均拿到十六分
-BAC009S0914W0345 俄罗斯美国和日本三队均零封对手
-BAC009S0914W0346 此轮战罢后积分榜前四名排位没有任何变化
-BAC009S0914W0347 俄罗斯队十七分居榜首
-BAC009S0914W0348 美国队十六分排第二
-BAC009S0914W0349 日本和中国同积十五分
-BAC009S0914W0350 日本以小分优势暂列第三位
-BAC009S0914W0351 中国队将迎战冈山赛区的第二个对手肯尼亚队
-BAC009S0914W0352 中国女排昨天下午在松本迎战韩国队
-BAC009S0914W0353 主攻手朱婷不慎扭伤脚踝后依然带伤奋战
-BAC009S0914W0354 最终中国队以三比一力战韩国队全取三分
-BAC009S0914W0355 中韩之战中国队首发再次变阵
-BAC009S0914W0356 二传丁霞和主攻刘晏含取代了沈静思和张常宁的位置
-BAC009S0914W0357 张常宁则取代曾春蕾站在接应的位置上
-BAC009S0914W0358 中国队迅速调整阵容
-BAC009S0914W0359 逐渐控制住了局面并连扳两局以二比一优先
-BAC009S0914W0360 关键的第四局一开始中国队便发生了意外
-BAC009S0914W0361 一脸痛苦的朱婷当即被换下场
-BAC009S0914W0362 失去了最稳定的得分手之后
-BAC009S0914W0363 中国队进攻火力明显减弱
-BAC009S0914W0364 而看到了希望的韩国队也趁机拼命反击
-BAC009S0914W0365 当打到一三比一四中国队落后一分时
-BAC009S0914W0366 在场下接受完队医高压包扎后的朱婷请命上场
-BAC009S0914W0367 虽然扣球落地后朱婷依然一瘸一拐
-BAC009S0914W0368 见此情景韩国队的信心受到了打击
-BAC009S0914W0369 尽管也一度以二一比一七领先四分之多
-BAC009S0914W0370 但朱婷与队友们合力打出了一波八比二的高潮
-BAC009S0914W0371 最终中国队以二五比二三拿下第四局
-BAC009S0914W0372 以三比一胜出拿到了宝贵的三个积分
-BAC009S0914W0373 俄俄罗斯队以全胜战绩列积分榜首位
-BAC009S0914W0374 日本与美国同积十十分暂列二三两位
-BAC009S0914W0375 中国和多米尼加同积九分排在第四和第五位
-BAC009S0914W0376 今天中国队将迎战第一阶段的最后一个对手秘鲁队
-BAC009S0914W0377 中国女排三十一日本四夺世界杯冠军直通里约奥运
-BAC009S0914W0378 女排三十一日本进军里约众将欢度欢庆
-BAC009S0914W0379 夺冠的同时摘得本次世界杯的冠军
-BAC009S0914W0380 同时拿到了明年里约奥运会的入场券
-BAC009S0914W0381 是全场得分最高的运动员
-BAC009S0914W0382 也让这位一九九四年出生的河南妹子
-BAC009S0914W0383 逐步成长为中国女排的新核心
-BAC009S0914W0385 在今年的亚锦赛夺冠后
-BAC009S0914W0386 关于中国队过于依赖朱婷的言论不少
-BAC009S0914W0387 本赛季调进张常宁就是郎平为朱婷解压的一个表现
-BAC009S0914W0388 加上惠若琪因伤缺席本届世界杯
-BAC009S0914W0389 张常宁的幼稚嫩显然还不能立即挑起大梁
-BAC009S0914W0390 这支女排的暴露性强攻基本上都是靠朱婷打
-BAC009S0914W0391 郎平也认为这样去打世界高水平的球队是不够的
-BAC009S0914W0392 在目前中国队的阵容中
-BAC009S0914W0393 霸气外露的朱婷是不可或缺的绝对核心
-BAC009S0914W0394 在队长惠若琪缺阵的情况下
-BAC009S0914W0395 她几乎担当起了场上进攻加振奋士气的主力作用
-BAC009S0914W0396 半决赛对阵俄罗斯的比赛中
-BAC009S0914W0397 朱婷全场夺得二十九分
-BAC009S0914W0398 在俄罗斯队的严密拦防下
-BAC009S0914W0399 进攻成功率达到百分之五十六点七六拦网
-BAC009S0914W0400 朱婷得到七分同样全队最高
-BAC009S0914W0401 作为一个主攻手非常不易
-BAC009S0914W0402 与几乎不接一传的科舍列娃相比
-BAC009S0914W0403 朱婷的任务更重效率更高
-BAC009S0914W0404 提前一周至二零一六年六月二十四日
-BAC009S0914W0405 避免和新木乃伊正面较量
-BAC009S0914W0406 来源时光网美国时间本周一
-BAC009S0914W0407 二十世纪福斯影业公布一批新片的档期
-BAC009S0914W0408 晚上二十零下
-BAC009S0914W0409 马甲线啊马甲线
-BAC009S0914W0410 力证自己没有怀孕
-BAC009S0914W0411 网友纷纷调侃道为了辟谣怀孕也是蛮拼的
-BAC009S0914W0412 哈哈哈第一次见人用这种方式证明自己没怀孕
-BAC009S0914W0413 搜狐娱乐讯九月六日
-BAC009S0914W0414 陈妍希晒出一组攀岩照
-BAC009S0914W0415 并称攀岩太难会晃
-BAC009S0914W0416 不抓紧会被撞到地上
-BAC009S0914W0417 希饭快来接住我
-BAC009S0914W0418 陈妍希穿着粉色上衣
-BAC009S0914W0419 头发随意披在脑后
-BAC009S0914W0420 手脚并用努力向往上爬
-BAC009S0914W0421 似乎已过了第三关
-BAC009S0914W0422 如此高难度的动作
-BAC009S0914W0423 再次身体力行地辟谣怀孕传闻
-BAC009S0914W0424 搜狐娱乐讯近日频频传出陈晓向陈妍希求婚成功的消息
-BAC009S0914W0425 陈妍希回应现在真的很享受快乐恋爱的喜悦
-BAC009S0914W0426 有进一步消息一定会通知大家
-BAC009S0914W0427 中新网七月二十二日电据台湾东森新闻消息
-BAC009S0914W0428 陈妍希曾在新版神鵰侠侣中演小龙女
-BAC009S0914W0429 被网友调侃是小笼包
-BAC009S0914W0430 尽管她努力瘦身
-BAC009S0914W0431 当事网友疑遭遇克隆车
-BAC009S0914W0432 经调查核实相关情况
-BAC009S0914W0433 游客抢订冬奥运旅游团因遭遇订票难住房等
-BAC009S0914W0434 北京冬奥会刚刚申办成功
-BAC009S0914W0435 已经有游客迫不及待想去张家口看看了
-BAC009S0914W0436 游客摔断腿旅游社赔三成因旅游时未尽提示义务
-BAC009S0914W0437 游客日照海鲜店被打受伤警方称言语冲突引发互殴
-BAC009S0914W0438 京华时报讯记者卫张宁昨天上午
-BAC009S0914W0439 自己和家人因点的海鲜较少
-BAC009S0914W0440 并被店主及店员辱骂围殴
-BAC009S0914W0441 当时游客出言不逊在先
-BAC009S0914W0442 并未将游客脱光衣服殴打
-BAC009S0914W0443 日照市公安局官方发布消息
-BAC009S0914W0444 称事件系点餐过程中
-BAC009S0914W0445 双方发生语言冲突后进行互殴
-BAC009S0914W0446 已依法对双方进行处罚
-BAC009S0914W0447 游客晋吉岛乘船颠骨折诉旅社索赔二零馀万元
-BAC009S0914W0448 本来一家人出国旅游挺高兴的
-BAC009S0914W0449 可是我遇见这事还不够添堵的呢
-BAC009S0914W0450 崔先生带家人随团前往泰国晋吉岛游玩
-BAC009S0914W0451 导致崔先生腰部受伤
-BAC009S0914W0452 回国后被确诊为腰部骨折
-BAC009S0914W0453 将接团的两家旅行社起诉至法院
-BAC009S0914W0454 索赔各项损失共计二零馀万元
-BAC009S0914W0455 昌平法院开庭审理了这起案件
-BAC009S0914W0456 游客景区被忽悠八零零克石斛收费一二六零零元
-BAC009S0914W0457 滕女士在云南购买的石斛
-BAC009S0914W0458 游客武夷山就餐麝香肉结账要四八元一两
-BAC009S0914W0459 旅游点餐时与海鲜店主起争执互殴二人被行政拘留
-BAC009S0914W0460 新京报讯记者林斐然近日
-BAC009S0914W0461 有网友反映前往山东日照一海排档点海鲜时
-BAC009S0914W0462 该事件系游客点餐时嫌大排档太脏而引起口角纷
-BAC009S0914W0463 日照市公安局官方微博通报了这一事件的调查情况
-BAC009S0914W0464 双方因互殴均被行政拘留并处罚款
-BAC009S0914W0465 游客爬到峨眉山悬崖边石头上拍照
-BAC009S0914W0466 游客称点海鲜太少被当地媒体老板受伤更重
-BAC009S0914W0467 事情的真相完全不是这样的
-BAC009S0914W0468 大排档老板受伤更严重
-BAC009S0914W0469 起因也完全不是河南游客自己说的那样
-BAC009S0914W0470 希望警方尽快给出公平调查结果
-BAC009S0914W0471 游客称在山东日照只因点海鲜少全家遭殴打恐吓
-BAC009S0914W0472 并最新发微博表示当地警方已介入调查
-BAC009S0914W0473 游客突破八万人限流大关故宫首次提前禁止售票
-BAC009S0914W0474 新京报讯记者黄颖自七月六日进入暑期以来
-BAC009S0914W0475 故宫博物院接待的观众量也日益攀升
-BAC009S0914W0476 屡屡逼近八万人次的限流大关
-BAC009S0914W0477 故宫首次启动了起流起票限流措施
-BAC009S0914W0478 在馀票数量为售后现场关闭售票窗口
-BAC009S0914W0479 游客美签被废因访美停留太久称从没到过欧洲
-BAC009S0914W0480 而被美国海关移民官遣返
-BAC009S0914W0481 游客脚踩烈士铜像拍照四名当事人鞠躬道歉
-BAC009S0914W0482 四人鞠躬道歉据瓜沥人网
-BAC009S0914W0483 游客被黑导游拉进农家宴消费蘑菇炖鸡卖九零零元
-BAC009S0914W0484 其中一道蘑菇炖鸡收费近九零零元
-BAC009S0914W0485 看到该网友的曝光帖后
-BAC009S0914W0486 崂山景区勒令该农家宴停止停止营业
-BAC009S0914W0487 并索偿该游客全部损失
-BAC009S0914W0488 游客西安遭天价玛卡商家四零零零元一价合理
-BAC009S0914W0489 张先生购买的四零零元玛卡
-BAC009S0914W0490 内江人张先生在这次国庆期间
-BAC009S0914W0491 被导游介绍到一家购物点后
-BAC009S0914W0492 他被迫交了四零零元
-BAC009S0914W0493 这一斤玛卡其实价格只有一零零多元
-BAC009S0914W0494 一捧玛卡磨成粉景区商家要四零零零元
-BAC009S0914W0495 游客要退团张家界低价团导游称信不信你走走不了
-BAC009S0915W0121 从房地产的角度来看
-BAC009S0915W0122 这个政策的出台是希望刺激房地产投资
-BAC009S0915W0123 则是希望防止外资流出
-BAC009S0915W0124 国家统计局公布的数据显示
-BAC009S0915W0125 今年一到七月全国房地产开发投资五万亿元
-BAC009S0915W0126 增速比一到六月回落一个百分点
-BAC009S0915W0127 开发商投资增速处于不断下降的状态
-BAC009S0915W0128 市场开发也呈降温态势
-BAC009S0915W0129 此次出台的新政虽然放宽了条件
-BAC009S0915W0130 但对于实施住房限购政策的城市
-BAC009S0915W0131 境外个人购房依然需要符合当地政策规定
-BAC009S0915W0132 境外机构和个人在中国投资购买房地产的限制放松
-BAC009S0915W0133 兰州房地产市场回暖销量增加价格微涨
-BAC009S0915W0134 自二夏季开始
-BAC009S0915W0135 得益于一系列稳定房地产市场的措施
-BAC009S0915W0136 兰州房地产市场销量增加明显
-BAC009S0915W0137 一些楼盘新房价格出现微涨
-BAC009S0915W0138 较上月环比上涨百分之五
-BAC009S0915W0139 这也是该指数连续三个月出现上涨
-BAC009S0915W0140 而在多时间里
-BAC009S0915W0141 兰州新建住宅价格均呈现微降的态势
-BAC009S0915W0142 兰州楼市出现明显的区域分化
-BAC009S0915W0143 兰州市中心城区的一些楼盘
-BAC009S0915W0144 自今年初至今上涨幅度超过了十百分之
-BAC009S0915W0145 可由于中心城区楼盘数量稀少
-BAC009S0915W0146 在兰州雁滩区域的一家楼盘
-BAC009S0915W0147 而在兰州市新开楼盘集中的城郊区域
-BAC009S0915W0148 但房企调价幅度有限
-BAC009S0915W0149 由于商品房供应量充足
-BAC009S0915W0150 多个楼盘仍然采取的是低价走量的策略
-BAC009S0915W0151 在兰州市北岸由广东房企开发的一个大型楼盘里
-BAC009S0915W0152 但房价从七月至今上涨幅度仅为百分之二左右
-BAC009S0915W0153 今兰州市商品房销售面积同比上涨超过百分之三十
-BAC009S0915W0154 商品房销售额同比上涨超过了百分之四十
-BAC009S0915W0155 许多刚性住房和改善型住房需求得到释放
-BAC009S0915W0156 兰州房地产市场存在持续上涨可能
-BAC009S0915W0157 但由于房地产市场供给仍然不仍然充足
-BAC009S0915W0158 自二夏季开始
-BAC009S0915W0159 得益于一系列稳定房地产市场的措施
-BAC009S0915W0160 兰州房地产市场销量增加明显
-BAC009S0915W0161 而且提供各项衍生的福利性服务
-BAC009S0915W0162 中新网十月二十一日前
-BAC009S0915W0163 北京又一家共享创办公平台落地丰台
-BAC009S0915W0164 借全国大众创业万众创新活动周启动之势
-BAC009S0915W0166 将生活社区与科技园区两种空间组织融合
-BAC009S0915W0167 作为美国新型共享式办公与创新环境的运营品牌
-BAC009S0915W0168 是国际上合作性办公品牌的代表
-BAC009S0915W0169 由此拉开了跨境共享创新生态平台化发展的新时代
-BAC009S0915W0170 而且提供各项行生的福利性服务
-BAC009S0915W0171 帮助创新创业者聚合各方面资源
-BAC009S0915W0172 旨在帮助小型企业降低运运营成本
-BAC009S0915W0174 从创业者真正的需求出发
-BAC009S0915W0175 石榴中心位于丰台区宋家庄交通枢纽商圈
-BAC009S0915W0176 可以北京四环内唯一的国际化共享办公园区
-BAC009S0915W0177 园区总建筑面积一万平方米
-BAC009S0915W0178 其中地上一万平方米
-BAC009S0915W0179 地下一万平方米
-BAC009S0915W0180 由二十二栋企业独栋和二栋二十层的五ａ级写字楼组成
-BAC009S0915W0181 而且提供各项行生的福利性服务
-BAC009S0915W0182 中新网十月二十日前
-BAC009S0915W0184 关于智能家居你必须懂的五件事
-BAC009S0915W0185 智能家居概念的炒作
-BAC009S0915W0186 这是自媒体时代的胜利
-BAC009S0915W0187 将明确政策界限和工作机制以知以及部门分工
-BAC009S0915W0188 证监会将协同有关部门落实相关工作
-BAC009S0915W0189 公共娱乐场所清理整顿
-BAC009S0915W0190 燃气经经营市场清理整顿
-BAC009S0915W0191 行业协会清理整顿报告
-BAC009S0915W0192 国务院已批准信贷资产证券化继续扩大试点
-BAC009S0915W0193 多方面原因造成今年上半年部分中小企业生产经营困难
-BAC009S0915W0194 但没有出现大范围趋势性的破产倒闭
-BAC009S0915W0195 部分中小企业国内生产成本有所提高
-BAC009S0915W0196 这主要有四方面原因
-BAC009S0915W0197 中小企业经营困难
-BAC009S0915W0198 既是信贷投放回归常态的体现
-BAC009S0915W0199 也是国家淘汰落后产能加快产业升级宏观政策的体现
-BAC009S0915W0200 对于中小企业的支持政策
-BAC009S0915W0201 国务院已经批准信贷资产证券化继续扩大试点
-BAC009S0915W0202 转化成由资产产生的现金流作担保可自由流通的证券
-BAC009S0915W0203 销售给资本市场投资者的一种融资方式
-BAC009S0915W0204 目前我国正在稳步开展中小企业信贷资产证券化试点
-BAC009S0915W0205 为加快发展银行间债券市场
-BAC009S0915W0206 对中小企业发行债务融资工具提供绿色通道
-BAC009S0915W0207 占非金融企业直接债务融资总额之比
-BAC009S0915W0208 有力地支持了中小企业的发展
-BAC009S0915W0209 积极指导支持和鼓励金融机构根据中小企业的特点
-BAC009S0915W0210 研发推出不同的金融创新产品和服务方式
-BAC009S0915W0211 吴显亭称将加强和证监会等相关部门的配合和协作
-BAC009S0915W0212 而针对浙江广东民间借贷丰沛的特点
-BAC009S0915W0213 一定程度上缓解了部分中小企业的融资困难
-BAC009S0915W0214 将在有效防范民间借贷的潜在风险的前提下
-BAC009S0915W0215 发挥好民间借贷在服务中小企业发展中的积极作用
-BAC009S0915W0216 要加强对民间借贷的合理引导
-BAC009S0915W0217 解决中小企业生产经营困难需靠多方面共同努力
-BAC009S0915W0218 听取对中央企业监督检查情况的汇报
-BAC009S0915W0219 中央企业要进一步深化改革
-BAC009S0915W0220 强化企业管理和风险管控
-BAC009S0915W0221 加强依法监管和制度建设
-BAC009S0915W0222 部分中央企业的结构调整还存在一些困难
-BAC009S0915W0223 资源环境面临较大压力有的企业管理水平不高
-BAC009S0915W0224 非主业投资存在不少经营风险
-BAC009S0915W0225 境外资产监管有待加强
-BAC009S0915W0226 中央企业实现营业总收入十六点八亿元
-BAC009S0915W0227 上交税金一万亿元
-BAC009S0915W0228 增长百分之三十净利润一千亿元
-BAC009S0915W0229 二零一一年一月至七月
-BAC009S0915W0230 实现营业总收入十一亿元
-BAC009S0915W0231 同比增加迅速上缴税金三亿元
-BAC009S0915W0232 增长非常迅速
-BAC009S0915W0233 进入世界五百强的企业增加
-BAC009S0915W0234 包括七座以下小客车及摩托车都被列入免费范范围
-BAC009S0915W0235 江苏省交通厅相关负责人昨日对记者表示
-BAC009S0915W0236 今年国庆小长假期间私家车主们就可以免费上路了
-BAC009S0915W0237 免费时段从节假日第一天开始
-BAC009S0915W0238 节假日最后一天结束
-BAC009S0915W0239 普通公路以车辆通过收费站收费车道的时间为准
-BAC009S0915W0240 高速公路以车辆驶离出口收费车车道的时间为准
-BAC009S0915W0241 允许在普通收费公路行驶的摩托车
-BAC009S0915W0242 各地机场高速公路是否实行免费通行
-BAC009S0915W0243 由各省区市人民政府决定
-BAC009S0915W0244 各地机场高速公路是否实行免费通行
-BAC009S0915W0245 由各省区市人民政府决定
-BAC009S0915W0246 比如南京机场高速一到节假日
-BAC009S0915W0247 是南京往南的重要通道
-BAC009S0915W0248 对于江苏的机场高速是否免费
-BAC009S0915W0249 省交通部门称目前未定
-BAC009S0915W0250 但有关负责人认为我想
-BAC009S0915W0251 机场高速最大可能还是免费
-BAC009S0915W0252 另一个让南京市民特别关心的是
-BAC009S0915W0253 或许到二零一六年的时候
-BAC009S0915W0254 零售店就再也卖不出去一个实体钱包了
-BAC009S0915W0255 催生着移动支付技术的大跃进
-BAC009S0915W0256 最关键的两个属性莫过于安全和便捷
-BAC009S0915W0257 而且它们两个之间是非常对立的关系
-BAC009S0915W0258 安全性的提升需要牺牲一定的便携性
-BAC009S0915W0259 究竟哪个特特性更加重要
-BAC009S0915W0260 这也影响着移动支付市场的总体进程和发展方向
-BAC009S0915W0261 或许是受好莱坞艳照门的影响
-BAC009S0915W0263 重点强调了其安全性
-BAC009S0915W0264 最基本的逻辑就是我们不读取信息
-BAC009S0915W0265 牛师傅总说自己的面没有添加任何防腐剂
-BAC009S0915W0266 任何的电子行为都不免会留下痕迹
-BAC009S0915W0267 移动支付又会产生非常敏感的操作信息
-BAC009S0915W0268 蕴含着巨大商业价值
-BAC009S0915W0269 有哪家支付机构愿意心无旁续地放弃这些金子呢
-BAC009S0915W0270 安全真的是移动支付的第一属性吗
-BAC009S0915W0271 消费者对便捷性的要求可能会更高
-BAC009S0915W0272 按照国内消费者的习惯
-BAC009S0915W0273 他们通常会单独办一张银行卡来绑定移动支付系统
-BAC009S0915W0274 而不是拿着主卡到处刷
-BAC009S0915W0275 移动支付可调用的只能是消费者的小额度的钱财
-BAC009S0915W0276 一般不会给消费者带来巨大损失
-BAC009S0915W0277 消费者会在特定情况下牺牲安全性来提升支付的便捷性
-BAC009S0915W0278 她们宁愿可花五个小时讨论是否买一条裙子
-BAC009S0915W0279 也不愿意花五分钟重新输一定密码
-BAC009S0915W0281 大概十年前就有了这样的说法
-BAC009S0915W0283 也正是看中了中国消费者的消费潜力
-BAC009S0915W0284 华尔街才对阿里巴巴情有独钟
-BAC009S0915W0286 就让业界讨论它会带给中国移动支付市场怎样的影响
-BAC009S0915W0287 苹果要想在中国本土化
-BAC009S0915W0288 最大难点在于如何改变国内的消费习惯
-BAC009S0915W0289 如何说服四大银行一起与之愉快合作
-BAC009S0915W0290 如何重修与中国政府的良好关系
-BAC009S0915W0291 这对于苹果来说不是件容易的事儿
-BAC009S0915W0292 现在是不是也该长点心了吧
-BAC009S0915W0293 国内移动支付需主要有两股力量
-BAC009S0915W0295 前者有长时间的沉淀
-BAC009S0915W0296 银行们对此已深耕多年
-BAC009S0915W0297 而后者则是刚刚涌现的后起之秀
-BAC009S0915W0298 二零一四年春天打车软件补贴大战
-BAC009S0915W0299 两股力量基本上都有一统天下的野心
-BAC009S0915W0300 这三个优势能在短时间内颠复美国移动支付市场的格局
-BAC009S0915W0301 最终促使苹果成为主流标准但中国市场有其特殊性
-BAC009S0915W0302 首先银联和苹果的合作谈判不会顺利
-BAC009S0915W0303 今后所有空调产品还将实现联机运行
-BAC009S0915W0304 这台设备就不会开机运转
-BAC009S0915W0305 这个在美的空调的南沙工厂武汉工厂已全面试点
-BAC009S0915W0306 自动化制造是未来唯一的出路
-BAC009S0915W0307 未来的制造业方向要实现无人化
-BAC009S0915W0308 美的计划在二零一八年
-BAC009S0915W0309 将家用空调事业部员工工人数缩减至两万人
-BAC009S0915W0310 除了四轴或三轴机器人外
-BAC009S0915W0311 今年还将新增二百台
-BAC009S0915W0312 机器人维护成本是挑战
-BAC009S0915W0313 广东东莞顺德等城市已经掀起大量机器换人计划
-BAC009S0915W0314 家电企业机器人智造也正在加速进行
-BAC009S0915W0315 从美的海尔使用机器人操作来看来
-BAC009S0915W0316 机器换人确实能够大大降低企业的用工数量
-BAC009S0915W0317 实现自动化升级也没那么简单
-BAC009S0915W0318 美的集团对项目在一定年限内有投入产出的规定
-BAC009S0915W0319 这对我们来说是个很大的挑战
-BAC009S0915W0320 同时也卡住了自动化的投入
-BAC009S0915W0321 一定年限内的投入产出
-BAC009S0915W0322 我们必须要有衡量标准
-BAC009S0915W0323 美的不能因自动化生产增加制造成本而让用户买单
-BAC009S0915W0324 美的空调进行自动化升级
-BAC009S0915W0325 一定是为了降低制造成本
-BAC009S0915W0326 比如降低人工费用运作费用等
-BAC009S0915W0327 机器人后期维护运行成本及技术也是一个高门槛
-BAC009S0915W0328 因为机器人生产商派遣技术人员不可能长期驻起驻点企业
-BAC009S0915W0329 高工机器人董事长张小飞表示
-BAC009S0915W0330 家电企业自动化升级改造必须进行
-BAC009S0915W0331 但伴随一定的投资风险
-BAC009S0915W0332 除了后期技术维护能力外
-BAC009S0915W0333 对于国内家电企业而言
-BAC009S0915W0334 自动化生产线的柔性改造也是其面临的一大难题
-BAC009S0915W0335 空调产品越来越追求个性化
-BAC009S0915W0336 这需要通过机器人的柔性改变来对此进行处理
-BAC009S0915W0337 家电企业要建立数字化工厂才能真正提升生产效率
-BAC009S0915W0338 她的表现也更加全面
-BAC009S0915W0339 对阵俄罗斯的比赛中
-BAC009S0915W0340 在张常宁一度进行进攻受阻
-BAC009S0915W0341 刘晓彤替补上场打得缩手缩脚的情况下
-BAC009S0915W0342 不断地为中国女排得分
-BAC009S0915W0343 只要中国队需要有人挺身而出
-BAC009S0915W0344 朱婷在中韩之战中一度受伤
-BAC009S0915W0345 但她在中国队遇到困难的时候坚持带伤上阵
-BAC009S0915W0346 最终掠队拿下了比赛
-BAC009S0915W0347 在队长惠若琪因为身体原因无缘世界杯的情况下
-BAC009S0915W0348 朱婷就是中国女排的核心
-BAC009S0915W0349 朱婷再度扮演了场上头脑的角色
-BAC009S0915W0350 队员们也对于她在技术上和心理上都颇为依赖和信服
-BAC009S0915W0351 朱婷扣球拿下一百一十三分
-BAC009S0915W0352 总共贡献了一百四十一分
-BAC009S0915W0353 反超张常宁成为中国队的得分王
-BAC009S0915W0354 让朱婷最佳球员的身份和价值再度彰显
-BAC009S0915W0355 尚不足十八岁的她身高为一米八六
-BAC009S0915W0356 徐建德统领的中国青年队八战全胜夺得冠军
-BAC009S0915W0357 作为主力主攻的朱婷
-BAC009S0915W0358 从而被授予最有价值球员荣誉
-BAC009S0915W0360 当时身披八号战袍的她身高达到了一米九五公分
-BAC009S0915W0361 朱婷斩获了一六七分
-BAC009S0915W0362 与多米尼加的马丁内斯一起摘得最佳得分奖
-BAC009S0915W0363 随后还以百分之五十三点五六的得分率拿到了最佳进攻的大奖
-BAC009S0915W0364 朱婷荣膺最有价值球员
-BAC009S0915W0365 还与巴西队的加比一起入选最佳主攻
-BAC009S0915W0366 当年的整个世青赛上
-BAC009S0915W0367 中国队虽然如愿夺冠
-BAC009S0915W0368 朱婷却是唯一的硕果
-BAC009S0915W0369 去年六月下旬举行的中国国际精英赛北仑站
-BAC009S0915W0371 当时郎平率队三战全胜名列第一
-BAC009S0915W0372 朱婷两场比赛担任首发
-BAC009S0915W0374 而在今年的香港站上
-BAC009S0915W0375 中国队三比二力克美国队收获分站赛九连胜
-BAC009S0915W0376 赛后主攻朱婷获最有价值球员和最受欢迎球员
-BAC009S0915W0377 主教练郎平获得最佳教练
-BAC009S0915W0378 三场比赛朱婷均有出色表现
-BAC009S0915W0379 朱婷共计拿下二十四分
-BAC009S0915W0380 第二场对阵日本也拿下全队第二高的十二分
-BAC009S0915W0381 获得二十三分荣誉全场得分王
-BAC009S0915W0382 在分站赛总得分榜上
-BAC009S0915W0383 朱婷以一百五十七分领先群芳
-BAC009S0915W0384 其中扣球拿到一百三十二分
-BAC009S0915W0385 扣球成功率五十四点百分之十高居榜首
-BAC009S0915W0386 人们首先会想到她的高度
-BAC009S0915W0387 其一米九五的身高三米二七的扣球高度
-BAC009S0915W0388 在比赛中确实非常有利
-BAC009S0915W0389 朱婷进攻相对比较简单
-BAC009S0915W0390 主要是四号位的高点强攻和六号位的后排进攻
-BAC009S0915W0391 四号位进攻以大斜线为主
-BAC009S0915W0392 她进攻的变化逐渐多了起来
-BAC009S0915W0393 首先是增加了二号位的进攻
-BAC009S0915W0394 即当自己轮转到前排二号位时
-BAC009S0915W0395 临时客串接应在二号位参与强攻
-BAC009S0915W0396 这样既丰富了自己也增加了全队的进攻变化
-BAC009S0915W0397 再就是四号位的进攻除了斜线
-BAC009S0915W0398 还增加了直线直线和斜线之间的所谓二直线
-BAC009S0915W0399 不时还施以非常巧妙的吊球
-BAC009S0915W0400 视频中国三一大胜俄罗斯独占女排世界杯榜首
-BAC009S0915W0401 日本二零一五女排世界杯单循环赛战至第十轮
-BAC009S0915W0402 由郎平挂帅的中国女排在名古屋赛区
-BAC009S0915W0403 提升战绩为九胜一负反超至榜首位置
-BAC009S0915W0404 上周在北美电影市场上遭遇票房惨剧
-BAC009S0915W0405 只以六百四十八万美元的进账排名第八
-BAC009S0915W0406 这部电影的失败并没有影响囧瑟夫的心情
-BAC009S0915W0407 将自导自演一部名为睡魔的科幻大片
-BAC009S0915W0408 让体重维持在四十五公斤左右
-BAC009S0915W0409 但网友的吐槽却一直没有停息
-BAC009S0915W0410 她在台湾出席活动
-BAC009S0915W0411 坦言刚开拍的一个月中
-BAC009S0915W0412 心情低落到崩溃大哭
-BAC009S0915W0413 甚至出现忧郁症状况
-BAC009S0915W0414 搜狐娱乐讯陈妍希传出和陈晓的恋情之后
-BAC009S0915W0415 二人一直鲜少回应
-BAC009S0915W0416 陈妍希回到台北代言悠游卡
-BAC009S0915W0417 外传她可能已经怀孕
-BAC009S0915W0418 但陈妍希在出席活动时
-BAC009S0915W0419 穿高跟鞋快步走
-BAC009S0915W0420 似乎也让传言不攻自破
-BAC009S0915W0421 贵州都市报十月二十九日报道据台湾媒体报道艺人陈妍希认爱小四岁的大陆小生陈晓
-BAC009S0915W0423 两人因合作神雕侠侣擦出爱火
-BAC009S0915W0424 恋情发展备受关注
-BAC009S0915W0425 更在日前爆出交往七个月准备闪婚
-BAC009S0915W0426 连男方在法国包游艇求婚的照片都被网友扒出
-BAC009S0915W0427 她坦承当时很惊喜很感动
-BAC009S0915W0428 男友受访时也首度大方松口确实已经进入求婚阶段
-BAC009S0915W0429 让粉丝听了又惊又喜
-BAC009S0915W0430 搜狐娱乐讯据台湾媒体报道
-BAC009S0915W0431 记者调查湖南张家界国家森林公园低价团问题
-BAC009S0915W0432 四零零元左右的低价两日游在当地非常普遍
-BAC009S0915W0433 这种低价游自称费用全包
-BAC009S0915W0434 原本自费项目变成必须交费项目
-BAC009S0915W0435 导游还诱骗游客加钱走特殊路线
-BAC009S0915W0436 面对游客质疑和退团要求
-BAC009S0915W0437 导游放言此树是我栽
-BAC009S0915W0438 你不可能一分钱不花
-BAC009S0915W0439 游客赴港游买瑞士表半个月停摆旅行社久拖不管
-BAC009S0915W0440 市民刘先生和江西环球国际旅行社的沟通协商再次失败
-BAC009S0915W0441 双方矛盾的焦点是一只瑞士名表
-BAC009S0915W0442 游客踩敦煌千年古城遗址拍照反问踩了会掉吗
-BAC009S0915W0443 现场图一零月五日下午
-BAC009S0915W0444 在甘肃敦煌大方盘城遗址
-BAC009S0915W0445 几位游客轮流翻越护栏
-BAC009S0915W0446 一位游客在拍照中说人家几千年都没有掉下来
-BAC009S0915W0447 踩一下就掉下来了
-BAC009S0915W0448 澎湃新闻在现场看到
-BAC009S0915W0449 遗址附近有多处警示牌写明严禁跨入保护区
-BAC009S0915W0450 游客进店未购物被导游嘲讽官方正在立案处理
-BAC009S0915W0451 游客铜缸刻字秀恩爱故宫已报警
-BAC009S0915W0452 法制晚报讯记者李洁今天傍晚
-BAC009S0915W0453 严厉谴责这一不文明应为
-BAC009S0915W0454 并称故宫博物院已就此事件向公安机关报案
-BAC009S0915W0455 游客青岛遭遇天价虾当地人最多几十元一斤
-BAC009S0915W0456 肖先生在上菜后高兴地拍下图片
-BAC009S0915W0457 当时他还不知道自己会被暗算
-BAC009S0915W0458 游客骑着明孝陵驮碑龟趺拍照市民大煞风景
-BAC009S0915W0459 游客骑在龟趺身上报料人供图
-BAC009S0915W0460 游戏主播花样作死声称天津是他炸的直播被抓游戏室老板因冲突开枪将人射伤致死
-BAC009S0915W0461 一五年后落网
-BAC009S0915W0462 贵港民警追凶未言弃嫌犯一五年后落法网
-BAC009S0915W0463 游戏平台称投千元可收百万数十民上当
-BAC009S0915W0464 信息时报讯记者周伟龙天上不会掉馅饼
-BAC009S0915W0465 数十名市民赶到越秀区一酒家维权
-BAC009S0915W0466 称他们曾在这里被人游说注册了一游戏平台的账户
-BAC009S0915W0467 花费几千元至上万元不等
-BAC009S0915W0468 原以为可以按照游戏规则定期分红提现
-BAC009S0915W0469 孰料从上月底开始平台关闭
-BAC009S0915W0470 随后众人一起到东山派出所报案
-BAC009S0915W0471 有待警方进一步调查
-BAC009S0915W0472 游戏网站频遭攻击每周交二零零零元保护费息事宁人
-BAC009S0915W0473 办案民警检查作案设备金华警方供图昨天
-BAC009S0915W0474 记者从金华市公安局获悉
-BAC009S0915W0475 仅半年就敲诈勒索了五七二万元
-BAC009S0915W0476 该案也被列为公安部督办大案
-BAC009S0915W0477 警方已抓获一五名犯罪嫌疑人
-BAC009S0915W0478 湖北一七二名教师转岗当保安其中有人曾是校长
-BAC009S0915W0479 一身保安制服的他准时站在校门口
-BAC009S0915W0480 手握电动栅栏遥控器
-BAC009S0915W0481 眼睛警惕地注视着进出校门的车辆和学生
-BAC009S0915W0482 湖北一九岁女护士深夜遭抢劫杀害嫌疑嫌犯已落网
-BAC009S0915W0483 凶手被抓捕归案钟欣摄
-BAC009S0915W0484 湖北二五岁女子从未来例假基因检查是男身
-BAC009S0915W0485 家住汉阳的莎莎化名
-BAC009S0915W0486 近日在医院检查才发现
-BAC009S0915W0487 她的基因竟是个纯爷们
-BAC009S0915W0488 湖北三亿打造亚洲玫瑰基地多个种植园杂草丛生
-BAC009S0915W0489 湖北四名被捅法官脱离危险一女法官尚在哺乳期
-BAC009S0915W0490 经十堰市中级人民法院确认
-BAC009S0915W0491 四名法官系送达法律文书时被刺伤
-BAC009S0915W0492 目前均暂无生命危险
-BAC009S0915W0493 其中一女法官尚在哺乳期
-BAC009S0915W0494 湖北六零后求婚九零后被指责欠款六千万因诈骗取保候审
-BAC009S0915W0495 湖北黄石市一家商场前
-BAC009S0916W0121 真正落地的产品却非常地少
-BAC009S0916W0122 而落地后的产品与客户的期待甚远
-BAC009S0916W0123 这些状况每日均上演发生
-BAC009S0916W0124 大部分的创业者举步艰辛
-BAC009S0916W0125 钱烧完了东西出不来
-BAC009S0916W0126 创业者成了智慧时代的贡品
-BAC009S0916W0127 这不是这个时代的不公平
-BAC009S0916W0128 而是我们对这个时代了解的太少
-BAC009S0916W0129 如果我们懂得多一点智能家居产品市场的法则
-BAC009S0916W0130 我们的路也许会好走得多
-BAC009S0916W0131 一智能产品的安全
-BAC009S0916W0132 连接的最高代价就是安全问题
-BAC009S0916W0133 成千上万的产品通过无线连接
-BAC009S0916W0134 只要一个单品存在安全漏洞
-BAC009S0916W0135 整个系统的安全就会出现问题
-BAC009S0916W0136 产生非常可怕的结果
-BAC009S0916W0137 现阶段市场上落地的产品大多对安全的认知都存在缺陷
-BAC009S0916W0138 普遍认为现在的市场很小且还是单品
-BAC009S0916W0139 不用花那么大的成本去解决安全的问题
-BAC009S0916W0140 可大家必须明白一个道理
-BAC009S0916W0141 当大家习惯安全的问题留以后解决的时候
-BAC009S0916W0142 安全问题立即会成为你的内伤
-BAC009S0916W0143 但综观国内同类企业
-BAC009S0916W0144 以深圳智能锁业代表为例
-BAC009S0916W0145 在安全加解密认证等方面也做足了功夫
-BAC009S0916W0147 软件与硬件都做了深度的对接
-BAC009S0916W0148 把顾客个人资料全部归客户自己保管
-BAC009S0916W0149 企业不接触客户个人资料
-BAC009S0916W0150 许多企业都把取得顾客个人资料当作资本
-BAC009S0916W0151 这是智能家居行业的先例
-BAC009S0916W0152 必须具有高度习惯融合性和耐用性
-BAC009S0916W0153 这决不是八零九零的消费习惯这么单纯的问题
-BAC009S0916W0154 是每个家庭成员体验的统一
-BAC009S0916W0155 也就是每个成员综合体验的最大公约数
-BAC009S0916W0156 以情怀代替体验是非常错误的
-BAC009S0916W0157 产品的核心是客户的体验
-BAC009S0916W0158 顾客体验的核心是真善美
-BAC009S0916W0159 近来看到的许多创新型产品
-BAC009S0916W0160 可使用起来让人啼笑皆非
-BAC009S0916W0161 加解密的措施如同虚设
-BAC009S0916W0162 没有智慧手机的成员无法开门
-BAC009S0916W0163 这是一帮精英自恋情怀的产品
-BAC009S0916W0164 可美国的月亮总是比中国的亮
-BAC009S0916W0165 国内许多媒体或企业都在为其背书
-BAC009S0916W0166 而对国内比它更优秀的产品却集体失声
-BAC009S0916W0167 只要了解一点核桃锁信息的人都能第一时间感受到
-BAC009S0916W0168 一智能家居产品的销售渠道
-BAC009S0916W0169 你要懂既然不是电子产品不是易损品不是玩品
-BAC009S0916W0170 他是家居产品依托互联网技术升级的家居耐用品
-BAC009S0916W0171 这产品的换代周期会较长
-BAC009S0916W0172 购买的机会受时间的制约
-BAC009S0916W0173 而未来借助更多的互联网技术
-BAC009S0916W0174 产品的升级速度一定加快
-BAC009S0916W0175 而智能家居产品的特殊属性决定了销售渠道的模式
-BAC009S0916W0176 他不能按电子产品或传统居家产品的模式去销售
-BAC009S0916W0177 除了做好传统门店的体验销售电商平台销售外
-BAC009S0916W0178 希望智能家居产品企业在短期的高回报率也是不现实的
-BAC009S0916W0179 但可以肯定的他一定是最高成长的企行业
-BAC009S0916W0180 一大数据云计算不是你谈的
-BAC009S0916W0181 好像不谈你就不属于这个时代的人
-BAC009S0916W0182 作用大并不代表每个人
-BAC009S0916W0183 大数据云计算是非常烧钱的
-BAC009S0916W0184 不是一般的企业个人玩得起的
-BAC009S0916W0185 与其厌不其烦的谈论大数据云计算
-BAC009S0916W0186 不如做一款实实在在的好产品
-BAC009S0916W0187 但是却不在国家文件所指的收费公路范围内
-BAC009S0916W0188 而是一条市内快速路
-BAC009S0916W0189 对于这条特殊的隧道
-BAC009S0916W0190 省交通部门表示应该不会特殊
-BAC009S0916W0191 长江隧道估计也顶不住
-BAC009S0916W0192 对于提高重大节假日公路通行能力和服务水平
-BAC009S0916W0193 降低公众假日出行成本具有重要意义
-BAC009S0916W0194 具体工作将由各省区市政府负责统一组织实施
-BAC009S0916W0195 国务院及五部门并没有明确实施时间
-BAC009S0916W0196 着实让不少网友有些着急
-BAC009S0916W0197 免费新规究竟啥时能享受到
-BAC009S0916W0198 记者昨日第一时间从江苏省交通运输厅获悉
-BAC009S0916W0199 就国家方案我省还会进行再研究
-BAC009S0916W0200 具体执行时间由省政府定
-BAC009S0916W0201 今年国庆应该可以实施
-BAC009S0916W0202 可是通过收费站的车有大客车中型客车还有货车
-BAC009S0916W0203 到时候会不会乱成一锅粥
-BAC009S0916W0204 在国务院下发的文件中提及
-BAC009S0916W0205 为确保免费政策实施后车辆有序通行
-BAC009S0916W0206 各地区要对公路收费站现有车道进行全面调查
-BAC009S0916W0207 合理规划和利用现有收费车道和免费专用通道
-BAC009S0916W0208 确保过往车辆分类分车道有序通行
-BAC009S0916W0209 记者昨日从省交通部门了解到
-BAC009S0916W0210 这是一个比较复杂的问题
-BAC009S0916W0211 估计未来系统可能会改造
-BAC009S0916W0212 应该不会开免费车道
-BAC009S0916W0213 如果开了小车免费车道
-BAC009S0916W0214 有大车或是货车误闯或者闯进去了就不好办了
-BAC009S0916W0215 有关负责人告诉记者
-BAC009S0916W0216 省里会对此进行专门研究讨论
-BAC009S0916W0217 看看山东之前是怎么做的记者了解到
-BAC009S0916W0218 面对上述这些问题山东是怎么免费放行的呢
-BAC009S0916W0219 免费期间收费员还是按照正常放行的
-BAC009S0916W0220 山东潍坊的一位李先生告诉记者
-BAC009S0916W0221 今年大年初一他开车去海南
-BAC009S0916W0222 一路上很多省份的高速公路收费站都是免费放行
-BAC009S0916W0223 到了出口车道再把通行卡收回去
-BAC009S0916W0224 由于山东免费放行的时间不在春运最高峰
-BAC009S0916W0225 大年初一路上都没什么车
-BAC009S0916W0226 所以倒也没产生收费站排队的现象
-BAC009S0916W0227 扩大到四个小长假之后
-BAC009S0916W0228 国务院批准银行系基金公司再扩容
-BAC009S0916W0229 本报记者蔡宗琦中国证券报记者获悉
-BAC009S0916W0230 公募基金管理业务有关工作
-BAC009S0916W0231 积极推动基金产品审核制度改革
-BAC009S0916W0232 鼓励更多资金投资资本市场
-BAC009S0916W0233 先后两批共八家商业银行设立或参股八家基金管理公司
-BAC009S0916W0234 试点基金管理公司发展态势良好
-BAC009S0916W0235 工商银行建设银行和交通银行为首批试点银行
-BAC009S0916W0236 增加机构投资者数量
-BAC009S0916W0237 促进基金行业规范发展
-BAC009S0916W0238 为商业银行探索跨业经营运作积累经验
-BAC009S0916W0239 此举可能将进一步推动金融混业经营
-BAC009S0916W0240 随着对商业银行设立基金管理公司门槛放宽
-BAC009S0916W0241 我国资本市场将迎来更多机构投资者
-BAC009S0916W0242 更加有利于价值投资理念形成
-BAC009S0916W0243 保险资产管理公司如符合有关规定
-BAC009S0916W0244 可以向有关金融监管部门申请
-BAC009S0916W0245 依法开展公募性质的资产管理业务
-BAC009S0916W0246 通知扩大保险资管公司业务范围
-BAC009S0916W0247 这体现出监管部门开放管理的思路
-BAC009S0916W0248 允许各类资产管理公司同台竞技
-BAC009S0916W0249 在遴选优质管理人提升保险资金投资收益率的同时
-BAC009S0916W0250 也通过机构间的竞争促进保险资管公司的转型发展
-BAC009S0916W0251 明确了参股基金管理公司股东
-BAC009S0916W0252 证监会新闻发言人邓给解释
-BAC009S0916W0253 中国的银行居垄断地位
-BAC009S0916W0254 作风向来无耻加强势苹果也因强势出名
-BAC009S0916W0255 可参照中国移动和苹果的合作传闻中
-BAC009S0916W0257 这估计很难让掉进钱眼儿的四大银行接受
-BAC009S0916W0260 但却鲜有人会像苹果的服务付费
-BAC009S0916W0261 更现实的的困难在于
-BAC009S0916W0263 粗估下来大概要七十亿
-BAC009S0916W0264 这还不包括改造过程中的渠道分食
-BAC009S0916W0265 以及给领导们的审批费用
-BAC009S0916W0266 从支付的大环境上看
-BAC009S0916W0268 它依旧要面对政府的刁难
-BAC009S0916W0270 政府失控的可不是什么隐私了
-BAC009S0916W0271 而是实实在在的金融命脉
-BAC009S0916W0272 慈禧太后就因乔致庸创办了票号
-BAC009S0916W0273 害怕其掌握国家金融命脉
-BAC009S0916W0274 而将他软禁十年之久
-BAC009S0916W0275 何况是一个来自美帝的小苹果呢
-BAC009S0916W0276 科幻星系康斯坦丁文
-BAC009S0916W0277 苹果一口气召开了两次新品发布会
-BAC009S0916W0278 就在会场的凳子和垃圾尚未收拾干净的时候
-BAC009S0916W0279 全世界的报道已经蜂拥而至
-BAC009S0916W0280 失望中夹杂着嘲讽的情绪霸占了各模块的头条
-BAC009S0916W0281 据华尔街日报网站报道
-BAC009S0916W0282 在自己全身心的努力和坚持之下
-BAC009S0916W0284 艾维本周四晚在旧金山现代艺术馆向大众表示
-BAC009S0916W0286 主要是因为社会对可穿戴智能手表的期望太高
-BAC009S0916W0287 手腕是配戴轻便型互动设备与休闲设备的理想之处
-BAC009S0916W0288 但不适合那些笨重的解读设备
-BAC009S0916W0289 艾维表示尽管苹果智能手表拥有诸多功能
-BAC009S0916W0290 这种产品的设计仍需考虑文化历史和未来等因素
-BAC009S0916W0291 艾维现为苹果主管设计业务的高级副总裁
-BAC009S0916W0292 帮助设计了苹果多项产品的外观和用户体验
-BAC009S0916W0294 苹果计划于明年初开始销售其智能手表
-BAC009S0916W0295 该公司于上个月简单地宣布了智能手表相关的情况
-BAC009S0916W0296 其将提供三种版本的智能手表
-BAC009S0916W0297 起步价为三百四九美元十
-BAC009S0916W0298 苹果没有透露更昂贵智能手表的具体售价
-BAC009S0916W0299 这些手表将配置不同的表带
-BAC009S0916W0300 以满足不同用户的需求
-BAC009S0916W0301 市场上还有诸多其他制造商也在尝试生产智能手表
-BAC009S0916W0302 但这些厂商的产品都难以进入主流
-BAC009S0916W0303 这是未来的必经之路
-BAC009S0916W0304 美的家用空调事业部总裁吴文新表示
-BAC009S0916W0305 每日经济新闻记者从美的家用空调事业部了解到
-BAC009S0916W0306 自二零一一年事业部启动自动化升级至今的四年里
-BAC009S0916W0307 工人数量减少近一半
-BAC009S0916W0308 美的家用空调事业部制造副总裁乌守保对记者
-BAC009S0916W0309 老板电器的新增量创新需求追求极致搜狐科技
-BAC009S0916W0310 质变中的世界工厂中国正在由中国制造向中国智造蜕变
-BAC009S0916W0311 如何借力拥抱互联网加这一全新变量
-BAC009S0916W0312 如何重新激活内部潜能
-BAC009S0916W0313 便是区别行业龙头企业经营智慧高低的关键时刻
-BAC009S0916W0314 身处传统白色家电领域中的重要一支到厨房电器
-BAC009S0916W0315 多年来保持奇高市占率的老板电器
-BAC009S0916W0316 在成名三十馀年后仍在竭力寻求业态的新鲜化和可能性
-BAC009S0916W0317 能否找到厨电行业下一个未知的增量
-BAC009S0916W0318 也成为老板电器和它的宿敌们能否领跑下半程的关键
-BAC009S0916W0319 阐述老板电器和内部创新外部国际化如何进行破题
-BAC009S0916W0320 老板电器如何看待公司的创新驱动
-BAC009S0916W0321 赵继宏老板电器做厨电已经三十多年了
-BAC009S0916W0322 作为企业理念和产品技术必须要走在时代的前面
-BAC009S0916W0323 现在中国的八十五后和九十后消费人群已经成为消费主体
-BAC009S0916W0324 他们需要的是智能厨房智能家居与家电
-BAC009S0916W0325 公司为此研发并推出市场的智能产品非常贴近市场
-BAC009S0916W0326 围绕消费者消费者需要什么
-BAC009S0916W0327 我们开发什么的产品研发策略
-BAC009S0916W0328 除了产品功能必须不错之外
-BAC009S0916W0329 以保证持续长久的黏性互动
-BAC009S0916W0330 产品创新其实也是一个双向互动的过程
-BAC009S0916W0331 现在消费者的需求越来越个性化差异化
-BAC009S0916W0332 可以和我们的消费者有很多的互动并提供超值服务
-BAC009S0916W0333 这些都是和消费者增添黏性互动的方式
-BAC009S0916W0334 这个方向的创新以后还有更多的东西可以发挥作用
-BAC009S0916W0335 如今的智能家电更多意义上是智能加上互动
-BAC009S0916W0336 也就是老板电器总结的自动加互动
-BAC009S0916W0338 只要在明天的最后一战中赢下东道主日本
-BAC009S0916W0339 高清女排力擒俄罗斯夺冠占主动众将喜极而泣
-BAC009S0916W0340 今天大家打得都挺好的
-BAC009S0916W0341 我们是一条心在打团结作战
-BAC009S0916W0342 赛后主攻手朱婷对记者说
-BAC009S0916W0343 本场比赛朱婷三七次扣球得到二十一分
-BAC009S0916W0344 此外她还凭借拦网和发球分别拿到七分和一分
-BAC009S0916W0345 我觉得自己的脚伤已经完全恢复了
-BAC009S0916W0346 对弹跳没有什么影响
-BAC009S0916W0347 当在新闻发布会上被问及伤情的时候
-BAC009S0916W0348 在第四轮与韩国队的比赛中
-BAC009S0916W0349 朱婷在第四局比赛中意外崴脚
-BAC009S0916W0350 今天出色的数据也佐证了她身体的康复情况良好
-BAC009S0916W0351 作为队里年龄最大的球员
-BAC009S0916W0352 最终拦网和扣球均得到六这些分
-BAC009S0916W0353 位列球队发球榜首位和拦网榜的第二位
-BAC009S0916W0354 大家今天打得非常出色
-BAC009S0916W0355 能够在这个集体与可爱的队友一起拼杀
-BAC009S0916W0356 我感到非常骄傲和自豪
-BAC009S0916W0357 在赛后发布会上颜妮对记者说
-BAC009S0916W0358 在复盘与俄罗斯一战时
-BAC009S0916W0359 这场比赛前教练给我们布置了很多
-BAC009S0916W0360 作为就是我上场多去贯彻教练意图
-BAC009S0916W0361 颜妮坦言今天俄罗斯表现很好
-BAC009S0916W0362 我们两家有时候比较像
-BAC009S0916W0363 当被问及新老队员相互担当弥补的话题时
-BAC009S0916W0364 颜妮坦言自己的发挥也不是特别稳定
-BAC009S0916W0365 但有起伏应该是正常的
-BAC009S0916W0366 作为老队员我要多承担
-BAC009S0916W0367 用实际行动来弥补不足
-BAC009S0916W0368 搜狐体育郭健文
-BAC009S0916W0369 女排众将手举国旗敬夺冠
-BAC009S0916W0370 拿到了明年里约奥运会的入场券
-BAC009S0916W0371 在接受中央电视台记者采访时朱婷表示
-BAC009S0916W0372 全队上下面对了巨大困难
-BAC009S0916W0373 其中郎平主教练最为辛苦
-BAC009S0916W0374 今晚的比赛中朱婷独得二十七分
-BAC009S0916W0375 再度成为了比赛的得分王
-BAC009S0916W0377 但今天能拿冠军真的是发自肺腑的想哭
-BAC009S0916W0378 面对日本队的魔鬼主场
-BAC009S0916W0379 中国女排表示承受了巨大的压力
-BAC009S0916W0380 朱婷表示我想日本肯定也会拼我们
-BAC009S0916W0381 做了很多很多困难准备
-BAC009S0916W0382 如果输了就不太好说了
-BAC009S0916W0383 但是里面不是淡定的
-BAC009S0916W0384 中国队连续三位主力因伤缺战
-BAC009S0916W0385 大家可能觉得我们这支队伍很苦
-BAC009S0916W0386 但我觉得郎导是最苦的
-BAC009S0916W0387 朱婷表示其实我也想
-BAC009S0916W0388 女排三零阿根廷朱婷复出扣杀状态神勇
-BAC009S0916W0389 全场比赛的焦点是休战三场后重新登场的名将朱婷
-BAC009S0916W0390 拿下全场最高分的朱婷赛后表示
-BAC009S0916W0391 在八月二十六日中国队和韩国队的比赛中
-BAC009S0916W0392 朱婷崴脚之后带伤率队取胜
-BAC009S0916W0393 主教练郎平都没有派她出场
-BAC009S0916W0394 一日晚的中阿之战
-BAC009S0916W0395 重新以首发身份登场的朱婷迅速找回比赛的感觉
-BAC009S0916W0396 赛后被评为当场最佳球员
-BAC009S0916W0397 这也是她在本届世界杯上第二次获得全场最佳
-BAC009S0916W0398 在场上移动很好
-BAC009S0916W0399 朱婷在谈到大家关心的脚伤时说
-BAC009S0916W0400 在冈山的桃太郎体育馆
-BAC009S0916W0401 当地华人团体组织了不少球迷为中国队加油
-BAC009S0916W0402 这样的氛围让朱婷感觉像是主场一样
-BAC009S0916W0403 大家赢球比自己获得最佳还要高兴
-BAC009S0916W0404 这部电影从二零一三年就已经开始筹备了
-BAC009S0916W0405 前后打磨了两年时间才得以完成
-BAC009S0916W0406 与奥斯卡影帝本金斯利同时出现在海报中央
-BAC009S0916W0407 雷诺兹持枪的造型和他在冥界警局里的颇为相似
-BAC009S0916W0408 那些年女神陈妍希近来瘦身有成
-BAC009S0916W0409 不仅摆脱神雕侠侣时期的小笼包名号
-BAC009S0916W0410 日前在大陆真人秀节目秀出两条雪白大长腿
-BAC009S0916W0411 更让粉丝看了鼻血直流
-BAC009S0916W0412 只不过好景不常
-BAC009S0916W0413 她最近又被拍到崩坏实录
-BAC009S0916W0414 乱糟糟的马尾加上宽松衣服的村姑打扮
-BAC009S0916W0415 搜狐娱乐讯名为娱乐圈八卦的自媒体
-BAC009S0916W0416 曝出陈妍希拍戏时突然干呕
-BAC009S0916W0417 推断其已怀孕
-BAC009S0916W0418 应该是月初吧
-BAC009S0916W0419 小笼包身体有反应
-BAC009S0916W0420 她突然就干呕
-BAC009S0916W0421 陈妍希还去医院做了检查
-BAC009S0916W0422 她的团队对她更加关心了
-BAC009S0916W0423 中新网六月十六日电六月十六日是容祖儿的生日
-BAC009S0916W0424 陈妍希晒出与容祖儿合照
-BAC009S0916W0425 并送上真挚祝福
-BAC009S0916W0426 祝可爱的你
-BAC009S0916W0427 每一天都要快乐喔
-BAC009S0916W0428 中新网九月二十五日电据台湾东森新闻报道
-BAC009S0916W0429 陈晓与陈妍希承认恋情
-BAC009S0916W0430 获得粉丝祝福
-BAC009S0916W0431 湖北六小伙温州偷硬币称代表诸葛后人战刘伯温后人
-BAC009S0916W0432 专偷摇摇车里的硬币
-BAC009S0916W0433 运气好时一天能偷几千枚一元硬币
-BAC009S0916W0434 湖北六岁女童被继母虐打下阴撕裂警方已介入
-BAC009S0916W0435 湖北七人冒充福彩工作人员兜售中奖秘籍骗取三零零万
-BAC009S0916W0436 湖北省黄冈市公安局通报称
-BAC009S0916W0437 打掉一个以传授彩票中奖秘籍为名的特大电信诈骗团伙
-BAC009S0916W0438 破获电信诈骗案二三三起
-BAC009S0916W0439 涉案金额三零零多万元
-BAC009S0916W0440 湖北九岁女童遇害案告破凶手强奸不成推下窗外
-BAC009S0916W0441 湖北五道杠少年捐出二万元政府奖学金
-BAC009S0916W0442 学校里有些同学家里条件不好
-BAC009S0916W0443 但是想让更多需要帮助的水高学子感受到温暖
-BAC009S0916W0444 湖北卷人电梯设计不合理同型号已售四六四八部
-BAC009S0916W0445 事故电梯仍处于拆解状态
-BAC009S0916W0446 新华社记者梁建强摄
-BAC009S0916W0447 湖北吃人同型号电梯全国四六四八部分布三一省市
-BAC009S0916W0448 安良百货商场正常营业
-BAC009S0916W0449 但各楼层的自动扶梯均已关停供图新华
-BAC009S0916W0450 湖北吃人电梯品牌四年被曝光五次
-BAC009S0916W0452 湖北吞人电梯三月份刚检测合格
-BAC009S0916W0453 事故电梯出厂刚满一年
-BAC009S0916W0454 今年三月份经检验为合格
-BAC009S0916W0455 涉事厂家生产的电梯此前曾发生多起事故
-BAC009S0916W0456 目前湖北省质监局已要求全省暂停使用涉事厂家电梯
-BAC009S0916W0457 湖北咬人电梯厂家曾为盖板支架申请专利
-BAC009S0916W0458 湖北荆州吃人电梯盖板设计不合理供图
-BAC009S0916W0459 湖北电梯吃人定性为责任事故
-BAC009S0916W0460 看过湖北电梯吃人视频的不少上海年轻人
-BAC009S0916W0461 在经过商场自动扶梯时会选择跳过视频中的跳板
-BAC009S0916W0462 晨报记者张佳琪晨报讯昨晚九时三零分
-BAC009S0916W0463 湖北省荆州市安监局召开安良百货电梯事故情报通报会
-BAC009S0916W0464 此次事故调查组组长荆州市安监局局长陈观鑫通报称
-BAC009S0916W0465 初步认定这是一起安全生产责任事故
-BAC009S0916W0466 湖北电梯吃人调查报告电梯厂商及商场负主责
-BAC009S0916W0467 二零一五七二六
-BAC009S0916W0468 湖北荆州市安良百货公司事发手扶电梯已被关闭检修
-BAC009S0916W0470 申龙电梯和安良百货公司应对事故负主要责任
-BAC009S0916W0471 湖北飞踢女居民车道办主任被停职
-BAC009S0916W0472 网曝视频截图当街飞踢女群众
-BAC009S0916W0473 大喊我一脚方言
-BAC009S0916W0474 同踹死你的街道办主任
-BAC009S0916W0475 湖北一中学教师体罚学生致重伤被判刑三年
-BAC009S0916W0476 用右脚踢向董某左腹部
-BAC009S0916W0477 董某某所受损伤程度属二重伤二级
-BAC009S0916W0478 残疾等级为六级残疾
-BAC009S0916W0479 梁某某被一审法院以故意伤害罪判处有期徒刑三年
-BAC009S0916W0480 湖北一中学班长失踪坠亡教学楼四小时去向成谜
-BAC009S0916W0481 新洲一名高中新生因为没去教室上晚自习
-BAC009S0916W0482 老师发现后和学生一起寻找
-BAC009S0916W0483 直至晚上一零时左右
-BAC009S0916W0484 一名老师才发现学生坠楼摔落在教学楼前
-BAC009S0916W0485 今日二二日晨凌晨
-BAC009S0916W0486 这名一五岁的花季少年最终送医救治无效死亡
-BAC009S0916W0487 湖北一传销头目归国投案骗取群众资金数亿元
-BAC009S0916W0488 湖北一骗取群众资金数亿元的传销头目近日归国投案
-BAC009S0916W0489 湖北一公司以员工名义贷款数十员工负债千万
-BAC009S0916W0490 阳逻一家公司以数十名员工的名义
-BAC009S0916W0491 向一家金融公司贷款一千多万元
-BAC009S0916W0492 公司承诺贷款本息都由公司负责偿还
-BAC009S0916W0493 公司却遇到了资金困难
-BAC009S0916W0494 存在无法如期还贷的风险
-BAC009S0916W0495 这令被贷款的员工们寝食难安
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitkeep b/models/audio/speech_recognition/conformer/igie/tools/__init__.py
similarity index 100%
rename from models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitkeep
rename to models/audio/speech_recognition/conformer/igie/tools/__init__.py
diff --git a/models/audio/speech_recognition/conformer/igie/compute_cer.py b/models/audio/speech_recognition/conformer/igie/tools/compute_cer.py
similarity index 82%
rename from models/audio/speech_recognition/conformer/igie/compute_cer.py
rename to models/audio/speech_recognition/conformer/igie/tools/compute_cer.py
index 6a7b381e6ebc6ff950226677ce34e25f4b1f4947..a5db08979f4d31a4a2ac9e4ceb0d122537690aac 100644
--- a/models/audio/speech_recognition/conformer/igie/compute_cer.py
+++ b/models/audio/speech_recognition/conformer/igie/tools/compute_cer.py
@@ -1,22 +1,10 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
 
 import sys
 import unicodedata
 import codecs
-import argparse
 
 remove_tag = True
 spacelist = [' ', '\t', '\r', '\n']
@@ -278,16 +266,101 @@ def default_cluster(word) :
             return 'Other'
     return unicode_names[0]
 
-def get_acc(ref_file, hyp_file):
+def usage() :
+    print("compute-wer.py : compute word error rate (WER) \
+          and align recognition results and references.")
+    print("         usage : python compute-wer.py [--cs={0,1}] \
+          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
+          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
+
+if __name__ == '__main__':
+    if len(sys.argv) == 1 :
+        usage()
+        sys.exit(0)
     calculator = Calculator()
     cluster_file = ''
     ignore_words = set()
     tochar = False
-    verbose = 0
+    verbose = 1
     padding_symbol = ' '
     case_sensitive = False
     max_words_per_line = sys.maxsize
     split = None
+    while len(sys.argv) > 3:
+        a = '--maxw='
+        if sys.argv[1].startswith(a):
+            b = sys.argv[1][len(a):]
+            del sys.argv[1]
+            max_words_per_line = int(b)
+            continue
+        a = '--rt='
+        if sys.argv[1].startswith(a):
+            b = sys.argv[1][len(a):].lower()
+            del sys.argv[1]
+            remove_tag = (b == 'true') or (b != '0')
+            continue
+        a = '--cs='
+        if sys.argv[1].startswith(a):
+            b = sys.argv[1][len(a):].lower()
+            del sys.argv[1]
+            case_sensitive = (b == 'true') or (b != '0')
+            continue
+        a = '--cluster='
+        if sys.argv[1].startswith(a):
+            cluster_file = sys.argv[1][len(a):]
+            del sys.argv[1]
+            continue
+        a = '--splitfile='
+        if sys.argv[1].startswith(a):
+            split_file = sys.argv[1][len(a):]
+            del sys.argv[1]
+            split = dict()
+            with codecs.open(split_file, 'r', 'utf-8') as fh:
+                for line in fh:  # line in unicode
+                    words = line.strip().split()
+                    if len(words) >= 2:
+                        split[words[0]] = words[1:]
+            continue
+        a = '--ig='
+        if sys.argv[1].startswith(a):
+            ignore_file = sys.argv[1][len(a):]
+            del sys.argv[1]
+            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
+                for line in fh:  # line in unicode
+                    line = line.strip()
+                    if len(line) > 0:
+                        ignore_words.add(line)
+            continue
+        a = '--char='
+        if sys.argv[1].startswith(a):
+            b = sys.argv[1][len(a):].lower()
+            del sys.argv[1]
+            tochar = (b == 'true') or (b != '0')
+            continue
+        a = '--v='
+        if sys.argv[1].startswith(a):
+            b = sys.argv[1][len(a):].lower()
+            del sys.argv[1]
+            verbose = 0
+            try:
+                verbose = int(b)
+            except Exception:
+                if b == 'true' or b != '0':
+                    verbose = 1
+            continue
+        a = '--padding-symbol='
+        if sys.argv[1].startswith(a):
+            b = sys.argv[1][len(a):].lower()
+            del sys.argv[1]
+            if b == 'space':
+                padding_symbol = ' '
+            elif b == 'underline':
+                padding_symbol = '_'
+            continue
+        if True or sys.argv[1].startswith('-'):
+            # ignore invalid switch
+            del sys.argv[1]
+            continue
 
     if not case_sensitive:
         ig = set([w.upper() for w in ignore_words])
@@ -296,6 +369,8 @@ def get_acc(ref_file, hyp_file):
     default_clusters = {}
     default_words = {}
 
+    ref_file = sys.argv[1]
+    hyp_file = sys.argv[2]
     rec_set = {}
     if split and not case_sensitive:
         newsplit = dict()
@@ -394,13 +469,18 @@ def get_acc(ref_file, hyp_file):
                 lab1 = lab2
                 rec1 = rec2
 
+    if verbose:
+        print('==================================================='
+              '========================')
+        print()
+
     result = calculator.overall()
     if result['all'] != 0 :
         wer = float(result['ins'] + result['sub'] +
                     result['del']) * 100.0 / result['all']
     else :
         wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
+    print('Overall -> wer %4.2f %% Corr %4.2f %%' % (wer, result['cor']*100/result['all']), end=' ')
     print('N=%d C=%d S=%d D=%d I=%d' %
           (result['all'], result['cor'], result['sub'],
            result['del'], result['ins']))
@@ -447,7 +527,6 @@ def get_acc(ref_file, hyp_file):
                     # general terms, like WEATHER / CAR / ...
                     else :
                         cluster.append(token)
-
-    acc = 100.0 - round(wer, 2)
-
-    return acc
\ No newline at end of file
+        print()
+        print('======================================='
+              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/tools/filter_scp.pl
similarity index 100%
rename from models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/filter_scp.pl
rename to models/audio/speech_recognition/conformer/igie/tools/filter_scp.pl
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/tools/make_raw_list.py
similarity index 100%
rename from models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/make_raw_list.py
rename to models/audio/speech_recognition/conformer/igie/tools/make_raw_list.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/tools/make_shard_list.py
similarity index 91%
rename from models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/make_shard_list.py
rename to models/audio/speech_recognition/conformer/igie/tools/make_shard_list.py
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..fcd4bcd7d62ba933cf27c34fc02e18371a6b10a6 100644
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/make_shard_list.py
+++ b/models/audio/speech_recognition/conformer/igie/tools/make_shard_list.py
@@ -66,16 +66,8 @@ def write_tar_file(data_list,
 
                 # resample
                 if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
+                    audio = torchaudio.transforms.Resample(
+                        sample_rate, resample)(audio)
 
                 ts = time.time()
                 f = io.BytesIO()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/tools/text2token.py
similarity index 100%
rename from models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/text2token.py
rename to models/audio/speech_recognition/conformer/igie/tools/text2token.py
diff --git a/models/audio/speech_recognition/conformer/igie/train.yaml b/models/audio/speech_recognition/conformer/igie/train.yaml
deleted file mode 100644
index e1224b6931bb8e16dbe1f34b638779bbb72d2149..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/train.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-accum_grad: 4
-cmvn_file: exp/conformer/global_cmvn
-dataset_conf:
-  filter_conf:
-    max_length: 40960
-    min_length: 0
-    token_max_length: 200
-    token_min_length: 1
-  resample_conf:
-    resample_rate: 16000
-  speed_perturb: true
-  fbank_conf:
-    num_mel_bins: 80
-    frame_shift: 10
-    frame_length: 25
-    dither: 0.1
-  spec_aug: true
-  spec_aug_conf:
-    num_t_mask: 2
-    num_f_mask: 2
-    max_t: 50
-    max_f: 10
-  shuffle: true
-  shuffle_conf:
-    shuffle_size: 1500
-  sort: true
-  sort_conf:
-    sort_size: 500  # sort_size should be less than shuffle_size
-  batch_conf:
-    batch_type: 'static' # static or dynamic
-    batch_size: 16
-decoder: transformer
-decoder_conf:
-  attention_heads: 4
-  dropout_rate: 0.1
-  linear_units: 2048
-  num_blocks: 6
-  positional_dropout_rate: 0.1
-  self_attention_dropout_rate: 0.0
-  src_attention_dropout_rate: 0.0
-encoder: conformer
-encoder_conf:
-  activation_type: swish
-  attention_dropout_rate: 0.0
-  attention_heads: 4
-  cnn_module_kernel: 15
-  dropout_rate: 0.1
-  input_layer: conv2d
-  linear_units: 2048
-  normalize_before: true
-  num_blocks: 12
-  output_size: 256
-  pos_enc_layer_type: rel_pos
-  positional_dropout_rate: 0.1
-  selfattention_layer_type: rel_selfattn
-  use_cnn_module: true
-grad_clip: 5
-input_dim: 80
-is_json_cmvn: true
-log_interval: 100
-max_epoch: 240
-model_conf:
-  ctc_weight: 0.3
-  length_normalized_loss: false
-  lsm_weight: 0.1
-optim: adam
-optim_conf:
-  lr: 0.002
-output_dim: 4233
-scheduler: warmuplr
-scheduler_conf:
-  warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/.clang-format b/models/audio/speech_recognition/conformer/igie/wenet/.clang-format
deleted file mode 100644
index 29333f52be4f383d3a7e1fa8b4cd3680ca007a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/.clang-format
+++ /dev/null
@@ -1,94 +0,0 @@
----
-Language:        Cpp
-# BasedOnStyle:  Google
-AccessModifierOffset: -1
-AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlinesLeft: true
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: true
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
-BraceWrapping:
-  AfterClass:      false
-  AfterControlStatement: false
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  BeforeCatch:     false
-  BeforeElse:      false
-  IndentBraces:    false
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit:     80
-CommentPragmas:  '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
-IncludeCategories:
-  - Regex:           '^<.*\.h>'
-    Priority:        1
-  - Regex:           '^<.*'
-    Priority:        2
-  - Regex:           '.*'
-    Priority:        3
-IncludeIsMainRegex: '([-_](test|unittest))?$'
-IndentCaseLabels: true
-IndentWidth:     2
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-ReflowComments:  true
-SortIncludes:    true
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles:  false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:        Auto
-TabWidth:        8
-UseTab:          Never
-...
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/.flake8 b/models/audio/speech_recognition/conformer/igie/wenet/.flake8
deleted file mode 100644
index 34aa3e3843d2cb87028da016d6d688b77359b2b4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/.flake8
+++ /dev/null
@@ -1,15 +0,0 @@
-[flake8]
-select = B,C,E,F,P,T4,W,B9
-max-line-length = 80
-# C408 ignored because we like the dict keyword argument syntax
-# E501 is not flexible enough, we're using B950 instead
-ignore =
-    E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
-    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
-    # to line this up with executable bit
-    EXE001, EXE002,
-    # these ignores are from flake8-bugbear; please fix!
-    B007,B008,B905
-    # these ignores are from flake8-comprehensions; please fix!
-    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
-exclude = compute-wer.py,kaldi_io.py,__torch__,docs/conf.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/CODE_OF_CONDUCT.md b/models/audio/speech_recognition/conformer/igie/wenet/CODE_OF_CONDUCT.md
deleted file mode 100644
index 66c2a4cafb77b81f9d8f7e65a485b841a6a347a9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Contributor Covenant Code of Conduct
-
-## Our Pledge
-
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to making participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, sex characteristics, gender identity and expression,
-level of experience, education, socio-economic status, nationality, personal
-appearance, race, religion, or sexual identity and orientation.
-
-## Our Standards
-
-Examples of behavior that contributes to creating a positive environment
-include:
-
-* Using welcoming and inclusive language
-* Being respectful of differing viewpoints and experiences
-* Gracefully accepting constructive criticism
-* Focusing on what is best for the community
-* Showing empathy towards other community members
-
-Examples of unacceptable behavior by participants include:
-
-* The use of sexualized language or imagery and unwelcome sexual attention or
- advances
-* Trolling, insulting/derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or electronic
- address, without explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
- professional setting
-
-## Our Responsibilities
-
-Project maintainers are responsible for clarifying the standards of acceptable
-behavior and are expected to take appropriate and fair corrective action in
-response to any instances of unacceptable behavior.
-
-Project maintainers have the right and responsibility to remove, edit, or
-reject comments, commits, code, wiki edits, issues, and other contributions
-that are not aligned to this Code of Conduct, or to ban temporarily or
-permanently any contributor for other behaviors that they deem inappropriate,
-threatening, offensive, or harmful.
-
-## Scope
-
-This Code of Conduct applies both within project spaces and in public spaces
-when an individual is representing the project or its community. Examples of
-representing a project or community include using an official project e-mail
-address, posting via an official social media account, or acting as an appointed
-representative at an online or offline event. Representation of a project may be
-further defined and clarified by project maintainers.
-
-## Enforcement
-
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at mikelei@mobvoi.com. All
-complaints will be reviewed and investigated and will result in a response that
-is deemed necessary and appropriate to the circumstances. The project team is
-obligated to maintain confidentiality with regard to the reporter of an incident.
-Further details of specific enforcement policies may be posted separately.
-
-Project maintainers who do not follow or enforce the Code of Conduct in good
-faith may face temporary or permanent repercussions as determined by other
-members of the project's leadership.
-
-## Attribution
-
-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
-available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
-
-[homepage]: https://www.contributor-covenant.org
-
-For answers to common questions about this code of conduct, see
-https://www.contributor-covenant.org/faq
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/CONTRIBUTING.md b/models/audio/speech_recognition/conformer/igie/wenet/CONTRIBUTING.md
deleted file mode 100644
index cb56befc95b19d428b7953851caa69a622f5fb88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/CONTRIBUTING.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Contributing guidelines
-
-## Pre-commit tidy/linting hook
-
-You'll need to install flake8 first.
-
-`pip3 install flake8==3.8.2`
-
-We use flake8 to perform additional formatting and semantic checking of code.
-We provide a pre-commit git hook for performing these checks, before a commit
-is created:
-
-```bash
-ln -s ../../tools/git-pre-commit .git/hooks/pre-commit
-```
-
-You have to execute above command in wenet project root directory.
-After that, each commit will be checked by flake8.
-
-If you do not set pre-commit, just run `flake8` in wenet project root directory
-and fix all the problems.
-
-## Github checks
-
-After a pull request is submitted, some checks will run to check your code style.
-
-Below is an example where some checks fail.
-
-![github checks](docs/images/checks.png)
-
-You need to click the details to see the detailed info like the example below.
-
-![github checks](docs/images/check_detail.png)
-
-You have to fix all style problems according to the detailed info.
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/CPPLINT.cfg
deleted file mode 100644
index d3c898441efaec14fcd356efbefaa0ef3e237b57..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/CPPLINT.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-root=runtime/core
-filter=-build/c++11
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/LICENSE b/models/audio/speech_recognition/conformer/igie/wenet/LICENSE
deleted file mode 100644
index 261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/README.md b/models/audio/speech_recognition/conformer/igie/wenet/README.md
deleted file mode 100644
index 0afabf07bb2d2c97e060afadbb862bcd060c32d5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/README.md
+++ /dev/null
@@ -1,119 +0,0 @@
-# WeNet
-
-[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0)
-[![Python-Version](https://img.shields.io/badge/Python-3.7%7C3.8-brightgreen)](https://github.com/wenet-e2e/wenet)
-
-[**Roadmap**](https://github.com/wenet-e2e/wenet/issues/1683)
-| [**Docs**](https://wenet-e2e.github.io/wenet)
-| [**Papers**](https://wenet-e2e.github.io/wenet/papers.html)
-| [**Runtime (x86)**](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch)
-| [**Runtime (android)**](https://github.com/wenet-e2e/wenet/tree/main/runtime/android)
-| [**Pretrained Models**](docs/pretrained_models.md)
-| [**HuggingFace**](https://huggingface.co/spaces/wenet/wenet_demo)
-
-**We** share neural **Net** together.
-
-The main motivation of WeNet is to close the gap between research and production end-to-end (E2E) speech recognition models,
-to reduce the effort of productionizing E2E models, and to explore better E2E models for production.
-
-## :fire: News
-
-* 2022.12: Horizon X3 pi BPU, see https://github.com/wenet-e2e/wenet/pull/1597, Kunlun Core XPU, see https://github.com/wenet-e2e/wenet/pull/1455, Raspberry Pi, see https://github.com/wenet-e2e/wenet/pull/1477, IOS, see https://github.com/wenet-e2e/wenet/pull/1549.
-* 2022.11: TrimTail paper released, see https://arxiv.org/pdf/2211.00522.pdf
-* 2022.10: Squeezeformer is supported, see https://github.com/wenet-e2e/wenet/pull/1447.
-* 2022.07: RNN-T is supported now, see [rnnt](https://github.com/wenet-e2e/wenet/tree/main/examples/aishell/rnnt) for benchmark.
-
-## Highlights
-
-* **Production first and production ready**: The core design principle of WeNet. WeNet provides full stack solutions for speech recognition.
-  * *Unified solution for streaming and non-streaming ASR*: [U2++ framework](https://arxiv.org/pdf/2203.15455.pdf)--develop, train, and deploy only once.
-  * *Runtime solution*: built-in server [x86](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch) and on-device [android](https://github.com/wenet-e2e/wenet/tree/main/runtime/android) runtime solution.
-  * *Model exporting solution*: built-in solution to export model to LibTorch/ONNX for inference.
-  * *LM solution*: built-in production-level [LM solution](docs/lm.md).
-  * *Other production solutions*: built-in contextual biasing, time stamp, endpoint, and n-best solutions.
-
-* **Accurate**: WeNet achieves SOTA results on a lot of public speech datasets.
-* **Light weight**: WeNet is easy to install, easy to use, well designed, and well documented.
-
-## Performance Benchmark
-
-Please see `examples/$dataset/s0/README.md` for benchmark on different speech datasets.
-
-## Installation(Python Only)
-
-If you just want to use WeNet as a python package for speech recognition application,
-just install it by `pip`, please note python 3.6+ is required.
-``` sh
-pip3 install wenetruntime
-```
-
-And please see [doc](runtime/binding/python/README.md) for usage.
-
-
-## Installation(Training and Developing)
-
-- Clone the repo
-``` sh
-git clone https://github.com/wenet-e2e/wenet.git
-```
-
-- Install Conda: please see https://docs.conda.io/en/latest/miniconda.html
-- Create Conda env:
-
-``` sh
-conda create -n wenet python=3.8
-conda activate wenet
-pip3 install -r requirements.txt
-conda install pytorch=1.10.0 torchvision torchaudio=0.10.0 cudatoolkit=11.1 -c pytorch -c conda-forge
-```
-
-- Optionally, if you want to use x86 runtime or language model(LM),
-you have to build the runtime as follows. Otherwise, you can just ignore this step.
-
-``` sh
-# runtime build requires cmake 3.14 or above
-cd runtime/libtorch
-mkdir build && cd build && cmake -DGRAPH_TOOLS=ON .. && cmake --build .
-```
-
-## Discussion & Communication
-
-Please visit [Discussions](https://github.com/wenet-e2e/wenet/discussions) for further discussion.
-
-For Chinese users, you can aslo scan the QR code on the left to follow our offical account of WeNet.
-We created a WeChat group for better discussion and quicker response.
-Please scan the personal QR code on the right, and the guy is responsible for inviting you to the chat group.
-
-If you can not access the QR image, please access it on [gitee](https://gitee.com/robin1001/qr/tree/master).
-
-| <img src="https://github.com/robin1001/qr/blob/master/wenet.jpeg" width="250px"> | <img src="https://github.com/robin1001/qr/blob/master/binbin.jpeg" width="250px"> |
-| ---- | ---- |
-
-Or you can directly discuss on [Github Issues](https://github.com/wenet-e2e/wenet/issues).
-
-## Acknowledge
-
-1. We borrowed a lot of code from [ESPnet](https://github.com/espnet/espnet) for transformer based modeling.
-2. We borrowed a lot of code from [Kaldi](http://kaldi-asr.org/) for WFST based decoding for LM integration.
-3. We referred [EESEN](https://github.com/srvk/eesen) for building TLG based graph for LM integration.
-4. We referred to [OpenTransformer](https://github.com/ZhengkunTian/OpenTransformer/) for python batch inference of e2e models.
-
-## Citations
-
-``` bibtex
-@inproceedings{yao2021wenet,
-  title={WeNet: Production oriented Streaming and Non-streaming End-to-End Speech Recognition Toolkit},
-  author={Yao, Zhuoyuan and Wu, Di and Wang, Xiong and Zhang, Binbin and Yu, Fan and Yang, Chao and Peng, Zhendong and Chen, Xiaoyu and Xie, Lei and Lei, Xin},
-  booktitle={Proc. Interspeech},
-  year={2021},
-  address={Brno, Czech Republic },
-  organization={IEEE}
-}
-
-@article{zhang2022wenet,
-  title={WeNet 2.0: More Productive End-to-End Speech Recognition Toolkit},
-  author={Zhang, Binbin and Wu, Di and Peng, Zhendong and Song, Xingchen and Yao, Zhuoyuan and Lv, Hang and Xie, Lei and Yang, Chao and Pan, Fuping and Niu, Jianwei},
-  journal={arXiv preprint arXiv:2203.15455},
-  year={2022}
-}
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/README_CN.md b/models/audio/speech_recognition/conformer/igie/wenet/README_CN.md
deleted file mode 100644
index dc7254512f5dad1dfe191fb486a0c5a6c7255bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/README_CN.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# WeNet
-
-[**English version**](https://github.com/wenet-e2e/wenet/tree/main/README.md)
-
-[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0)
-[![Python-Version](https://img.shields.io/badge/Python-3.7%7C3.8-brightgreen)](https://github.com/wenet-e2e/wenet)
-
-[**文档**](https://wenet-e2e.github.io/wenet/)
-| [**训练模型教程 1**](https://wenet.org.cn/wenet/tutorial_librispeech.html)
-| [**训练模型教程 2**](https://wenet.org.cn/wenet/tutorial_aishell.html)
-| [**WeNet 论文**](https://wenet-e2e.github.io/wenet/papers.html)
-| [**x86 识别服务**](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch)
-| [**android 本地识别**](https://github.com/wenet-e2e/wenet/tree/main/runtime/android)
-
-
-
-## 核心功能
-
-WeNet 是一款面向工业落地应用的语音识别工具包，提供了从语音识别模型的训练到部署的一条龙服务，其主要特点如下：
-
-* 使用 conformer 网络结构和 CTC/attention loss 联合优化方法，统一的流式/非流式语音识别方案，具有业界一流的识别效果。
-* 提供云上和端上直接部署的方案，最小化模型训练和产品落地之间的工程工作。
-* 框架简洁，模型训练部分完全基于 pytorch 生态，不依赖于 kaldi 等复杂的工具。
-* 详细的注释和文档，非常适合用于学习端到端语音识别的基础知识和实现细节。
-* 支持时间戳，对齐，端点检测，语言模型等相关功能。
-
-
-## 1分钟 Demo
-
-**使用预训练模型和 docker 进行语音识别，1分钟（如果网速够快）搭建一个语音识别系统**
-
-下载官方提供的预训练模型，并启动 docker 服务，加载模型，提供 websocket 协议的语音识别服务。
-
-``` sh
-wget https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell2/20210618_u2pp_conformer_libtorch.tar.gz
-tar -xf 20210618_u2pp_conformer_libtorch.tar.gz
-model_dir=$PWD/20210618_u2pp_conformer_libtorch
-docker run --rm -it -p 10086:10086 -v $model_dir:/home/wenet/model wenetorg/wenet-mini:latest bash /home/run.sh
-```
-
-**实时识别**
-
-使用浏览器打开文件`wenet/runtime/libtorch/web/templates/index.html`，在 `WebSocket URL` 中填入 `ws://127.0.0.1:10086` (若在windows下通过wsl2运行docker,  则使用`ws://localhost:10086`) , 允许浏览器弹出的请求使用麦克风，即可通过麦克风进行实时语音识别。
-
-![Runtime web](/docs/images/runtime_web.png)
-
-
-## 训练语音识别模型
-
-**配置环境**
-
-``` sh
-git clone https://github.com/wenet-e2e/wenet.git
-```
-
-- 安装 Conda:  https://docs.conda.io/en/latest/miniconda.html
-- 创建 Conda 环境:
-
-``` sh
-conda create -n wenet python=3.8
-conda activate wenet
-pip3 install -r requirements.txt
-conda install pytorch=1.10.0 torchvision torchaudio=0.10.0 cudatoolkit=11.1 -c pytorch -c conda-forge
-```
-
-**训练模型**
-
-使用中文 Aishell-1 数据集训练模型
-```
-cd examples/aishell/s0/
-bash run.sh --stage -1
-```
-
-细节请阅读 [**训练模型教程**](https://wenet-e2e.github.io/wenet/tutorial_aishell.html)
-
-
-### 新手常见问题
-
-1. 请使用具有gpu的机器。确保cuda和torch都已经安装。wenet也支持cpu训练，但是速度非常很慢。
-2. 请使用支持bash的环境。windows的默认cmd是不支持bash的。
-3. run.sh脚本里，`export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"`, 改为自己要用的GPU id，比如你的机器有4张GPU卡，4张卡都用来训练，改为 `export CUDA_VISIBLE_DEVICES="0,1,2,3"`
-4. run.sh脚本里，`data=/export/data/asr-data/OpenSLR/33/`设置为自己的目录。请使用绝对路径而不要用相对路径。
-5. 如果继续训练出错，请先删除实验目录下的 ddp_init文件再试一试。
-
-
-## 技术支持
-
-欢迎在 [Github Issues](https://github.com/wenet-e2e/wenet/issues) 中提交问题。
-
-欢迎扫二维码加入微信讨论群，如果群人数较多，请添加右侧个人微信入群。
-
-| <img src="https://github.com/robin1001/qr/blob/master/wenet.jpeg" width="250px"> | <img src="https://github.com/robin1001/qr/blob/master/binbin.jpeg" width="250px"> |
-| ---- | ---- |
-
-## 致谢
-
-WeNet 借鉴了一些优秀的开源项目，包括
-
-1. Transformer 建模 [ESPnet](https://github.com/espnet/espnet)
-2. WFST 解码 [Kaldi](http://kaldi-asr.org/)
-3. TLG 构图 [EESEN](https://github.com/srvk/eesen)
-4. Python Batch 推理 [OpenTransformer](https://github.com/ZhengkunTian/OpenTransformer/)
-
-## 引用
-
-``` bibtex
-@inproceedings{yao2021wenet,
-  title={WeNet: Production oriented Streaming and Non-streaming End-to-End Speech Recognition Toolkit},
-  author={Yao, Zhuoyuan and Wu, Di and Wang, Xiong and Zhang, Binbin and Yu, Fan and Yang, Chao and Peng, Zhendong and Chen, Xiaoyu and Xie, Lei and Lei, Xin},
-  booktitle={Proc. Interspeech},
-  year={2021},
-  address={Brno, Czech Republic },
-  organization={IEEE}
-}
-
-@article{zhang2020unified,
-  title={Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition},
-  author={Zhang, Binbin and Wu, Di and Yao, Zhuoyuan and Wang, Xiong and Yu, Fan and Yang, Chao and Guo, Liyong and Hu, Yaguang and Xie, Lei and Lei, Xin},
-  journal={arXiv preprint arXiv:2012.05481},
-  year={2020}
-}
-
-@article{wu2021u2++,
-  title={U2++: Unified Two-pass Bidirectional End-to-end Model for Speech Recognition},
-  author={Wu, Di and Zhang, Binbin and Yang, Chao and Peng, Zhendong and Xia, Wenjing and Chen, Xiaoyu and Lei, Xin},
-  journal={arXiv preprint arXiv:2106.05642},
-  year={2021}
-}
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/ROADMAP.md b/models/audio/speech_recognition/conformer/igie/wenet/ROADMAP.md
deleted file mode 100644
index 4d44b2ab6548b8a74cc20baf0371674839f68cb6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/ROADMAP.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# WeNet Roadmap
-
-This roadmap for WeNet.
-WeNet is a community-driven project and we love your feedback and proposals on where we should be heading.
-
-Please open up [issues](https://github.com/wenet-e2e/wenet/issues/) or
-[discussion](https://github.com/wenet-e2e/wenet/discussions) on github to write your proposal.
-Feel free to volunteer yourself if you are interested in trying out some items(they do not have to be on the list).
-
-
-## WeNet 3.0 (2023.06)
-
-- [x] ONNX support, see https://github.com/wenet-e2e/wenet/pull/1103
-- [x] RNN-T support, see https://github.com/wenet-e2e/wenet/pull/1261
-- [ ] Self training, streaming
-- [ ] Light weight, low latency, on-device model exploration
-  - [x] TrimTail, see https://github.com/wenet-e2e/wenet/pull/1487/, [paper link](https://arxiv.org/pdf/2211.00522.pdf)
-- [ ] Audio-Visual speech recognition
-- [ ] OS or Hardware Platforms
-  - [x] IOS, https://github.com/wenet-e2e/wenet/pull/1549
-  - [x] Raspberry Pi, see https://github.com/wenet-e2e/wenet/pull/1477
-  - [ ] Harmony OS
-- [ ] ASIC XPU
-  - [x] Horizon X3 pi, BPU, see https://github.com/wenet-e2e/wenet/pull/1597
-  - [x] Kunlun XPU, see https://github.com/wenet-e2e/wenet/pull/1455
-- [x] Public Model Hub Support
-  - [x] HuggingFace, see https://huggingface.co/spaces/wenet/wenet_demo
-  - [x] ModelScope, see https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online/summary
- - [x] [Vosk](https://github.com/alphacep/vosk-api/) like models and API for developers.
-    - [x] Models(Chinese/English/Japanese/Korean/French/German/Spanish/Portuguese)
-      - [x] Chinese
-      - [x] English
-    - [x] API(python/c/c++/go/java)
-      - [x] python
-
-## WeNet 2.0 (2022.06)
-
-- [x] U2++ framework for better accuracy
-- [x] n-gram + WFST language model solution
-- [x] Context biasing(hotword) solution
-- [x] Very big data training support with UIO
-- [x] More dataset support, including WenetSpeech, GigaSpeech, HKUST and so on.
-
-## WeNet 1.0 (2021.02)
-
-- [x] Streaming solution(U2 framework)
-- [x] Production runtime solution with `TorchScript` training and `LibTorch` inference.
-- [x] Unified streaming and non-streaming model(U2 framework)
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/dataset.py
similarity index 86%
rename from models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/dataset.py
rename to models/audio/speech_recognition/conformer/igie/wenet/dataset.py
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..88a8cd15aec2277a36358883b25e929b179165e8 100644
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/dataset.py
+++ b/models/audio/speech_recognition/conformer/igie/wenet/dataset.py
@@ -18,8 +18,8 @@ import torch
 import torch.distributed as dist
 from torch.utils.data import IterableDataset
 
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
+import wenet.processor as processor
+from wenet.file_utils import read_lists
 
 
 class Processor(IterableDataset):
@@ -156,27 +156,13 @@ def Dataset(data_type,
     if speed_perturb:
         dataset = Processor(dataset, processor.speed_perturb)
 
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
+    fbank_conf = conf.get('fbank_conf', {})
+    dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
 
     spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
     if spec_aug:
         spec_aug_conf = conf.get('spec_aug_conf', {})
         dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
 
     if shuffle:
         shuffle_conf = conf.get('shuffle_conf', {})
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/Makefile b/models/audio/speech_recognition/conformer/igie/wenet/docs/Makefile
deleted file mode 100644
index a025a1f0c74dfe301edb4403f42fde65eb204aa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SPHINXPROJ    = Wenet
-SOURCEDIR     = .
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/UIO.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/UIO.md
deleted file mode 100644
index dd2555694075893bb29d68018c1b656728ed6aab..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/UIO.md
+++ /dev/null
@@ -1,177 +0,0 @@
-# UIO for WeNet
-
-In order to support the model training of industrial tens of millions of hours of speech dataset, the data processing
-method UIO (Unified IO) has been updated in WeNet. The document will introduce UIO from the following sections:
-Necessity of upgrading IO mothod, System design of UIO, Validation experiments, Usage of UIO, Q&A.
-
-## Necessity of upgrading IO mothod
-The old IO method in WeNet is based on Pytorch's native Dataset. During training, it need to load all training audio
-paths and correspondingly labels into the memory at one time, then randomly read data. In the case of industrial-grade
-ultra-large-scale data (egs: more than 50,000 hours or 50 million or more audio), this method will cause the training
-to fail to run for two reasons:
-- Out of memory(OOM): The physical memory of the general machine is difficult to load the training data at one time.
-- Slow down reading performance:  In the case that the large-scale data memory cannot be used as a file cache, the training
-data reading speed is greatly reduced.
-
-## System design of UIO
-### Overall design
-Inspired by the following industrial methods(egs: [webdataset](https://github.com/webdataset/webdataset), [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord)),
-WeNet redesigned the IO method. Its core idea is to make the audio and labels of multiple small data(such as 1000 pieces),
-into compressed packets (tar) and read them based on the IterableDataset of Pytorch. The advantages of this method is：
-- Only the index information of the compressed package needs to be maintained in memory, which greatly saves memory and
-solves the problem of OOM.
-- The on-the-fly decompression is performed in the memory, and the data in the same compressed package is read in
-sequence, which solves the problem of slow random reading performance. Different compressed packets can be read randomly
-to ensure the global randomness of data.
-
-The new IO method takes into account both small datasets and large datasets， and provides two data reading methods.
-We call it UIO. The overall design of UIO is shown in the figure below:
-
-![UIO System Design](./images/UIO_system.png)
-
-Some necessary explanations about the above figure:
-- Small IO(raw) supports small datasets, which we call ``raw`` mode. This mode only supports local file reading.
-The required documents must be sorted into Kaldi style file: wav.scp and text.(It's the same as before)
-- Big IO(shared) supports large datasets, which we call ``shard`` mode. This mode can support both local file
-reading and network cloud storage file reading. The required files must be sorted into compressed packages. Audio (wav)
-and label (txt) are stored in a single compressed package in sequence.
-
-### Chain IO
-Inspired by TFRecord chain IO, UIO also adopts chain implementation. In practical use, chain IO is more flexible,
-easier to expand and easier to debug. TFRecord IO example as follows,
-```python
-def read_dataset(filename, batch_size):
-    dataset = tf.data.TFRecordDataset(filename)
-    dataset = dataset.map(_parse_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
-    dataset = dataset.shuffle(500)
-    dataset = dataset.batch(batch_size, drop_remainder=True)
-    dataset = dataset.repeat()
-    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
-    return dataset
-```
-Refer to TFRecord IO, the UIO dataflow in WeNet is designed as the figure below:
-
-![UIO dataflow](./images/UIO_dataflow.png)
-
-It includes the following modules:
-- tokenize module: convert the label into specify modeling unit(egs: char or BPE).
-
-- filter module: filter out unqualified training data.
-
-- resample module: optional resampling of training data.
-
-- compute_fbank module: fbank feature extraction.
-
-- spec_augmentation module: feature enhancement.
-
-- shuffle module: disrupt local data.
-
-- sort module: sort local data.
-
-- batch module: organize multiple pieces of data into batch.
-
-- padding module: padding data in the same batch.
-
-what's more, There are several parameters to note. first, ``shuffle buffer`` and ``sort buffer`` in ``buffer size``:
-* ``Shuffle buffer``: shuffle data. It is recommended that the size of this buffer be larger than
-the number of data contained in a single shard. Each time it is equivalent to shuffling data between two shards,
-which increases the randomness of the data.(egs: if each shard contains 1000, you can set shuffle buffer as 1500)
-* ``Sort buffer``: sort the data according to the number of frames. This operation is very important and can greatly
-improve the training speed.
-
-then, ``Prefetch``:
-``Prefetch`` is used in the Pytorch ``Dataloader`` to pre-read data. The granularity of prefetch is the batch of final training.
-The default parameter is 2, that is, the data of two batches will be pre-read by default. In the design of the UIO,
-due to the existence of the pre buffer, the pre-read data may already be in the buffer, so there is no real pre read.
-Only when the data in the buffer is insufficient during the next training can the buffer be filled on the fly.
-At this time, the training is blocked on the read data. In a word, when the prefetch is very small, the training will
-block reading data in part of the time, because the previous data is still in cache. Therefore, you can set a large
-prefetch to avoid this problem.
-
-
-## Validation experiments
-At present, we have verified the accuracy of UIO on aishell (200 hours) and wenetspeech (10000 hours) data respectively.
-### Aishell(``raw`` vs ``shard``)
-
-|IO|CER|
-|:---|:---|
-|Old|4.61|
-|UIO(``Raw``)|4.63|
-|UIO(``Shard``)|4.67|
-
-### WenetSpeech(``shard``)
-
-![UIO WenetSpeech](./images/UIO_wenetspeech_cer.png)
-
-WeNet and ESPnet use similar model structure and parameter configuration, and they achieve similar recognition rate,
-which shows the correctness of UIO in WeNet. And during the training, we observed that the overall utilization rate of
-GPU of UIO is more than 80% - 90%, indicating that the overall IO reading efficiency is very high.
-
-## Usage of UIO
-For detailed usage of UIO, please refer to the aishell dataset example:
-https://github.com/wenet-e2e/wenet/blob/main/examples/aishell/s0/run.sh
-At present, all datasets in WeNet have used UIO as the default data preparation.
-
-There are three parameters related to UIO in the training script train.py：
-- ``train_data``(``cv_data``/``test_data``): data.list
-- ``data_type``: raw/shard
-- ``symbol_table``: specify modeling unit
-
-For example:
-```shell
-python wenet/bin/train.py --gpu $gpu_id \
-  --config $train_config \
-  --data_type $data_type \
-  --symbol_table $dict \
-  --train_data $feat_dir/$train_set/data.list \
-  --cv_data $feat_dir/dev/data.list \
-  ...
-```
-If data_type is ``raw``, the format of data.list is as follows:
-```
-{"key": "BAC009S0002W0122", "wav": "/export/data/asr-data/OpenSLR/33/data_aishell/wav/train/S0002/BAC009S0002W0122.wav", "txt": "而对楼市成交抑制作用最大的限购"}
-{"key": "BAC009S0002W0123", "wav": "/export/data/asr-data/OpenSLR/33/data_aishell/wav/train/S0002/BAC009S0002W0123.wav", "txt": "也成为地方政府的眼中钉"}
-{"key": "BAC009S0002W0124", "wav": "/export/data/asr-data/OpenSLR/33/data_aishell/wav/train/S0002/BAC009S0002W0124.wav", "txt": "自六月底呼和浩特市率先宣布取消限购后"}
-```
-Each line is a json serialized string, which contains three fields: ``key``, ``wav`` and ``txt``.
-
-If data_type is ``shard``, the format of data.list is as follows:
-```
-# [option 1: local]
-/export/maryland/binbinzhang/code/wenet/examples/aishell/s3/raw_wav/train/shards/shards_000000000.tar.gz
-/export/maryland/binbinzhang/code/wenet/examples/aishell/s3/raw_wav/train/shards/shards_000000001.tar.gz
-/export/maryland/binbinzhang/code/wenet/examples/aishell/s3/raw_wav/train/shards/shards_000000002.tar.gz
-
-# [option 2: network(egs: OSS)]
-https://examplebucket.oss-cn-hangzhou.aliyuncs.com/exampledir/1.tar.gz
-https://examplebucket.oss-cn-hangzhou.aliyuncs.com/exampledir/2.tar.gz
-```
-
-## Q&A
-Q1: How to operate distributed partition of training data?
-
-A: According to rank and num_workers can segment the data. for example:
-```python
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        data = list(range(len(data)))
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-```
-
-Q2: How to deal with unbalanced data?
-
-A: Use model.join() to handle the imbalance of data allocated on each rank. Please refer [this](https://pytorch.org/tutorials/advanced/generic_join.html#how-does-join-work).
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/conf.py b/models/audio/speech_recognition/conformer/igie/wenet/docs/conf.py
deleted file mode 100644
index 49abc10713f4caa97c2d076352a11b7b121b7c29..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/conf.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
-
-
-# -- Project information -----------------------------------------------------
-
-project = 'Wenet'
-copyright = '2020, wenet-team'
-author = 'wenet-team'
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    "nbsphinx",
-    "sphinx.ext.autodoc",
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
-    "sphinx.ext.mathjax",
-    "sphinx.ext.todo",
-    # "sphinxarg.ext",
-    "sphinx_markdown_tables",
-    'recommonmark',
-    'sphinx_rtd_theme',
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-source_suffix = {
-    '.rst': 'restructuredtext',
-    '.txt': 'markdown',
-    '.md': 'markdown',
-}
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-# html_theme = 'alabaster'
-html_theme = "sphinx_rtd_theme"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/context.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/context.md
deleted file mode 100644
index 881d119ea05e4a591d42cba0d4efd123fc56a80f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/context.md
+++ /dev/null
@@ -1,138 +0,0 @@
-## Context Biasing
-
-In the practical application of ASR, the recognition effect of commonly used words is better, but for some unique words, the recognition accuracy may be low. Contextual biasing is the problem of injecting prior knowledge into an ASR system during inference, for example a user’s favorite songs, contacts, apps or location. Conventional ASR systems perform contextual biasing by building an n-gram finite state transducer (FST) from a list of biasing phrases, which is composed on-the-fly with the decoder graph during decoding. This helps to bias the recognition result towards the n-grams contained in the contextual FST, and thus improves accuracy in certain scenarios.
-
-In WeNet, we compute the biasing scores $P_C(\mathbf y)$, which are interpolated with the base model $P(\mathbf y|\mathbf x)$ using shallow-fusion during beam search, including CTC prefix beam search and CTC WFST beam search.
-
-$$
-\mathbf y^*=\mathrm{arg\,max\,log}P(\mathbf y|\mathbf x)+\lambda\,\mathrm{log}\,P_C(\mathbf y)
-$$
-
-where, $\lambda$ is a tunable hyperparameter controlling how much the contextual LM influences the overall model score during beam search.
-
-### Context Graph
-
-If we want to improve the score of the word "cat", and the biasing score $\lambda\,\mathrm{log}\,P_C(\mathbf y)$ of each character is 0.25. The context graph can be constructed as follow:
-
-![context graph](images/context_graph.png)
-
-In the decoding process, when the corresponding prefix is matched, the corresponding score reward will be obtained. In order to avoid artificially boosting prefixes which match early on but do not match the entire phrase, we add a special failure arc which removes the boosted score.
-
-WeNet records only one state for each prefix, to easily determine the boundary of the matched hot word. That is, only one hot word can be matched at the same time, and only after the hot word matching succeeds or fails can other hot words start matching.
-
-``` c++
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  // Traverse the arcs of current state.
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // Record the score of the backoff arc. It might will be covered.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      // If they match, record the next state and the score.
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      // Check whether is the boundary of the hot word.
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-```
-
-### CTC Prefix Beam Search
-
-In the process of CTC prefix beam search, each prefix needs to record the hot word matching information. After appending the current output character, if the prefix changes, call the above function `GetNextState` to update the state and score of the hot word. If it is the start or end of a hot word, it is also necessary to record the position, which are used to insert the start tag and end tag in the result, such as: "The \<context>cat\</context> is in the bag".
-
-### CTC WFST Beam Search
-
-WeNet adopts the Lattice Faster Online Decoder from Kaldi for WFST beam search. We have to modify the `lattice-faster-decoder.cc` to support context biasing.
-
-WFST beam search decodes in the TLG graph according to the CTC outputs. If we bias the input label of the TLG, we need to compose the context graph with the Token graph. Finally, we decide to bias TLG's output towards the contextual fst. We need to modify the `ProcessEmitting` and `ProcessNonemitting` functions as follow:
-
-```c++
-Elem *e_next =
-    FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-// NULL: no change indicator needed
-
-// ========== Context code BEGIN ===========
-bool is_start_boundary = false;
-bool is_end_boundary = false;
-float context_score = 0;
-if (context_graph_) {
-  if (arc.olabel == 0) {
-    e_next->val->context_state = tok->context_state;
-  } else {
-    e_next->val->context_state = context_graph_->GetNextState(
-      tok->context_state, arc.olabel, &context_score,
-      &is_start_boundary, &is_end_boundary);
-    graph_cost -= context_score;
-  }
-}
-// ========== Context code END ==========
-
-// Add ForwardLink from tok to next_tok (put on head of list
-// tok->links)
-tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                              graph_cost, ac_cost, is_start_boundary,
-                              is_end_boundary, tok->links);
-tok->links->context_score = context_score;
-```
-
-### Pruning
-
-The backoff arc will return the accumulated scores to a single ForwardLink. It leads to the cost of that ForwardLink is too large. We have to remove the cost returned by backoff arc before pruning.
-
-```c++
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  ...
-  BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-  // ========== Context code BEGIN ===========
-  // graph_cost contains the score of hot word
-  // link->context_score < 0 means the hot word of the link is returned from backoff arc
-  if (link->context_score < 0) {
-    link_extra_cost += link->context_score;
-  }
-  // ========== Context code END ==========
-  // link_exta_cost is the difference in score between the best paths
-  // through link source state and through link destination state
-```
-
-### Usage
-
-1. Specify the `--context_path` to a text file.
-   - Each line of the file contains a context.
-   - Each context can be split into words with the symbol_table of the ASR model (It means there is no oov in the context).
-2. Specify the `--context_score`, the reward of each word in the context.
-
-```bash
-cd /home/wenet/runtime/libtorch
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=docker_resource/test.wav
-context_path=docker_resource/context.txt
-model_dir=docker_resource/model
-./build/decoder_main \
-    --chunk_size -1 \
-    --wav_path $wav_path \
-    --model_path $model_dir/final.zip \
-    --context_path $context_path \
-    --context_score 3 \
-    --unit_path $model_dir/units.txt 2>&1 | tee log.txt
-```
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_dataflow.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_dataflow.png
deleted file mode 100644
index dce9f0128b8a6fc610f2d8dc8ec5cface0b1553a..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_dataflow.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_system.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_system.png
deleted file mode 100644
index 1861daf61c6265b08e60fb4661f17654823ad0e4..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_system.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_wenetspeech_cer.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_wenetspeech_cer.png
deleted file mode 100644
index 223e9ccf9422aa4fe567a27c1778d2f61c0c610c..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/UIO_wenetspeech_cer.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/check_detail.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/check_detail.png
deleted file mode 100644
index 45c08be40bfd530d8bddfb091aa7d6340afa1bf2..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/check_detail.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/checks.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/checks.png
deleted file mode 100644
index 14f0124875170bff4d4328f9b3295081db85473e..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/checks.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/context_graph.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/context_graph.png
deleted file mode 100644
index c89cd37f7b92168733aeba538a5260ed25ca39d6..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/context_graph.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/lm_system.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/lm_system.png
deleted file mode 100644
index 76b428d8297b5c4c36d26831aff6692a42318a84..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/lm_system.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_android.gif b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_android.gif
deleted file mode 100644
index ab060b4f45cf580c332e41d146c620ec0b212a0f..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_android.gif and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_server.gif b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_server.gif
deleted file mode 100644
index f418c3db1c67c90b4cfa6e57bc5e1542c88c5323..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_server.gif and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_web.png b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_web.png
deleted file mode 100644
index 1788eadc425643496ef16ac36579a297d7d54ba1..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/runtime_web.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/subsampling_overalp.gif b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/subsampling_overalp.gif
deleted file mode 100644
index 8bffe67315bd89cc4fbfda2c4505d8918914dce6..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/subsampling_overalp.gif and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/u2.gif b/models/audio/speech_recognition/conformer/igie/wenet/docs/images/u2.gif
deleted file mode 100644
index a4a62e8bd1fc385be31040acefd57816469d361a..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/docs/images/u2.gif and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/index.rst b/models/audio/speech_recognition/conformer/igie/wenet/docs/index.rst
deleted file mode 100644
index 9eb920f3c46dc5a13db285d45843c4e8b2303028..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/index.rst
+++ /dev/null
@@ -1,33 +0,0 @@
-.. Wenet documentation master file, created by
-   sphinx-quickstart on Thu Dec  3 11:43:53 2020.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to Wenet's documentation!
-=================================
-
-
-Wenet is an tansformer-based end-to-end ASR toolkit.
-
-.. toctree::
-   :maxdepth: 1
-   :caption: Tutorial:
-
-   ./python_binding.md
-   ./papers.md
-   ./tutorial_librispeech.md
-   ./tutorial_aishell.md
-   ./pretrained_models.md
-   ./lm.md
-   ./context.md
-   ./runtime.md
-   ./jit_in_wenet.md
-   ./UIO.md
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/jit_in_wenet.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/jit_in_wenet.md
deleted file mode 100644
index 650090d9e6f6f19f0081df912f750eee3820a713..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/jit_in_wenet.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# JIT in WeNet
-
-We want that our PyTorch model can be directly exported by torch.jit.script method,
-which is essential for deploying the model to production.
-
-See the following resource for how to deploy PyTorch models in production in details.
-- [INTRODUCTION TO TORCHSCRIPT](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html)
-- [TORCHSCRIPT LANGUAGE REFERENCE](https://pytorch.org/docs/stable/jit_language_reference.html#language-reference)
-- [LOADING A TORCHSCRIPT MODEL IN C++](https://pytorch.org/tutorials/advanced/cpp_export.html)
-- [TorchScript and PyTorch JIT | Deep Dive](https://www.youtube.com/watch?v=2awmrMRf0dA&t=574s)
-- [Research to Production: PyTorch JIT/TorchScript Updates](https://www.youtube.com/watch?v=St3gdHJzic0)
-
-To ensure that, we will try to export the model before training stage.
-If it fails, we should modify the training code to satisfy the export requirements.
-
-``` python
-# See in wenet/bin/train.py
-script_model = torch.jit.script(model)
-script_model.save(os.path.join(args.model_dir, 'init.zip'))
-```
-
-Two principles should be taken into consideration when we contribute our python code
-to WeNet, especially for the subclass of torch.nn.Module, and for the forward function.
-
-1. Know what is allowed and what is disallowed.
-    - [Torch and Tensor Unsupported Attributes](https://pytorch.org/docs/master/jit_unsupported.html#jit-unsupported)
-    - [Python Language Reference Coverage](https://pytorch.org/docs/master/jit_python_reference.html#python-language-reference)
-
-2. Try to use explicit typing as much as possible. You can try to do type checking
-forced by typeguard, see https://typeguard.readthedocs.io/en/latest/userguide.html for details.
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/lm.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/lm.md
deleted file mode 100644
index 12acce16f7f5fcb9d5e8e61993eb6164396849b7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/lm.md
+++ /dev/null
@@ -1,106 +0,0 @@
-# LM for WeNet
-
-WeNet uses n-gram based statistical language model and the WFST framework to support the custom language model.
-And LM is only supported in runtime of WeNet.
-
-## Motivation
-
-Why n-gram based LM? This may be the first question many people will ask.
-Now that LM based on RNN and Transformer is in full swing, why does WeNet go backward?
-The reason is simple, it is for productivity.
-The n-gram-based language model has mature and complete training tools,
-any amount of corpus can be trained, the training is very fast, the hotfix is easy,
-and it has a wide range of mature applications in actual products.
-
-Why WFST? It may be the second question many people will ask.
-Since both industry and research have been working so hard to abandon traditional speech recognition,
-especially the complex decoding technology. Why does WeNet back?
-The reason is also very simple, it is for productivity.
-WFST is a standard and powerful tool in traditional speech recognition.
-And based on this solution, we have mature and complete bug fix solutions and product solutions,
-such as that we can use the replace function in WFST for class-based personalization such as contact recognition.
-
-Therefore, just like WeNet's design goal "Production first and Production Ready",
-LM in WeNet also puts productivity as the first priority.
-So it draws on many very productive tools and solutions accumulated in traditional speech recognition.
-The difference to traditional speech recognition are:
-
-1. The training in WeNet is pure end to end.
-2. As described below, LM is optional in decoding, you can choose whether to use LM according to your needs and application scenarios.
-
-
-## System Design
-
-The whole system is shown in the bellowing picture. There are two ways to generate N-best.
-
-![LM System Design](./images/lm_system.png)
-
-1. Without LM, we use CTC prefix beam search to generate N-best.
-2. With LM, we use CTC WFST search to generate N-best and CTC WFST search is the traditional WFST based decoder.
-
-There are two main parts of the CTC WFST based search.
-
-The first is building the decoding graph, which is to compose the model unit T, the lexicon L and the language model G into one unified graph TLG. And in which:
-1. T is the model unit in E2E training. Typically it's char in Chinese, char or BPE in English.
-2. L is the lexicon, the lexicon is very simple. What we need to do is just split a word into its modeling unit sequence.
-For example, the word "我们" is split into two chars "我 们", and the word "APPLE" is split into five letters "A P P L E".
-We can see there is no phonemes and there is no need to design pronunciation on purpose.
-3. G is the language model, namely compiling the n-gram to standard WFST representation.
-
-The second is the decoder, which is the same as the traditional decoder, which uses the standard Viterbi beam search algorithm in decoding.
-
-## Implementation
-
-WeNet draws on the decoder and related tools in Kaldi to support LM and WFST based decoding.
-For ease of using and keeping independence, we directly migrated the code related to decoding in Kaldi to [this directory](https://github.com/wenet-e2e/wenet/tree/main/runtime/core/kaldi) in WeNet runtime.
-And modify and organize according to the following principles:
-1. To minimize changes, the migrated code remains the same directory structure as the original.
-2. We use GLOG to replace the log system in Kaldi.
-3. We modify the code format to meet the lint requirements of the code style in WeNet.
-
-The core code is https://github.com/wenet-e2e/wenet/blob/main/runtime/core/decoder/ctc_wfst_beam_search.cc,
-which wraps the LatticeFasterDecoder in Kaldi.
-And we use blank frame skipping to speed up decoding.
-
-In addition, WeNet also migrated related tools for building the decoding graph,
-such as arpa2fst, fstdeterminizestar, fsttablecompose, fstminimizeencoded, and other tools.
-So all the tools related to LM are built-in tools and can be used out of the box.
-
-
-## Results
-
-We get consistent gain (3%~10%) on different datasets,
-including aishell, aishell2, and librispeech,
-please go to the corresponding example dataset for the details.
-
-## How to use?
-
-Here is an example from aishell, which shows how to prepare the dictionary, how to train the LM,
-how to build the graph, and how to decode with the runtime.
-
-``` sh
-# 7.1 Prepare dict
-unit_file=$dict
-mkdir -p data/local/dict
-cp $unit_file data/local/dict/units.txt
-tools/fst/prepare_dict.py $unit_file ${data}/resource_aishell/lexicon.txt \
-    data/local/dict/lexicon.txt
-# 7.2 Train lm
-lm=data/local/lm
-mkdir -p $lm
-tools/filter_scp.pl data/train/text \
-     $data/data_aishell/transcript/aishell_transcript_v0.8.txt > $lm/text
-local/aishell_train_lms.sh
-# 7.3 Build decoding TLG
-tools/fst/compile_lexicon_token_fst.sh \
-    data/local/dict data/local/tmp data/local/lang
-tools/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
-# 7.4 Decoding with runtime
-./tools/decode.sh --nj 16 \
-    --beam 15.0 --lattice_beam 7.5 --max_active 7000 \
-    --blank_skip_thresh 0.98 --ctc_weight 0.5 --rescoring_weight 1.0 \
-    --fst_path data/lang_test/TLG.fst \
-    --dict_path data/lang_test/words.txt \
-    data/test/wav.scp data/test/text $dir/final.zip \
-    data/lang_test/units.txt $dir/lm_with_runtime
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/make.bat b/models/audio/speech_recognition/conformer/igie/wenet/docs/make.bat
deleted file mode 100644
index a42274a63310b8672adb4eb1bbd2c170cdc7684a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-    set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-    echo.
-    echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-    echo.installed, then set the SPHINXBUILD environment variable to point
-    echo.to the full path of the 'sphinx-build' executable. Alternatively you
-    echo.may add the Sphinx directory to PATH.
-    echo.
-    echo.If you don't have Sphinx installed, grab it from
-    echo.http://sphinx-doc.org/
-    exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/papers.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/papers.md
deleted file mode 100644
index f006314f31324594e729c48ef7df8b0bd5c51afa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/papers.md
+++ /dev/null
@@ -1,5 +0,0 @@
-## Papers
-
-* [WeNet: Production Oriented Streaming and Non-streaming End-to-End Speech Recognition Toolkit](https://arxiv.org/pdf/2102.01547.pdf), accepted by InterSpeech 2021.
-* [WeNet 2.0: More Productive End-to-End Speech Recognition Toolkit](https://arxiv.org/pdf/2203.15455.pdf), accepted by InterSpeech 2022.
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/pretrained_models.en.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/pretrained_models.en.md
deleted file mode 100644
index 1aaac5a1760650d2150decd644ed4ec30433e520..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/pretrained_models.en.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Pretrained Models in WeNet
-
-## Model Types
-We provide two types of pretrained model in WeNet to facilitate users with different requirements.
-
-1. **Checkpoint Model**, with suffix **.pt**, the model trained and saved as checkpoint by WeNet python code, you can reproduce our published result with it, or you can use it as checkpoint to continue.
-
-2. **Runtime Model**, with suffix **.zip**, you can directly use `runtime model` in our [x86](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch) or [android](https://github.com/wenet-e2e/wenet/tree/main/runtime/android) runtime, the `runtime model` is export by Pytorch JIT on the `checkpoint model`. And the runtime models has been quantized to reduce the model size and network traffic.
-
-## Model License
-
-The pretrained model in WeNet follows the license of it's corresponding dataset.
-For example, the pretrained model on LibriSpeech follows `CC BY 4.0`, since it is used as license of the LibriSpeech dataset, see http://openslr.org/12/.
-
-## Model List
-
-Here is a list of the pretrained models on different datasets. The model structure, model size, and download link are given.
-
-| Datasets  | Languages     | Checkpoint Model  | Runtime Model     | Contributor |
-|---    |---    |---    |---    |---    |
-| [aishell](../examples/aishell/s0/README.md)   | CN    | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20210601_u2%2B%2B_conformer_exp.tar.gz)  | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20210601_u2%2B%2B_conformer_libtorch.tar.gz)     | <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [aishell2](../examples/aishell2/s0/README.md)     | CN    | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell2/20210618_u2pp_conformer_exp.tar.gz)     | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell2/20210618_u2pp_conformer_libtorch.tar.gz)    | <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [gigaspeech](../examples/gigaspeech/s0/README.md)     | EN    | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/gigaspeech/gigaspeech_u2pp_conformer_exp.tar.gz)   | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/gigaspeech/gigaspeech_u2pp_conformer_libtorch.tar.gz)  |  <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [librispeech](../examples/librispeech/s0/README.md)   | EN    | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/librispeech/20210610_u2pp_conformer_exp.tar.gz)  | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/librispeech/20210610_u2pp_conformer_libtorch.tar.gz)     |  <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [multi_cn](../examples/multi_cn/s0/README.md)     | CN    | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/multi_cn/20210815_unified_conformer_exp.tar.gz)  | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/multi_cn/20210815_unified_conformer_libtorch.tar.gz)     | <a href="https://www.jd.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/jd.jpeg" width="100px"></a> |
-| [wenetspeech](../examples/wenetspeech/s0/README.md)     | CN    | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/wenetspeech/wenetspeech_u2pp_conformer_exp.tar.gz) | [Conformer](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/wenetspeech/wenetspeech_u2pp_conformer_libtorch.tar.gz)     | <a href="https://horizon.ai" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/hobot.png" width="100px"></a> |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/pretrained_models.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/pretrained_models.md
deleted file mode 100644
index ee4fc02e524163ef91221413e9a190b206aaa93f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/pretrained_models.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Pretrained Models in WeNet
-
-## Model Types
-We provide two types of pretrained model in WeNet to facilitate users with different requirements.
-
-1. **Checkpoint Model**, with suffix **.pt**, the model trained and saved as checkpoint by WeNet python code, you can reproduce our published result with it, or you can use it as checkpoint to continue.
-
-2. **Runtime Model**, with suffix **.zip**, you can directly use `runtime model` in our [x86](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch) or [android](https://github.com/wenet-e2e/wenet/tree/main/runtime/android) runtime, the `runtime model` is export by Pytorch JIT on the `checkpoint model`. And the runtime models has been quantized to reduce the model size and network traffic.
-
-## Model License
-
-The pretrained model in WeNet follows the license of it's corresponding dataset.
-For example, the pretrained model on LibriSpeech follows `CC BY 4.0`, since it is used as license of the LibriSpeech dataset, see http://openslr.org/12/.
-
-## Model List
-
-Here is a list of the pretrained models on different datasets.
-
-For non-Chinese users, please visit [Pretrained Models(En)](./pretrained_models.en.md) to download.
-
-| Datasets  | Languages     | Checkpoint Model  | Runtime Model     | Contributor |
-|---    |---    |---    |---    |---    |
-| [aishell](../examples/aishell/s0/README.md)   | CN    | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC) | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)     | <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [aishell2](../examples/aishell2/s0/README.md)     | CN    | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)     | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)    | <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [gigaspeech](../examples/gigaspeech/s0/README.md)     | EN    | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)   | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)  |  <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [librispeech](../examples/librispeech/s0/README.md)   | EN    | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)  | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)     |  <a href="https://www.chumenwenwen.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/chumenwenwen.png" width="100px"></a> |
-| [multi_cn](../examples/multi_cn/s0/README.md)     | CN    | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)  | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)     | <a href="https://www.jd.com" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/jd.jpeg" width="100px"></a> |
-| [wenetspeech](../examples/wenetspeech/s0/README.md)     | CN    | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC) | [Conformer](https://docs.qq.com/form/page/DZnRkVHlnUk5QaFdC)     | <a href="https://horizon.ai" target="_blank"><img src="https://raw.githubusercontent.com/wenet-e2e/wenet-contributors/main/companies/hobot.png" width="100px"></a> |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/python_binding.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/python_binding.md
deleted file mode 100644
index b5a86ff853d4fa13540d2926706e0c1113eaee64..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/python_binding.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# WeNet Python Binding
-
-This is a python binding of WeNet.
-
-WeNet is a production first and production ready end-to-end speech recognition toolkit.
-
-The best things of the binding are:
-
-1. Multiple languages supports, including English, Chinese. Other languages are in development.
-2. Non-streaming and streaming API
-3. N-best, contextual biasing, and timestamp supports, which are very important for speech productions.
-4. Alignment support. You can get phone level alignments this tool, on developing.
-
-## Install
-
-Python 3.6+ is required.
-
-``` sh
-pip3 install wenetruntime
-```
-
-## Usage
-
-Note:
-
-1. For macOS, wenetruntime packed `libtorch.so`, so we can't import torch and wenetruntime at the same time.
-2. For Windows and Linux, wenetruntime depends on torch. Please install and import the same version `torch` as wenetruntime.
-
-### Non-streaming Usage
-
-``` python
-import sys
-import torch
-import wenetruntime as wenet
-
-wav_file = sys.argv[1]
-decoder = wenet.Decoder(lang='chs')
-ans = decoder.decode_wav(wav_file)
-print(ans)
-```
-
-You can also specify the following parameter in `wenet.Decoder`
-
-* `lang` (str): The language you used, `chs` for Chinese, and `en` for English.
-* `model_dir` (str): is the `Runtime Model` directory, it contains the following files.
-   If not provided, official model for specific `lang` will be downloaded automatically.
-
-  * `final.zip`: runtime TorchScript ASR model.
-  * `units.txt`: modeling units file
-  * `TLG.fst`: optional, it means decoding with LM when `TLG.fst` is given.
-  * `words.txt`: optional, word level symbol table for decoding with `TLG.fst`
-
-  Please refer https://github.com/wenet-e2e/wenet/blob/main/docs/pretrained_models.md for the details of `Runtime Model`.
-
-* `nbest` (int): Output the top-n best result.
-* `enable_timestamp` (bool): Whether to enable the word level timestamp.
-* `context` (List[str]): a list of context biasing words.
-* `context_score` (float): context bonus score.
-* `continuous_decoding` (bool): Whether to enable continuous(long) decoding.
-
-For example:
-``` python
-decoder = wenet.Decoder(model_dir,
-                        lang='chs',
-                        nbest=5,
-                        enable_timestamp=True,
-                        context=['不忘初心', '牢记使命'],
-                        context_score=3.0)
-```
-
-### Streaming Usage
-
-``` python
-import sys
-import torch
-import wave
-import wenetruntime as wenet
-
-test_wav = sys.argv[1]
-
-with wave.open(test_wav, 'rb') as fin:
-    assert fin.getnchannels() == 1
-    wav = fin.readframes(fin.getnframes())
-
-decoder = wenet.Decoder(lang='chs')
-# We suppose the wav is 16k, 16bits, and decode every 0.5 seconds
-interval = int(0.5 * 16000) * 2
-for i in range(0, len(wav), interval):
-    last = False if i + interval < len(wav) else True
-    chunk_wav = wav[i: min(i + interval, len(wav))]
-    ans = decoder.decode(chunk_wav, last)
-    print(ans)
-```
-
-You can use the same parameters as we introduced above to control the behavior of `wenet.Decoder`
-
-
-## Build on Your Local Machine
-
-``` sh
-git clone https://github.com/wenet-e2e/wenet.git
-cd wenet/runtime/binding/python
-python setup.py install
-```
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/runtime.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/runtime.md
deleted file mode 100644
index cf65e7505aaf44055479ed9f8371e2807558ae8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/runtime.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Runtime for WeNet
-
-WeNet runtime uses [Unified Two Pass (U2)](https://arxiv.org/pdf/2102.01547.pdf) framework for inference. U2 has the following advantages:
-* **Unified**: U2 unified the streaming and non-streaming model in a simple way, and our runtime is also unified. Therefore you can easily balance the latency and accuracy by changing chunk_size (described in the following section).
-* **Accurate**: U2 achieves better accuracy by CTC joint training.
-* **Fast**: Our runtime uses attention rescoring based decoding method described in U2, which is much faster than a traditional autoregressive beam search.
-* **Other benefits**: In practice, we find U2 is more stable on long-form speech than standard transformer which usually fails or degrades a lot on long-form speech; and we can easily get the word-level time stamps by CTC spikes in U2. Both of these aspects are favored for industry adoption.
-
-## Platforms Supported
-
-The WeNet runtime supports the following platforms.
-
-* Server
-  * [x86](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch)
-* Device
-  * [android](https://github.com/wenet-e2e/wenet/tree/main/runtime/android)
-
-## Architecture and Implementation
-
-### Architecture
-
-The following picture shows how U2 works.
-
-![U2](images/u2.gif)
-
-When input is not finished, the input frames $x_t$ are fed into the *Shared Encoder* module frame by frame to get the encoder output $e_t$, then $e_t$ is transformed by the *CTC Activation* module (typically, it's just a linear transform with a log_softmax) to get the CTC prob $y_t$ at current frame, and $y_t$ is further used by the *CTC prefix beam search* module to generate n-best results at current time $t$, and the best result is used as partial result of the U2 system.
-
-When input is finished at time $T$, the n-best results from the *CTC prefix beam search* module and the encoder output $e_1, e_2, e_3, ..., e_T$ are fed into the *Attention Decoder* module, then the *Attention Decoder* module computes a score for every result. The result with the best score is selected as the final result of U2 system.
-
-We can group $C$ continuous frames $x_t, x_{t+1}, x_{t+C}$ as one chunk for the *Shared Encoder* module, and $C$ is called chunk_size in the U2 framework. The chunk_size will affect the attention computation in the *Shared Encoder* module. When chunk_size is infinite, it is a non-streaming case. The system gives the best accuracy with infinite latency. When chunk_size is limited (typically less than 1s), it is a streaming case. The system has limited latency and also gives promising accuracy. So the developer can balance the accuracy and latency and setting a proper chunk_size.
-
-### Interface Design
-
-We use LibTorch to implement U2 runtime in WeNet, and we export several interfaces in PyTorch python code
-by @torch.jit.export (see [asr_model.py](https://github.com/wenet-e2e/wenet/tree/main/wenet/transformer/asr_model.py)),
-which are required and used in C++ runtime in [torch_asr_model.cc](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch/decoder/torch_asr_model.cc).
-Here we just list the interface and give a brief introduction.
-
-| interface                        | description                             |
-|----------------------------------|-----------------------------------------|
-| subsampling_rate (args)          | get the subsampling rate of the model   |
-| right_context (args)             | get the right context of the model      |
-| sos_symbol (args)                | get the sos symbol id of the model      |
-| eos_symbol (args)                | get the eos symbol id of the model      |
-| forward_encoder_chunk (args)     | used for the *Shared Encoder* module    |
-| ctc_activation (args)            | used for the *CTC Activation* module    |
-| forward_attention_decoder (args) | used for the *Attention Decoder* module |
-
-### Cache in Details
-
-For streaming scenario, the *Shared Encoder* module works in an incremental way. The current chunk computation requries the inputs and outputs of all the history chunks. We implement the incremental computation by using caches. Overall, two types of cache are used in our runtime.
-
-* att_cache: the attention cache of the *Shared Encoder*(Conformer/Transformer) module.
-* cnn_cache: the cnn cache of the *Shared Encoder*, which caches the left context for causal CNN computation in Conformer.
-
-Please see [encoder.py:forward_chunk()](https://github.com/wenet-e2e/wenet/tree/main/wenet/transformer/encoder.py) and [torch_asr_model.cc](https://github.com/wenet-e2e/wenet/tree/main/runtime/libtorch/decoder/torch_asr_model.cc) for details of the caches.
-
-In practice, CNN is also used for subsampling, we should handle the CNN cache in subsampling.
-However, there are different CNN layers in subsampling with different left contexts, right contexts and strides, which makes it tircky to directly implement the CNN cache in subsampling.
-In our implementation, we simply overlap the input to avoid subsampling CNN cache.
-It is simple and straightforward with negligible additional cost since subsampling CNN only costs a very small fraction of the whole computation.
-The following picture shows how it works, where the blue color is for the overlap part of current inputs and previous inputs.
-
-![Overlap input for Subsampling CNN](images/subsampling_overalp.gif)
-
-## References
-1. [Sequence Modeling With CTC](https://distill.pub/2017/ctc/)
-2. [First-Pass Large Vocabulary Continuous Speech Recognition using Bi-Directional Recurrent DNNs](https://arxiv.org/pdf/1408.2873.pdf)
-3. [Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition](https://arxiv.org/pdf/2012.05481.pdf)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/tutorial_aishell.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/tutorial_aishell.md
deleted file mode 100644
index ea4e80d32439bbce901a18724d81e1ccc37fd49f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/tutorial_aishell.md
+++ /dev/null
@@ -1,173 +0,0 @@
-## Tutorial on AIShell
-
-If you meet any problems when going through this tutorial, please feel free to ask in github [issues](https://github.com/mobvoi/wenet/issues). Thanks for any kind of feedback.
-
-### Setup environment
-
-Please follow [Installation](https://github.com/wenet-e2e/wenet#installation) to install WeNet.
-
-### First Experiment
-
-We provide a recipe `example/aishell/s0/run.sh` on aishell-1 data.
-
-The recipe is simple and we suggest you run each stage one by one manually and check the result to understand the whole process.
-
-```
-cd example/aishell/s0
-bash run.sh --stage -1 --stop_stage -1
-bash run.sh --stage 0 --stop_stage 0
-bash run.sh --stage 1 --stop_stage 1
-bash run.sh --stage 2 --stop_stage 2
-bash run.sh --stage 3 --stop_stage 3
-bash run.sh --stage 4 --stop_stage 4
-bash run.sh --stage 5 --stop_stage 5
-bash run.sh --stage 6 --stop_stage 6
-```
-
-You could also just run the whole script
-```
-bash run.sh --stage -1 --stop_stage 6
-```
-
-
-#### Stage -1: Download data
-
-This stage downloads the aishell-1 data to the local path `$data`. This may take several hours. If you have already downloaded the data, please change the `$data` variable in `run.sh` and start from `--stage 0`.
-Please set a **absolute path** for `$data`, e.g. `/home/username/asr-data/aishell/`
-
-#### Stage 0: Prepare Training data
-
-In this stage, `local/aishell_data_prep.sh` organizes the original aishell-1 data into two files:
-* **wav.scp** each line records two tab-separated columns : `wav_id` and `wav_path`
-* **text**  each line records two tab-separated columns :  `wav_id` and `text_label`
-
-**wav.scp**
-```
-BAC009S0002W0122 /export/data/asr-data/OpenSLR/33/data_aishell/wav/train/S0002/BAC009S0002W0122.wav
-BAC009S0002W0123 /export/data/asr-data/OpenSLR/33/data_aishell/wav/train/S0002/BAC009S0002W0123.wav
-BAC009S0002W0124 /export/data/asr-data/OpenSLR/33/data_aishell/wav/train/S0002/BAC009S0002W0124.wav
-BAC009S0002W0125 /export/data/asr-data/OpenSLR/33/data_aishell/wav/train/S0002/BAC009S0002W0125.wav
-...
-```
-
-**text**
-```
-BAC009S0002W0122 而对楼市成交抑制作用最大的限购
-BAC009S0002W0123 也成为地方政府的眼中钉
-BAC009S0002W0124 自六月底呼和浩特市率先宣布取消限购后
-BAC009S0002W0125 各地政府便纷纷跟进
-...
-```
-
-If you want to train using your customized data, just organize the data into two files `wav.scp` and `text`, and start from `stage 1`.
-
-
-#### Stage 1: Extract optinal cmvn features
-
-`example/aishell/s0` uses raw wav as input and and [TorchAudio](https://pytorch.org/audio/stable/index.html) to extract the features just-in-time in dataloader. So in this step we just copy the training wav.scp and text file into the `raw_wav/train/` dir.
-
-`tools/compute_cmvn_stats.py` is used to extract global cmvn(cepstral mean and variance normalization) statistics. These statistics will be used to normalize the acoustic features. Setting `cmvn=false` will skip this step.
-
-#### Stage 2: Generate label token dictionary
-
-The dict is a map between label tokens (we use characters for Aishell-1) and
- the integer indices.
-
-An example dict is as follows
-```
-<blank> 0
-<unk> 1
-一 2
-丁 3
-...
-龚 4230
-龟 4231
-<sos/eos> 4232
-```
-
-* `<blank>` denotes the blank symbol for CTC.
-* `<unk>` denotes the unknown token, any out-of-vocabulary tokens will be mapped into it.
-* `<sos/eos>` denotes start-of-speech and end-of-speech symbols for attention based encoder decoder training, and they shares the same id.
-
-#### Stage 3: Prepare WeNet data format
-
-This stage generates the WeNet required format file `data.list`. Each line in `data.list` is in json format which contains the following fields.
-
-1. `key`: key of the utterance
-2. `wav`: audio file path of the utterance
-3. `txt`: normalized transcription of the utterance, the transcription will be tokenized to the model units on-the-fly at the training stage.
-
-Here is an example of the `data.list`, and please see the generated training feature file in `data/train/data.list`.
-
-```
-{"key": "BAC009S0002W0122", "wav": "/export/data/asr-data/OpenSLR/33//data_aishell/wav/train/S0002/BAC009S0002W0122.wav", "txt": "而对楼市成交抑制作用最大的限购"}
-{"key": "BAC009S0002W0123", "wav": "/export/data/asr-data/OpenSLR/33//data_aishell/wav/train/S0002/BAC009S0002W0123.wav", "txt": "也成为地方政府的眼中钉"}
-{"key": "BAC009S0002W0124", "wav": "/export/data/asr-data/OpenSLR/33//data_aishell/wav/train/S0002/BAC009S0002W0124.wav", "txt": "自六月底呼和浩特市率先宣布取消限购后"}
-```
-
-We aslo design another format for `data.list` named `shard` which is for big data training.
-Please see [gigaspeech](https://github.com/wenet-e2e/wenet/tree/main/examples/gigaspeech/s0)(10k hours) or
-[wenetspeech](https://github.com/wenet-e2e/wenet/tree/main/examples/wenetspeech/s0)(10k hours)
-for how to use `shard` style `data.list` if you want to apply WeNet on big data set(more than 5k).
-
-#### Stage 4: Neural Network training
-
-The NN model is trained in this step.
-
-- Multi-GPU mode
-
-If using DDP mode for multi-GPU, we suggest using `dist_backend="nccl"`. If the NCCL does not work, try using `gloo` or use `torch==1.6.0`
-Set the GPU ids in CUDA_VISIBLE_DEVICES. For example, set `export CUDA_VISIBLE_DEVICES="0,1,2,3,6,7"` to use card 0,1,2,3,6,7.
-
-- Resume training
-
-If your experiment is terminated after running several epochs for some reasons (e.g. the GPU is accidentally used by other people and is out-of-memory ), you could continue the training from a checkpoint model. Just find out the finished epoch in `exp/your_exp/`, set  `checkpoint=exp/your_exp/$n.pt` and run the `run.sh --stage 4`. Then the training will continue from the $n+1.pt
-
-- Config
-
-The config of neural network structure, optimization parameter, loss parameters, and dataset can be set in a YAML format file.
-
-In `conf/`,  we provide several models like transformer and conformer. see `conf/train_conformer.yaml` for reference.
-
-- Use Tensorboard
-
-The training takes several hours. The actual time depends on the number and type of your GPU cards. In an 8-card 2080 Ti machine, it takes about less than one day for 50 epochs.
-You could use tensorboard to monitor the loss.
-
-```
-tensorboard --logdir tensorboard/$your_exp_name/ --port 12598 --bind_all
-```
-
-#### Stage 5: Recognize wav using the trained model
-
-This stage shows how to recognize a set of wavs into texts. It also shows how to do the model averaging.
-
-- Average model
-
-If `${average_checkpoint}` is set to `true`, the best `${average_num}` models on cross validation set will be averaged to generate a boosted model and used for recognition.
-
-- Decoding
-
-Recognition is also called decoding or inference. The function of the NN will be applied on the input acoustic feature sequence to output a sequence of text.
-
-Four decoding methods are provided in WeNet:
-
-* `ctc_greedy_search` : encoder + CTC greedy search
-* `ctc_prefix_beam_search` :  encoder + CTC prefix beam search
-* `attention` : encoder + attention-based decoder decoding
-* `attention_rescoring` : rescoring the ctc candidates from ctc prefix beam search with encoder output on attention-based decoder.
-
-In general, attention_rescoring is the best method. Please see [U2 paper](https://arxiv.org/pdf/2012.05481.pdf) for the details of these algorithms.
-
-`--beam_size` is a tunable parameter, a large beam size may get better results but also cause higher computation cost.
-
-`--batch_size` can be greater than 1 for "ctc_greedy_search" and "attention" decoding mode, and must be 1 for "ctc_prefix_beam_search" and "attention_rescoring" decoding mode.
-
-- WER evaluation
-
-`tools/compute-wer.py` will calculate the word (or char) error rate of the result. If you run the recipe without any change, you may get WER ~= 5%.
-
-
-#### Stage 6: Export the trained model
-
-`wenet/bin/export_jit.py` will export the trained model using Libtorch. The exported model files can be easily used for inference in other programming languages such as C++.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/docs/tutorial_librispeech.md b/models/audio/speech_recognition/conformer/igie/wenet/docs/tutorial_librispeech.md
deleted file mode 100644
index 223f3b6a913def973a5ce3feb9b95d73ab9b491d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/docs/tutorial_librispeech.md
+++ /dev/null
@@ -1,436 +0,0 @@
-## Tutorial on LibriSpeech
-
-If you meet any problems when going through this tutorial, please feel free to ask in github [issues](https://github.com/mobvoi/wenet/issues). Thanks for any kind of feedback.
-
-### Setup environment
-
-Please follow [Installation](https://github.com/wenet-e2e/wenet#installation) to install WeNet.
-
-### First Experiment
-
-We provide a recipe `example/librispeech/s0/run.sh` on librispeech data.
-
-The recipe is simple and we suggest you run each stage one by one manually and check the result to understand the whole process.
-
-```
-cd example/librispeech/s0
-bash run.sh --stage -1 --stop_stage -1
-bash run.sh --stage 0 --stop_stage 0
-bash run.sh --stage 1 --stop_stage 1
-bash run.sh --stage 2 --stop_stage 2
-bash run.sh --stage 3 --stop_stage 3
-bash run.sh --stage 4 --stop_stage 4
-bash run.sh --stage 5 --stop_stage 5
-bash run.sh --stage 6 --stop_stage 6
-bash run.sh --stage 7 --stop_stage 7
-```
-
-You could also just run the whole script
-```
-bash run.sh --stage -1 --stop_stage 7
-```
-
-
-#### Stage -1: Download data
-
-``` sh
-data_url=www.openslr.org/resources/12
-datadir=/export/data/en-asr-data/OpenSLR/
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
-    local/download_and_untar.sh ${datadir} ${data_url} ${part}
-  done
-fi
-```
-
-This stage downloads the librispeech data to the local path `$data`. This may take several hours. If you have already downloaded the data, please change the `$data` variable in `run.sh` and start from `--stage 0`.
-
-#### Stage 0: Prepare Training data
-
-``` sh
-wave_data=data
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
-    # use underscore-separated names in data directories.
-    local/data_prep_torchaudio.sh ${datadir}/LibriSpeech/${part} $wave_data/${part//-/_}
-  done
-fi
-```
-
-In this stage, `local/data_prep_torchaudio.sh` organizes the original data into two files:
-* **wav.scp** each line records two tab-separated columns : `wav_id` and `wav_path`
-* **text**  each line records two tab-separated columns :  `wav_id` and `text_label`
-
-**wav.scp**
-```
-1867-154075-0014 /export/data/en-asr-data/OpenSLR//LibriSpeech/train-clean-100/1867/154075/1867-154075-0014.flac
-1970-26100-0022 /export/data/en-asr-data/OpenSLR//LibriSpeech/train-clean-100/1970/26100/1970-26100-0022.flac
-...
-```
-
-**text**
-```
-1867-154075-0014 YOU SHOW HIM THAT IT IS POSSIBLE
-1970-26100-0022 DID YOU SEE HIM AT THAT TIME
-...
-```
-
-If you want to train using your customized data, just organize the data into two files `wav.scp` and `text`, and start from `stage 1`.
-
-
-#### Stage 1: Extract optinal cmvn features
-
-``` sh
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  ### Task dependent. You have to design training and dev sets by yourself.
-  ### But you can utilize Kaldi recipes in most cases
-  echo "stage 1: Feature Generation"
-  mkdir -p $wave_data/train_960
-  # merge total training data
-  for set in train_clean_100 train_clean_360 train_other_500; do
-    for f in `ls $wave_data/$set`; do
-      cat $wave_data/$set/$f >> $wave_data/train_960/$f
-    done
-  done
-  mkdir -p $wave_data/dev
-  # merge total dev data
-  for set in dev_clean dev_other; do
-    for f in `ls $wave_data/$set`; do
-      cat $wave_data/$set/$f >> $wave_data/dev/$f
-    done
-  done
-
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp $wave_data/$train_set/wav.scp \
-    --out_cmvn $wave_data/$train_set/global_cmvn
-
-fi
-```
-
-The librispeech corpus contains 3 subsets for training, namely `train_clean_100`, `train_clean_360`, and `train_other_500`,
-so we first merge them to get our final training data.
-
-`tools/compute_cmvn_stats.py` is used to extract global cmvn(cepstral mean and variance normalization) statistics. These statistics will be used to normalize the acoustic features. Setting `cmvn=false` will skip this step.
-
-#### Stage 2: Generate label token dictionary
-
-``` sh
-dict=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 2: Dictionary and Json Data Preparation"
-  mkdir -p data/lang_char/
-
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " $wave_data/${train_set}/text > $wave_data/lang_char/input.txt
-  tools/spm_train --input=$wave_data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
-  tools/spm_encode --model=${bpemodel}.model --output_format=piece < $wave_data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-  wc -l ${dict}
-fi
-```
-
-The model unit of English e2e speech recognition system could be char or BPE(byte-pair-encoding).
-Typically, BPE shows better result. So here we use BPE as model unit,
-and the BPE is trained by [sentencepiece](https://github.com/google/sentencepiece) tool on the librispeech training data.
-
-The model unit is defined as a dict in WeNet, which maps the a BPE into integer index.
-The librispeech dict is like:
-
-```
-<blank> 0
-<unk> 1
-' 2
-▁ 3
-A 4
-▁A 5
-AB 6
-▁AB 7
-▁YOU 4995
-▁YOUNG 4996
-▁YOUR 4997
-▁YOUTH 4998
-Z 4999
-ZZ 5000
-<sos/eos> 5001
-```
-
-* `<blank>` denotes the blank symbol for CTC.
-* `<unk>` denotes the unknown token, any out-of-vocabulary tokens will be mapped into it.
-* `<sos/eos>` denotes start-of-speech and end-of-speech symbols for attention based encoder decoder training, and they shares the same id.
-
-#### Stage 3: Prepare WeNet data format
-
-``` sh
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  # Prepare wenet required data
-  echo "Prepare data, prepare required format"
-  for x in dev ${recog_set} $train_set ; do
-    tools/make_raw_list.py $wave_data/$x/wav.scp $wave_data/$x/text \
-        $wave_data/$x/data.list
-  done
-
-fi
-```
-
-This stage generates the WeNet required format file `data.list`. Each line in `data.list` is in json format which contains the following fields.
-
-1. `key`: key of the utterance
-2. `wav`: audio file path of the utterance
-3. `txt`: normalized transcription of the utterance, the transcription will be tokenized to the model units on-the-fly at the training stage.
-
-Here is an example of the `data.list`, and please see the generated training feature file in `data/train/data.list`.
-
-```
-{"key": "1455-134435-0000", "wav": "/mnt/nfs/ptm1/open-data/LibriSpeech/train-clean-100/1455/134435/1455-134435-0000.flac", "txt": "THE GIRL WHO CAME INTO THE WORLD ON THAT NIGHT WHEN JESSE RAN THROUGH THE FIELDS CRYING TO GOD THAT HE BE GIVEN A SON HAD GROWN TO WOMANHOOD ON THE FARM"}
-{"key": "1455-134435-0001", "wav": "/mnt/nfs/ptm1/open-data/LibriSpeech/train-clean-100/1455/134435/1455-134435-0001.flac", "txt": "AND WHEN NOT ANGRY SHE WAS OFTEN MOROSE AND SILENT IN WINESBURG IT WAS SAID THAT SHE DRANK HER HUSBAND THE BANKER"}
-{"key": "1455-134435-0002", "wav": "/mnt/nfs/ptm1/open-data/LibriSpeech/train-clean-100/1455/134435/1455-134435-0002.flac", "txt": "BUT LOUISE COULD NOT BE MADE HAPPY SHE FLEW INTO HALF INSANE FITS OF TEMPER DURING WHICH SHE WAS SOMETIMES SILENT SOMETIMES NOISY AND QUARRELSOME SHE SWORE AND CRIED OUT IN HER ANGER SHE GOT A KNIFE FROM THE KITCHEN AND THREATENED HER HUSBAND'S LIFE"}
-```
-
-We aslo design another format for `data.list` named `shard` which is for big data training.
-Please see [gigaspeech](https://github.com/wenet-e2e/wenet/tree/main/examples/gigaspeech/s0)(10k hours) or
-[wenetspeech](https://github.com/wenet-e2e/wenet/tree/main/examples/wenetspeech/s0)(10k hours)
-for how to use `shard` style `data.list` if you want to apply WeNet on big data set(more than 5k).
-
-#### Stage 4: Neural Network training
-
-``` sh
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="nccl"
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn $wave_data/${train_set}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type raw \
-      --symbol_table $dict \
-      --train_data $wave_data/$train_set/data.list \
-      --cv_data $wave_data/dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $num_gpus \
-      --ddp.rank $i \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-```
-
-The NN model is trained in this step.
-
-- Multi-GPU mode
-
-If using DDP mode for multi-GPU, we suggest using `dist_backend="nccl"`. If the NCCL does not work, try using `gloo` or use `torch==1.6.0`
-Set the GPU ids in CUDA_VISIBLE_DEVICES. For example, set `export CUDA_VISIBLE_DEVICES="0,1,2,3,6,7"` to use card 0,1,2,3,6,7.
-
-- Resume training
-
-If your experiment is terminated after running several epochs for some reasons (e.g. the GPU is accidentally used by other people and is out-of-memory ), you could continue the training from a checkpoint model. Just find out the finished epoch in `exp/your_exp/`, set  `checkpoint=exp/your_exp/$n.pt` and run the `run.sh --stage 4`. Then the training will continue from the $n+1.pt
-
-- Config
-
-The config of neural network structure, optimization parameter, loss parameters, and dataset can be set in a YAML format file.
-
-In `conf/`,  we provide several models like transformer and conformer. see `conf/train_conformer.yaml` for reference.
-
-- Use Tensorboard
-
-The training takes several hours. The actual time depends on the number and type of your GPU cards. In an 8-card 2080 Ti machine, it takes about less than one day for 50 epochs.
-You could use tensorboard to monitor the loss.
-
-```
-tensorboard --logdir tensorboard/$your_exp_name/ --port 12598 --bind_all
-```
-
-#### Stage 5: Recognize wav using the trained model
-
-``` sh
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-# Test model, please specify the model you want to test by --checkpoint
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-  # TODO, Add model average here
-  mkdir -p $dir/test
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  # Polling GPU id begin with index 0
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  idx=0
-  for test in $recog_set; do
-    for mode in ${decode_modes}; do
-    {
-      {
-        test_dir=$dir/${test}_${mode}
-        mkdir -p $test_dir
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-        python wenet/bin/recognize.py --gpu $gpu_id \
-          --mode $mode \
-          --config $dir/train.yaml \
-          --data_type raw \
-          --test_data $wave_data/$test/data.list \
-          --checkpoint $decode_checkpoint \
-          --beam_size 10 \
-          --batch_size 1 \
-          --penalty 0.0 \
-          --dict $dict \
-          --result_file $test_dir/text_bpe \
-          --ctc_weight $ctc_weight \
-          ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-
-        tools/spm_decode --model=${bpemodel}.model --input_format=piece < $test_dir/text_bpe | sed -e "s/▁/ /g" > $test_dir/text
-        python tools/compute-wer.py --char=1 --v=1 \
-          $wave_data/$test/text $test_dir/text > $test_dir/wer
-      } &
-
-      ((idx+=1))
-      if [ $idx -eq $num_gpus ]; then
-        idx=0
-      fi
-    }
-    done
-  done
-  wait
-
-fi
-
-```
-
-This stage shows how to recognize a set of wavs into texts. It also shows how to do the model averaging.
-
-- Average model
-
-If `${average_checkpoint}` is set to `true`, the best `${average_num}` models on cross validation set will be averaged to generate a boosted model and used for recognition.
-
-- Decoding
-
-Recognition is also called decoding or inference. The function of the NN will be applied on the input acoustic feature sequence to output a sequence of text.
-
-Four decoding methods are provided in WeNet:
-
-* `ctc_greedy_search` : encoder + CTC greedy search
-* `ctc_prefix_beam_search` :  encoder + CTC prefix beam search
-* `attention` : encoder + attention-based decoder decoding
-* `attention_rescoring` : rescoring the ctc candidates from ctc prefix beam search with encoder output on attention-based decoder.
-
-In general, attention_rescoring is the best method. Please see [U2 paper](https://arxiv.org/pdf/2012.05481.pdf) for the details of these algorithms.
-
-`--beam_size` is a tunable parameter, a large beam size may get better results but also cause higher computation cost.
-
-`--batch_size` can be greater than 1 for "ctc_greedy_search" and "attention" decoding mode, and must be 1 for "ctc_prefix_beam_search" and "attention_rescoring" decoding mode.
-
-- WER evaluation
-
-`tools/compute-wer.py` will calculate the word (or char) error rate of the result.
-
-
-#### Stage 6(Optional): Export the trained model
-
-``` sh
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
-```
-
-`wenet/bin/export_jit.py` will export the trained model using Libtorch.
-The exported model files can be easily used for C++ inference in our runtime.
-It is required if you want to integrate language model(LM), as shown in Stage 7.
-
-
-#### Stage 7(Optional): Add LM and test it with runtime
-
-
-
-``` sh
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-  lm=data/local/lm
-  lexicon=data/local/dict/lexicon.txt
-  mkdir -p $lm
-  mkdir -p data/local/dict
-
-  # 7.1 Download & format LM
-  which_lm=3-gram.pruned.1e-7.arpa.gz
-  if [ ! -e ${lm}/${which_lm} ]; then
-    wget http://www.openslr.org/resources/11/${which_lm} -P ${lm}
-  fi
-  echo "unzip lm($which_lm)..."
-  gunzip -k ${lm}/${which_lm} -c > ${lm}/lm.arpa
-  echo "Lm saved as ${lm}/lm.arpa"
-
-  # 7.2 Prepare dict
-  unit_file=$dict
-  bpemodel=$bpemodel
-  # use $dir/words.txt (unit_file) and $dir/train_960_unigram5000 (bpemodel)
-  # if you download pretrained librispeech conformer model
-  cp $unit_file data/local/dict/units.txt
-  if [ ! -e ${lm}/librispeech-lexicon.txt ]; then
-    wget http://www.openslr.org/resources/11/librispeech-lexicon.txt -P ${lm}
-  fi
-  echo "build lexicon..."
-  tools/fst/prepare_dict.py $unit_file ${lm}/librispeech-lexicon.txt \
-    $lexicon $bpemodel.model
-  echo "lexicon saved as '$lexicon'"
-
-  # 7.3 Build decoding TLG
-  tools/fst/compile_lexicon_token_fst.sh \
-     data/local/dict data/local/tmp data/local/lang
-  tools/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
-
-  # 7.4 Decoding with runtime
-  fst_dir=data/lang_test
-  for test in ${recog_set}; do
-    ./tools/decode.sh --nj 6 \
-      --beam 10.0 --lattice_beam 5 --max_active 7000 --blank_skip_thresh 0.98 \
-      --ctc_weight 0.5 --rescoring_weight 1.0 --acoustic_scale 1.2 \
-      --fst_path $fst_dir/TLG.fst \
-      --dict_path $fst_dir/words.txt \
-      data/$test/wav.scp data/$test/text $dir/final.zip $fst_dir/units.txt \
-      $dir/lm_with_runtime_${test}
-    tail $dir/lm_with_runtime_${test}/wer
-  done
-fi
-```
-
-LM is  only supported in runtime, you have to build the runtime as shown in [Installation](https://github.com/wenet-e2e/wenet#installation),
-and please refer [LM for WeNet](https://wenet-e2e.github.io/wenet/lm.html) for the details of LM design.
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/README.md
deleted file mode 100644
index 75fb1e43a9398ee1826a617882029e09e25f3b93..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/README.md
+++ /dev/null
@@ -1,146 +0,0 @@
-# Recipe to run Noisy Student Training with LM filter in WeNet
-
-Noisy Student Training (NST) has recently demonstrated extremely strong performance in Automatic Speech Recognition (ASR).
-
-Here, we provide a recipe to run NST with `LM filter` strategy using AISHELL-1 as supervised data and WenetSpeech as unsupervised data from [this paper](https://arxiv.org/abs/2211.04717), where hypotheses with and without Language Model are generated and CER differences between them are utilized as a filter threshold to improve the ASR performances of non-target domain datas.
-
-## Table of Contents
-
-- [Guideline](#guideline)
-  - [Data preparation](#data-preparation)
-  - [Initial supervised teacher](#initial-supervised-teacher)
-  - [Noisy student interations](#noisy-student-interations)
-- [Performance Record](#performance-record)
-  - [Supervised baseline and standard NST](##supervised-baseline-and-standard-nst)
-  - [Supervised AISHELL-1 and unsupervised 1khr WenetSpeech](#supervised-aishell-1-and-unsupervised-1khr-wenetspeech)
-  - [Supervised AISHELL-2 and unsupervised 4khr WenetSpeech](#supervised-aishell-2-and-unsupervised-4khr-wenetspeech)
-- [Citations](#citations)
-
-## Guideline
-
-
-First, you have to prepare supervised and unsupervised data for NST. Then in stage 1 of `run.sh`, you will train an initial supervised teacher and generate pseudo labels for unsupervised data.
-After that, you can run the noisy student training iteratively in stage 2. The whole pipeline is illustrated in the following picture.
-
-![plot](local/NST_plot.png)
-
-### Data preparation
-
-To run this recipe, you should follow the steps from [WeNet examples](https://github.com/wenet-e2e/wenet/tree/main/examples) to prepare [AISHELL1](https://github.com/wenet-e2e/wenet/tree/main/examples/aishell/s0) and [WenetSpeech](https://github.com/wenet-e2e/wenet/tree/main/examples/wenetspeech/s0) data.
-We extract 1khr data from WenetSpeech and data should be prepared and stored in the following format:
-
-```
-data/
-├── train/
-├──── data_aishell.list
-├──── wenet_1khr.list
-├──── wav_dir/
-├──── utter_time.json (optional)
-├── dev/
-└── test/
-
-```
-- Files `*.list` contain paths for all the data shards for training.
-- A Json file containing the audio length should be prepared as `utter_time.json` if you want to apply the `speaking rate` filter.
-- A wav_dir contains all the audio data (id.wav) and labels (id.txt which is optional) for unsupervised data.
-
-### Initial supervised teacher
-
-To train an initial supervised teacher model, run the following command:
-
-```bash
-bash run.sh --stage 1 --stop-stage 1
-```
-
-Full arguments are listed below, you can check `run.sh` and `run_nst.sh` for more information about steps in each stage and their arguments. We used `num_split = 60` and generate shards with different cpu for the experiments in our paper which saved us lots of inference time and data shards generation time.
-
-```bash
-bash run.sh --stage 1 --stop-stage 1 --dir exp/conformer_test_fully_supervised --supervised_data_list data_aishell.list --enable_nst 0 --num_split 1 --unsupervised_data_list wenet_1khr.list --dir_split wenet_split_60_test/ --job_num 0 --hypo_name hypothesis_nst0.txt --label 1 --wav_dir data/train/wenet_1k_untar/ --cer_hypo_dir wenet_cer_hypo --cer_label_dir wenet_cer_label --label_file label.txt --cer_hypo_threshold 10 --speak_rate_threshold 0 --utter_time_file utter_time.json --untar_dir data/train/wenet_1khr_untar/ --tar_dir data/train/wenet_1khr_tar/ --out_data_list data/train/wenet_1khr.list
-```
-- `dir` contains the training parameters.
-- `data_list` contains paths for the training data list.
-- `supervised_data_list` contains paths for supervised data shards.
-- `unsupervised_data_list`contains paths for unsupervised data shards which is used for inference.
-- `dir_split` is the directory stores split unsupervised data for parallel computing.
-- `out_data_list` is the pseudo label data list file path.
-- `enable_nst` indicates whether we train with pseudo label and split data, for initial teacher we set it to 0.
-- This recipe uses the default `num_split=1` while we strongly recommend use larger number to decrease the inference and shards generation time.
-> **HINTS** If num_split is set to N larger than 1, you need to modify the script in step 4-8 in run_nst.sh to submit N tasks into your own clusters (such as slurm,ngc etc..).
-> We strongly recommend to do so since inference and pseudo-data generation is time-consuming.
-
-### Noisy student interations
-
-After finishing the initial fully supervised baseline, we now have the mixed list contains both supervised and pseudo data which is `wenet_1khr_nst0.list`.
-We will use it as the `data_list` in the training step and the `data_list` for next NST iteration will be generated.
-
-Here is an example command:
-
-```bash
-bash run.sh --stage 2 --stop-stage 2 --iter_num 2
-```
-
-Here we add extra argument `iter_num` for number of NST iterations. Intermediate files are named with `iter_num` as a suffix.
-Please check the `run.sh` and `run_nst.sh` scripts for more information about each stage and their arguments.
-
-## Performance Record
-
-### Supervised baseline and standard NST
-* Non-streaming conformer model with attention rescoring decoder.
-* Without filter strategy, first iteration
-* Feature info: using FBANK feature, dither, cmvn, online speed perturb
-* Training info: lr 0.002, batch size 32, 8 gpu, acc_grad 4, 240 epochs, dither 0.1
-* Decoding info: ctc_weight 0.3, average_num 30
-
-
-| Supervised               | Unsupervised | Test CER |
-|--------------------------|--------------|----------|
-| AISHELL-1 Only           | ----         | 4.85     |
-| AISHELL-1+WenetSpeech    | ----         | 3.54     |
-| AISHELL-1+AISHELL-2      | ----         | 1.01     |
-| AISHELL-1 (standard NST) | WenetSpeech  | 5.52     |
-
-
-
-### Supervised AISHELL-1 and unsupervised 1khr WenetSpeech
-* Non-streaming conformer model with attention rescoring decoder.
-* Feature info: using FBANK feature
-* Training info: lr=0.002, batch_size=32, 8 GPUs, acc_grad=4, 120 epochs, dither=0.1
-* Decoding info: ctc_weight=0.3, average_num=30, pseudo_ratio=0.75
-
-| # nst iteration | AISHELL-1 test CER | Pseudo CER| Filtered CER | Filtered hours |
-|----------------|--------------------|-----------|--------------|----------------|
-| 0 | 4.85             | 47.10     |   25.18           |     323           |
-| 1 | 4.86             | 37.02     |   20.93           |     436           |
-| 2 | 4.75             | 31.81     |   19.74           |     540           |
-| 3 | 4.69             | 28.27     |   17.85           |     592           |
-| 4 | 4.48             | 26.64     |   14.76           |     588           |
-| 5 | 4.41             | 24.70     |   15.86           |     670           |
-| 6 | 4.34             | 23.64     |   15.40           |     669           |
-| 7 | 4.31             | 23.79     |   15.75           |     694           |
-
-### Supervised AISHELL-2 and unsupervised 4khr WenetSpeech
-* Non-streaming conformer model with attention rescoring decoder.
-* Feature info: using FBANK feature
-* Training info: lr=0.002, batch_size=32, 8 GPUs, acc_grad=4, 120 epochs, dither=0.1
-* Decoding info: ctc_weight=0.3, average_num=30, pseudo_ratio=0.75
-
-| # nst iteration | AISHELL-2 test CER | Pseudo CER | Filtered CER | Filtered hours |
-|----------------|--------------------|------------|--------------|----------------|
-| 0 | 5.48               | 30.10      | 11.73        | 1637           |
-| 1 | 5.09               | 28.31      | 9.39         | 2016           |
-| 2 | 4.88               | 25.38      | 9.99         | 2186           |
-| 3 | 4.74               | 22.47      | 10.66        | 2528           |
-| 4 | 4.73               | 22.23      | 10.43        | 2734           |
-
-
-
-## Citations
-
-``` bibtex
-
-@article{chen2022NST,
-  title={Improving Noisy Student Training on Non-target Domain Data for Automatic Speech Recognition},
-  author={Chen, Yu and Wen, Ding and Lai, Junjie},
-  journal={arXiv preprint arXiv:2203.15455},
-  year={2022}
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/conf/train_conformer.yaml
deleted file mode 100644
index 8499de2e97b8ae13e15d7cfb8357ae59bb6b6115..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/conf/train_conformer.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 1200
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/NST_plot.png b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/NST_plot.png
deleted file mode 100644
index c652c62caed741bd52d4d1a1a9cf290477be9223..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/NST_plot.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/generate_data_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/generate_data_list.py
deleted file mode 100644
index 684e7cb683a9697e2f0807859ab71c2ac3820d42..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/generate_data_list.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import random
-
-def get_args():
-    parser = argparse.ArgumentParser(description='generate data.list file ')
-    parser.add_argument('--tar_dir', help='path for tar file')
-    parser.add_argument('--supervised_data_list',
-                        help='path for supervised data list')
-    parser.add_argument('--pseudo_data_ratio',
-                        type=float,
-                        help='ratio of pseudo data, '
-                             '0 means none pseudo data, '
-                             '1 means all using pseudo data.')
-    parser.add_argument('--out_data_list', help='output path for data list')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    target_dir = args.tar_dir
-    pseudo_data_list = os.listdir(target_dir)
-    output_file = args.out_data_list
-    pseudo_data_ratio = args.pseudo_data_ratio
-    supervised_path = args.supervised_data_list
-    with open(supervised_path, "r") as reader:
-        supervised_data_list = reader.readlines()
-    pseudo_len = len(pseudo_data_list)
-    supervised_len = len(supervised_data_list)
-    random.shuffle(pseudo_data_list)
-    random.shuffle(supervised_data_list)
-
-    cur_ratio = pseudo_len / (pseudo_len + supervised_len)
-    if cur_ratio < pseudo_data_ratio:
-        pseudo_to_super_datio = pseudo_data_ratio / (1 - pseudo_data_ratio)
-        supervised_len = int(pseudo_len / pseudo_to_super_datio)
-    elif cur_ratio > pseudo_data_ratio:
-        super_to_pseudo_datio = (1 - pseudo_data_ratio) / pseudo_data_ratio
-        pseudo_len = int(supervised_len / super_to_pseudo_datio)
-
-    for i in range(len(pseudo_data_list)):
-        pseudo_data_list[i] = target_dir + "/" + pseudo_data_list[i] + "\n"
-
-    fused_list = pseudo_data_list[:pseudo_len] + supervised_data_list[:supervised_len]
-
-    with open(output_file, "w") as writer:
-        for line in fused_list:
-            writer.write(line)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/generate_filtered_pseudo_label.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/generate_filtered_pseudo_label.py
deleted file mode 100644
index 2a8ee83c32f1c69fcffc796c83cc58e4a6f1eec2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/generate_filtered_pseudo_label.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import tarfile
-import time
-import json
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='generate filter pseudo label')
-    parser.add_argument('--dir_num', required=True, help='split directory number')
-    parser.add_argument('--cer_hypo_dir', required=True,
-                        help='prefix for cer_hypo_dir')
-    parser.add_argument('--utter_time_file', required=True,
-                        help='the json file that contains audio time infos ')
-    parser.add_argument('--cer_hypo_threshold', required=True, type=float,
-                        help='the cer-hypo threshold used to filter')
-    parser.add_argument('--speak_rate_threshold', type=float,
-                        help='the cer threshold we use to filter')
-    parser.add_argument('--dir', required=True, help='dir for the experiment ')
-    # output untar and tar
-    parser.add_argument('--untar_dir', required=True,
-                        help='the output path, '
-                             'eg: data/train/wenet_untar_cer_hypo_nst1/')
-    parser.add_argument('--tar_dir', required=True,
-                        help='the tar file path, '
-                             'eg: data/train/wenet_tar_cer_hypo_leq_10_nst1/')
-    parser.add_argument('--wav_dir', required=True,
-                        help='dir to store wav files, '
-                             'eg "data/train/wenet_1k_untar/"')
-    parser.add_argument('--start_tar_id', default=0 , type=int,
-                        help='the initial tar id (for debugging)')
-    args = parser.parse_args()
-    return args
-
-
-def make_tarfile(output_filename, source_dir):
-    with tarfile.open(output_filename, "w") as tar:
-        tar.add(source_dir, arcname=os.path.basename(source_dir))
-
-
-def main():
-    args = get_args()
-    dir_num = args.dir_num
-    dir_name = args.dir
-    output_dir = args.untar_dir
-    cer_hypo_threshold = args.cer_hypo_threshold
-    speak_rate_threshold = args.speak_rate_threshold
-    utter_time_file = args.utter_time_file
-    tar_dir = args.tar_dir
-    wav_dir = args.wav_dir
-    start_tar_id = args.start_tar_id
-    os.makedirs(tar_dir, exist_ok=True)
-    os.makedirs(output_dir, exist_ok=True)
-    cer_hypo_name = args.cer_hypo_dir
-    print("start tar id is", start_tar_id)
-    print("make dirs")
-
-    utter_time_enable = True
-    dataset = "wenet"
-
-    utter_time = {}
-    if utter_time_enable:
-
-        if dataset == "wenet":
-            print("wenet")
-            with open(utter_time_file, encoding='utf-8') as fh:
-                utter_time = json.load(fh)
-
-        if dataset == "aishell2":
-            aishell2_jason = utter_time_file
-            print("aishell2")
-            with open(aishell2_jason, "r", encoding="utf-8") as f:
-                for line in f:
-                    data = json.loads(line)
-                    data_audio = data["audio_filepath"]
-                    t_id = data_audio.split("/")[-1].split(".")[0]
-                    data_duration = data["duration"]
-                    utter_time[t_id] = data_duration
-
-    print(time.time(), "start time ")
-    cer_dict = {}
-    print("dir_num = ", dir_num)
-    cer_hypo_path = dir_name + "/Hypo_LM_diff10/" + cer_hypo_name
-    cer_hypo_path = cer_hypo_path + "_" + dir_num + "/wer"
-    with open(cer_hypo_path, 'r', encoding="utf-8") as reader:
-        data = reader.readlines()
-
-    for i in range(len(data)):
-        line = data[i]
-        if line[:3] == 'utt':
-            wer_list = data[i + 1].split()
-            wer_pred_lm = float(wer_list[1])
-            n_hypo = int(wer_list[3].split("=")[1])
-
-            utt_list = line.split()
-            lab_list = data[i + 2].split()
-            rec_list = data[i + 3].split()
-
-            utt_id = utt_list[1]
-            pred_no_lm = "".join(lab_list[1:])
-            pred_lm = "".join(rec_list[1:])
-            prediction = "".join(lab_list[1:])
-
-            if utter_time_enable:
-
-                utt_time = utter_time[utt_id]
-
-                cer_dict[utt_id] = [pred_no_lm, pred_lm, wer_pred_lm,
-                                    utt_time, n_hypo, prediction]
-            else:
-                cer_dict[utt_id] = [pred_no_lm, pred_lm,
-                                    wer_pred_lm, -1, -1, prediction]
-
-    c = 0
-    cer_preds = []
-    uttr_len = []
-    speak_rates = []
-    num_lines = 0
-    data_filtered = []
-
-    for key, item in cer_dict.items():
-
-        cer_pred = item[2]
-        speak_rate = item[4] / item[3]  # char per second
-
-        if cer_pred <= cer_hypo_threshold and speak_rate > speak_rate_threshold:
-
-            num_lines += 1
-            c += 1
-            cer_preds.append(cer_pred)
-            uttr_len.append(item[4])
-            speak_rates.append(speak_rate)
-            pred = item[1]
-            utt_id = key
-            filtered_line = [utt_id, pred]
-            data_filtered.append(filtered_line)
-
-    num_uttr = 1000
-    len_data = len(data_filtered)
-    print("total sentences after filter ")
-    cur_id = start_tar_id * 1000
-    end_id = cur_id + num_uttr
-    if cur_id < len_data < end_id:
-        end_id = len_data
-    tar_id = start_tar_id
-
-    not_exist = []
-    while end_id <= len_data:
-
-        tar_s = str(tar_id)
-        diff = 6 - len(tar_s)
-        for _ in range(diff):
-            tar_s = "0" + tar_s
-
-        out_put_dir = output_dir + "dir" + str(dir_num)
-        out_put_dir = out_put_dir + "_" + "tar" + tar_s + "/"
-        os.makedirs(out_put_dir, exist_ok=True)
-
-        for i in range(cur_id, end_id):
-            print("dir:", dir_num, ", " "tar: ", tar_id,
-                  ", ", "progress:", i / len_data)
-
-            t_id, utter = data_filtered[i]
-
-            output_path = out_put_dir + t_id + ".txt"
-            wav_path = wav_dir + t_id + ".wav"
-            print(wav_path)
-            wav_exist = os.path.exists(wav_path)
-            if wav_exist:
-                # update .txt
-                with open(output_path, "w", encoding="utf-8") as writer:
-                    writer.write(utter)
-                # update .wav
-                os.system("cp" + " " + wav_path + " "
-                          + out_put_dir + t_id + ".wav")
-            else:
-                print(" wav does not exists ! ", wav_path)
-                not_exist.append(wav_path)
-
-        tar_file_name = tar_dir + "dir" + str(dir_num) + "_" + tar_s + ".tar"
-        # tar the dir
-
-        make_tarfile(tar_file_name, out_put_dir)
-        # update index
-        tar_id += 1
-        cur_id += num_uttr
-        end_id += num_uttr
-
-        if cur_id < len_data < end_id:
-            end_id = len_data
-
-        print("end, now removing untar files for saving storge space.")
-        print("rm -rf" + " " + out_put_dir[:-1])
-        os.system("rm -rf" + " " + out_put_dir[:-1])
-        print("remove done")
-
-    print("There are ", len(not_exist), "wav files not exist")
-    print(not_exist)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/get_wav_labels.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/get_wav_labels.py
deleted file mode 100644
index fb0c5c2b0f66f274680b95b0e872f04886f2c7ca..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/get_wav_labels.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='sum up prediction wer')
-    parser.add_argument('--job_num', type=int, default=8,
-                        help='number of total split dir')
-    parser.add_argument('--dir_split', required=True,
-                        help='the path to the data_list dir '
-                             'eg data/train/wenet1k_good_split_60/')
-    parser.add_argument('--label', type=int, default=0,
-                        help='if ture, label file will also be considered.')
-    parser.add_argument('--hypo_name', type=str, required=True,
-                        help='the hypothesis path.  eg. /hypothesis_0.txt ')
-    parser.add_argument('--wav_dir', type=str, required=True,
-                        help='the wav dir path.  eg. data/train/wenet_1k_untar/ ')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    data_list_dir = args.dir_split
-    num_lists = args.job_num
-    hypo = args.hypo_name
-    # wav_dir is the directory where your pair of ID.scp
-    # (the audio file ) and ID.txt (the optional label file ) file stored.
-    # We assumed that you have generated this dir in data processing steps.
-    wav_dir = args.wav_dir
-    label = args.label
-
-    print("data_list_path is", data_list_dir)
-    print("num_lists is", num_lists)
-    print("hypo is", hypo)
-    print("wav_dir is", wav_dir)
-
-    i = num_lists
-    c = 0
-    hypo_path = data_list_dir + "data_sublist" + str(i) + hypo
-    output_wav = data_list_dir + "data_sublist" + str(i) + "/wav.scp"
-    output_label = data_list_dir + "data_sublist" + str(i) + "/label.txt"
-    # bad lines are just for debugging
-    output_bad_lines = data_list_dir + "data_sublist" + str(i) + "/bad_line.txt"
-
-    with open(hypo_path, 'r', encoding="utf-8") as reader:
-        hypo_lines = reader.readlines()
-
-    wavs = []
-    labels = []
-    bad_files = []
-    for x in hypo_lines:
-        c += 1
-        file_id = x.split()[0]
-
-        label_path = wav_dir + file_id + ".txt"
-        wav_path = wav_dir + file_id + ".wav\n"
-        wav_line = file_id + " " + wav_path
-        wavs.append(wav_line)
-        if label:
-            try:
-                with open(label_path, 'r', encoding="utf-8") as reader1:
-                    label_line = reader1.readline()
-            except OSError as e:
-                bad_files.append(label_path)
-
-            label_line = file_id + " " + label_line + "\n"
-            labels.append(label_line)
-
-    with open(output_wav, 'w', encoding="utf-8") as writer2:
-        for wav in wavs:
-            writer2.write(wav)
-    with open(output_bad_lines, 'w', encoding="utf-8") as writer4:
-        for line in bad_files:
-            writer4.write(line)
-    if label:
-        with open(output_label, 'w', encoding="utf-8") as writer3:
-            for label in labels:
-                writer3.write(label)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/split_data_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/split_data_list.py
deleted file mode 100644
index 17d507cb79ed1c4e25cdbd3d59a0eeb93000f0d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/local/split_data_list.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import argparse
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--job_nums', type=int, default=8,
-                        help='number of total split jobs')
-    parser.add_argument('--data_list_path', required=True,
-                        help='the path to the data.list file')
-    parser.add_argument('--output_dir', required=True,
-                        help='path to output dir, '
-                             'eg --output_dir=data/train/aishell_split_60')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    data_list_path = args.data_list_path
-    num_lists = args.job_nums
-    output_dir = args.output_dir
-
-    print("data_list_path is", data_list_path)
-    print("num_lists is", num_lists)
-    print("output_dir is", output_dir)
-    os.makedirs(output_dir, exist_ok=True)
-
-    with open(data_list_path, 'r', encoding="utf-8") as reader:
-        data_list_we = reader.readlines()
-
-    # divide data.list equally
-    len_d = int(len(data_list_we) / num_lists)
-    rest_lines = data_list_we[num_lists * len_d:]
-    rest_len = len(rest_lines)
-    print("total num of lines", len(data_list_we) , "rest len is", rest_len)
-
-    # generate N sublist
-    for i in range(num_lists):
-        print("current dir num", i)
-        out_put_sub_dir = output_dir + "/" + "data_sublist" + str(i) + "/"
-        os.makedirs(out_put_sub_dir, exist_ok=True)
-        output_list = out_put_sub_dir + "data_list"
-
-        with open(output_list, 'w', encoding="utf-8") as writer:
-
-            new_list = data_list_we[i * len_d: (i + 1) * len_d]
-            if i < rest_len:
-                new_list.append(rest_lines[i])
-            for x in new_list:
-                # output list
-                writer.write(x)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/path.sh
deleted file mode 100644
index 5ddca76cc23a90f320dd95fd262c345dc700aa04..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/server/x86/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/run.sh
deleted file mode 100644
index 258f5061f314de6347d9418ccc85035cbf51074d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/run.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-iter_num=2
-stage=1
-stop_stage=1
-pseudo_data_ratio=0.75
-dir=exp/conformer_test_fully_supervised
-data_list=data_aishell.list
-supervised_data_list=data_aishell.list
-unsupervised_data_list=wenet_1khr.list
-dir_split=wenet_split_60_test/
-out_data_list=data/train/wenet_1khr_nst0.list
-num_split=1
-. tools/parse_options.sh || exit 1;
-
-# Stage 1 trains the initial teacher and generates initial pseudo-labels.
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  echo "******** stage 1 training the intial teacher ********"
-  bash run_nst.sh --dir $dir \
-  --data_list $data_list \
-  --supervised_data_list $supervised_data_list \
-  --unsupervised_data_list $unsupervised_data_list \
-  --dir_split $dir_split\
-  --out_data_list $out_data_list \
-  --enable_nst 0 \
-  --pseudo_data_ratio pseudo_data_ratio \
-  --num_split $num_split
-
-fi
-
-# Stage 2 trains the nst iterations.
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-
-  for ((i = 0; i < $iter_num; ++i)); do
-  {
-    echo "******** stage 2 training nst iteration number $i ********"
-    bash run_nst.sh --dir exp/conformer_nst${i+1} \
-      --supervised_data_list data_aishell.list \
-      --data_list wenet_1khr_nst${i}.list \
-      --enable_nst 1 \
-      --job_num 0 \
-      --num_split $num_split \
-      --hypo_name hypothesis_nst${i+1}.txt \
-      --untar_dir wenet_1khr_untar_nst${i+1}/ \
-      --tar_dir wenet_1khr_tar_nst${i+1}/ \
-      --out_data_list wenet_1khr_nst${i+1}.list \
-      --pseudo_data_ratio $pseudo_data_ratio
-
-  }
-  done
-
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/run_nst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/run_nst.sh
deleted file mode 100644
index 877d55dddc3c8d2f7f43e1acf24d1d8a1785b3c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/NST/run_nst.sh
+++ /dev/null
@@ -1,409 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This is an augmented version of aishell-1 "run.sh" to make the code compatible with noisy student training
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=1 # start from 0 if you need to start from data preparation
-stop_stage=8
-
-# here are extra parameters used in NST
-cer_out_dir=""
-dir=""
-supervised_data_list=""
-checkpoint=
-unsupervised_data_list=""
-data_list=""
-
-hypo_name=""
-out_data_list=""
-#parameters with default values:
-label=0
-average_num=30
-nj=16
-num_split=1
-cer_hypo_threshold=10
-speak_rate_threshold=0
-label_file="label.txt"
-utter_time_file="utter_time.json"
-enable_nst=1
-job_num=0
-dir_split="wenet_split_60_test/"
-hypo_name="hypothesis_nst${job_num}.txt"
-wav_dir="data/train/wenet_1k_untar/"
-tar_dir="data/train/wenet_1khr_tar/"
-untar_dir="data/train/wenet_1khr_untar/"
-cer_hypo_dir="wenet_cer_hypo"
-cer_label_dir="wenet_cer_label"
-pseudo_data_ratio=0.75
-
-# The num of machines(nodes) for multi-machine training, 1 is for one machine.
-# NFS is required if num_nodes > 1.
-
-num_nodes=1
-
-# The rank of each node or machine, which ranges from 0 to `num_nodes - 1`.
-# You should set the node_ranHk=0 on the first machine, set the node_rank=1
-# on the second machine, and so on.
-node_rank=0
-dict=data/dict/lang_char.txt
-
-# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
-# `shard` is used for large dataset which is over 1k hours, and `shard` is
-# faster on reading data and training.
-data_type=shard
-num_utts_per_shard=1000
-train_set=train
-train_config=conf/train_conformer.yaml
-cmvn=true
-average_checkpoint=true
-target_pt=80
-decode_checkpoint=$dir/$target_pt.pt
-
-# here we only use attention_rescoring for NST
-decode_modes="attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-# print the settings
-echo "setting for this run:"
-echo "dir is ${dir}"
-echo "data list is ${data_list}"
-echo "job_num is ${job_num}"
-echo "cer_out_dir is  ${cer_out_dir}"
-echo "average_num is ${average_num}"
-echo "checkpoint is ${checkpoint} "
-echo "enable_nst is ${enable_nst} "
-
-# we assumed that you have finished the data pre-process steps from -1 to 3 in aishell1/s0/run.sh .
-# You can modify the "--train_data_supervised" to match your supervised data list.
-# Here i used wenetspeech as the unsupervised data, you can run the data pre-process steps from -1 to 3 in
-# wenetspeech/s0/run.sh ; you can modify "--train_data_supervised" to match your unsupervised data list.
-# you can follow this process to generate your own dataset.
-# I have also included my code for extracting data in local/...
-
-# stage 1 is for training
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  echo "********step 1 start time : $now ********"
-  mkdir -p $dir
-  # You have to rm `INIT_FILE` manually when you resume or restart a
-  # multi-machine training.
-  rm $dir/ddp_init
-  INIT_FILE=$dir/ddp_init
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-
-  # the global_cmvn file need to be calculated by combining both supervised/unsupervised datasets,
-  # and it should be positioned at data/${train_set}/global_cmvn .
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir/global_cmvn
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-
-  # train.py rewrite $train_config to $dir/train.yaml with model input
-  # and output dimension, and $dir/train.yaml will be used for inference
-  # and export.
-  echo "checkpoint is "  ${checkpoint}
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    echo "gpu number  $i "
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data data/$train_set/$data_list \
-      --cv_data data/dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-# In stage 2, we get the averaged final checkpoint and calculate the test and dev accuracy
-# please make sure your test and valid data.list are in the proper location.
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  # stage 5 we test with aishell dataset,
-  echo "******** step 2 start time : $now ********"
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-
-  # export model
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip \
-    --output_quant_file $dir/final_quant.zip
-  # Please specify decoding_chunk_size for unified streaming and
-  # non-streaming model. The default value is -1, which is full chunk
-  # for non-streaming inference.
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-
-  # test_wer
-  for mode in ${decode_modes}; do
-  {
-    #test_dir=$dir/test_${mode}_${target_pt}pt  # for target pt
-    test_dir=$dir/test_${mode}${average_num}pt   # for average pt
-    mkdir -p $test_dir
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data data/test/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --ctc_weight $ctc_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $test_dir/text \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    echo "before compute-wer"
-    python tools/compute-wer.py --char=1 --v=1 \
-      data/test/text $test_dir/text > $test_dir/wer
-  } &
-  done
-
-#   dev_wer
-  for mode in ${decode_modes}; do
-  {
-    #test_dir=$dir/test_${mode}_${target_pt}pt  # for target pt
-    dev_dir=$dir/dev_${mode}${average_num}pt   # for average pt
-    mkdir -p $dev_dir
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data data/dev/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --ctc_weight $ctc_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $dev_dir/text \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    echo "before compute-wer"
-    python tools/compute-wer.py --char=1 --v=1 \
-      data/dev/text $dev_dir/text > $dev_dir/wer
-  } &
-  done
-  wait
-fi
-
-
-# split the (unsupervised) datalist into N sublists, where N depends on the number of available cpu in your cluster.
-# when making inference, we compute N sublist in parallel.
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ] && [ ${enable_nst} -eq 0 ]; then
-  echo "********step 3 start time : $now ********"
-  python local/split_data_list.py \
-    --job_nums $num_split \
-    --data_list_path data/train/$unsupervised_data_list \
-    --output_dir data/train/$dir_split
-
-fi
-
-
-# stage 4 will perform inference without language model on the given sublist(job num)
-# here is example usages:
-# bash run_nst.sh --stage 4 --stop-stage 4 --job_num $i --dir_split data/train/wenet_4khr_split_60/
-# --hypo_name hypothesis_0.txt --dir exp/conformer_aishell2_wenet4k_nst4
-# You need to specify the "job_num" n (n <= N), "dir_split" which is the dir path for split data
-# "hypo_name" is the path for output hypothesis and "dir" is the path where we train and store the model.
-# For each gpu, you can run with different job_num to perform data-wise parallel computing.
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  echo "********step 4 start time : $now ********"
-  # we assume you have run stage 2 so that avg_${average_num}.pt exists
-  decode_checkpoint=$dir/avg_${average_num}.pt
-  # Please specify decoding_chunk_size for unified streaming and
-  # non-streaming model. The default value is -1, which is full chunk
-  # for non-streaming inference.
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-  mode="attention_rescoring"
-  gpu_id=0
-  echo "job number  ${job_num} "
-  echo "data_list dir is  ${dir_split}"
-  echo "hypo name is " $hypo_name
-  echo "dir is ${dir}"
-
-  python wenet/bin/recognize.py --gpu $gpu_id \
-    --mode $mode \
-    --config $dir/train.yaml \
-    --data_type $data_type \
-    --test_data data/train/${dir_split}data_sublist${job_num}/data_list \
-    --checkpoint $decode_checkpoint \
-    --beam_size 10 \
-    --batch_size 1 \
-    --penalty 0.0 \
-    --dict $dict \
-    --ctc_weight $ctc_weight \
-    --reverse_weight $reverse_weight \
-    --result_file data/train/${dir_split}data_sublist${job_num}/${hypo_name} \
-    ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    echo "end time : $now"
-
-fi
-
-
-# Generate wav.scp file and label.txt file(optional) for each sublist we generated in step 3.
-# the wav_dir should be prepared in data processing step as we mentioned.
-#You need to specify the "job_num" n (n <= N), "dir_split" which is the dir path for split data,
-# "hypo_name" is the path for output hypothesis and "dir" is the path where we train and store the model.
-# wav_dir is the directory that stores raw wav file and possible labels.
-# if you have label for unsupervised dataset, set label = 1 other wise keep it 0
-# For each gpu or cpu, you can run with different job_num to perform data-wise parallel computing.
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ] && [ ${enable_nst} -eq 0 ]; then
-  echo "********step 5 start time : $now ********"
-  python local/get_wav_labels.py \
-    --dir_split data/train/${dir_split} \
-    --hypo_name /$hypo_name \
-    --wav_dir $wav_dir\
-    --job_num $job_num \
-    --label $label
-fi
-
-# Calculate cer-hypo between hypothesis with and without language model.
-# We assumed that you have finished language model
-# training using the wenet aishell-1 pipline. (You should have data/lang/words.txt , data/lang/TLG.fst files ready.)
-# Here is an exmaple usage:
-# bash run_nst.sh --stage 5 --stop-stage 5 --job_num n --dir_split data/train/wenet1k_redo_split_60/
-# --cer_hypo_dir wenet1k_cer_hypo --hypo_name hypothesis_nst.txt --dir exp/conformer_no_filter_redo_nst6
-# You need to specify the "job_num" n (n <= N), "dir_split" which is the dir path for split data
-# "hypo_name" is the path for output hypothesis and "dir" is the path where we train and store the model.
-# For each gpu, you can run with different job_num to perform data-wise parallel computing.
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  echo "********step 6 start time : $now ********"
-  chunk_size=-1
-  mode="attention_rescoring"
-  test_dir=$dir/test_${mode}_${job_num}
-  now=$(date +"%T")
-  echo "start time : $now"
-  echo "GPU dir is " $job_num "dir_split is " data/train/${dir_split}
-  echo "nj is" $nj "hypo_file is" $hypo_name "cer out is" $cer_hypo_dir "lm is 4gram"
-  echo "dir is " $dir
-  if [ ! -f data/train/${dir_split}data_sublist${job_num}/${hypo_name}  ]; then
-  echo "text file does not exists"
-  exit 1;
-  fi
-
-  ./tools/decode.sh --nj 16 \
-    --beam 15.0 --lattice_beam 7.5 --max_active 7000 \
-    --blank_skip_thresh 0.98 --ctc_weight 0.5 --rescoring_weight 1.0 \
-    --chunk_size $chunk_size \
-    --fst_path data/lang_test/TLG.fst \
-    data/train/${dir_split}data_sublist${job_num}/wav.scp \
-    data/train/${dir_split}data_sublist${job_num}/${hypo_name} $dir/final.zip \
-    data/lang_test/words.txt $dir/Hypo_LM_diff10/${cer_hypo_dir}_${job_num}
-  now=$(date +"%T")
-  echo "end time : $now"
-fi
-
-# (optional, only run this stage if you have true label for unsupervised data.)
-# Calculate cer-label between true label and hypothesis with language model.
-# You can use the output cer to evaluate NST's performance.
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ] && [ ${label} -eq 1 ]; then
-  echo "********step 7 start time : $now ********"
-  chunk_size=-1
-  mode="attention_rescoring"
-  test_dir=$dir/test_${mode}_${job_num}
-  now=$(date +"%T")
-  echo "start time : $now"
-  echo "GPU dir is " $job_num "dir_split is " data/train/${dir_split}
-  echo "nj is" $nj "label_file is" $label_file "cer out is" $cer_label_dir "lm is 4gram"
-  echo "dir is " $dir
-  echo "label_file " data/train/${dir_split}data_sublist${job_num}/${label_file}
-  if [ ! -f data/train/${dir_split}data_sublist${job_num}/${label_file}  ]; then
-  echo "text file does not exists"
-  exit 1;
-  fi
-
-  ./tools/decode.sh --nj 16 \
-    --beam 15.0 --lattice_beam 7.5 --max_active 7000 \
-    --blank_skip_thresh 0.98 --ctc_weight 0.5 --rescoring_weight 1.0 \
-    --chunk_size $chunk_size \
-    --fst_path data/lang_test/TLG.fst \
-    data/train/${dir_split}data_sublist${job_num}/wav.scp \
-    data/train/${dir_split}data_sublist${job_num}/${label_file} $dir/final.zip \
-    data/lang_test/words.txt $dir/Hypo_LM_diff10/${cer_label_dir}_${job_num}
-  now=$(date +"%T")
-  echo "end time : $now"
-fi
-
-
-if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
-  echo "********step 8 start time : $now ********"
-  python local/generate_filtered_pseudo_label.py  \
-    --cer_hypo_dir $cer_hypo_dir \
-    --untar_dir data/train/$untar_dir \
-    --wav_dir $wav_dir \
-    --dir_num $job_num \
-    --cer_hypo_threshold $cer_hypo_threshold \
-    --speak_rate_threshold $speak_rate_threshold \
-    --dir $dir \
-    --tar_dir data/train/$tar_dir \
-    --utter_time_file $utter_time_file
-
-  python local/generate_data_list.py  \
-    --tar_dir data/train/$tar_dir \
-    --out_data_list data/train/$out_data_list \
-    --supervised_data_list data/train/$supervised_data_list \
-    --pseudo_data_ratio $pseudo_data_ratio
-
-fi
-
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/README.md
deleted file mode 100644
index ff2ec88c7da99adac6f7bbaafb5d445d644cf70e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/README.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb
-* Training info: lr 0.001, batch size 8, 8 gpu, acc_grad 1, 100 epochs, dither 0.1
-* Training weight info: transducer_weight 0.75, ctc_weight 0.1, attention_weight 0.15, average_num 10
-* Predictor type: lstm
-
-| decoding mode             | CER   |
-|---------------------------|-------|
-| rnnt greedy search        | 5.24  |
-
-* after 165 epochs and avg 30
-
-| decoding mode             | CER   |
-|---------------------------|-------|
-| rnnt greedy search        | 5.02  |
-| ctc prefix beam search    | 5.17  |
-| ctc prefix beam + rescore | 4.48  |
-
-## Conformer Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb
-* Training info: lr 0.001, batch size 20, 8 gpu, acc_grad 1, 140 epochs, dither 0.1
-* Training weight info: transducer_weight 0.4, ctc_weight 0.2, attention_weight 0.4, average_num 10
-* Predictor type: lstm
-* Model link: https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20220728_conformer_rnnt_exp.tar.gz
-
-| decoding mode                         | CER   |
-|---------------------------------------|-------|
-| rnnt greedy search                    | 4.88  |
-| rnnt beam search                      | 4.67  |
-| ctc prefix beam search                | 5.02  |
-| ctc prefix beam + rescore             | 4.51  |
-| ctc prefix beam + rnnt&attn rescore   | 4.45  |
-| rnnt prefix beam + rnnt&attn rescore  | 4.49  |
-
-
-## U2++ Conformer Result
-
-* Feature info: using fbank feature, dither, cmvn, oneline speed perturb
-* Training info: lr 0.001, batch size 4, 32 gpu, acc_grad 1, 360 epochs
-* Training weight info: transducer_weight 0.75,  ctc_weight 0.1, reverse_weight 0.15  average_num 30
-* Predictor type: lstm
-
-| decoding mode/chunk size  | full  | 16    |
-|---------------------------|-------|-------|
-| rnnt greedy search        | 5.68  | 6.26  |
-
-## Pretrain
-* Pretrain model: https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20210601_u2%2B%2B_conformer_exp.tar.gz
-* Feature info: using fbank feature, dither, cmvn, oneline speed perturb
-* Training info: lr 0.001, batch size 8, 8 gpu, acc_grad 1, 140 epochs
-* Training weight info: transducer_weight 0.4,  ctc_weight 0.2 , attention_weight 0.4, reverse_weight 0.3  average_num 30
-* Predictor type: lstm
-
-| decoding mode/chunk size    | full  | 16     |
-|-----------------------------|-------|--------|
-| rnnt greedy search          | 5.21  | 5.73   |
-| rnnt prefix beam            | 5.14  | 5.63   |
-| rnnt prefix beam + rescore  | 4.73  | 5.095  |
-
-
-## Training loss ablation study
-
-note:
-
-- If rnnt is checked, greedy means rnnt  greedy search; so is beam
-
-- if rnnt is checked, rescoring means rnnt beam & attention rescoring
-
-- if only 'ctc & att' is checked, greedy means ctc gredy search; so is beam
-
-- if only  'ctc & att' (AED)  is checked, rescoring means ctc beam & attention rescoring
-
-- what if rnnt model do search of wenet's style, comming soon
-
-| rnnt | ctc | att | greedy | beam | rescoring | fusion |
-|------|-----|-----|--------|------|-----------|--------|
-| ✔    | ✔   | ✔   |   4.88 | 4.67 |      4.45 |   4.49 |
-| ✔    | ✔   |     |   5.56 | 5.46 |       /   |   5.40 |
-| ✔    |     | ✔   |   5.03 | 4.94 |      4.87 |    /   |
-| ✔    |     |     |   5.64 | 5.59 |       /   |    /   |
-|      | ✔   | ✔   |   4.94 | 4.94 |      4.61 |    /   |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/conformer_rnnt.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/conformer_rnnt.yaml
deleted file mode 100644
index aeab0b180bc4904d32de6d01997e96c3f6ed9efd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/conformer_rnnt.yaml
+++ /dev/null
@@ -1,100 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: true
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-
-joint_conf:
-    join_dim: 512
-    prejoin_linear: True
-    postjoin_linear: false
-    joint_mode: 'add'
-    activation: 'tanh'
-
-predictor: rnn
-predictor_conf:
-    embed_size: 256
-    output_size: 256
-    embed_dropout: 0.1
-    hidden_size: 256
-    num_layers: 2
-    bias: true
-    rnn_type: 'lstm'
-    dropout: 0.1
-
-decoder: bitransformer
-decoder_conf:
-  attention_heads: 4
-  dropout_rate: 0.1
-  linear_units: 2048
-  num_blocks: 3
-  positional_dropout_rate: 0.1
-  r_num_blocks: 3
-  self_attention_dropout_rate: 0.1
-  src_attention_dropout_rate: 0.1
-
-# hybrid transducer+ctc+attention
-model_conf:
-    transducer_weight: 0.75
-    ctc_weight: 0.1
-    attention_weight: 0.15
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 8
-
-grad_clip: 4
-accum_grad: 1
-max_epoch: 140
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/conformer_u2pp_rnnt.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/conformer_u2pp_rnnt.yaml
deleted file mode 100644
index 28a80d5f7f9f59be932ccc2cc8900d7ab397cf49..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/conformer_u2pp_rnnt.yaml
+++ /dev/null
@@ -1,103 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 8
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-
-joint_conf:
-    join_dim: 512
-    prejoin_linear: True
-    postjoin_linear: false
-    joint_mode: 'add'
-    activation: 'tanh'
-
-predictor: rnn
-predictor_conf:
-    embed_size: 256
-    output_size: 256
-    embed_dropout: 0.1
-    hidden_size: 256
-    num_layers: 2
-    bias: true
-    rnn_type: 'lstm'
-    dropout: 0.1
-
-decoder: bitransformer
-decoder_conf:
-  attention_heads: 4
-  dropout_rate: 0.1
-  linear_units: 2048
-  num_blocks: 3
-  positional_dropout_rate: 0.1
-  r_num_blocks: 3
-  self_attention_dropout_rate: 0.1
-  src_attention_dropout_rate: 0.1
-
-# hybrid transducer+ctc+attention
-model_conf:
-    transducer_weight: 0.75
-    ctc_weight: 0.1
-    attention_weight: 0.15
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 4
-
-grad_clip: 4
-accum_grad: 1
-max_epoch: 130
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/example_embedding_predictor.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/example_embedding_predictor.yaml
deleted file mode 100644
index 6d15b2fc03c43c250f5588bb9c50d01b128a4ecd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/conf/example_embedding_predictor.yaml
+++ /dev/null
@@ -1,95 +0,0 @@
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: true
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-
-joint_conf:
-    join_dim: 320
-    prejoin_linear: true
-    postjoin_linear: false
-    joint_mode: 'add'
-    activation: 'tanh'
-
-predictor: embedding
-predictor_conf:
-    embed_size: 320
-    embed_dropout: 0.1
-    n_head: 4
-    history_size: 5
-    bias: false
-
-decoder: bitransformer
-decoder_conf:
-  attention_heads: 4
-  dropout_rate: 0.1
-  linear_units: 2048
-  num_blocks: 3
-  positional_dropout_rate: 0.1
-  r_num_blocks: 3
-  self_attention_dropout_rate: 0.1
-  src_attention_dropout_rate: 0.1
-
-# hybrid transducer+ctc+attention
-model_conf:
-    transducer_weight: 0.4
-    ctc_weight: 0.2
-    attention_weight: 0.4
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 30
-
-
-grad_clip: 4
-accum_grad: 1
-max_epoch: 500
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/aishell_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/aishell_data_prep.sh
deleted file mode 100644
index fb4d5fb0adefb9e3e3ebeaa5ccb1a92562eb77c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/aishell_data_prep.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Xingyu Na
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <audio-path> <text-path>"
-  echo " $0 /export/a05/xna/data/data_aishell/wav /export/a05/xna/data/data_aishell/transcript"
-  exit 1;
-fi
-
-aishell_audio_dir=$1
-aishell_text=$2/aishell_transcript_v0.8.txt
-
-train_dir=data/local/train
-dev_dir=data/local/dev
-test_dir=data/local/test
-tmp_dir=data/local/tmp
-
-mkdir -p $train_dir
-mkdir -p $dev_dir
-mkdir -p $test_dir
-mkdir -p $tmp_dir
-
-# data directory check
-if [ ! -d $aishell_audio_dir ] || [ ! -f $aishell_text ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-# find wav audio file for train, dev and test resp.
-find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
-n=`cat $tmp_dir/wav.flist | wc -l`
-[ $n -ne 141925 ] && \
-  echo Warning: expected 141925 data data files, found $n
-
-grep -i "wav/train" $tmp_dir/wav.flist > $train_dir/wav.flist || exit 1;
-grep -i "wav/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1;
-grep -i "wav/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1;
-
-rm -r $tmp_dir
-
-# Transcriptions preparation
-for dir in $train_dir $dev_dir $test_dir; do
-  echo Preparing $dir transcriptions
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
-  paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all
-  tools/filter_scp.pl -f 1 $dir/utt.list $aishell_text > $dir/transcripts.txt
-  awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
-  tools/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp
-  sort -u $dir/transcripts.txt > $dir/text
-done
-
-mkdir -p data/train data/dev data/test
-
-for f in wav.scp text; do
-  cp $train_dir/$f data/train/$f || exit 1;
-  cp $dev_dir/$f data/dev/$f || exit 1;
-  cp $test_dir/$f data/test/$f || exit 1;
-done
-
-echo "$0: AISHELL data preparation succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/aishell_train_lms.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/aishell_train_lms.sh
deleted file mode 100644
index 30ffb7973b3ddec4ef4c0f09c8184837cad768d6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/aishell_train_lms.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-
-# To be run from one directory above this script.
-. ./path.sh
-
-text=data/local/lm/text
-lexicon=data/local/dict/lexicon.txt
-
-for f in "$text" "$lexicon"; do
-  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
-done
-
-# Check SRILM tools
-if ! which ngram-count > /dev/null; then
-    echo "srilm tools are not found, please download it and install it from: "
-    echo "http://www.speech.sri.com/projects/srilm/download.html"
-    echo "Then add the tools to your PATH"
-    exit 1
-fi
-
-# This script takes no arguments.  It assumes you have already run
-# aishell_data_prep.sh.
-# It takes as input the files
-# data/local/lm/text
-# data/local/dict/lexicon.txt
-dir=data/local/lm
-mkdir -p $dir
-
-
-cleantext=$dir/text.no_oov
-
-cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
-  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
-  > $cleantext || exit 1;
-
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
-   sort -nr > $dir/word.counts || exit 1;
-
-# Get counts from acoustic training transcripts, and add  one-count
-# for each word in the lexicon (but not silence, we don't want it
-# in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
-  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
-   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
-
-cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
-
-heldout_sent=10000 # Don't change this if you want result to be comparable with
-    # kaldi_lm results
-mkdir -p $dir
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  head -$heldout_sent > $dir/heldout
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  tail -n +$heldout_sent > $dir/train
-
-ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
-  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
-ngram -lm $dir/lm.arpa -ppl $dir/heldout
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/download_and_untar.sh
deleted file mode 100644
index 58a278241d75caeba25ba4b17d186912d0d724ec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/local/download_and_untar.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-#             2017  Xingyu Na
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/33 data_aishell"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: data_aishell, resource_aishell."
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-part_ok=false
-list="data_aishell resource_aishell"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1;
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-# sizes of the archive files in bytes.
-sizes="15582913665 1246920"
-
-if [ -f $data/$part.tgz ]; then
-  size=$(/bin/ls -l $data/$part.tgz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tgz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tgz
-  else
-    echo "$data/$part.tgz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tgz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tgz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tgz; then
-  echo "$0: error un-tarring archive $data/$part.tgz"
-  exit 1;
-fi
-
-touch $data/$part/.complete
-
-if [ $part == "data_aishell" ]; then
-  cd $data/$part/wav
-  for wav in ./*.tar.gz; do
-    echo "Extracting wav from $wav"
-    tar -zxf $wav && rm $wav
-  done
-fi
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tgz file since --remove-archive option was supplied."
-  rm $data/$part.tgz
-fi
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/run.sh
deleted file mode 100644
index e9a0640240d58c38f914ebb165caa01ca11f857d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/run.sh
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-#           2022 Binbin Zhang(binbizha@qq.com)
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-
-# The num of machines(nodes) for multi-machine training, 1 is for one machine.
-# NFS is required if num_nodes > 1.
-num_nodes=1
-
-# The rank of each node or machine, which ranges from 0 to `num_nodes - 1`.
-# You should set the node_rank=0 on the first machine, set the node_rank=1
-# on the second machine, and so on.
-node_rank=0
-# The aishell dataset location, please change this to your own path
-# make sure of using absolute path. DO-NOT-USE relatvie path!
-data=/export/data/asr-data/OpenSLR/33/
-data_url=www.openslr.org/resources/33
-
-nj=16
-dict=data/dict/lang_char.txt
-
-# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
-# `shard` is used for large dataset which is over 1k hours, and `shard` is
-# faster on reading data and training.
-data_type=raw
-num_utts_per_shard=1000
-
-train_set=train
-train_config=conf/conformer_u2pp_rnnt.yaml
-cmvn=true
-dir=exp/conformer_rnnt
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=30
-decode_modes="rnnt_beam_search"
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  local/download_and_untar.sh ${data} ${data_url} data_aishell
-  local/download_and_untar.sh ${data} ${data_url} resource_aishell
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  # Data preparation
-  local/aishell_data_prep.sh ${data}/data_aishell/wav \
-    ${data}/data_aishell/transcript
-fi
-
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  # remove the space between the text labels for Mandarin dataset
-  for x in train dev test; do
-    cp data/${x}/text data/${x}/text.org
-    paste -d " " <(cut -f 1 -d" " data/${x}/text.org) \
-      <(cut -f 2- -d" " data/${x}/text.org | tr -d " ") \
-      > data/${x}/text
-    rm data/${x}/text.org
-  done
-
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp data/${train_set}/wav.scp \
-    --out_cmvn data/$train_set/global_cmvn
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  echo "Make a dictionary"
-  mkdir -p $(dirname $dict)
-  echo "<blank> 0" > ${dict}  # 0 is for "blank" in CTC
-  echo "<unk> 1"  >> ${dict}  # <unk> must be 1
-  tools/text2token.py -s 1 -n 1 data/train/text | cut -f 2- -d" " \
-    | tr " " "\n" | sort | uniq | grep -a -v -e '^\s*$' | \
-    awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Prepare data, prepare required format"
-  for x in dev test ${train_set}; do
-    if [ $data_type == "shard" ]; then
-      tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 data/$x/wav.scp data/$x/text \
-        $(realpath data/$x/shards) data/$x/data.list
-    else
-      tools/make_raw_list.py data/$x/wav.scp data/$x/text \
-        data/$x/data.list
-    fi
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  mkdir -p $dir
-  # You have to rm `INIT_FILE` manually when you resume or restart a
-  # multi-machine training.
-  INIT_FILE=$dir/ddp_init
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-
-  # train.py rewrite $train_config to $dir/train.yaml with model input
-  # and output dimension, and $dir/train.yaml will be used for inference
-  # and export.
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data data/$train_set/data.list \
-      --cv_data data/dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Please specify decoding_chunk_size for unified streaming and
-  # non-streaming model. The default value is -1, which is full chunk
-  # for non-streaming inference.
-  decoding_chunk_size=
-  # only used in rescore mode for weighting different scores
-  rescore_ctc_weight=0.5
-  rescore_transducer_weight=0.5
-  rescore_attn_weight=0.5
-  # only used in beam search, either pure beam search mode OR beam search inside rescoring
-  search_ctc_weight=0.3
-  search_transducer_weight=0.7
-
-  reverse_weight=0.0
-  for mode in ${decode_modes}; do
-  {
-    test_dir=$dir/test_${mode}
-    mkdir -p $test_dir
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data data/test/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --ctc_weight $rescore_ctc_weight \
-      --transducer_weight $rescore_transducer_weight \
-      --attn_weight $rescore_attn_weight \
-      --search_ctc_weight $search_ctc_weight \
-      --search_transducer_weight $search_transducer_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $test_dir/text \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    python tools/compute-wer.py --char=1 --v=1 \
-      data/test/text $test_dir/text > $test_dir/wer
-  } &
-  done
-  wait
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/README.md
deleted file mode 100644
index e7d5ca2567fd21ea84f8a32b5ac31f1451b5baf0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/README.md
+++ /dev/null
@@ -1,180 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb
-* Training info: lr 0.002, batch size 18, 4 gpu, acc_grad 4, 240 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 20
-* Git hash: 919f07c4887ac500168ba84b39b535fd8e58918a
-
-| decoding mode             | CER   |
-|---------------------------|-------|
-| attention decoder         | 5.18  |
-| ctc greedy search         | 4.94  |
-| ctc prefix beam search    | 4.94  |
-| attention rescoring       | 4.61  |
-| LM + attention rescoring  | 4.36  |
-
-## U2++ Conformer Result
-
-* Feature info: using fbank feature, dither=1.0, cmvn, oneline speed perturb
-* Training info: lr 0.001, batch size 16, 8 gpu, acc_grad 1, 360 epochs
-* Decoding info: ctc_weight 0.3, reverse_weight 0.5  average_num 30, lm_scale 0.7, decoder_scale 0.1, r_decoder_scale 0.7
-* Git hash: 5a1342312668e7a5abb83aed1e53256819cebf95
-
-| decoding mode/chunk size  | full  | 16    |
-|---------------------------|-------|-------|
-| ctc greedy search         | 5.19  | 5.81  |
-| ctc prefix beam search    | 5.17  | 5.81  |
-| attention rescoring       | 4.63  | 5.05  |
-| LM + attention rescoring  | 4.40  | 4.75  |
-| HLG(k2 LM)                | 4.81  | 5.27  |
-| HLG(k2 LM)  + attention rescoring | 4.32  | 4.70  |
-
-## Unified Conformer Result
-
-* Feature info: using fbank feature, dither=0, cmvn, oneline speed perturb
-* Training info: lr 0.001, batch size 16, 8 gpu, acc_grad 1, 180 epochs, dither 0.0
-* Decoding info: ctc_weight 0.5, average_num 20
-* Git hash: 919f07c4887ac500168ba84b39b535fd8e58918a
-
-| decoding mode/chunk size  | full  | 16    | 8     | 4     |
-|---------------------------|-------|-------|-------|-------|
-| attention decoder         | 5.40  | 5.60  | 5.74  | 5.86  |
-| ctc greedy search         | 5.56  | 6.29  | 6.68  | 7.10  |
-| ctc prefix beam search    | 5.57  | 6.30  | 6.67  | 7.10  |
-| attention rescoring       | 5.05  | 5.45  | 5.69  | 5.91  |
-| LM + attention rescoring  | 4.73  | 5.08  | 5.22  | 5.38  |
-
-## U2++ Transformer Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb.
-* Training info: lr 0.001, batch size 26, 8 gpu, acc_grad 1, 360 epochs, dither 0.1
-* Decoding info: ctc_weight 0.2, reverse_weight 0.5, average_num 30
-* Git hash: 65270043fc8c2476d1ab95e7c39f730017a670e0
-
-| decoding mode/chunk size  | full  | 16    |
-|---------------------------|-------|-------|
-| ctc greedy search         | 6.05  | 6.92  |
-| ctc prefix beam search    | 6.05  | 6.90  |
-| attention rescoring       | 5.11  | 5.63  |
-| LM + attention rescoring  | 4.82  | 5.24  |
-
-## Transformer Result
-
-* Feature info: using fbank feature, dither, with cmvn, online speed perturb.
-* Training info: lr 0.002, batch size 26, 4 gpu, acc_grad 4, 240 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 20
-* Git hash: 919f07c4887ac500168ba84b39b535fd8e58918a
-
-| decoding mode             | CER   |
-|---------------------------|-------|
-| attention decoder         | 5.69  |
-| ctc greedy search         | 5.92  |
-| ctc prefix beam search    | 5.91  |
-| attention rescoring       | 5.30  |
-| LM + attention rescoring  | 5.04  |
-
-## Unified Transformer Result
-
-* Feature info: using fbank feature, dither=0, with cmvn, online speed perturb.
-* Training info: lr 0.002, batch size 16, 4 gpu, acc_grad 1, 240 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 20
-* Git hash: 919f07c4887ac500168ba84b39b535fd8e58918a
-
-| decoding mode/chunk size  | full  | 16    | 8     | 4     |
-|---------------------------|-------|-------|-------|-------|
-| attention decoder         | 6.04  | 6.35  | 6.45  | 6.70  |
-| ctc greedy search         | 6.28  | 6.99  | 7.39  | 7.89  |
-| ctc prefix beam search    | 6.28  | 6.98  | 7.40  | 7.89  |
-| attention rescoring       | 5.52  | 6.05  | 6.28  | 6.62  |
-| LM + attention rescoring  | 5.11  | 5.59  | 5.86  | 6.17  |
-
-## AMP Training Transformer Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb
-* Training info: lr 0.002, batch size, 4 gpus, acc_grad 4, 240 epochs, dither 0.1, warm up steps 25000
-* Decoding info: ctc_weight 0.5, average_num 20
-* Git hash: 1bb4e5a269c535340fae5b0739482fa47733d2c1
-
-| decoding mode          | CER  |
-|------------------------|------|
-| attention decoder      | 5.73 |
-| ctc greedy search      | 5.92 |
-| ctc prefix beam search | 5.92 |
-| attention rescoring    | 5.31 |
-
-
-## Muilti-machines Training Conformer Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb
-* Training info: lr 0.004, batch size 16, 2 machines, 8\*2=16 gpus, acc_grad 4, 240 epochs, dither 0.1, warm up steps 10000
-* Decoding info: ctc_weight 0.5, average_num 20
-* Git hash: f6b1409023440da1998d31abbcc3826dd40aaf35
-
-| decoding mode          | CER  |
-|------------------------|------|
-| attention decoder      | 4.90 |
-| ctc greedy search      | 5.07 |
-| ctc prefix beam search | 5.06 |
-| attention rescoring    | 4.65 |
-
-
-## Conformer with/without Position Encoding Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb
-* Training info: lr 0.002, batch size 16, 8 gpu, acc_grad 4, 240 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 20
-
-| decoding mode          | with PE | without PE |
-|------------------------|---------|------------|
-| attention decoder      | 5.18    | 5.73       |
-| ctc greedy search      | 4.94    | 4.97       |
-| ctc prefix beam search | 4.94    | 4.97       |
-| attention rescoring    | 4.61    | 4.69       |
-
-
-## Efficient Conformer v1 Result
-
-* Feature info:
-    * using fbank feature, cmvn, speed perturb, dither
-* Training info:
-    * train_u2++_efficonformer_v1.yaml
-    * 8 gpu, batch size 16, acc_grad 1, 200 epochs
-    * lr 0.001, warmup_steps 25000
-* Model info:
-    * Model Params: 48,488,347
-    * Downsample rate: 1/4 (conv2d) * 1/2 (efficonformer block)
-    * encoder_dim 256, output_size 256, head 8, linear_units 2048
-    * num_blocks 12, cnn_module_kernel 15, group_size 3
-* Decoding info:
-    * ctc_weight 0.5, reverse_weight 0.3, average_num 20
-
-| decoding mode          | full | 18   | 16   |
-|------------------------|------|------|------|
-| attention decoder      | 4.99 | 5.13 | 5.16 |
-| ctc prefix beam search | 4.98 | 5.23 | 5.23 |
-| attention rescoring    | 4.64 | 4.86 | 4.85 |
-
-
-## Efficient Conformer v2 Result
-
-* Feature info:
-    * using fbank feature, cmvn, speed perturb, dither
-* Training info:
-    * train_u2++_efficonformer_v2.yaml
-    * 8 gpu, batch size 16, acc_grad 1, 200 epochs
-    * lr 0.001, warmup_steps 25000
-* Model info:
-    * Model Params: 49,354,651
-    * Downsample rate: 1/2 (conv2d2) * 1/4 (efficonformer block)
-    * encoder_dim 256, output_size 256, head 8, linear_units 2048
-    * num_blocks 12, cnn_module_kernel 15, group_size 3
-* Decoding info:
-    * ctc_weight 0.5, reverse_weight 0.3, average_num 20
-
-| decoding mode          | full | 18   | 16   |
-|------------------------|------|------|------|
-| attention decoder      | 4.87 | 5.03 | 5.07 |
-| ctc prefix beam search | 4.97 | 5.18 | 5.20 |
-| attention rescoring    | 4.56 | 4.75 | 4.77 |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/UIO_RESULT.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/UIO_RESULT.md
deleted file mode 100644
index b18775c5cd73437bcb3d4840eaf6778103b4dbb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/UIO_RESULT.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Benchmark on Conformer
-
-| IO           | CER   |
-|--------------|-------|
-| Old          | 4.61  |
-| UIO(Raw)     | 4.63  |
-| UIO(Shards)  | 4.67  |
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_conformer.yaml
deleted file mode 100644
index b8ce511cdaad0f03be4a82708d70290ec9e37c3d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_conformer_no_pos.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_conformer_no_pos.yaml
deleted file mode 100644
index a2d5d03f570119d4c54a26917552f92939c83ac1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_conformer_no_pos.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'no_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_transformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_transformer.yaml
deleted file mode 100644
index b7d7eee83ace095b4c7a09e61fd63776cb50b2d6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_transformer.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-# network architecture
-# encoder related
-encoder: transformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder architecture type
-    normalize_before: true
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 26
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_conformer.yaml
deleted file mode 100644
index b4587bce33be458b15490dccbf2f98aaa798959c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_conformer.yaml
+++ /dev/null
@@ -1,90 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 8
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    spec_trim: false
-    spec_trim_conf:
-        max_t: 50
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 360
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v1.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v1.yaml
deleted file mode 100644
index 3d0de82dbf23e2c3abaa26eda5178c8ba1452861..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v1.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-# network architecture
-# encoder related
-encoder: efficientConformer
-encoder_conf:
-    activation_type: 'swish'
-    attention_heads: 8
-    causal: false
-    cnn_module_kernel: 15
-    cnn_module_norm: 'layer_norm'
-    dropout_rate: 0.1
-    input_layer: conv2d
-    linear_units: 2048
-    normalize_before: true
-    num_blocks: 12
-    output_size: 256
-    pos_enc_layer_type: 'rel_pos'
-    attention_dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    use_cnn_module: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-    efficient_conf:
-        stride_layer_idx: [3]           # layer id with StrideConv
-        stride: [2]                     # stride size of each StrideConv
-        group_layer_idx: [0, 1, 2, 3]   # layer id with GroupedAttention
-        group_size: 3                   # group size of every GroupedAttention layer
-        stride_kernel: true             # true: recompute cnn kernels with stride
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    dropout_rate: 0.1
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    batch_conf:
-        batch_size: 16
-        batch_type: 'static'
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    spec_trim: false
-    spec_trim_conf:
-        max_t: 50
-    speed_perturb: true
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 200
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v1_stream.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v1_stream.yaml
deleted file mode 100644
index 3b5a99a86276971592e6a35a26557b10fb561cdc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v1_stream.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-# network architecture
-# encoder related
-encoder: efficientConformer
-encoder_conf:
-    activation_type: 'swish'
-    attention_heads: 8
-    causal: true
-    cnn_module_kernel: 15
-    cnn_module_norm: 'layer_norm'
-    dropout_rate: 0.1
-    input_layer: conv2d
-    linear_units: 2048
-    normalize_before: true
-    num_blocks: 12
-    output_size: 256
-    pos_enc_layer_type: 'rel_pos'
-    attention_dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    use_cnn_module: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-    efficient_conf:
-        stride_layer_idx: [3]           # layer id with StrideConv
-        stride: [2]                     # stride size of each StrideConv
-        group_layer_idx: [0, 1, 2, 3]   # layer id with GroupedAttention
-        group_size: 3                   # group size of every GroupedAttention layer
-        stride_kernel: true             # true: recompute cnn kernels with stride
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    dropout_rate: 0.1
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    batch_conf:
-        batch_size: 16
-        batch_type: 'static'
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    spec_trim: false
-    spec_trim_conf:
-        max_t: 50
-    speed_perturb: true
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 200
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v2.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v2.yaml
deleted file mode 100644
index c23e1b64da5304e19d8339bc94d10b3cf80b36a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_efficonformer_v2.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-# network architecture
-# encoder related
-encoder: efficientConformer
-encoder_conf:
-    activation_type: 'swish'
-    attention_heads: 8
-    causal: false
-    cnn_module_kernel: 15
-    cnn_module_norm: 'layer_norm'
-    dropout_rate: 0.1
-    input_layer: conv2d2
-    linear_units: 2048
-    normalize_before: true
-    num_blocks: 12
-    output_size: 256
-    pos_enc_layer_type: 'rel_pos'
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    use_cnn_module: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-    efficient_conf:
-        stride_layer_idx: [3, 7]           # layer id with StrideConv
-        stride: [2, 2]                     # stride size of each StrideConv
-        group_layer_idx: [3, 7]            # layer id with GroupedAttention
-        group_size: 3                      # group size of every GroupedAttention layer
-        stride_kernel: false               # true: recompute cnn kernels with stride
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    dropout_rate: 0.1
-    linear_units: 2048
-    num_blocks: 3
-    positional_dropout_rate: 0.1
-    r_num_blocks: 3
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    batch_conf:
-        batch_size: 16
-        batch_type: 'static'
-    fbank_conf:
-        dither: 1.0
-        frame_length: 25
-        frame_shift: 10
-        num_mel_bins: 80
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        max_output_input_ratio: 0.1
-        min_output_input_ratio: 0.005
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    spec_trim: false
-    spec_trim_conf:
-        max_t: 50
-    speed_perturb: true
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 200
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_transformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_transformer.yaml
deleted file mode 100644
index 44b4d4be7f70b7921e2ce67b3e4b8a80f99e9048..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_u2++_transformer.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# network architecture
-# encoder related
-encoder: transformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder architecture type
-    normalize_before: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    spec_trim: false
-    spec_trim_conf:
-        max_t: 70
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 26
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 360
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_unified_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_unified_conformer.yaml
deleted file mode 100644
index 978d3d91c4d9eef417c60a068647bb5d7db88fe0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_unified_conformer.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 180
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_unified_transformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_unified_transformer.yaml
deleted file mode 100644
index 9d7a386872ddfb0859ff9e09c81e185e1a60d7a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/conf/train_unified_transformer.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# network architecture
-# encoder related
-encoder: transformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder architecture type
-    normalize_before: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 180
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/aishell_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/aishell_data_prep.sh
deleted file mode 100644
index fb4d5fb0adefb9e3e3ebeaa5ccb1a92562eb77c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/aishell_data_prep.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Xingyu Na
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <audio-path> <text-path>"
-  echo " $0 /export/a05/xna/data/data_aishell/wav /export/a05/xna/data/data_aishell/transcript"
-  exit 1;
-fi
-
-aishell_audio_dir=$1
-aishell_text=$2/aishell_transcript_v0.8.txt
-
-train_dir=data/local/train
-dev_dir=data/local/dev
-test_dir=data/local/test
-tmp_dir=data/local/tmp
-
-mkdir -p $train_dir
-mkdir -p $dev_dir
-mkdir -p $test_dir
-mkdir -p $tmp_dir
-
-# data directory check
-if [ ! -d $aishell_audio_dir ] || [ ! -f $aishell_text ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-# find wav audio file for train, dev and test resp.
-find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
-n=`cat $tmp_dir/wav.flist | wc -l`
-[ $n -ne 141925 ] && \
-  echo Warning: expected 141925 data data files, found $n
-
-grep -i "wav/train" $tmp_dir/wav.flist > $train_dir/wav.flist || exit 1;
-grep -i "wav/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1;
-grep -i "wav/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1;
-
-rm -r $tmp_dir
-
-# Transcriptions preparation
-for dir in $train_dir $dev_dir $test_dir; do
-  echo Preparing $dir transcriptions
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
-  paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all
-  tools/filter_scp.pl -f 1 $dir/utt.list $aishell_text > $dir/transcripts.txt
-  awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
-  tools/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp
-  sort -u $dir/transcripts.txt > $dir/text
-done
-
-mkdir -p data/train data/dev data/test
-
-for f in wav.scp text; do
-  cp $train_dir/$f data/train/$f || exit 1;
-  cp $dev_dir/$f data/dev/$f || exit 1;
-  cp $test_dir/$f data/test/$f || exit 1;
-done
-
-echo "$0: AISHELL data preparation succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/aishell_train_lms.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/aishell_train_lms.sh
deleted file mode 100644
index 30ffb7973b3ddec4ef4c0f09c8184837cad768d6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/aishell_train_lms.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-
-# To be run from one directory above this script.
-. ./path.sh
-
-text=data/local/lm/text
-lexicon=data/local/dict/lexicon.txt
-
-for f in "$text" "$lexicon"; do
-  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
-done
-
-# Check SRILM tools
-if ! which ngram-count > /dev/null; then
-    echo "srilm tools are not found, please download it and install it from: "
-    echo "http://www.speech.sri.com/projects/srilm/download.html"
-    echo "Then add the tools to your PATH"
-    exit 1
-fi
-
-# This script takes no arguments.  It assumes you have already run
-# aishell_data_prep.sh.
-# It takes as input the files
-# data/local/lm/text
-# data/local/dict/lexicon.txt
-dir=data/local/lm
-mkdir -p $dir
-
-
-cleantext=$dir/text.no_oov
-
-cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
-  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
-  > $cleantext || exit 1;
-
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
-   sort -nr > $dir/word.counts || exit 1;
-
-# Get counts from acoustic training transcripts, and add  one-count
-# for each word in the lexicon (but not silence, we don't want it
-# in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
-  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
-   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
-
-cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
-
-heldout_sent=10000 # Don't change this if you want result to be comparable with
-    # kaldi_lm results
-mkdir -p $dir
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  head -$heldout_sent > $dir/heldout
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  tail -n +$heldout_sent > $dir/train
-
-ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
-  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
-ngram -lm $dir/lm.arpa -ppl $dir/heldout
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/download_and_untar.sh
deleted file mode 100644
index 58a278241d75caeba25ba4b17d186912d0d724ec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/local/download_and_untar.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-#             2017  Xingyu Na
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/33 data_aishell"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: data_aishell, resource_aishell."
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-part_ok=false
-list="data_aishell resource_aishell"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1;
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-# sizes of the archive files in bytes.
-sizes="15582913665 1246920"
-
-if [ -f $data/$part.tgz ]; then
-  size=$(/bin/ls -l $data/$part.tgz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tgz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tgz
-  else
-    echo "$data/$part.tgz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tgz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tgz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tgz; then
-  echo "$0: error un-tarring archive $data/$part.tgz"
-  exit 1;
-fi
-
-touch $data/$part/.complete
-
-if [ $part == "data_aishell" ]; then
-  cd $data/$part/wav
-  for wav in ./*.tar.gz; do
-    echo "Extracting wav from $wav"
-    tar -zxf $wav && rm $wav
-  done
-fi
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tgz file since --remove-archive option was supplied."
-  rm $data/$part.tgz
-fi
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/path.sh
deleted file mode 100644
index ac1ca08baf5d4540b92ed239b8aa7cd613064a8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/run.sh
deleted file mode 100644
index 7a4c4e1d0e6326371774ae38d90948f9cfdd5927..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/run.sh
+++ /dev/null
@@ -1,287 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-
-# The num of machines(nodes) for multi-machine training, 1 is for one machine.
-# NFS is required if num_nodes > 1.
-num_nodes=1
-
-# The rank of each node or machine, which ranges from 0 to `num_nodes - 1`.
-# You should set the node_rank=0 on the first machine, set the node_rank=1
-# on the second machine, and so on.
-node_rank=0
-# The aishell dataset location, please change this to your own path
-# make sure of using absolute path. DO-NOT-USE relatvie path!
-data=/export/data/asr-data/OpenSLR/33/
-data_url=www.openslr.org/resources/33
-
-nj=16
-dict=data/dict/lang_char.txt
-
-# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
-# `shard` is used for large dataset which is over 1k hours, and `shard` is
-# faster on reading data and training.
-data_type=raw
-num_utts_per_shard=1000
-
-train_set=train
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard transformer
-# 2. conf/train_conformer.yaml: Standard conformer
-# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
-# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
-# 5. conf/train_u2++_conformer.yaml: U2++ conformer
-# 6. conf/train_u2++_transformer.yaml: U2++ transformer
-train_config=conf/train_conformer.yaml
-cmvn=true
-dir=exp/conformer
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=30
-decode_modes="ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  local/download_and_untar.sh ${data} ${data_url} data_aishell
-  local/download_and_untar.sh ${data} ${data_url} resource_aishell
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  # Data preparation
-  local/aishell_data_prep.sh ${data}/data_aishell/wav ${data}/data_aishell/transcript
-fi
-
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  # remove the space between the text labels for Mandarin dataset
-  for x in train dev test; do
-    cp data/${x}/text data/${x}/text.org
-    paste -d " " <(cut -f 1 -d" " data/${x}/text.org) \
-      <(cut -f 2- -d" " data/${x}/text.org | tr -d " ") \
-      > data/${x}/text
-    rm data/${x}/text.org
-  done
-
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp data/${train_set}/wav.scp \
-    --out_cmvn data/$train_set/global_cmvn
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  echo "Make a dictionary"
-  mkdir -p $(dirname $dict)
-  echo "<blank> 0" > ${dict}  # 0 is for "blank" in CTC
-  echo "<unk> 1"  >> ${dict}  # <unk> must be 1
-  tools/text2token.py -s 1 -n 1 data/train/text | cut -f 2- -d" " \
-    | tr " " "\n" | sort | uniq | grep -a -v -e '^\s*$' | \
-    awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Prepare data, prepare required format"
-  for x in dev test ${train_set}; do
-    if [ $data_type == "shard" ]; then
-      tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 data/$x/wav.scp data/$x/text \
-        $(realpath data/$x/shards) data/$x/data.list
-    else
-      tools/make_raw_list.py data/$x/wav.scp data/$x/text \
-        data/$x/data.list
-    fi
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  mkdir -p $dir
-  # You have to rm `INIT_FILE` manually when you resume or restart a
-  # multi-machine training.
-  INIT_FILE=$dir/ddp_init
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-
-  # train.py rewrite $train_config to $dir/train.yaml with model input
-  # and output dimension, and $dir/train.yaml will be used for inference
-  # and export.
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data data/$train_set/data.list \
-      --cv_data data/dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Please specify decoding_chunk_size for unified streaming and
-  # non-streaming model. The default value is -1, which is full chunk
-  # for non-streaming inference.
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-  for mode in ${decode_modes}; do
-  {
-    test_dir=$dir/test_${mode}
-    mkdir -p $test_dir
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data data/test/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --ctc_weight $ctc_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $test_dir/text \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    python tools/compute-wer.py --char=1 --v=1 \
-      data/test/text $test_dir/text > $test_dir/wer
-  } &
-  done
-  wait
-fi
-
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip \
-    --output_quant_file $dir/final_quant.zip
-fi
-
-# Optionally, you can add LM and test it with runtime.
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-  # 7.1 Prepare dict
-  unit_file=$dict
-  mkdir -p data/local/dict
-  cp $unit_file data/local/dict/units.txt
-  tools/fst/prepare_dict.py $unit_file ${data}/resource_aishell/lexicon.txt \
-    data/local/dict/lexicon.txt
-  # 7.2 Train lm
-  lm=data/local/lm
-  mkdir -p $lm
-  tools/filter_scp.pl data/train/text \
-    $data/data_aishell/transcript/aishell_transcript_v0.8.txt > $lm/text
-  local/aishell_train_lms.sh
-  # 7.3 Build decoding TLG
-  tools/fst/compile_lexicon_token_fst.sh \
-    data/local/dict data/local/tmp data/local/lang
-  tools/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
-  # 7.4 Decoding with runtime
-  chunk_size=-1
-  ./tools/decode.sh --nj 16 \
-    --beam 15.0 --lattice_beam 7.5 --max_active 7000 \
-    --blank_skip_thresh 0.98 --ctc_weight 0.5 --rescoring_weight 1.0 \
-    --chunk_size $chunk_size \
-    --fst_path data/lang_test/TLG.fst \
-    --dict_path data/lang_test/words.txt \
-    data/test/wav.scp data/test/text $dir/final.zip \
-    data/lang_test/units.txt $dir/lm_with_runtime
-  # Please see $dir/lm_with_runtime for wer
-fi
-
-# Optionally, you can decode with k2 hlg
-if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
-  if [ ! -f data/local/lm/lm.arpa ]; then
-    echo "Please run prepare dict and train lm in Stage 7" || exit 1;
-  fi
-
-  # 8.1 Build decoding HLG
-  required="data/local/hlg/HLG.pt data/local/hlg/words.txt"
-  for f in $required; do
-    if [ ! -f $f ]; then
-      tools/k2/make_hlg.sh data/local/dict/ data/local/lm/ data/local/hlg
-      break
-    fi
-  done
-
-  # 8.2 Decode using HLG
-  decoding_chunk_size=
-  lm_scale=0.7
-  decoder_scale=0.1
-  r_decoder_scale=0.7
-  for mode in hlg_onebest hlg_rescore; do
-  {
-    test_dir=$dir/test_${mode}
-    mkdir -p $test_dir
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data data/test/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 16 \
-      --penalty 0.0 \
-      --dict $dict \
-      --word data/local/hlg/words.txt \
-      --hlg data/local/hlg/HLG.pt \
-      --lm_scale $lm_scale \
-      --decoder_scale $decoder_scale \
-      --r_decoder_scale $r_decoder_scale \
-      --result_file $test_dir/text \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    python tools/compute-wer.py --char=1 --v=1 \
-      data/test/text $test_dir/text > $test_dir/wer
-  }
-  done
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/README.md
deleted file mode 100644
index c74fbeadc6d1d6edd04147be7490257a2b3e5e9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Performance Record
-
-## U2++ Conformer Result
-
-* Feature info: using fbank feature, dither, cmvn, oneline speed perturb
-* Training info: lr 0.001, dynamic batch with max_frames_in_batch 15000, 4 gpu, acc_grad 1, 130 epochs
-* Training weight info: transducer_weight 0.75,  ctc_weight 0.1, reverse_weight 0.30, average_num 30
-* Predictor type: lstm
-
-| decoding mode/chunk size  | full  | 16    |
-|---------------------------|-------|-------|
-| rnnt greedy search        | 6.44  | 7.09  |
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/conf/conformer_rnnt.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/conf/conformer_rnnt.yaml
deleted file mode 100644
index aeab0b180bc4904d32de6d01997e96c3f6ed9efd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/conf/conformer_rnnt.yaml
+++ /dev/null
@@ -1,100 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: true
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-
-joint_conf:
-    join_dim: 512
-    prejoin_linear: True
-    postjoin_linear: false
-    joint_mode: 'add'
-    activation: 'tanh'
-
-predictor: rnn
-predictor_conf:
-    embed_size: 256
-    output_size: 256
-    embed_dropout: 0.1
-    hidden_size: 256
-    num_layers: 2
-    bias: true
-    rnn_type: 'lstm'
-    dropout: 0.1
-
-decoder: bitransformer
-decoder_conf:
-  attention_heads: 4
-  dropout_rate: 0.1
-  linear_units: 2048
-  num_blocks: 3
-  positional_dropout_rate: 0.1
-  r_num_blocks: 3
-  self_attention_dropout_rate: 0.1
-  src_attention_dropout_rate: 0.1
-
-# hybrid transducer+ctc+attention
-model_conf:
-    transducer_weight: 0.75
-    ctc_weight: 0.1
-    attention_weight: 0.15
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 8
-
-grad_clip: 4
-accum_grad: 1
-max_epoch: 140
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/conf/conformer_u2pp_rnnt.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/conf/conformer_u2pp_rnnt.yaml
deleted file mode 100644
index cfb4b18b6ed9ff1a19cecde760f5fa0d53b5a2c2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/conf/conformer_u2pp_rnnt.yaml
+++ /dev/null
@@ -1,103 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 8
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-
-joint_conf:
-    join_dim: 512
-    prejoin_linear: True
-    postjoin_linear: false
-    joint_mode: 'add'
-    activation: 'tanh'
-
-predictor: rnn
-predictor_conf:
-    embed_size: 256
-    output_size: 256
-    embed_dropout: 0.1
-    hidden_size: 256
-    num_layers: 2
-    bias: true
-    rnn_type: 'lstm'
-    dropout: 0.1
-
-decoder: bitransformer
-decoder_conf:
-  attention_heads: 4
-  dropout_rate: 0.1
-  linear_units: 2048
-  num_blocks: 3
-  positional_dropout_rate: 0.1
-  r_num_blocks: 3
-  self_attention_dropout_rate: 0.1
-  src_attention_dropout_rate: 0.1
-
-# hybrid transducer+ctc+attention
-model_conf:
-    transducer_weight: 0.75
-    ctc_weight: 0.1
-    attention_weight: 0.15
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'dynamic' # static or dynamic
-        max_frames_in_batch: 15000
-
-grad_clip: 4
-accum_grad: 1
-max_epoch: 130
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/prepare_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/prepare_data.sh
deleted file mode 100644
index c1586b87856804bce4a23609f696417deb7d4e79..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/prepare_data.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
-#           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
-# Apache 2.0
-
-# transform raw AISHELL-2 data to kaldi format
-
-if [ $# != 3 ]; then
-  echo "prepare_data.sh <corpus-data-dir> <tmp-dir> <output-dir>"
-  echo " e.g prepare_data.sh /data/AISHELL-2/iOS/train data/local/train data/train"
-  exit 1;
-fi
-
-corpus=$1
-tmp=$2
-dir=$3
-
-echo "prepare_data.sh: Preparing data in $corpus"
-
-mkdir -p $tmp
-mkdir -p $dir
-
-# corpus check
-if [ ! -d $corpus ] || [ ! -f $corpus/wav.scp ] || [ ! -f $corpus/trans.txt ]; then
-  echo "Error: $0 requires wav.scp and trans.txt under $corpus directory."
-  exit 1;
-fi
-
-# validate utt-key list
-awk '{print $1}' $corpus/wav.scp   > $tmp/wav_utt.list
-awk '{print $1}' $corpus/trans.txt > $tmp/trans_utt.list
-tools/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
-
-# wav.scp
-awk -F'\t' -v path_prefix=$corpus '{printf("%s\t%s/%s\n",$1,path_prefix,$2)}' $corpus/wav.scp > $tmp/tmp_wav.scp
-tools/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
-
-# text
-tools/filter_scp.pl -f 1 $tmp/utt.list $corpus/trans.txt | sort -k 1 | uniq > $tmp/trans.txt
-dos2unix < $tmp/trans.txt | \
-  tools/filter_scp.pl -f 1 $tmp/utt.list - | \
-  sort -k 1 | uniq | tr '[a-z]' '[A-Z]' | \
-  sed 's/Ａ/A/g' | sed 's/Ｔ/T/g' | sed 's/Ｍ/M/g' | sed 's/𫚉//g' | sed 's/𫖯/頫/g' | \
-  sed 's/[()]//g' | sed "s/\([^A-Z]\)'/\1/g" > $tmp/text
-
-# copy prepared resources from tmp_dir to target dir
-mkdir -p $dir
-for f in wav.scp text; do
-  cp $tmp/$f $dir/$f || exit 1;
-done
-
-echo "local/prepare_data.sh succeeded"
-exit 0;
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/train_lms.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/train_lms.sh
deleted file mode 100644
index 2e2d0dbeb79c54d707add6d20269fe9b89e69d8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/train_lms.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-
-# To be run from one directory above this script.
-. ./path.sh
-
-text=data/local/lm/text
-lexicon=data/local/dict/lexicon.txt
-
-. tools/parse_options.sh
-
-for f in "$text" "$lexicon"; do
-  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
-done
-
-# Check SRILM tools
-if ! which ngram-count > /dev/null; then
-    echo "srilm tools are not found, please download it and install it from: "
-    echo "http://www.speech.sri.com/projects/srilm/download.html"
-    echo "Then add the tools to your PATH"
-    exit 1
-fi
-
-dir=data/local/lm
-mkdir -p $dir
-
-cleantext=$dir/text.no_oov
-
-cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
-  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
-  > $cleantext || exit 1;
-
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
-   sort -nr > $dir/word.counts || exit 1;
-
-# Get counts from acoustic training transcripts, and add  one-count
-# for each word in the lexicon (but not silence, we don't want it
-# in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
-  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
-   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
-
-cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
-
-heldout_sent=10000 # Don't change this if you want result to be comparable with
-    # kaldi_lm results
-mkdir -p $dir
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  head -$heldout_sent > $dir/heldout
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  tail -n +$heldout_sent > $dir/train
-
-ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
-  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
-ngram -lm $dir/lm.arpa -ppl $dir/heldout
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/word_segmentation.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/word_segmentation.py
deleted file mode 100644
index 117686dd3e826ebc63abc22779c913a2cb9f78d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/local/word_segmentation.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-# encoding=utf-8
-# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
-#           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
-# Apache 2.0
-
-from __future__ import print_function
-import sys
-import jieba
-
-if len(sys.argv) < 3:
-    sys.stderr.write(
-        "word_segmentation.py <vocab> <trans> <word-segmented-trans>\n")
-    exit(1)
-
-vocab_file = sys.argv[1]
-trans_file = sys.argv[2]
-
-jieba.set_dictionary(vocab_file)
-for line in open(trans_file, 'r', encoding='utf8'):
-    key, trans = line.strip().split(' ', 1)
-    words = jieba.cut(trans,
-                      HMM=False)  # turn off new word discovery (HMM-based)
-    new_line = key + '\t' + " ".join(words)
-    print(new_line)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/path.sh
deleted file mode 100644
index 8d4c9092e217d2fbd4e4cada2edbe0da768358eb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/run.sh
deleted file mode 100644
index 8102c1a888ab42f0b391228314696c210f85b186..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/run.sh
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-#           2022 burkliu(boji123@aliyun.com)
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-
-# modify this to your AISHELL-2 data path
-# Note: the evaluation data (dev & test) is available at AISHELL.
-# Please download it from http://aishell-eval.oss-cn-beijing.aliyuncs.com/TEST%26DEV%20DATA.zip
-train_set=/cfs/share/corpus/aishell-2/AISHELL-2/iOS/data
-dev_set=/cfs/share/corpus/aishell-2/AISHELL-DEV-TEST-SET/iOS/dev
-test_set=/cfs/share/corpus/aishell-2/AISHELL-DEV-TEST-SET/iOS/test
-
-nj=16
-dict=data/dict/lang_char.txt
-
-train_set=train
-train_config=conf/conformer_u2pp_rnnt.yaml
-cmvn=true
-dir=exp/`basename ${train_config%.*}`
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=30
-decode_modes="rnnt_beam_search"
-
-# Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-# -1 for full chunk
-decoding_chunk_size=-1
-# only used in rescore mode for weighting different scores
-rescore_ctc_weight=0.5
-rescore_transducer_weight=0.5
-rescore_attn_weight=0.5
-# only used in beam search, either pure beam search mode OR beam search inside rescoring
-search_ctc_weight=0.3
-search_transducer_weight=0.7
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Data preparation
-    local/prepare_data.sh ${train_set} data/local/${train_set} data/${train_set} || exit 1;
-    local/prepare_data.sh ${dev_set} data/local/dev data/dev || exit 1;
-    local/prepare_data.sh ${test_set} data/local/test data/test || exit 1;
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    # remove the space between the text labels for Mandarin dataset
-    for x in ${train_set} dev test; do
-        cp data/${x}/text data/${x}/text.org
-        paste -d " " <(cut -f 1 data/${x}/text.org) <(cut -f 2- data/${x}/text.org \
-             | tr 'a-z' 'A-Z' | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' | tr -d " ") \
-            > data/${x}/text
-        rm data/${x}/text.org
-    done
-
-    tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-        --in_scp data/${train_set}/wav.scp \
-        --out_cmvn data/$train_set/global_cmvn
-
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # Make train dict
-    echo "Make a dictionary"
-    mkdir -p $(dirname $dict)
-    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-    echo "<unk> 1" >> ${dict} # <unk> must be 1
-    tools/text2token.py -s 1 -n 1 data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \
-        | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0 " " NR+1}' >> ${dict}
-    num_token=$(cat $dict | wc -l)
-    echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # Prepare wenet required data
-    echo "Prepare data, prepare required format"
-    for x in dev test ${train_set}; do
-        tools/make_raw_list.py data/$x/wav.scp data/$x/text data/$x/data.list
-    done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-    # Training
-    mkdir -p $dir
-    INIT_FILE=$dir/ddp_init
-    # You had better rm it manually before you start run.sh on first node.
-    # rm -f $INIT_FILE # delete old one before starting
-    init_method=file://$(readlink -f $INIT_FILE)
-    echo "$0: init method is $init_method"
-    # The number of gpus runing on each node/machine
-    num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-    # Use "nccl" if it works, otherwise use "gloo"
-    dist_backend="gloo"
-    #dist_backend="nccl"
-    # The total number of processes/gpus, so that the master knows
-    # how many workers to wait for.
-    # More details about ddp can be found in
-    # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-    world_size=`expr $num_gpus \* $num_nodes`
-    echo "total gpus is: $world_size"
-    cmvn_opts=
-    $cmvn && cp data/${train_set}/global_cmvn $dir
-    $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-    # train.py will write $train_config to $dir/train.yaml with model input
-    # and output dimension, train.yaml will be used for inference or model
-    # export later
-    for ((i = 0; i < $num_gpus; ++i)); do
-    {
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-            --config $train_config \
-            --data_type raw \
-            --symbol_table $dict \
-            --train_data data/$train_set/data.list \
-            --cv_data data/dev/data.list \
-            ${checkpoint:+--checkpoint $checkpoint} \
-            --model_dir $dir \
-            --ddp.init_method $init_method \
-            --ddp.world_size $world_size \
-            --ddp.rank $rank \
-            --ddp.dist_backend $dist_backend \
-            --num_workers 4 \
-            $cmvn_opts \
-            2>&1 | tee -a $dir/train.log || exit 1;
-    } &
-    done
-    wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-    # Test model, please specify the model you want to test by --checkpoint
-    if [ ${average_checkpoint} == true ]; then
-        decode_checkpoint=$dir/avg_${average_num}.pt
-        echo "do model average and final checkpoint is $decode_checkpoint"
-        python wenet/bin/average_model.py \
-            --dst_model $decode_checkpoint \
-            --src_path $dir  \
-            --num ${average_num} \
-            --val_best \
-            2>&1 | tee -a $dir/average.log || exit 1;
-    fi
-
-    for mode in ${decode_modes}; do
-    {
-        test_dir=$dir/test_${mode}_chunk_${decoding_chunk_size}
-        mkdir -p $test_dir
-        python wenet/bin/recognize.py --gpu 0 \
-            --mode $mode \
-            --config $dir/train.yaml \
-            --data_type raw \
-            --test_data data/test/data.list \
-            --checkpoint $decode_checkpoint \
-            --beam_size 10 \
-            --batch_size 1 \
-            --penalty 0.0 \
-            --dict $dict \
-            --ctc_weight $rescore_ctc_weight \
-            --transducer_weight $rescore_transducer_weight \
-            --attn_weight $rescore_attn_weight \
-            --search_ctc_weight $search_ctc_weight \
-            --search_transducer_weight $search_transducer_weight \
-            --result_file $test_dir/text \
-            ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-         python tools/compute-wer.py --char=1 --v=1 \
-            data/test/text $test_dir/text > $test_dir/wer
-    } &
-    done
-    wait
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/README.md
deleted file mode 100644
index ef98864f6b07e6fb0c3b0e7bef57f06d16f2ef79..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Performance Record
-
-## U2++ Conformer Result
-
-* Feature info: using fbank feature, with cmvn, no speed perturb, dither
-* Training info: lr 0.001, batch size 32, 8 gpus, acc_grad 1, 240 epochs, dither 1.0
-* Decoding info: ctc_weight 0.1, reverse_weight 0.4, average_num 30
-* Git hash: 5a1342312668e7a5abb83aed1e53256819cebf95
-
-| decoding mode/chunk size  | full  | 16    |
-|---------------------------|-------|-------|
-| ctc greedy search         | 6.18  | 6.79  |
-| ctc prefix beam search    | 6.20  | 6.80  |
-| attention rescoring       | 5.39  | 5.78  |
-| LM + attention rescoring  | 5.35  | 5.73  |
-
-## U2++ Transformer Result
-
-* Feature info: using fbank feature, with cmvn, no speed perturb
-* Training info: lr 0.002, batch size 22, 8 gpus, acc_grad 1, 240 epochs, dither 0.0
-* Decoding info: ctc_weight 0.1, reverse_weight 0.5, average_num 30
-* Git hash: 5a1342312668e7a5abb83aed1e53256819cebf95
-
-| decoding mode/chunk size  | full  | 16    |
-|---------------------------|-------|-------|
-| ctc greedy search         | 7.35  | 8.23  |
-| ctc prefix beam search    | 7.36  | 8.23  |
-| attention rescoring       | 6.09  | 6.70  |
-| LM + attention rescoring  | 6.07  | 6.55  |
-
-## Unified Conformer Result
-
-* Feature info: using fbank feature, with cmvn, no speed perturb.
-* Training info: lr 0.002, batch size 16, 8 gpus, acc_grad 1, 120 epochs, dither 1.0
-* Decoding info: ctc_weight 0.5, average_num 20
-* Git hash: 14d38085a8d966cf9e9577ffafc51d578dce954f
-
-| decoding mode/chunk size  | full  | 16    | 8     | 4     |
-|---------------------------|-------|-------|-------|-------|
-| attention decoder         | 6.23  | 6.42  | 6.58  | 7.20  |
-| ctc greedy search         | 6.98  | 7.75  | 8.21  | 9.91  |
-| ctc prefix beam search    | 7.02  | 7.76  | 8.21  | 9.93  |
-| attention rescoring       | 6.08  | 6.46  | 6.72  | 7.79  |
-| LM + attention rescoring  | 5.87  | 6.37  | 6.47  | 6.61  |
-
-## Unified Transformer Result
-
-* Feature info: using fbank feature, with cmvn, no speed perturb.
-* Training info: lr 0.002, batch size 22, 8 gpus, acc_grad 1, 180 epochs, dither 0.0
-* Decoding info: ctc_weight 0.5, average_num 30
-* Git hash: 14d38085a8d966cf9e9577ffafc51d578dce954f
-
-| decoding mode/chunk size  | full  | 16    | 8     | 4     |
-|---------------------------|-------|-------|-------|-------|
-| attention decoder         | 6.71  | 7.08  | 7.17  | 7.40  |
-| ctc greedy search         | 7.84  | 8.68  | 8.98  | 9.46  |
-| ctc prefix beam search    | 7.86  | 8.68  | 8.98  | 9.45  |
-| attention rescoring       | 6.71  | 7.31  | 7.51  | 7.85  |
-| LM + attention rescoring  | 6.35  | 7.02  | 7.24  | 7.52  |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_u2++_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_u2++_conformer.yaml
deleted file mode 100644
index 2680893cf5b8707241908469697a35ce3f5acb3e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_u2++_conformer.yaml
+++ /dev/null
@@ -1,84 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 8
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_u2++_transformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_u2++_transformer.yaml
deleted file mode 100644
index 391c9a65af89fed3d038ff15e6e9bc08b5493142..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_u2++_transformer.yaml
+++ /dev/null
@@ -1,90 +0,0 @@
-# network architecture
-# encoder related
-encoder: transformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder architecture type
-    normalize_before: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# feature extraction
-collate_conf:
-    # waveform level config
-    wav_distortion_conf:
-        wav_dither: 1.0
-        wav_distortion_rate: 0.0
-        distortion_methods: []
-    speed_perturb: false
-    feature_extraction_conf:
-        feature_type: 'fbank'
-        mel_bins: 80
-        frame_shift: 10
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_unified_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_unified_conformer.yaml
deleted file mode 100644
index 9c907d97a981662f7b1d87e09fbef14c8d1f5bb5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_unified_conformer.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 120
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_unified_transformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_unified_transformer.yaml
deleted file mode 100644
index ecd9f8cfff3677c5e0c56f996923a849fa38346b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/conf/train_unified_transformer.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# network architecture
-# encoder related
-encoder: transformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder architecture type
-    normalize_before: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-# feature extraction
-collate_conf:
-    # waveform level config
-    wav_distortion_conf:
-        wav_dither: 0.0
-        wav_distortion_rate: 0.0
-        distortion_methods: []
-    speed_perturb: false
-    feature_extraction_conf:
-        feature_type: 'fbank'
-        mel_bins: 80
-        frame_shift: 10
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 130
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/prepare_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/prepare_data.sh
deleted file mode 100644
index c1586b87856804bce4a23609f696417deb7d4e79..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/prepare_data.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
-#           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
-# Apache 2.0
-
-# transform raw AISHELL-2 data to kaldi format
-
-if [ $# != 3 ]; then
-  echo "prepare_data.sh <corpus-data-dir> <tmp-dir> <output-dir>"
-  echo " e.g prepare_data.sh /data/AISHELL-2/iOS/train data/local/train data/train"
-  exit 1;
-fi
-
-corpus=$1
-tmp=$2
-dir=$3
-
-echo "prepare_data.sh: Preparing data in $corpus"
-
-mkdir -p $tmp
-mkdir -p $dir
-
-# corpus check
-if [ ! -d $corpus ] || [ ! -f $corpus/wav.scp ] || [ ! -f $corpus/trans.txt ]; then
-  echo "Error: $0 requires wav.scp and trans.txt under $corpus directory."
-  exit 1;
-fi
-
-# validate utt-key list
-awk '{print $1}' $corpus/wav.scp   > $tmp/wav_utt.list
-awk '{print $1}' $corpus/trans.txt > $tmp/trans_utt.list
-tools/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
-
-# wav.scp
-awk -F'\t' -v path_prefix=$corpus '{printf("%s\t%s/%s\n",$1,path_prefix,$2)}' $corpus/wav.scp > $tmp/tmp_wav.scp
-tools/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
-
-# text
-tools/filter_scp.pl -f 1 $tmp/utt.list $corpus/trans.txt | sort -k 1 | uniq > $tmp/trans.txt
-dos2unix < $tmp/trans.txt | \
-  tools/filter_scp.pl -f 1 $tmp/utt.list - | \
-  sort -k 1 | uniq | tr '[a-z]' '[A-Z]' | \
-  sed 's/Ａ/A/g' | sed 's/Ｔ/T/g' | sed 's/Ｍ/M/g' | sed 's/𫚉//g' | sed 's/𫖯/頫/g' | \
-  sed 's/[()]//g' | sed "s/\([^A-Z]\)'/\1/g" > $tmp/text
-
-# copy prepared resources from tmp_dir to target dir
-mkdir -p $dir
-for f in wav.scp text; do
-  cp $tmp/$f $dir/$f || exit 1;
-done
-
-echo "local/prepare_data.sh succeeded"
-exit 0;
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/train_lms.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/train_lms.sh
deleted file mode 100644
index 2e2d0dbeb79c54d707add6d20269fe9b89e69d8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/train_lms.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-
-# To be run from one directory above this script.
-. ./path.sh
-
-text=data/local/lm/text
-lexicon=data/local/dict/lexicon.txt
-
-. tools/parse_options.sh
-
-for f in "$text" "$lexicon"; do
-  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
-done
-
-# Check SRILM tools
-if ! which ngram-count > /dev/null; then
-    echo "srilm tools are not found, please download it and install it from: "
-    echo "http://www.speech.sri.com/projects/srilm/download.html"
-    echo "Then add the tools to your PATH"
-    exit 1
-fi
-
-dir=data/local/lm
-mkdir -p $dir
-
-cleantext=$dir/text.no_oov
-
-cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
-  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
-  > $cleantext || exit 1;
-
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
-   sort -nr > $dir/word.counts || exit 1;
-
-# Get counts from acoustic training transcripts, and add  one-count
-# for each word in the lexicon (but not silence, we don't want it
-# in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
-  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
-   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
-
-cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
-
-heldout_sent=10000 # Don't change this if you want result to be comparable with
-    # kaldi_lm results
-mkdir -p $dir
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  head -$heldout_sent > $dir/heldout
-cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
-  tail -n +$heldout_sent > $dir/train
-
-ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
-  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
-ngram -lm $dir/lm.arpa -ppl $dir/heldout
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/word_segmentation.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/word_segmentation.py
deleted file mode 100644
index 117686dd3e826ebc63abc22779c913a2cb9f78d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/local/word_segmentation.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-# encoding=utf-8
-# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
-#           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
-# Apache 2.0
-
-from __future__ import print_function
-import sys
-import jieba
-
-if len(sys.argv) < 3:
-    sys.stderr.write(
-        "word_segmentation.py <vocab> <trans> <word-segmented-trans>\n")
-    exit(1)
-
-vocab_file = sys.argv[1]
-trans_file = sys.argv[2]
-
-jieba.set_dictionary(vocab_file)
-for line in open(trans_file, 'r', encoding='utf8'):
-    key, trans = line.strip().split(' ', 1)
-    words = jieba.cut(trans,
-                      HMM=False)  # turn off new word discovery (HMM-based)
-    new_line = key + '\t' + " ".join(words)
-    print(new_line)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/path.sh
deleted file mode 100644
index ac1ca08baf5d4540b92ed239b8aa7cd613064a8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/run.sh
deleted file mode 100644
index 7a40f4223c377fb2f9d66b8a2ad96bcf86cd8506..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/run.sh
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=6
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-
-# modify this to your AISHELL-2 data path
-# Note: the evaluation data (dev & test) is available at AISHELL.
-# Please download it from http://aishell-eval.oss-cn-beijing.aliyuncs.com/TEST%26DEV%20DATA.zip
-trn_set=/mnt/nfs/ptm1/open-data/AISHELL-2/iOS/data
-dev_set=/mnt/nfs/ptm1/open-data/AISHELL-DEV-TEST-SET/iOS/dev
-tst_set=/mnt/nfs/ptm1/open-data/AISHELL-DEV-TEST-SET/iOS/test
-
-nj=16
-dict=data/dict/lang_char.txt
-
-train_set=train
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard transformer
-# 2. conf/train_conformer.yaml: Standard conformer
-# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
-# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
-train_config=conf/train_unified_transformer.yaml
-cmvn=true
-dir=exp/transformer
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=30
-decode_modes="ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Data preparation
-    local/prepare_data.sh ${trn_set} data/local/${train_set} data/${train_set} || exit 1;
-    local/prepare_data.sh ${dev_set} data/local/dev data/dev || exit 1;
-    local/prepare_data.sh ${tst_set} data/local/test data/test || exit 1;
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    # remove the space between the text labels for Mandarin dataset
-    for x in ${train_set} dev test; do
-        cp data/${x}/text data/${x}/text.org
-        paste -d " " <(cut -f 1 data/${x}/text.org) <(cut -f 2- data/${x}/text.org \
-             | tr 'a-z' 'A-Z' | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' | tr -d " ") \
-            > data/${x}/text
-        rm data/${x}/text.org
-    done
-
-    tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-        --in_scp data/${train_set}/wav.scp \
-        --out_cmvn data/$train_set/global_cmvn
-
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # Make train dict
-    echo "Make a dictionary"
-    mkdir -p $(dirname $dict)
-    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-    echo "<unk> 1" >> ${dict} # <unk> must be 1
-    tools/text2token.py -s 1 -n 1 data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \
-        | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0 " " NR+1}' >> ${dict}
-    num_token=$(cat $dict | wc -l)
-    echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # Prepare wenet required data
-    echo "Prepare data, prepare required format"
-    for x in dev test ${train_set}; do
-        tools/make_raw_list.py data/$x/wav.scp data/$x/text data/$x/data.list
-    done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-    # Training
-    mkdir -p $dir
-    INIT_FILE=$dir/ddp_init
-    # You had better rm it manually before you start run.sh on first node.
-    # rm -f $INIT_FILE # delete old one before starting
-    init_method=file://$(readlink -f $INIT_FILE)
-    echo "$0: init method is $init_method"
-    # The number of gpus runing on each node/machine
-    num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-    # Use "nccl" if it works, otherwise use "gloo"
-    dist_backend="gloo"
-    # The total number of processes/gpus, so that the master knows
-    # how many workers to wait for.
-    # More details about ddp can be found in
-    # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-    world_size=`expr $num_gpus \* $num_nodes`
-    echo "total gpus is: $world_size"
-    cmvn_opts=
-    $cmvn && cp data/${train_set}/global_cmvn $dir
-    $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-    # train.py will write $train_config to $dir/train.yaml with model input
-    # and output dimension, train.yaml will be used for inference or model
-    # export later
-    for ((i = 0; i < $num_gpus; ++i)); do
-    {
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-            --config $train_config \
-            --data_type raw \
-            --symbol_table $dict \
-            --train_data data/$train_set/data.list \
-            --cv_data data/dev/data.list \
-            ${checkpoint:+--checkpoint $checkpoint} \
-            --model_dir $dir \
-            --ddp.init_method $init_method \
-            --ddp.world_size $world_size \
-            --ddp.rank $rank \
-            --ddp.dist_backend $dist_backend \
-            --num_workers 2 \
-            $cmvn_opts
-    } &
-    done
-    wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-    # Test model, please specify the model you want to test by --checkpoint
-    if [ ${average_checkpoint} == true ]; then
-        decode_checkpoint=$dir/avg_${average_num}.pt
-        echo "do model average and final checkpoint is $decode_checkpoint"
-        python wenet/bin/average_model.py \
-            --dst_model $decode_checkpoint \
-            --src_path $dir  \
-            --num ${average_num} \
-            --val_best
-    fi
-    # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-    # -1 for full chunk
-    decoding_chunk_size=
-    ctc_weight=0.5
-    for mode in ${decode_modes}; do
-    {
-        test_dir=$dir/test_${mode}
-        mkdir -p $test_dir
-        python wenet/bin/recognize.py --gpu 0 \
-            --mode $mode \
-            --config $dir/train.yaml \
-            --data_type raw \
-            --test_data data/test/data.list \
-            --checkpoint $decode_checkpoint \
-            --beam_size 10 \
-            --batch_size 1 \
-            --penalty 0.0 \
-            --dict $dict \
-            --ctc_weight $ctc_weight \
-            --result_file $test_dir/text \
-            ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-         python tools/compute-wer.py --char=1 --v=1 \
-            data/test/text $test_dir/text > $test_dir/wer
-    } &
-    done
-    wait
-
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-    # Export the best model you want
-    python wenet/bin/export_jit.py \
-        --config $dir/train.yaml \
-        --checkpoint $dir/avg_${average_num}.pt \
-        --output_file $dir/final.zip \
-        --output_quant_file $dir/final_quant.zip
-fi
-
-# Optionally, you can add LM and test it with runtime.
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-  # 7.1 Prepare dict
-  unit_file=$dict
-  download_dir=data/local/DaCiDian
-  git clone https://github.com/aishell-foundation/DaCiDian.git $download_dir
-  mkdir -p data/local/dict
-  cp $unit_file data/local/dict/units.txt
-  tools/fst/prepare_dict.py $unit_file $download_dir/word_to_pinyin.txt \
-      data/local/dict/lexicon.txt
-  # 7.2 Segment text
-  pip3 install jieba
-  lm=data/local/lm
-  mkdir -p $lm
-  awk '{print $1}' data/local/dict/lexicon.txt | \
-      awk '{print $1,99}' > $lm/word_seg_vocab.txt
-  python local/word_segmentation.py $lm/word_seg_vocab.txt \
-      data/train/text > $lm/text
-  # 7.3 Train lm
-  local/train_lms.sh
-  # 7.4 Build decoding TLG
-  tools/fst/compile_lexicon_token_fst.sh \
-      data/local/dict data/local/tmp data/local/lang
-  tools/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
-  # 7.5 Decoding with runtime
-  # reverse_weight only works for u2++ model and only left to right decoder is used when it is set to 0.0.
-  reverse_weight=0.0
-  chunk_size=-1
-  ./tools/decode.sh --nj 16 --chunk_size $chunk_size\
-      --beam 15.0 --lattice_beam 7.5 --max_active 7000 --blank_skip_thresh 0.98 \
-      --ctc_weight 0.3 --rescoring_weight 1.0 --reverse_weight $reverse_weight\
-      --fst_path data/lang_test/TLG.fst \
-      --dict_path data/lang_test/words.txt \
-      data/test/wav.scp data/test/text $dir/final.zip data/lang_test/units.txt \
-      $dir/lm_with_runtime
-  # See $dir/lm_with_runtime for wer
-  tail $dir/lm_with_runtime/wer
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/README.md
deleted file mode 100644
index 3d3fde55eb11dd503ac4e545ab535bf4670fe294..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: using fbank feature, cmvn, without speed perturb (not supported segments yet)
-* Training info: lr 0.001, max_frames_in_batch 15000, 8 gpu, acc_grad 4, 100 epochs
-* Decoding info: ctc_weight 0.5, average_num 30
-
-
-| decoding mode       | Test WER |
-|---------------------|----------|
-| attention rescoring |  32.58%  |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/conf/train_conformer.yaml
deleted file mode 100644
index dcd115b6308ba2a8073c9ad44213dfb7e5bde2fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 4096
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    #resample_conf:
-    #    resample_rate: 16000
-    speed_perturb: false
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'dynamic' # static or dynamic
-        max_frames_in_batch: 15000
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 100
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 1000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/aishell4_process_textgrid.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/aishell4_process_textgrid.py
deleted file mode 100644
index c4fdc54347d27d27440494b4e8b62dcce122b0e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/aishell4_process_textgrid.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Process the textgrid files
-"""
-import argparse
-import codecs
-from pathlib import Path
-import textgrid
-
-
-class Segment(object):
-    def __init__(self, uttid, spkr, stime, etime, text):
-        self.uttid = uttid
-        self.spkr = spkr
-        self.stime = round(stime, 2)
-        self.etime = round(etime, 2)
-        self.text = text
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description="process the textgrid files")
-    parser.add_argument("--path", type=str, required=True, help="Data path")
-    args = parser.parse_args()
-    return args
-
-
-def main(args):
-    wav_scp = codecs.open(Path(args.path) / "wav.scp", "r", "utf-8")
-    textgrid_flist = codecs.open(
-        Path(args.path) / "textgrid.flist", "r", "utf-8")
-    # get the path of textgrid file for each utterance
-    utt2textgrid = {}
-    for line in textgrid_flist:
-        path = Path(line.strip())
-        # the name of textgrid file is different between training and test set
-        if "train" in path.parts:
-            uttid = "%s_%s" % (path.parts[-2], path.stem)
-        else:
-            uttid = path.stem
-        utt2textgrid[uttid] = path
-    # parse the textgrid file for each utterance
-    all_segments = []
-    for line in wav_scp:
-        uttid = line.strip().split(" ")[0]
-        if uttid not in utt2textgrid:
-            print("%s doesn't have transcription" % uttid)
-            continue
-        segments = []
-        tg = textgrid.TextGrid.fromFile(utt2textgrid[uttid])
-        for i in range(tg.__len__()):
-            for j in range(tg[i].__len__()):
-                if tg[i][j].mark.strip():
-                    segments.append(
-                        Segment(
-                            uttid,
-                            tg[i].name,
-                            tg[i][j].minTime,
-                            tg[i][j].maxTime,
-                            tg[i][j].mark.strip(),
-                        ))
-
-        segments = sorted(segments, key=lambda x: x.stime)
-        all_segments += segments
-
-    wav_scp.close()
-    textgrid_flist.close()
-
-    segments_file = codecs.open(Path(args.path) / "segments_all", "w", "utf-8")
-    utt2spk_file = codecs.open(Path(args.path) / "utt2spk_all", "w", "utf-8")
-    text_file = codecs.open(Path(args.path) / "text_all", "w", "utf-8")
-    utt2dur_file = codecs.open(Path(args.path) / "utt2dur_all", "w", "utf-8")
-
-    for i in range(len(all_segments)):
-        utt_name = "%s-%s-%07d-%07d" % (
-            all_segments[i].uttid,
-            all_segments[i].spkr,
-            all_segments[i].stime * 100,
-            all_segments[i].etime * 100,
-        )
-
-        segments_file.write("%s %s %.2f %.2f\n" % (
-            utt_name,
-            all_segments[i].uttid,
-            all_segments[i].stime,
-            all_segments[i].etime,
-        ))
-        utt2spk_file.write(
-            "%s %s-%s\n" %
-            (utt_name, all_segments[i].uttid, all_segments[i].spkr))
-        text_file.write("%s %s\n" % (utt_name, all_segments[i].text))
-        utt2dur_file.write(
-            "%s %.2f\n" %
-            (utt_name, all_segments[i].etime - all_segments[i].stime))
-        if len(all_segments[i].text) / (all_segments[i].etime -
-                                        all_segments[i].stime) > 100:
-            print(utt_name)
-            print(
-                len(all_segments[i].text) /
-                (all_segments[i].etime - all_segments[i].stime))
-
-    segments_file.close()
-    utt2spk_file.close()
-    text_file.close()
-    utt2dur_file.close()
-
-
-if __name__ == "__main__":
-    args = get_args()
-    main(args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/apply_map.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/apply_map.pl
deleted file mode 100644
index 725d3463a0098f58210a4b71d5c49f3be38fe23b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/apply_map.pl
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-# This program is a bit like ./sym2int.pl in that it applies a map
-# to things in a file, but it's a bit more general in that it doesn't
-# assume the things being mapped to are single tokens, they could
-# be sequences of tokens.  See the usage message.
-
-
-$permissive = 0;
-
-for ($x = 0; $x <= 2; $x++) {
-
-  if (@ARGV > 0 && $ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-
-  if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
-    shift @ARGV;
-    # Mapping is optional (missing key is printed to output)
-    $permissive = 1;
-  }
-}
-
-if(@ARGV != 1) {
-  print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
-  print STDERR <<'EOF';
-Usage: apply_map.pl [options] map <input >output
- options: [-f <field-range> ] [--permissive]
-   This applies a map to some specified fields of some input text:
-   For each line in the map file: the first field is the thing we
-   map from, and the remaining fields are the sequence we map it to.
-   The -f (field-range) option says which fields of the input file the map
-   map should apply to.
-   If the --permissive option is supplied, fields which are not present
-   in the map will be left as they were.
- Applies the map 'map' to all input text, where each line of the map
- is interpreted as a map from the first field to the list of the other fields
- Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field
- range in the input to apply the map to.
- e.g.: echo A B | apply_map.pl a.txt
- where a.txt is:
- A a1 a2
- B b
- will produce:
- a1 a2 b
-EOF
-  exit(1);
-}
-
-($map_file) = @ARGV;
-open(M, "<$map_file") || die "Error opening map file $map_file: $!";
-
-while (<M>) {
-  @A = split(" ", $_);
-  @A >= 1 || die "apply_map.pl: empty line.";
-  $i = shift @A;
-  $o = join(" ", @A);
-  $map{$i} = $o;
-}
-
-while(<STDIN>) {
-  @A = split(" ", $_);
-  for ($x = 0; $x < @A; $x++) {
-    if ( (!defined $field_begin || $x >= $field_begin)
-         && (!defined $field_end || $x <= $field_end)) {
-      $a = $A[$x];
-      if (!defined $map{$a}) {
-        if (!$permissive) {
-          die "apply_map.pl: undefined key $a in $map_file\n";
-        } else {
-          print STDERR "apply_map.pl: warning! missing key $a in $map_file\n";
-        }
-      } else {
-        $A[$x] = $map{$a};
-      }
-    }
-  }
-  print join(" ", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/copy_data_dir.sh
deleted file mode 100644
index 6feb77fdf29beabc086f572f0c5d68ffde9581fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/copy_data_dir.sh
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. local/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | local/apply_map.pl -f 1 $destdir/utt_map  | \
-  local/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-local/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/text.tc ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text.tc >$destdir/text.tc
-fi
-if [ -f $srcdir/text.lc ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text.lc >$destdir/text.lc
-fi
-if [ -f $srcdir/text.lc.rm ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text.lc.rm >$destdir/text.lc.rm
-fi
-if [ -f $srcdir/utt2dur ]; then
-  local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    local/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  local/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  local/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-local/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/download_and_untar.sh
deleted file mode 100644
index bd3ceae5679fd97693d4053d1fc01fef4ad64cda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/download_and_untar.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/bash
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /home/data/aishell4 https://www.openslr.org/resources/111 train_L"
-  echo "<corpus-part> can be one of: train_L, train_M, train_S, test."
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-part_ok=false
-list="train_L train_M train_S test"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1;
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-if [ -f $data/$part.tar.gz ]; then
-  echo "$0: removing existing file $data/$part.tar.gz"
-  rm $data/$part.tar.gz
-fi
-
-if [ ! -f $data/$part.tar.gz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tar.gz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tar.gz; then
-  echo "$0: error un-tarring archive $data/$part.tgz"
-  exit 1;
-fi
-
-touch $data/$part/.complete
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/prepare_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/prepare_data.sh
deleted file mode 100644
index bfa73d46fea1a0b71d3f8734178a23249480b7fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/prepare_data.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-
-. ./path.sh || exit 1;
-
-if [ $# != 1 ]; then
-  echo "Usage: $0 <audio-path>"
-  echo " $0 /home/data/aishell4"
-  exit 1;
-fi
-
-aishell4_source_dir=$1
-train_dir=data/local/aishell4_train
-test_dir=data/local/aishell4_test
-
-mkdir -p $train_dir
-mkdir -p $test_dir
-
-# data directory check
-if [ ! -d $aishell_audio_dir ] || [ ! -f $aishell_text ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-for room_name in "train_L" "train_M" "train_S" "test"; do
-  if [ -f ${aishell4_source_dir}/$room_name/wav_list.txt ];then
-    rm  ${aishell4_source_dir}/$room_name/wav_list.txt
-  fi
-  FILES="$PWD/${aishell4_source_dir}/$room_name/wav/*"
-  for f in $FILES; do
-    echo "$f" >> ${aishell4_source_dir}/$room_name/wav_list.txt
-  done
-  if [ -f ${aishell4_source_dir}/$room_name/TextGrid_list.txt ];then
-    rm ${aishell4_source_dir}/$room_name/TextGrid_list.txt
-  fi
-  FILES="$PWD/${aishell4_source_dir}/$room_name/TextGrid/*.TextGrid"
-  for f in $FILES; do
-    echo "$f" >> ${aishell4_source_dir}/$room_name/TextGrid_list.txt
-  done
-done
-
-mkdir -p ${aishell4_source_dir}/full_train
-for r in train_L train_M train_S ; do
-  cat ${aishell4_source_dir}/$r/TextGrid_list.txt >> ${aishell4_source_dir}/full_train/textgrid.flist
-  cat ${aishell4_source_dir}/$r/wav_list.txt >> ${aishell4_source_dir}/full_train/wav.flist
-done
-
-wav_list_aishell4=${aishell4_source_dir}/full_train/wav.flist
-text_grid_aishell4=${aishell4_source_dir}/full_train/textgrid.flist
-
-# process train set
-sed -e 's/\.wav//' $train_dir/wav.flist | awk -F '/' '{print $NF}' > $train_dir/utt.list
-paste -d' ' $train_dir/utt.list $train_dir/wav.flist | sort -u > $train_dir/wav.scp
-python local/aishell4_process_textgrid.py --path $train_dir
-cat $train_dir/text_all | local/text_normalize.pl | local/text_format.pl | sort -u > $train_dir/text
-local/filter_scp.pl -f 1 $train_dir/text $train_dir/utt2spk_all | sort -u > $train_dir/utt2spk
-local/utt2spk_to_spk2utt.pl $train_dir/utt2spk > $train_dir/spk2utt
-local/filter_scp.pl -f 1 $train_dir/text $train_dir/segments_all | sort -u > $train_dir/segments
-
-# process test set
-sed -e 's/\.wav//' $test_dir/wav.flist | awk -F '/' '{print $NF}' > $test_dir/utt.list
-paste -d' ' $test_dir/utt.list $test_dir/wav.flist |sort -u > $test_dir/wav.scp
-python local/aishell4_process_textgrid.py --path $test_dir
-cat $test_dir/text_all | local/text_normalize.pl | local/text_format.pl | sort -u > $test_dir/text
-local/filter_scp.pl -f 1 $test_dir/text $test_dir/utt2spk_all | sort -u > $test_dir/utt2spk
-local/utt2spk_to_spk2utt.pl $test_dir/utt2spk > $test_dir/spk2utt
-local/filter_scp.pl -f 1 $test_dir/text $test_dir/segments_all | sort -u > $test_dir/segments
-
-local/copy_data_dir.sh --utt-prefix Aishell4- --spk-prefix Aishell4- \
-  $train_dir data/aishell4_train
-local/copy_data_dir.sh --utt-prefix Aishell4- --spk-prefix Aishell4- \
-  $test_dir data/aishell4_test
-
-echo "$0: AISHELL4 data preparation succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/text_format.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/text_format.pl
deleted file mode 100644
index c1ff896d017fe5d99c69ff8161a7e7070c3442a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/text_format.pl
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright Chao Weng
-
-# normalizations for hkust trascript
-# see the docs/trans-guidelines.pdf for details
-
-while (<STDIN>) {
-  @A = split(" ", $_);
-  if (@A == 1) {
-    next;
-  }
-  print $_
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/text_normalize.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/text_normalize.pl
deleted file mode 100644
index 046903d02dd9f2ccd51215250c5b7797e207b61a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/text_normalize.pl
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright Chao Weng
-
-# normalizations for hkust trascript
-# see the docs/trans-guidelines.pdf for details
-
-while (<STDIN>) {
-  @A = split(" ", $_);
-  print "$A[0] ";
-  for ($n = 1; $n < @A; $n++) {
-    $tmp = $A[$n];
-    if ($tmp =~ /<sil>/) {$tmp =~ s:<sil>::g;}
-    if ($tmp =~ /<%>/) {$tmp =~ s:<%>::g;}
-    if ($tmp =~ /<->/) {$tmp =~ s:<->::g;}
-    if ($tmp =~ /<\$>/) {$tmp =~ s:<\$>::g;}
-    if ($tmp =~ /<#>/) {$tmp =~ s:<#>::g;}
-    if ($tmp =~ /<_>/) {$tmp =~ s:<_>::g;}
-    if ($tmp =~ /<space>/) {$tmp =~ s:<space>::g;}
-    if ($tmp =~ /`/) {$tmp =~ s:`::g;}
-    if ($tmp =~ /&/) {$tmp =~ s:&::g;}
-    if ($tmp =~ /,/) {$tmp =~ s:,::g;}
-    if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);}
-    if ($tmp =~ /Ａ/) {$tmp =~ s:Ａ:A:g;}
-    if ($tmp =~ /ａ/) {$tmp =~ s:ａ:A:g;}
-    if ($tmp =~ /ｂ/) {$tmp =~ s:ｂ:B:g;}
-    if ($tmp =~ /ｃ/) {$tmp =~ s:ｃ:C:g;}
-    if ($tmp =~ /ｋ/) {$tmp =~ s:ｋ:K:g;}
-    if ($tmp =~ /ｔ/) {$tmp =~ s:ｔ:T:g;}
-    if ($tmp =~ /，/) {$tmp =~ s:，::g;}
-    if ($tmp =~ /丶/) {$tmp =~ s:丶::g;}
-    if ($tmp =~ /。/) {$tmp =~ s:。::g;}
-    if ($tmp =~ /、/) {$tmp =~ s:、::g;}
-    if ($tmp =~ /？/) {$tmp =~ s:？::g;}
-    if ($tmp =~ /·/) {$tmp =~ s:·::g;}
-    if ($tmp =~ /\*/) {$tmp =~ s:\*::g;}
-    if ($tmp =~ /！/) {$tmp =~ s:！::g;}
-    if ($tmp =~ /\$/) {$tmp =~ s:\$::g;}
-    if ($tmp =~ /\+/) {$tmp =~ s:\+::g;}
-    if ($tmp =~ /-/) {$tmp =~ s:-::g;}
-    if ($tmp =~ /\\/) {$tmp =~ s:\\::g;}
-    if ($tmp =~ /\?/) {$tmp =~ s:\?::g;}
-    if ($tmp =~ /￥/) {$tmp =~ s:￥::g;}
-    if ($tmp =~ /%/) {$tmp =~ s:%::g;}
-    if ($tmp =~ /\./) {$tmp =~ s:\.::g;}
-    if ($tmp =~ /</) {$tmp =~ s:<::g;}
-    if ($tmp =~ /＆/) {$tmp =~ s:＆::g;}
-    print "$tmp ";
-  }
-  print "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/validate_data_dir.sh
deleted file mode 100644
index 22a01fcdab1088bf42fdcbf9c3de2029a5a66d4f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/usr/bin/env bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(local/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  local/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/local/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/run.sh
deleted file mode 100644
index dc9aef9ab77ef32c9b13d6bfd13818c795d2def5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/run.sh
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=6
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-num_utts_per_shard=1000
-data_url=https://www.openslr.org/resources/111
-data_source=/home/work_nfs5_ssd/yhliang/data/aishell4
-# modify this to your AISHELL-4 data path
-
-nj=16
-dict=data/dict/lang_char.txt
-
-train_set=aishell4_train
-dev_set=aishell4_test
-test_sets=aishell4_test
-
-train_config=conf/train_conformer.yaml
-cmvn=true
-dir=exp/conformer
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=30
-decode_modes="attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  local/download_and_untar.sh ${data_source} ${data_url} train_L
-  local/download_and_untar.sh ${data_source} ${data_url} train_M
-  local/download_and_untar.sh ${data_source} ${data_url} train_S
-  local/download_and_untar.sh ${data_source} ${data_url} test
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  # Data preparation
-  local/prepare_data.sh ${data_source} || exit 1;
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  # remove the space between the text labels for Mandarin dataset
-  for x in ${train_set} ${test_sets}; do
-    cp data/${x}/text data/${x}/text.org
-    paste -d " " <(cut -d " " -f 1 data/${x}/text.org) <(cut -d " " -f 2 data/${x}/text.org \
-      | tr 'a-z' 'A-Z' | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' | tr -d " ") > data/${x}/text
-    rm data/${x}/text.org
-  done
-
-  tools/compute_cmvn_stats.py --num_workers 32 --train_config $train_config \
-    --in_scp data/${train_set}/wav.scp \
-    --out_cmvn data/$train_set/global_cmvn
-
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  # Make train dict
-  echo "Make a dictionary"
-  mkdir -p $(dirname $dict)
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-  tools/text2token.py -s 1 -n 1 data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \
-    | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  # Prepare wenet required data
-  echo "Prepare data, prepare required format"
-  for x in $train_set ${test_sets}; do
-    tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-      --num_threads 32 --segments data/$x/segments \
-      data/$x/wav.scp data/$x/text $(realpath data/$x/shards) data/$x/data.list
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  # You had better rm it manually before you start run.sh on first node.
-  # rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  # The number of gpus runing on each node/machine
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type shard \
-      --symbol_table $dict \
-      --train_data data/$train_set/data.list \
-      --cv_data data/${dev_set}/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts
-  }
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num}
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  for mode in ${decode_modes}; do
-  {
-    for test_set in ${test_sets}; do
-    {
-      test_dir=$dir/test_${mode}
-      mkdir -p $test_dir
-      python wenet/bin/recognize.py --gpu $(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f1) \
-        --mode $mode \
-        --config $dir/train.yaml \
-        --data_type shard \
-        --test_data data/${test_set}/data.list \
-        --checkpoint $decode_checkpoint \
-        --beam_size 10 \
-        --batch_size 1 \
-        --penalty 0.0 \
-        --dict $dict \
-        --ctc_weight $ctc_weight \
-        --result_file $test_dir/text \
-        ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-      python tools/compute-wer.py --char=1 --v=1 \
-        data/${test_set}/text $test_dir/text > $test_dir/wer
-    } &
-    done
-  }
-  done
-  wait
-
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip \
-    --output_quant_file $dir/final_quant.zip
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell4/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/README.md
deleted file mode 100644
index a65bfa961eb95eea182e28bc424724d290f26df3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: dither + specaug + speed perturb
-* Training info: lr 0.0005, batch size 8, 1 gpu, acc_grad 4, 80 epochs
-* Decoding info: average_num 10
-
-|      decoding mode     | dt05_real_1ch | dt05_simu_1ch | et05_real_1ch | et05_simu_1ch |
-|:----------------------:|:-------------:|:-------------:|:-------------:|:-------------:|
-| ctc_prefix_beam_search |   19.06%      |   21.17%      |   28.39%      |    29.16%     |
-|  attention_rescoring   |   17.92%      |   20.22%      |   27.40%      |    28.25%     |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/conf/train_conformer.yaml
deleted file mode 100644
index 49aa0bcab0e37dc22aae19c276a75d6c4b157625..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    split_with_space: true
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 40
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 8
-
-grad_clip: 10
-accum_grad: 4
-max_epoch: 80
-log_interval: 200
-
-optim: adam
-optim_conf:
-    lr: 0.0005
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 20000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/chime4_format_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/chime4_format_dir.sh
deleted file mode 100644
index 118b950e6e34a7cc262f5586ed153e94174df927..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/chime4_format_dir.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-
-# wujian@2020
-
-set -eu
-
-echo "$0: Formating chime4 data dir..."
-
-track=isolated_1ch_track
-data_dir=data/chime4
-
-mkdir -p $data_dir/{train,dev}
-
-cat $data_dir/tr05_{simu,real}_noisy/wav.scp $data_dir/tr05_orig_clean/wav.scp \
-  $data_dir/train_si200_wsj1_clean/wav.scp | sort -k1 > $data_dir/train/wav.scp
-cat $data_dir/tr05_{simu,real}_noisy/text $data_dir/tr05_orig_clean/text \
-  $data_dir/train_si200_wsj1_clean/text | sort -k1 > $data_dir/train/text
-
-cat $data_dir/dt05_{real,simu}_${track}/wav.scp | sort -k1 > $data_dir/dev/wav.scp
-cat $data_dir/dt05_{real,simu}_${track}/text | sort -k1 > $data_dir/dev/text
-
-echo "$0: Format $data_dir done"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/chime4_gen_wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/chime4_gen_wav.sh
deleted file mode 100644
index 7beca665efbba2ead04c88a2f5ac6a1c3b8b2a11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/chime4_gen_wav.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env bash
-
-# wujian@2020
-
-set -eu
-
-[ $# -ne 2 ] && echo "Script format error: $0 <data-dir> <dump-dir>" && exit 0
-
-data_dir=$1
-dump_dir=$2
-
-mkdir -p $dump_dir
-
-num_utts=$(cat $data_dir/wav.scp | wc -l)
-echo "Orginal utterances (.wav + .wv1): $num_utts"
-
-# cat $data_dir/wav.scp | grep "sph2pipe" | \
-#   awk -v dir=$dump_dir '{printf("%s -f wav %s %s/%s.wav\n", $2, $5, dir, $1)}' | bash
-
-cat $data_dir/wav.scp | grep -v "sph2pipe" > $data_dir/raw_wav.scp
-find $dump_dir -name "*.wav" | awk -F '/' '{printf("%s %s\n", $NF, $0)}' | \
-  sed 's:\.wav::' > $data_dir/sph_wav.scp
-
-cat $data_dir/{raw_wav,sph_wav}.scp | sort -k1 > $data_dir/wav.scp
-num_utts=$(cat $data_dir/wav.scp | wc -l)
-echo "Wave utterances (.wav): $num_utts"
-
-echo "$0: Generate wav => $dump_dir done"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/clean_wsj0_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/clean_wsj0_data_prep.sh
deleted file mode 100644
index 45798dd244690742ead80e3278b738323014f850..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/clean_wsj0_data_prep.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-# Modified from Kaldi's chime4 recipe
-
-set -eu
-
-dataset=chime4
-
-. ./tools/parse_options.sh || exit 1;
-
-if [ $# -ne 1 ]; then
-  printf "\nUSAGE: %s <original WSJ0 corpus-directory>\n\n" `basename $0`
-  echo "The argument should be a the top-level WSJ corpus directory."
-  echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
-  echo "within the top-level corpus directory."
-  exit 1;
-fi
-
-wsj0=$1
-
-srcdir=$PWD/data/chime4/local
-dstdir=$PWD/data/$dataset
-local=$PWD/local
-utils=$PWD/utils
-sph2pipe=sph2pipe
-
-if [ ! `which sph2pipe` ]; then
-  echo "Could not find sph2pipe, install it first..."
-  mkdir -p exp && cd exp && wget https://www.openslr.org/resources/3/sph2pipe_v2.5.tar.gz
-  tar -zxf sph2pipe_v2.5.tar.gz && cd sph2pipe_v2.5
-  gcc -o sph2pipe *.c -lm && cd .. && rm -rf sph2pipe_v2.5.tar.gz
-  sph2pipe=$PWD/sph2pipe_v2.5/sph2pipe
-  cd ..
-fi
-
-mkdir -p $srcdir && cd $srcdir
-
-# This version for SI-84
-cat $wsj0/wsj0/doc/indices/train/tr_s_wv1.ndx \
-  | $local/cstr_ndx2flist.pl $wsj0 | sort -u > tr05.flist
-
-# Now for the test sets.
-# $wsj0/wsj1/doc/indices/readme.doc
-# describes all the different test sets.
-# Note: each test-set seems to come in multiple versions depending
-# on different vocabulary sizes, verbalized vs. non-verbalized
-# pronunciations, etc.  We use the largest vocab and non-verbalized
-# pronunciations.
-# The most normal one seems to be the "baseline 60k test set", which
-# is h1_p0.
-
-# Nov'92 (330 utts, 5k vocab)
-cat $wsj0/wsj0/doc/indices/test/nvp/si_et_05.ndx | \
-  $local/cstr_ndx2flist.pl $wsj0 | sort > et05.flist
-
-# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.
-# Sometimes this gets copied from the CD's with upcasing, don't know
-# why (could be older versions of the disks).
-find $wsj0/wsj0/si_dt_05 -print | grep -i ".wv1" | sort > dt05.flist
-
-# Finding the transcript files:
-find -L $wsj0 -iname '*.dot' > dot_files.flist
-
-# Convert the transcripts into our format (no normalization yet)
-# adding suffix to utt_id
-# 0 for clean condition
-for x in tr05 et05 dt05; do
-  $local/flist2scp.pl $x.flist | sort > ${x}_sph_tmp.scp
-  cat ${x}_sph_tmp.scp | awk '{print $1}' \
-    | $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
-  cat ${x}_sph_tmp.scp | awk '{printf("%s %s\n", $1, $2);}' > ${x}_sph.scp
-  cat ${x}_tmp.trans1 | awk '{printf("%s ", $1); for(i=2;i<=NF;i++) printf("%s ", $i); printf("\n");}' > ${x}.trans1
-done
-
-# Do some basic normalization steps.  At this point we don't remove OOVs--
-# that will be done inside the training scripts, as we'd like to make the
-# data-preparation stage independent of the specific lexicon used.
-noiseword="<NOISE>";
-for x in tr05 et05 dt05; do
-  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
-    | sort > $x.txt || exit 1;
-done
-
-# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
-for x in tr05 et05 dt05; do
-  awk -v cmd=$sph2pipe '{printf("%s %s -f wav %s |\n", $1, cmd, $2);}' ${x}_sph.scp > ${x}_wav.scp
-done
-
-if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
-  rm -f wsj0-train-spkrinfo.txt
-  wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt \
-    || ( echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
-         wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj0-train-spkrinfo.txt );
-fi
-
-if [ ! -f wsj0-train-spkrinfo.txt ]; then
-  echo "Could not get the spkrinfo.txt file from LDC website (moved)?"
-  echo "This is possibly omitted from the training disks; couldn't find it."
-  echo "Everything else may have worked; we just may be missing gender info"
-  echo "which is only needed for VTLN-related diagnostics anyway."
-  exit 1
-fi
-# Note: wsj0-train-spkrinfo.txt doesn't seem to be on the disks but the
-# LDC put it on the web.  Perhaps it was accidentally omitted from the
-# disks.
-
-cat $wsj0/wsj0/doc/spkrinfo.txt \
-    ./wsj0-train-spkrinfo.txt  | \
-    perl -ane 'tr/A-Z/a-z/; m/^;/ || print;' | \
-    awk '{print $1, $2}' | grep -v -- -- | sort | uniq > spk2gender
-
-# return back
-cd -
-
-for x in et05 dt05 tr05; do
-  mkdir -p $dstdir/${x}_orig_clean
-  cp $srcdir/$x.txt $dstdir/${x}_orig_clean/text || exit 1
-  cp $srcdir/${x}_wav.scp $dstdir/${x}_orig_clean/wav.scp || exit 1
-done
-
-echo "Data preparation succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/clean_wsj1_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/clean_wsj1_data_prep.sh
deleted file mode 100644
index 9043879da801bc08cd5d7294f8e1c5e8ed51aa93..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/clean_wsj1_data_prep.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-set -eu
-
-
-if [ $# -ne 1 ]; then
-  echo "Arguments should be WSJ1 directory"
-  exit 1;
-fi
-
-wsj1=$1
-dir=$PWD/data/chime4/local
-odir=$PWD/data/chime4
-mkdir -p $dir
-local=$PWD/local
-sph2pipe=sph2pipe
-
-if [ ! `which sph2pipe` ]; then
-  echo "Could not find sph2pipe, install it first..."
-  mkdir -p exp && cd exp && wget https://www.openslr.org/resources/3/sph2pipe_v2.5.tar.gz
-  tar -zxf sph2pipe_v2.5.tar.gz && cd sph2pipe_v2.5
-  gcc -o sph2pipe *.c -lm && cd .. && rm -rf sph2pipe_v2.5.tar.gz
-  sph2pipe=$PWD/sph2pipe_v2.5/sph2pipe
-  cd ..
-fi
-
-cd $dir
-# This version for SI-200
-cat $wsj1/13-34.1/wsj1/doc/indices/si_tr_s.ndx | \
- $local/ndx2flist.pl $wsj1/??-{?,??}.? | sort > train_si200.flist
-
-nl=`cat train_si200.flist | wc -l`
-[ "$nl" -eq 30278 ] || echo "Warning: expected 30278 lines in train_si200.flist, got $nl"
-
-# Dev-set for Nov'93 (503 utts)
-cat $wsj1/13-34.1/wsj1/doc/indices/h1_p0.ndx | \
-  $local/ndx2flist.pl $wsj1/??-{?,??}.? | sort > test_dev93.flist
-
-# Finding the transcript files:
-for x in $wsj1/??-{?,??}.?; do find -L $x -iname '*.dot'; done > dot_files.flist
-
-# Convert the transcripts into our format (no normalization yet)
-for x in train_si200 test_dev93; do
-   $local/flist2scp.pl $x.flist | sort > ${x}_sph.scp
-   cat ${x}_sph.scp | awk '{print $1}' | $local/find_transcripts.pl  dot_files.flist > $x.trans1
-done
-
-# Do some basic normalization steps.  At this point we don't remove OOVs--
-# that will be done inside the training scripts, as we'd like to make the
-# data-preparation stage independent of the specific lexicon used.
-noiseword="<NOISE>";
-for x in train_si200 test_dev93; do
-   cat $x.trans1 | $local/normalize_transcript.pl $noiseword | sort > $x.txt || exit 1;
-done
-
-# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
-for x in train_si200 test_dev93; do
-  awk -v cmd=$sph2pipe '{printf("%s %s -f wav %s |\n", $1, cmd, $2);}' ${x}_sph.scp > ${x}_wav.scp
-done
-
-# return back
-cd -
-
-for x in train_si200 test_dev93; do
-  mkdir -p $odir/${x}_wsj1_clean
-  cp $dir/$x.txt $odir/${x}_wsj1_clean/text || exit 1
-  cp $dir/${x}_wav.scp $odir/${x}_wsj1_clean/wav.scp || exit 1
-done
-
-echo "Data preparation WSJ1 succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/cstr_ndx2flist.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/cstr_ndx2flist.pl
deleted file mode 100644
index 79daa1a99db992c5893a9d762fa4ef757b16dc76..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/cstr_ndx2flist.pl
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env perl
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This is modified from the script in standard Kaldi recipe to account
-# for the way the WSJ data is structured on the Edinburgh systems.
-# - Arnab Ghoshal, 12/1/12
-
-# This program takes as its standard input an .ndx file from the WSJ corpus that looks
-# like this:
-#;; File: tr_s_wv1.ndx, updated 04/26/94
-#;;
-#;; Index for WSJ0 SI-short Sennheiser training data
-#;; Data is read WSJ sentences, Sennheiser mic.
-#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts
-#;; per speaker TI) = 7236 utts
-#;;
-#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
-
-# and as command-line argument it takes the names of the WSJ disk locations, e.g.:
-# /group/corpora/public/wsjcam0/data on DICE machines.
-# It outputs a list of absolute pathnames.
-
-$wsj_dir = $ARGV[0];
-
-while(<STDIN>){
-  if(m/^;/){ next; } # Comment.  Ignore it.
-  else {
-    m/^([0-9_]+):\s*(\S+)$/  || die "Could not parse line $_";
-    $filename = $2; # as a subdirectory of the distributed disk.
-    if ($filename !~ m/\.wv1$/) { $filename .= ".wv1"; }
-    $filename = "$wsj_dir/$filename";
-    if (-e $filename) {
-      print "$filename\n";
-    } else {
-      print STDERR "File $filename found in the index but not on disk\n";
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/find_noisy_transcripts.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/find_noisy_transcripts.pl
deleted file mode 100644
index d24ae32668840dafddef768622aa234dc3d396f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/find_noisy_transcripts.pl
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-
-# This program takes on its standard input a list of utterance
-# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
-# It takes as
-# Extracts from the dot files the transcripts for a given
-# dataset (represented by a file list).
-#
-
-@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
-$dot_flist = shift @ARGV;
-
-open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
-while(<L>){
-    chop;
-    m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
-    $spk = $1;
-    $spk2dot{$spk} = $_;
-}
-
-
-
-while(<STDIN>){
-    chop;
-    $uttid_orig = $_;
-    $uttid = substr $uttid_orig, 0, 8;
-    $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
-    $spk = $1;
-    if($spk ne $curspk) {
-        %utt2trans = { }; # Don't keep all the transcripts in memory...
-        $curspk = $spk;
-        $dotfile = $spk2dot{$spk};
-        defined $dotfile || die "No dot file for speaker $spk\n";
-        open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
-        while(<F>) {
-            $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
-            $trans = $1;
-            $utt = $2;
-            $utt2trans{$utt} = $trans;
-        }
-    }
-    if(!defined $utt2trans{$uttid}) {
-        print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
-    } else {
-        print "$uttid_orig $utt2trans{$uttid}\n";
-    }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/find_transcripts.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/find_transcripts.pl
deleted file mode 100644
index 8884e4f811e2cb76b3d07511368f3ceb4ac17a43..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/find_transcripts.pl
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-
-# This program takes on its standard input a list of utterance
-# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
-# It takes as
-# Extracts from the dot files the transcripts for a given
-# dataset (represented by a file list).
-#
-
-@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
-$dot_flist = shift @ARGV;
-
-open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
-while(<L>){
-    chop;
-    m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
-    $spk = $1;
-    $spk2dot{$spk} = $_;
-}
-
-
-
-while(<STDIN>){
-    chop;
-    $uttid = $_;
-    $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
-    $spk = $1;
-    if($spk ne $curspk) {
-        %utt2trans = { }; # Don't keep all the transcripts in memory...
-        $curspk = $spk;
-        $dotfile = $spk2dot{$spk};
-        defined $dotfile || die "No dot file for speaker $spk\n";
-        open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
-        while(<F>) {
-            $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
-            $trans = $1;
-            $utt = $2;
-            $utt2trans{$utt} = $trans;
-        }
-    }
-    if(!defined $utt2trans{$uttid}) {
-        print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
-    } else {
-        print "$uttid $utt2trans{$uttid}\n";
-    }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/flist2scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/flist2scp.pl
deleted file mode 100644
index 7edf1e3f1f44e4ac3b97b39361a46ba8c453c88d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/flist2scp.pl
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# takes in a file list with lines like
-# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
-# and outputs an scp in kaldi format with lines like
-# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
-# (the first thing is the utterance-id, which is the same as the basename of the file.
-
-
-while(<>){
-    m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_";
-    $id = $1;
-    $id =~ tr/A-Z/a-z/;  # Necessary because of weirdness on disk 13-16.1 (uppercase filenames)
-    print "$id $_";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/ndx2flist.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/ndx2flist.pl
deleted file mode 100644
index c5f676affcd11ba1c6411c013c76841d65d776bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/ndx2flist.pl
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This program takes as its standard input an .ndx file from the WSJ corpus that looks
-# like this:
-#;; File: tr_s_wv1.ndx, updated 04/26/94
-#;;
-#;; Index for WSJ0 SI-short Sennheiser training data
-#;; Data is read WSJ sentences, Sennheiser mic.
-#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts
-#;; per speaker TI) = 7236 utts
-#;;
-#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
-
-#and as command-line arguments it takes the names of the WSJ disk locations, e.g.:
-#/mnt/matylda2/data/WSJ0/11-1.1 /mnt/matylda2/data/WSJ0/11-10.1  ... etc.
-# It outputs a list of absolute pathnames (it does this by replacing e.g. 11_1_1 with
-# /mnt/matylda2/data/WSJ0/11-1.1.
-# It also does a slight fix because one of the WSJ disks (WSJ1/13-16.1) was distributed with
-# uppercase rather than lower case filenames.
-
-foreach $fn (@ARGV) {
-    $fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n";
-    $disk_id=$1;
-    $disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1
-    $fn =~ s:/$::; # Remove final slash, just in case it is present.
-    $disk2fn{$disk_id} = $fn;
-}
-
-while(<STDIN>){
-    if(m/^;/){ next; } # Comment.  Ignore it.
-    else {
-      m/^([0-9_]+):\s*(\S+)$/  || die "Could not parse line $_";
-      $disk=$1;
-      if(!defined $disk2fn{$disk}) {
-          die "Disk id $disk not found";
-      }
-      $filename = $2; # as a subdirectory of the distributed disk.
-      if($disk eq "13_16_1" && `hostname` =~ m/fit.vutbr.cz/) {
-          # The disk 13-16.1 has been uppercased for some reason, on the
-          # BUT system.  This is a fix specifically for that case.
-          $filename =~ tr/a-z/A-Z/; # This disk contains all uppercase filenames.  Why?
-      }
-      print "$disk2fn{$disk}/$filename\n";
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/normalize_transcript.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/normalize_transcript.pl
deleted file mode 100644
index 6b18d43d26ff42e550b4b05eb77c4b4301c249c0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/normalize_transcript.pl
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This takes data from the standard input that's unnormalized transcripts in the format
-# 4k2c0308 Of course there isn\'t any guarantee the company will keep its hot hand [misc_noise]
-# 4k2c030a [loud_breath] And new hardware such as the set of personal computers I\. B\. M\. introduced last week can lead to unexpected changes in the software business [door_slam]
-# and outputs normalized transcripts.
-# c.f. /mnt/matylda2/data/WSJ0/11-10.1/wsj0/transcrp/doc/dot_spec.doc
-
-@ARGV == 1 ||  die "usage: normalize_transcript.pl noise_word < transcript > transcript2";
-$noise_word = shift @ARGV;
-
-while(<STDIN>) {
-    $_ =~ m:^(\S+) (.+): || die "bad line $_";
-    $utt = $1;
-    $trans = $2;
-    print "$utt";
-    foreach $w (split (" ",$trans)) {
-        $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. .
-        $w =~ s:\\::g;      # Remove backslashes.  We don't need the quoting.
-        $w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts.
-        $w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts.
-        if($w =~ m:^\[\<\w+\]$:  || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete.
-           $w =~ m:^\[\w+\>\]$:  ||  # E.g. [door_slam>], this means a door slammed in the next word.  Delete.
-           $w =~ m:\[\w+/\]$: ||  # E.g. [phone_ring/], which indicates the start of this phenomenon.
-           $w =~ m:\[\/\w+]$: ||  # E.g. [/phone_ring], which indicates the end of this phenomenon.
-           $w eq "~" ||        # This is used to indicate truncation of an utterance.  Not a word.
-           $w eq ".") {      # "." is used to indicate a pause.  Silence is optional anyway so not much
-                             # point including this in the transcript.
-            next; # we won't print this word.
-        } elsif($w =~ m:\[\w+\]:) { # Other noises, e.g. [loud_breath].
-            print " $noise_word";
-        } elsif($w =~ m:^\<([\w\']+)\>$:) {
-            # e.g. replace <and> with and.  (the <> means verbal deletion of a word).. but it's pronounced.
-            print " $1";
-        } elsif($w eq "--DASH") {
-            print " -DASH";  # This is a common issue; the CMU dictionary has it as -DASH.
-#        } elsif($w =~ m:(.+)\-DASH$:) { # E.g. INCORPORATED-DASH... seems the DASH gets combined with previous word
-#            print " $1 -DASH";
-        } else {
-            print " $w";
-        }
-    }
-    print "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/real_enhan_chime4_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/real_enhan_chime4_data_prep.sh
deleted file mode 100644
index ea93343adf0bf6d68a6f54476841c3a8b628cdcc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/real_enhan_chime4_data_prep.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-# This is modified from the script in standard Kaldi recipe to account
-# for the way the WSJ data is structured on the Edinburgh systems.
-# - Arnab Ghoshal, 29/05/12
-
-# Modified from the script for CHiME2 baseline
-# Shinji Watanabe 02/13/2015
-
-# Config:
-eval_flag=true # make it true when the evaluation data are released
-
-. tools/parse_options.sh || exit 1;
-
-if [ $# -ne 2 ]; then
-  printf "\nUSAGE: %s <enhancement-name> <enhanced-speech-directory>\n\n" `basename $0`
-  echo "The argument should be a the directory that only contains enhanced speech data."
-  exit 1;
-fi
-
-echo "$0 $@"  # Print the command line for logging
-
-enhan=$1
-audio_dir=$2
-
-dir=$PWD/data/chime4/local
-mkdir -p $dir
-local=$PWD/local
-utils=$PWD/utils
-odir=$PWD/data/chime4
-
-if $eval_flag; then
-list_set="tr05_real_$enhan dt05_real_$enhan et05_real_$enhan"
-else
-list_set="tr05_real_$enhan dt05_real_$enhan"
-fi
-
-cd $dir
-
-find $audio_dir/ -name '*.wav' | grep 'tr05_bus_real\|tr05_caf_real\|tr05_ped_real\|tr05_str_real' | sort -u > tr05_real_$enhan.flist
-find $audio_dir/ -name '*.wav' | grep 'dt05_bus_real\|dt05_caf_real\|dt05_ped_real\|dt05_str_real' | sort -u > dt05_real_$enhan.flist
-if $eval_flag; then
-find $audio_dir/ -name '*.wav' | grep 'et05_bus_real\|et05_caf_real\|et05_ped_real\|et05_str_real' | sort -u > et05_real_$enhan.flist
-fi
-
-# make a scp file from file list
-for x in $list_set; do
-    cat $x.flist | awk -F'[/]' '{print $NF}'| sed -e 's/\.wav/_REAL/' > ${x}_wav.ids
-    paste -d" " ${x}_wav.ids $x.flist | sort -k 1 > ${x}_wav.scp
-done
-
-#make a transcription from dot
-cat tr05_real.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF "_REAL"}'> tr05_real_$enhan.ids
-cat tr05_real.dot | sed -e 's/(.*)//' > tr05_real_$enhan.txt
-paste -d" " tr05_real_$enhan.ids tr05_real_$enhan.txt | sort -k 1 > tr05_real_$enhan.trans1
-cat dt05_real.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF "_REAL"}'> dt05_real_$enhan.ids
-cat dt05_real.dot | sed -e 's/(.*)//' > dt05_real_$enhan.txt
-paste -d" " dt05_real_$enhan.ids dt05_real_$enhan.txt | sort -k 1 > dt05_real_$enhan.trans1
-if $eval_flag; then
-cat et05_real.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF "_REAL"}'> et05_real_$enhan.ids
-cat et05_real.dot | sed -e 's/(.*)//' > et05_real_$enhan.txt
-paste -d" " et05_real_$enhan.ids et05_real_$enhan.txt | sort -k 1 > et05_real_$enhan.trans1
-fi
-
-# Do some basic normalization steps.  At this point we don't remove OOVs--
-# that will be done inside the training scripts, as we'd like to make the
-# data-preparation stage independent of the specific lexicon used.
-noiseword="<NOISE>";
-for x in $list_set;do
-  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
-    | sort > $x.txt || exit 1;
-done
-
-# copying data to data/...
-for x in $list_set; do
-  mkdir -p $odir/$x
-  cp ${x}_wav.scp $odir/$x/wav.scp || exit 1;
-  cp ${x}.txt     $odir/$x/text    || exit 1;
-done
-
-echo "Data preparation succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/real_noisy_chime4_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/real_noisy_chime4_data_prep.sh
deleted file mode 100644
index aeb3b0314bbaa021577bdf1e0ba519468d8b666f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/real_noisy_chime4_data_prep.sh
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/env bash
-
-set -eu
-
-# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-# This is modified from the script in standard Kaldi recipe to account
-# for the way the WSJ data is structured on the Edinburgh systems.
-# - Arnab Ghoshal, 29/05/12
-
-# Modified from the script for CHiME2 baseline
-# Shinji Watanabe 02/13/2015
-# Modified to use data of six channels
-# Szu-Jui Chen 09/29/2017
-
-# Config:
-eval_flag=true # make it true when the evaluation data are released
-
-. tools/parse_options.sh || exit 1
-
-if [ $# -ne 1 ]; then
-  printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
-  echo "The argument should be a the top-level Chime4 directory."
-  echo "It is assumed that there will be a 'data' subdirectory"
-  echo "within the top-level corpus directory."
-  exit 1;
-fi
-
-echo "$0 $@"  # Print the command line for logging
-
-audio_dir=$1/data/audio/16kHz/isolated/
-trans_dir=$1/data/transcriptions
-
-echo "extract all channels (CH[1-6].wav) for noisy data"
-
-dir=$PWD/data/chime4/local
-mkdir -p $dir
-local=$PWD/local
-
-if $eval_flag; then
-list_set="tr05_real_noisy dt05_real_noisy et05_real_noisy"
-else
-list_set="tr05_real_noisy dt05_real_noisy"
-fi
-
-cd $dir
-
-find $audio_dir -name '*CH[1-6].wav' | grep 'tr05_bus_real\|tr05_caf_real\|tr05_ped_real\|tr05_str_real' | sort -u > tr05_real_noisy.flist
-find $audio_dir -name '*CH[1-6].wav' | grep 'dt05_bus_real\|dt05_caf_real\|dt05_ped_real\|dt05_str_real' | sort -u > dt05_real_noisy.flist
-if $eval_flag; then
-find $audio_dir -name '*CH[1-6].wav' | grep 'et05_bus_real\|et05_caf_real\|et05_ped_real\|et05_str_real' | sort -u > et05_real_noisy.flist
-fi
-
-# make a dot format from json annotation files
-cp $trans_dir/tr05_real.dot_all tr05_real.dot
-cp $trans_dir/dt05_real.dot_all dt05_real.dot
-if $eval_flag; then
-cp $trans_dir/et05_real.dot_all et05_real.dot
-fi
-
-# make a scp temporary file from file list
-for x in $list_set; do
-    cat $x.flist | awk -F'[/]' '{print $NF}'| sed -e 's/\.wav/_REAL/' > ${x}_wav.id.temp
-    cat ${x}_wav.id.temp | awk -F'_' '{print $3}' | awk -F'.' '{print $2}' > $x.ch
-    cat ${x}_wav.id.temp | awk -F'_' '{print $1}' > $x.part1
-    cat ${x}_wav.id.temp | sed -e 's/^..._//' > $x.part2
-    paste -d"_" $x.part1 $x.ch $x.part2 > ${x}_wav.ids
-    paste -d" " ${x}_wav.ids $x.flist | sort -t_ -k1,1 -k3 > ${x}_wav.scp.temp
-done
-
-#make a transcription from dot
-cat tr05_real.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF ".CH1_REAL"}'> tr05_real_noisy.ids
-cat tr05_real.dot | sed -e 's/(.*)//' > tr05_real_noisy.txt
-paste -d" " tr05_real_noisy.ids tr05_real_noisy.txt | \
-awk '{print}{sub(/CH1/, "CH2",$0);print}{sub(/CH2/, "CH3",$0);print}{sub(/CH3/, "CH4",$0);print}{sub(/CH4/, "CH5",$0);print}{sub(/CH5/, "CH6",$0);print}' | \
-sort -k 1 > tr05_real_noisy.trans1
-cat dt05_real.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF ".CH1_REAL"}'> dt05_real_noisy.ids
-cat dt05_real.dot | sed -e 's/(.*)//' > dt05_real_noisy.txt
-paste -d" " dt05_real_noisy.ids dt05_real_noisy.txt | \
-awk '{print}{sub(/CH1/, "CH2",$0);print}{sub(/CH2/, "CH3",$0);print}{sub(/CH3/, "CH4",$0);print}{sub(/CH4/, "CH5",$0);print}{sub(/CH5/, "CH6",$0);print}' | \
-sort -k 1 > dt05_real_noisy.trans1
-if $eval_flag; then
-cat et05_real.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF ".CH1_REAL"}'> et05_real_noisy.ids
-cat et05_real.dot | sed -e 's/(.*)//' > et05_real_noisy.txt
-paste -d" " et05_real_noisy.ids et05_real_noisy.txt | \
-awk '{print}{sub(/CH1/, "CH2",$0);print}{sub(/CH2/, "CH3",$0);print}{sub(/CH3/, "CH4",$0);print}{sub(/CH4/, "CH5",$0);print}{sub(/CH5/, "CH6",$0);print}' | \
-sort -k 1 > et05_real_noisy.trans1
-fi
-
-# Do some basic normalization steps.  At this point we don't remove OOVs--
-# that will be done inside the training scripts, as we'd like to make the
-# data-preparation stage independent of the specific lexicon used.
-noiseword="<NOISE>";
-for x in $list_set;do
-  cat ${x}_wav.scp.temp | awk '{print $1}' > $x.txt.part1
-  cat $x.trans1 | awk '{$1=""; print $0}' | sed 's/^[ \t]*//g' > $x.txt.part2
-  paste -d" " $x.txt.part1 $x.txt.part2 > $x.trans1
-  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
-    | sort > $x.txt || exit 1;
-done
-
-# copying data to data/...
-for x in $list_set; do
-  sort ${x}_wav.scp.temp > ${x}_wav.scp
-  mkdir -p ../../chime4/$x
-  cp ${x}_wav.scp ../../chime4/$x/wav.scp || exit 1;
-  cp ${x}.txt     ../../chime4/$x/text    || exit 1;
-done
-
-# clean up temp files
-rm *.temp
-rm *.part{1,2}
-
-echo "Data preparation succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/simu_enhan_chime4_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/simu_enhan_chime4_data_prep.sh
deleted file mode 100644
index f5d28366dd0c7dec74b8441237ae8fbe3789363f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/simu_enhan_chime4_data_prep.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env bash
-set -eu
-
-# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-# This is modified from the script in standard Kaldi recipe to account
-# for the way the WSJ data is structured on the Edinburgh systems.
-# - Arnab Ghoshal, 29/05/12
-
-# Modified from the script for CHiME2 baseline
-# Shinji Watanabe 02/13/2015
-
-# Config:
-eval_flag=true # make it true when the evaluation data are released
-
-. tools/parse_options.sh || exit 1;
-
-if [ $# -ne 2 ]; then
-  printf "\nUSAGE: %s <enhancement-name> <enhanced-speech-directory>\n\n" `basename $0`
-  echo "The argument should be a the directory that only contains enhanced speech data."
-  exit 1;
-fi
-
-echo "$0 $@"  # Print the command line for logging
-
-enhan=$1
-audio_dir=$2
-
-dir=$PWD/data/chime4/local
-mkdir -p $dir
-local=$PWD/local
-utils=$PWD/utils
-odir=$PWD/data/chime4
-
-if $eval_flag; then
-list_set="tr05_simu_$enhan dt05_simu_$enhan et05_simu_$enhan"
-else
-list_set="tr05_simu_$enhan dt05_simu_$enhan"
-fi
-
-cd $dir
-
-find $audio_dir/ -name '*.wav' | grep 'tr05_bus_simu\|tr05_caf_simu\|tr05_ped_simu\|tr05_str_simu' | sort -u > tr05_simu_$enhan.flist
-find $audio_dir/ -name '*.wav' | grep 'dt05_bus_simu\|dt05_caf_simu\|dt05_ped_simu\|dt05_str_simu' | sort -u > dt05_simu_$enhan.flist
-if $eval_flag; then
-find $audio_dir/ -name '*.wav' | grep 'et05_bus_simu\|et05_caf_simu\|et05_ped_simu\|et05_str_simu' | sort -u > et05_simu_$enhan.flist
-fi
-
-# make a scp file from file list
-for x in $list_set; do
-    cat $x.flist | awk -F'[/]' '{print $NF}'| sed -e 's/\.wav/_SIMU/' > ${x}_wav.ids
-    paste -d" " ${x}_wav.ids $x.flist | sort -k 1 > ${x}_wav.scp
-done
-
-# make a transcription from dot
-# simulation training data extract dot file from original WSJ0 data
-# since it is generated from these data
-if [ ! -e dot_files.flist ]; then
-  echo "Could not find $dir/dot_files.flist files, first run local/clean_wsj0_data_prep.sh";
-  exit 1;
-fi
-cat tr05_simu_${enhan}_wav.scp | awk -F'[_]' '{print $2}' | tr '[A-Z]' '[a-z]' \
-    | $local/find_noisy_transcripts.pl dot_files.flist | cut -f 2- -d" " > tr05_simu_$enhan.txt
-cat tr05_simu_${enhan}_wav.scp | cut -f 1 -d" " > tr05_simu_$enhan.ids
-paste -d" " tr05_simu_$enhan.ids tr05_simu_$enhan.txt | sort -k 1 > tr05_simu_$enhan.trans1
-# dt05 and et05 simulation data are generated from the CHiME4 booth recording
-# and we use CHiME4 dot files
-cat dt05_simu.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF "_SIMU"}'> dt05_simu_$enhan.ids
-cat dt05_simu.dot | sed -e 's/(.*)//' > dt05_simu_$enhan.txt
-paste -d" " dt05_simu_$enhan.ids dt05_simu_$enhan.txt | sort -k 1 > dt05_simu_$enhan.trans1
-if $eval_flag; then
-cat et05_simu.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF "_SIMU"}'> et05_simu_$enhan.ids
-cat et05_simu.dot | sed -e 's/(.*)//' > et05_simu_$enhan.txt
-paste -d" " et05_simu_$enhan.ids et05_simu_$enhan.txt | sort -k 1 > et05_simu_$enhan.trans1
-fi
-
-# Do some basic normalization steps.  At this point we don't remove OOVs--
-# that will be done inside the training scripts, as we'd like to make the
-# data-preparation stage independent of the specific lexicon used.
-noiseword="<NOISE>";
-for x in $list_set;do
-  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
-    | sort > $x.txt || exit 1;
-done
-
-# copying data to data/...
-for x in $list_set; do
-  mkdir -p $odir/$x
-  cp ${x}_wav.scp $odir/$x/wav.scp || exit 1;
-  cp ${x}.txt     $odir/$x/text    || exit 1;
-done
-
-echo "Data preparation succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/simu_noisy_chime4_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/simu_noisy_chime4_data_prep.sh
deleted file mode 100644
index 52bd7c6c3aa13e596847b721194a703e42030c75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/local/simu_noisy_chime4_data_prep.sh
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env bash
-set -eu
-
-# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-# This is modified from the script in standard Kaldi recipe to account
-# for the way the WSJ data is structured on the Edinburgh systems.
-# - Arnab Ghoshal, 29/05/12
-
-# Modified from the script for CHiME2 baseline
-# Shinji Watanabe 02/13/2015
-# Modified to use data of six channels
-# Szu-Jui Chen 09/29/2017
-
-# Config:
-eval_flag=true # make it true when the evaluation data are released
-
-. tools/parse_options.sh || exit 1;
-
-if [ $# -ne 1 ]; then
-  printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
-  echo "The argument should be a the top-level Chime4 directory."
-  echo "It is assumed that there will be a 'data' subdirectory"
-  echo "within the top-level corpus directory."
-  exit 1;
-fi
-
-echo "$0 $@"  # Print the command line for logging
-
-audio_dir=$1/data/audio/16kHz/isolated/
-trans_dir=$1/data/transcriptions
-
-echo "extract all channels (CH[1-6].wav) for noisy data"
-
-dir=$PWD/data/chime4/local
-mkdir -p $dir
-local=$PWD/local
-utils=$PWD/utils
-
-if $eval_flag; then
-list_set="tr05_simu_noisy dt05_simu_noisy et05_simu_noisy"
-else
-list_set="tr05_simu_noisy dt05_simu_noisy"
-fi
-
-cd $dir
-
-find $audio_dir -name '*CH[1-6].wav' | grep 'tr05_bus_simu\|tr05_caf_simu\|tr05_ped_simu\|tr05_str_simu' | sort -u > tr05_simu_noisy.flist
-find $audio_dir -name '*CH[1-6].wav' | grep 'dt05_bus_simu\|dt05_caf_simu\|dt05_ped_simu\|dt05_str_simu' | sort -u > dt05_simu_noisy.flist
-if $eval_flag; then
-find $audio_dir -name '*CH[1-6].wav' | grep 'et05_bus_simu\|et05_caf_simu\|et05_ped_simu\|et05_str_simu' | sort -u > et05_simu_noisy.flist
-fi
-
-# make a dot format from json annotation files
-cp $trans_dir/dt05_simu.dot_all dt05_simu.dot
-if $eval_flag; then
-cp $trans_dir/et05_simu.dot_all et05_simu.dot
-fi
-
-# make a scp file from file list
-for x in $list_set; do
-    cat $x.flist | awk -F'[/]' '{print $NF}'| sed -e 's/\.wav/_SIMU/' > ${x}_wav.id.temp
-    cat ${x}_wav.id.temp | awk -F'_' '{print $3}' | awk -F'.' '{print $2}' > $x.ch
-    cat ${x}_wav.id.temp | awk -F'_' '{print $1}' > $x.part1
-    cat ${x}_wav.id.temp | sed -e 's/^..._//' > $x.part2
-    paste -d"_" $x.part1 $x.ch $x.part2 > ${x}_wav.ids
-    paste -d" " ${x}_wav.ids $x.flist | sort -t_ -k1,1 -k3 > ${x}_wav.scp.temp
-done
-
-# make a transcription from dot
-# simulation training data extract dot file from original WSJ0 data
-# since it is generated from these data
-if [ ! -e dot_files.flist ]; then
-  echo "Could not find $dir/dot_files.flist files, first run local/clean_wsj0_data_prep.sh";
-  exit 1;
-fi
-cat tr05_simu_noisy_wav.scp.temp | awk -F'[_]' '{print $3}' | tr '[A-Z]' '[a-z]' \
-    | $local/find_noisy_transcripts.pl dot_files.flist | cut -f 2- -d" " > tr05_simu_noisy.txt
-cat tr05_simu_noisy_wav.scp.temp | cut -f 1 -d" " > tr05_simu_noisy.ids
-paste -d" " tr05_simu_noisy.ids tr05_simu_noisy.txt | sort -t_ -k1,1 -k3 > tr05_simu_noisy.trans1
-# dt05 and et05 simulation data are generated from the CHiME4 booth recording
-# and we use CHiME4 dot files
-cat dt05_simu.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF ".CH1_SIMU"}'> dt05_simu_noisy.ids
-cat dt05_simu.dot | sed -e 's/(.*)//' > dt05_simu_noisy.txt
-paste -d" " dt05_simu_noisy.ids dt05_simu_noisy.txt | \
-awk '{print}{sub(/CH1/, "CH2",$0);print}{sub(/CH2/, "CH3",$0);print}{sub(/CH3/, "CH4",$0);print}{sub(/CH4/, "CH5",$0);print}{sub(/CH5/, "CH6",$0);print}' | \
-sort -k 1 > dt05_simu_noisy.trans1
-if $eval_flag; then
-cat et05_simu.dot | sed -e 's/(\(.*\))/\1/' | awk '{print $NF ".CH1_SIMU"}'> et05_simu_noisy.ids
-cat et05_simu.dot | sed -e 's/(.*)//' > et05_simu_noisy.txt
-paste -d" " et05_simu_noisy.ids et05_simu_noisy.txt | \
-awk '{print}{sub(/CH1/, "CH2",$0);print}{sub(/CH2/, "CH3",$0);print}{sub(/CH3/, "CH4",$0);print}{sub(/CH4/, "CH5",$0);print}{sub(/CH5/, "CH6",$0);print}' | \
-sort -k 1 > et05_simu_noisy.trans1
-fi
-
-# Do some basic normalization steps.  At this point we don't remove OOVs--
-# that will be done inside the training scripts, as we'd like to make the
-# data-preparation stage independent of the specific lexicon used.
-noiseword="<NOISE>";
-for x in $list_set;do
-  cat ${x}_wav.scp.temp | awk '{print $1}' > $x.txt.part1
-  cat $x.trans1 | awk '{$1=""; print $0}' | sed 's/^[ \t]*//g' > $x.txt.part2
-  paste -d" " $x.txt.part1 $x.txt.part2 > $x.trans1
-  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
-    | sort > $x.txt || exit 1;
-done
-
-# copying data to data/...
-for x in $list_set; do
-  sort ${x}_wav.scp.temp > ${x}_wav.scp
-  mkdir -p ../../chime4/$x
-  cp ${x}_wav.scp ../../chime4/$x/wav.scp || exit 1;
-  cp ${x}.txt     ../../chime4/$x/text    || exit 1;
-done
-
-# clean up temp files
-rm *.temp
-rm *.part{1,2}
-
-echo "Data preparation succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/run.sh
deleted file mode 100644
index f010265fa3348af1b04753bcf3dc77f5e71bffb8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/run.sh
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2020 Jian Wu
-# License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
-
-set -eu
-
-stage="1-4"
-space="<space>"
-track="isolated_1ch_track"
-wsj1_data_dir=//scratch/jwu/wsj1
-chime4_data_dir=/scratch/jwu/CHiME4
-dump_wav_dir=/scratch/jwu/chime4_wav
-
-data_dir=data/chime4
-dict=$data_dir/dict_char.txt
-train_config=conf/train_conformer.yaml
-exp_dir=exp/1a
-decode_modes="ctc_prefix_beam_search attention_rescoring"
-average_checkpoint=true
-average_num=10
-
-. ./path.sh
-. ./tools/parse_options.sh || exit 1
-
-beg=$(echo $stage | awk -F '-' '{print $1}')
-end=$(echo $stage | awk -F '-' '{print $2}')
-[ -z $end ] && end=$beg
-
-if [ $end -ge 1 ] && [ $beg -le 1 ]; then
-  echo "Stage 1: preparing data ..."
-  ./local/clean_wsj0_data_prep.sh $chime4_data_dir/CHiME3/data/WSJ0
-  ./local/simu_noisy_chime4_data_prep.sh $chime4_data_dir
-  ./local/real_noisy_chime4_data_prep.sh $chime4_data_dir
-  ./local/simu_enhan_chime4_data_prep.sh $track $chime4_data_dir/data/audio/16kHz/$track
-  ./local/real_enhan_chime4_data_prep.sh $track $chime4_data_dir/data/audio/16kHz/$track
-  ./local/clean_wsj1_data_prep.sh $wsj1_data_dir
-  ./local/chime4_format_dir.sh
-fi
-
-
-if [ $end -ge 2 ] && [ $beg -le 2 ]; then
-  echo -e "<NOISE>\n<*IN*>\n<*MR.*>" > $data_dir/train/non_lang.txt
-  for name in dev train; do
-    python tools/text2token.py $data_dir/$name/text -n 1 -s 1 \
-      -l $data_dir/train/non_lang.txt > $data_dir/$name/char
-  done
-  mkdir -p $(dirname $dict) && echo -e "<blank> 0\n<unk> 1" > ${dict}
-  cat $data_dir/train/char | cut -f 2- -d" " | tr " " "\n" | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict
-  echo "Make dictionary done"
-fi
-
-
-if [ $end -ge 3 ] && [ $beg -le 3 ]; then
-  ./local/chime4_gen_wav.sh $data_dir/train $dump_wav_dir
-  tools/compute_cmvn_stats.py --num_workers 16 \
-   --train_config $train_config \
-    --in_scp $data_dir/train/wav.scp \
-    --out_cmvn $data_dir/train/global_cmvn
-  echo "Prepare data, prepare required format"
-  for x in train dev; do
-  tools/make_raw_list.py $data_dir/$x/wav.scp $data_dir/$x/char \
-    $data_dir/$x/data.list
-  done
-fi
-
-if [ $end -ge 4 ] && [ $beg -le 4 ]; then
-  mkdir -p $exp_dir && cp $data_dir/train/global_cmvn $exp_dir
-  python wenet/bin/train.py \
-    --gpu 0 \
-    --config $train_config \
-    --train_data $data_dir/train/data.list \
-    --cv_data $data_dir/dev/data.list \
-    --model_dir $exp_dir \
-    --num_workers 4 \
-    --symbol_table $dict \
-    --cmvn $exp_dir/global_cmvn \
-    --pin_memory > $exp_dir/train.log 2>&1
-fi
-
-suffix="isolated_1ch_track"
-if [ $end -ge 5 ] && [ $beg -le 5 ]; then
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$exp_dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $exp_dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  nj=4
-  ctc_weight=0.5
-  for x in dt05_{simu,real} et05_{simu,real}; do
-    subdir=${x}_${suffix}
-    tools/make_raw_list.py $data_dir/$subdir/wav.scp $data_dir/$subdir/text \
-      $data_dir/$subdir/data.list
-  done
-  for mode in ${decode_modes}; do
-    for x in dt05_{simu,real} et05_{simu,real}; do
-      subdir=${x}_${suffix}
-      dec_dir=$exp_dir/${subdir}_${mode} && mkdir -p $dec_dir
-      python wenet/bin/recognize.py \
-        --gpu 0 \
-        --mode $mode \
-        --config $exp_dir/train.yaml \
-        --test_data $data_dir/$subdir/data.list \
-        --checkpoint $exp_dir/avg_${average_num}.pt \
-        --beam_size 8 \
-        --batch_size 1 \
-        --dict $dict \
-        --ctc_weight $ctc_weight \
-        --result_file $dec_dir/text &
-     done
-     wait
-  done
-  for mode in ${decode_modes}; do
-    for x in dt05_{simu,real} et05_{simu,real}; do
-     subdir=${x}_${suffix}
-     dec_dir=$exp_dir/${subdir}_${mode}
-     sed 's:<space>: :g' $dec_dir/text > $dec_dir/text.norm
-     python tools/compute-wer.py --char=1 --v=1 \
-       $data_dir/$subdir/text $dec_dir/text.norm > $dec_dir/wer
-    done
-  done
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/chime4/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/README.md
deleted file mode 100644
index 853415bf35645f6effe6bc2ee7b3fdd7854eeabe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Performance Record
-# Should be installed ffmpeg , pandas !!!
-## Conformer Result
-
-* Feature info: dither + specaug + speed perturb
-* Training info: lr 0.0005, warmup_steps 20000 batch size 8, 3 gpu, 30 epochs
-* Decoding info: average_num 20
-
-
-
-|     decoding mode      | test (wer) |
-| :--------------------: | :---------: |
-|   ctc_greedy_search    |   16.12%    |
-| ctc_prefix_beam_search |   16.07%    |
-|       attention        |   13.56%    |
-|  attention_rescoring   |   14.01%    |
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/conf/train_conformer.yaml
deleted file mode 100644
index 1e20f58224e4b4307d4dc2c24ef96adf2c30c4a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/conf/train_conformer.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    split_with_space: true
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 40
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'dynamic' # static or dynamic
-        batch_size: 8
-
-grad_clip: 10
-accum_grad: 4
-max_epoch: 30
-log_interval: 200
-
-optim: adam
-optim_conf:
-    lr: 0.0005
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 20000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/create_scp_text.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/create_scp_text.py
deleted file mode 100644
index b3d94276e4ca58e3e0f5bf676671cf81b51fec15..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/create_scp_text.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import sys
-import os
-import re
-def process(src_str):
-    punc = '~`!#$%^&*()_+-=|\';":/.,?><~·！@#￥%……&*（）——+-=“：’；、。，？》《{}'
-    return re.sub(r"[{0}]+".format(punc), "", src_str).upper()
-
-if __name__ == '__main__':
-    src_dir = sys.argv[1]
-    tsv_file = src_dir + "/" + sys.argv[2] + ".tsv"
-    output_dir = sys.argv[3]
-    for file_path in os.listdir(src_dir + "/clips"):
-        if(os.path.exists(src_dir + "/wavs/" + file_path.split('.')[0] + ".wav")):
-            continue
-        t_str = src_dir + "/clips/" + file_path
-        tt_str = src_dir + "/wavs/" + file_path.split('.')[0] + ".wav"
-        os.system("ffmpeg -i {0} -ac 1 -ar 16000 -f wav {1}".format(t_str, tt_str))
-    import pandas
-    tsv_content = pandas.read_csv(tsv_file, sep="\t")
-    path_list = tsv_content["path"]
-    sentence = tsv_content["sentence"]
-    client_list = tsv_content["client_id"]
-    scp_file = open(output_dir + "/wav.scp", "w")
-    text_file = open(output_dir + "/text", "w")
-    utt2spk = open(output_dir + "/utt2spk", "w")
-    for i in range(len(path_list)):
-        temple_str = path_list[i].split(".")[0]
-        now_sentence = process(sentence[i])
-        wav_file = src_dir + "/wavs/" + temple_str + ".wav"
-        scp_file.writelines(temple_str + " " + wav_file + "\n")
-        text_file.writelines(temple_str + " " + now_sentence + "\n")
-        utt2spk.writelines(temple_str + " " + client_list[i] + "\n")
-    scp_file.close()
-    text_file.close()
-    utt2spk.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/download_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/download_data.sh
deleted file mode 100644
index 1dc1914a59311c426280a4308e5a4d5a476fb6ec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/download_data.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env bash
-if [ $# -le 1 ]; then
-    echo "Args_Error:Two parameters are required."
-    exit 1;
-fi
-download_path=$1
-data_France=$2
-wget -O ${download_path}/tmp.zip https://mozilla-common-voice-datasets.s3.dualstack.us-west-2.amazonaws.com/cv-corpus-8.0-2022-01-19/cv-corpus-8.0-2022-01-19-fr.tar.gz
-tar -xvf ${download_path}/tmp.zip  -C ${data_France}
-rm -rf ${download_path}/tmp.zip
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/prepare_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/prepare_data.sh
deleted file mode 100644
index 5e561a556b32b7482d87a3c387caa03a8b8e6878..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/local/prepare_data.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-if [ $# -le 0 ]; then
-    echo "Argument should be France src directory, see ../run.sh for example."
-    exit 1;
-fi
-dir=`pwd`/data
-local=`pwd`/local
-src_path=$1
-if [ ! -d ${dir} ]; then
-    mkdir ${dir}
-  else
-    rm -rf ${dir}
-    mkdir ${dir}
-fi
-
-for x in train dev test; do
-    if [ ! ${dir}/${x} ]; then
-        mkdir ${dir}/${x}
-    else
-        rm -rf ${dir}/${x}
-        mkdir ${dir}/${x}
-    fi
-done
-
-if [ ! -d ${src_path}/wavs ]; then
-    mkdir ${src_path}/wavs
-fi
-for x in train dev test; do
-    python3 ${local}/create_scp_text.py  ${src_path} ${x} ${dir}/${x}
-done
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/run.sh
deleted file mode 100644
index 5ca76d43060d36693cbe156b6d0f80ff4298002b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/run.sh
+++ /dev/null
@@ -1,244 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0     # start from 0 if you need to start from data download
-stop_stage=2
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-# data
-download_path=/root/autodl-tmp
-french_data=/root/autodl-tmp/cv-corpus-8.0-2022-01-19
-# path to save preproecssed data
-# export data=data
-. ./path.sh
-. ./tools/parse_options.sh || exit 1
-
-nj=16
-
-# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
-# `shard` is used for large dataset which is over 1k hours, and `shard` is
-# faster on reading data and training.
-data_type=raw
-num_utts_per_shard=1000
-
-train_set=train
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard transformer
-# 2. conf/train_conformer.yaml: Standard conformer
-# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
-# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
-# 5. conf/train_conformer_no_pos.yaml: Conformer without relative positional encoding
-# 6. conf/train_u2++_conformer.yaml: U2++ conformer
-# 7. conf/train_u2++_transformer.yaml: U2++ transformer
-train_config=conf/train_conformer.yaml
-cmvn=true
-dir=exp/conformer
-checkpoint=
-nbpe=5000
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=20
-#decode_modes="ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring"
-decode_modes="attention attention_rescoring"
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-
-    echo "stage -1: Data download"
-    echo "download Dataset!"
-    local/download_data.sh ${download_path} ${french_data}
-    echo "Finish stage 0"
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-
-    echo "stage 0: Data preparation"
-    local/prepare_data.sh ${french_data}/fr
-    echo "Finish stage 0"
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: compute global cmvn"
-    # compute cmvn
-    python tools/compute_cmvn_stats.py --num_workers 1 --train_config $train_config \
-        --in_scp data/${train_set}/wav.scp \
-        --out_cmvn data/${train_set}/global_cmvn
-    echo "Finish stage 1"
-fi
-
-
-bpemode=unigram
-dict=data/lang_char_/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=data/lang_char_/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 2: Dictionary and Json Data Preparation"
-  mkdir -p data/lang_char_/
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " data/${train_set}/text > data/lang_char_/input.txt
-  tools/spm_train --input=data/lang_char_/input.txt --vocab_size=${nbpe} \
-    --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
-  tools/spm_encode --model=${bpemodel}.model --output_format=piece \
-    < data/lang_char_/input.txt | \
-    tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-  wc -l ${dict}
-fi
-
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "stage 3: Prepare data, prepare required format"
-  for x in dev test ${train_set}; do
-    if [ $data_type == "shard" ]; then
-      python tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 data/$x/wav.scp data/$x/text \
-        $(realpath data/$x/shards) data/$x/data.list
-    else
-      python tools/make_raw_list.py data/$x/wav.scp data/$x/text \
-        data/$x/data.list
-    fi
-  done
-  echo "Finish stage 3"
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  mkdir -p $dir
-  # You have to rm `INIT_FILE` manually when you resume or restart a
-  # multi-machine training.
-  INIT_FILE=$dir/ddp_init
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-
-  # train.py rewrite $train_config to $dir/train.yaml with model input
-  # and output dimension, and $dir/train.yaml will be used for inference
-  # and export.
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --bpe_model $bpemodel.model \
-      --train_data data/$train_set/data.list \
-      --cv_data data/dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-  # TODO, Add model average here
-  mkdir -p $dir/test
-  if [ ${average_checkpoint} == true ]; then
-      decode_checkpoint=$dir/avg_${average_num}.pt
-      echo "do model average and final checkpoint is $decode_checkpoint"
-      python wenet/bin/average_model.py \
-          --dst_model $decode_checkpoint \
-          --src_path $dir  \
-          --num ${average_num} \
-          --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  # Polling GPU id begin with index 0
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  idx=0
-  for mode in ${decode_modes}; do
-    {
-      {
-        test_dir=$dir/test_${mode}
-        mkdir -p $test_dir
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-        python wenet/bin/recognize.py --gpu 0 \
-          --mode $mode \
-          --config $dir/train.yaml \
-          --data_type "raw" \
-          --bpe_model $bpemodel.model \
-          --test_data data/test/data.list \
-          --checkpoint $decode_checkpoint \
-          --beam_size 20 \
-          --batch_size 1 \
-          --penalty 0.0 \
-          --dict $dict \
-          --result_file $test_dir/text_bpe \
-          --ctc_weight $ctc_weight \
-          ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-
-        cut -f2- -d " " $test_dir/text_bpe > $test_dir/text_bpe_value_tmp
-        cut -f1 -d " " $test_dir/text_bpe > $test_dir/text_bpe_key_tmp
-
-         tools/spm_decode --model=${bpemodel}.model --input_format=piece \
-           < $test_dir/text_bpe_value_tmp | sed -e "s/▁/ /g" > $test_dir/text_value
-        #sed -e "s/▁/ /g" $test_dir/text_bpe_value_tmp > $test_dir/text_value
-        paste -d " " $test_dir/text_bpe_key_tmp $test_dir/text_value > $test_dir/text
-        # a raw version wer without refining processs
-        python tools/compute-wer.py --char=1 --v=1 \
-          data/test/text $test_dir/text > $test_dir/wer
-      } &
-
-      ((idx+=1))
-      if [ $idx -eq $num_gpus ]; then
-        idx=0
-      fi
-    }
-    done
-
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/commonvoice/fr/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/README.md
deleted file mode 100644
index 82fe2662e934d9d9f498321b1406911217d23d13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Performance Record
-
-## Conformer Result Bidecoder (large)
-
-
-## Conformer Result
-
-* Feature info: using fbank feature, cmvn, dither, online speed perturb
-* Training info: train_conformer.yaml, kernel size 15, lr 0.004, batch size 12, 8 gpu, acc_grad 1, 50 epochs, dither 0.0
-* Decoding info: ctc_weight 0.5, average_num 10
-
-
-| decoding mode                    | test1      | test2      | test3      |
-|----------------------------------|------------|------------|------------|
-| ctc greedy search                | 7.94       | 5.29       | 6.10       |
-| ctc prefix beam search           | 7.83+      | 5.28       | 6.08       |
-| attention decoder                | 7.83       | 5.63       | 6.37       |
-| attention rescoring              | 7.28+      | 4.81       | 5.44       |
-
-note that "+" means we removed two <0.1s wav files in test1 before decoding.
-
-
-
-
-## Conformer U2++ Result
-
-
-## Conformer U2 Result
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/conf/train_conformer.yaml
deleted file mode 100644
index 461673ed7eea0889243b63df54b5fafb43c1c6f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 50
-        token_max_length: 400
-        token_min_length: 1
-        min_output_input_ratio: 0.05
-        max_output_input_ratio: 10.0
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 12
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 50
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.004
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.0.parse.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.0.parse.py
deleted file mode 100644
index d916a2cf030b9338eb8dc698d7f16572a638b2c8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.0.parse.py
+++ /dev/null
@@ -1,119 +0,0 @@
-
-# parse xml files and output simplified version
-
-import xml.dom.minidom
-import os
-import sys
-import multiprocessing
-
-def parsexml(afile, outpath):
-    outfile = os.path.join(outpath, afile.split('/')[-1] + '.simp')
-
-    with open(outfile, 'w') as bw:
-        domtree = xml.dom.minidom.parse(afile)
-        collection = domtree.documentElement
-        ipus = collection.getElementsByTagName('IPU')
-
-        for ipu in ipus:
-            starttime = 0
-            endtime = 0
-            if ipu.hasAttribute('IPUStartTime'):
-                starttime = ipu.getAttribute('IPUStartTime')
-            if ipu.hasAttribute('IPUEndTime'):
-                endtime = ipu.getAttribute('IPUEndTime')
-
-            # print('{}\t{}'.format(starttime, endtime))
-            #  ## original format ###
-            wlist = list()
-            plainwlist = list()
-            pronlist = list()
-
-            #  ## pronunciation ###
-            lemmalist = list()  # lemma list
-            dictlemmalist = list()  # dict lemma list
-            for suw in ipu.getElementsByTagName('SUW'):  # short unit word
-                txt = ''
-                plaintxt = ''
-                # PhoneticTranscription
-                prontxt = ''
-
-                if suw.hasAttribute('OrthographicTranscription'):
-                    txt = suw.getAttribute('OrthographicTranscription')
-                if suw.hasAttribute('PlainOrthographicTranscription'):
-                    plaintxt = suw.getAttribute('PlainOrthographicTranscription')
-                if suw.hasAttribute('PhoneticTranscription'):
-                    prontxt = suw.getAttribute('PhoneticTranscription')
-                wlist.append(txt)
-                plainwlist.append(plaintxt)
-                pronlist.append(prontxt)
-
-                lemma = ''
-                dictlemma = ''
-
-                if suw.hasAttribute('SUWLemma'):
-                    lemma = suw.getAttribute('SUWLemma')
-                if suw.hasAttribute('SUWDictionaryForm'):
-                    dictlemma = suw.getAttribute('SUWDictionaryForm')
-                lemmalist.append(lemma)
-                dictlemmalist.append(dictlemma)
-            txtsent = ' '.join(wlist)
-            plaintxtsent = ' '.join(plainwlist)
-            prontxtsent = ' '.join(pronlist)
-
-            lemmasent = ' '.join(lemmalist)
-            dictlemmasent = ' '.join(dictlemmalist)
-            outrow = '{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
-                starttime, endtime, txtsent, plaintxtsent,
-                prontxtsent, lemmasent, dictlemmasent)
-            bw.write(outrow)
-
-def procfolder_orig(apath, outpath):
-    count = 0
-    for afile in os.listdir(apath):
-        if not afile.endswith('.xml'):
-            continue
-        afile = os.path.join(apath, afile)
-        parsexml(afile, outpath)
-        count += 1
-        print('done: {} [{}]'.format(afile, count))
-
-def procfolder(apath, outpath):
-    # count = 0
-    fnlist = list()
-    for afile in os.listdir(apath):
-        if not afile.endswith('.xml'):
-            continue
-        fnlist.append(afile)
-    # now parallel processing:
-    nthreads = 16
-    for i in range(0, len(fnlist), nthreads):
-        # fnlist[i, i+16]
-        pool = multiprocessing.Pool(processes=nthreads)
-        for j in range(nthreads):
-            if i + j < len(fnlist):
-                afile = os.path.join(apath, fnlist[i + j])
-                pool.apply_async(parsexml, (afile, outpath))
-        pool.close()
-        pool.join()
-    print('parallel {} threads done for {} files in total.'.format(
-        nthreads, len(fnlist)))
-
-if __name__ == '__main__':
-    if len(sys.argv) < 3:
-        print("Usage: {} <in.csj.path> <out.csj.path>".format(sys.argv[0]))
-        exit(1)
-    # e.g., csjpath='/workspace/asr/csj/'
-    csjpath = sys.argv[1]
-    outcsjpath = sys.argv[2]
-
-    apath = os.path.join(csjpath, 'XML/BaseXML/core')
-    apath2 = os.path.join(csjpath, 'XML/BaseXML/noncore')
-
-    outapath = os.path.join(outcsjpath, 'xml')
-    # create the "outapath" dir:
-    if not os.path.exists(outapath):
-        os.mkdir(outapath)
-
-    # range over the following two folders:
-    procfolder(apath, outapath)
-    procfolder(apath2, outapath)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.1.split_wav.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.1.split_wav.py
deleted file mode 100644
index ccdf04e9b5168337fd06509e2999afad57de2904..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.1.split_wav.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# based on xml.simp -> start_time and end_time -> split using sox
-
-import os
-import sys
-import multiprocessing
-
-import librosa
-import soundfile as sf
-
-# use .simp as the source for .wav file splitting
-def wavfn(apath):
-    wavdict = dict()  # key=id, value=full.path of .wav
-    for awavfn in os.listdir(apath):
-        fullwavpath = os.path.join(apath, awavfn)
-        aid = awavfn.replace('.wav', '')
-        wavdict[aid] = fullwavpath
-    return wavdict
-
-def xmlfn(apath):
-    xmldict = dict()  # key=id, value=full.path of .xml.simp
-    for axmlfn in os.listdir(apath):
-        if not axmlfn.endswith('.xml.simp'):
-            continue
-        axmlfn2 = os.path.join(apath, axmlfn)
-        aid = axmlfn.replace('.xml.simp', '')
-        # print('obtain id: {}\t{}'.format(axmlfn, aid))
-        xmldict[aid] = axmlfn2
-    return xmldict
-
-def ch2to1(f1, outf1):
-    wav1, _ = librosa.load(f1, sr=16000, mono=False)
-    if wav1.ndim == 1:
-        return
-    wav1mono = librosa.to_mono(wav1)
-    sf.write(outf1, wav1mono, 16000)
-    # print('2ch to 1ch, {} -> {}'.format(f1, outf1))
-    acmd = 'mv {} {}'.format(outf1, f1)
-    res = os.system(acmd)
-    # rename the .1ch file back to the .wav file and
-    # overwrite the old .wav file which is 2ch
-    # print(res, acmd)
-
-def proc1file(fullxmlfn, fullwavfn, outwavpath):
-    with open(fullxmlfn) as xmlbr:
-        for axmlline in xmlbr.readlines():
-            # start.time end.time ortho plainortho phonetic
-            axmlline = axmlline.strip()
-            cols = axmlline.split('\t')
-            stime = cols[0]
-            etime = cols[1]
-
-            if len(cols) == 2:
-                continue  # skip
-
-            basename = fullwavfn.split('/')[-1]
-
-            name2 = '{}_{}_{}.wav'.format(basename, stime, etime)
-            partwavfn = os.path.join(outwavpath, name2)
-
-            dur = float(etime) - float(stime)
-            acmd = 'sox {} {} trim {} {}'.format(fullwavfn, partwavfn, stime, dur)
-            res = os.system(acmd)
-            # print(res, acmd)
-
-            # perform 2ch to 1ch if necessary!
-            partwavfn1ch = partwavfn + ".1ch.wav"  # NOTE must ends with '.wav'!
-            # otherwise, soundfile.write will give us error report!
-            ch2to1(partwavfn, partwavfn1ch)
-
-def procpath(atag, csjpath, xmlsimppath, outwavpath, idset):
-    # atag = 'core' and 'noncore'
-    axmlpath = xmlsimppath
-    awavpath = os.path.join(csjpath, atag)
-
-    xmldict = xmlfn(axmlpath)
-    wavdict = wavfn(awavpath)
-
-    wavidlist = list(wavdict.keys())
-
-    # parallel processing
-    nthreads = 16
-    for i in range(0, len(wavidlist), nthreads):
-        pool = multiprocessing.Pool(processes=nthreads)
-        for j in range(nthreads):
-            if i + j < len(wavidlist):
-                wavid = wavidlist[i + j]
-                if len(idset) > 0 and wavid not in idset:
-                    # when idset is not empty, then only process the ids
-                    # that are included in idset:
-                    continue
-
-                fullwavfn = wavdict[wavid]
-                if wavid in xmldict:
-                    fullxmlfn = xmldict[wavid]
-                    pool.apply_async(proc1file, (fullxmlfn, fullwavfn, outwavpath))
-        pool.close()
-        pool.join()
-
-    print('parallel {} threads done for {} files.'.format(
-        nthreads,
-        len(wavidlist)))
-
-if __name__ == '__main__':
-    if len(sys.argv) < 4:
-        print(
-            "Usage: {}".format(sys.argv[0]) +
-            "<in.csj.path> <in.xml.simp.path> <out.wav.path> [id.list.fn]")
-        exit(1)
-
-    csjpath = sys.argv[1]
-    xmlsimppath = sys.argv[2]
-    outwavpath = sys.argv[3]
-    idlistfn = sys.argv[4] if len(sys.argv) == 5 else ""
-    idset = set()
-    if len(idlistfn) > 0:
-        with open(idlistfn) as br:
-            for aline in br.readlines():
-                aline = aline.strip()
-                idset.add(aline)
-    print(idset)
-
-    for atag in ['core', 'noncore']:
-        procpath(atag, csjpath, xmlsimppath, outwavpath, idset)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.2.prep.text.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.2.prep.text.py
deleted file mode 100644
index 2b132ad9d6155eb48804e918d2fa77996e129c42..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.2.prep.text.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import os
-import sys
-
-# train test1 test2 test3
-
-def readtst(tstfn):
-    outlist = list()
-    with open(tstfn) as br:
-        for aline in br.readlines():
-            aline = aline.strip()
-            outlist.append(aline)
-    return outlist
-
-def split_train_tests_xml(xmlpath, test1fn, test2fn, test3fn):
-    test1list = readtst(test1fn)
-    test2list = readtst(test2fn)
-    test3list = readtst(test3fn)
-
-    outtrainlist = list()  # full path ".xml.simp" files
-    outt1list = list()  # test 1, full path ".xml.simp" files
-    outt2list = list()
-    outt3list = list()
-
-    for afile in os.listdir(xmlpath):
-        if not afile.endswith('.xml.simp'):
-            continue
-        afile2 = xmlpath + '/' + afile
-        aid = afile.split('.')[0]
-        if aid in test1list:
-            outt1list.append(afile2)
-        elif aid in test2list:
-            outt2list.append(afile2)
-        elif aid in test3list:
-            outt3list.append(afile2)
-        else:
-            outtrainlist.append(afile2)
-
-    return outtrainlist, outt1list, outt2list, outt3list
-
-def all_wavs(wavpath):
-    wavlist = list()
-    for afile in os.listdir(wavpath):
-        if not afile.endswith('.wav'):
-            continue
-        afile2 = wavpath + '/' + afile
-        wavlist.append(afile2)
-    return wavlist
-
-def gen_text(xmllist, outpath):
-    # id \t text
-    # e.g., /workspace/asr/wenet/examples/csj/s0/data/xml/S11M1689.xml.simp
-    # ID = S11M1689_stime_etime
-    outtxtfn = os.path.join(outpath, 'text')
-    with open(outtxtfn, 'w') as bw:
-        for xmlfn in xmllist:
-            aid = xmlfn.split('/')[-1]
-            aid2 = aid.split('.')[0]
-
-            with open(xmlfn) as br:
-                for aline in br.readlines():
-                    aline = aline.strip()
-                    # stime \t etime \t text1 \t text2 \t text3 \t text4 \t text5
-                    cols = aline.split('\t')
-                    # TODO different between "< 7" and "< 4"? strange
-                    # -> use "< 4", DO NOT use "< 7" !
-                    if len(cols) < 4:
-                        continue
-
-                    stime = cols[0]
-                    etime = cols[1]
-                    atxt = cols[3].replace(' ', '')
-
-                    afullid = '{}_{}_{}'.format(aid2, stime, etime)
-                    aoutline = '{}\t{}\n'.format(afullid, atxt)
-                    bw.write(aoutline)
-
-def parse_xml_set(xmllist):
-    outset = set()
-    for xml in xmllist:
-        aid = xml.split('/')[-1]
-        aid2 = aid.split('.')[0]
-        outset.add(aid2)
-    return outset
-
-def gen_wav_scp(xmllist, wavlist, outpath):
-    # xmlset = pure id set, alike 'S04F1228'
-    # can be from train, test1, test2, or test3
-    xmlset = parse_xml_set(xmllist)
-
-    outwavscpfn = os.path.join(outpath, 'wav.scp')
-    with open(outwavscpfn, 'w') as bw:
-        for wav in wavlist:
-            # wav is alike "/workspace/asr/wenet/examples/csj/s0/data
-            # /wav/S04F1228.wav_00458.875_00459.209.wav"
-            aid = wav.split('/')[-1]
-            cols = aid.split('_')
-
-            aid2 = cols[0].split('.')[0]
-            if aid2 not in xmlset:
-                continue
-
-            stime = cols[1]
-            etime = cols[2].replace('.wav', '')
-
-            afullid = '{}_{}_{}'.format(aid2, stime, etime)
-
-            wavabspath = os.path.abspath(wav)
-            aoutline = '{}\t{}\n'.format(afullid, wavabspath)
-            bw.write(aoutline)
-
-
-def prep_text_wavscp(
-        xmlpath, wavpath, test1fn, test2fn, test3fn,
-        outtrainpath, out1path, out2path, out3path):
-
-    trainlist, t1list, t2list, t3list = split_train_tests_xml(
-        xmlpath,
-        test1fn,
-        test2fn,
-        test3fn)
-    wavlist = all_wavs(wavpath)
-
-    gen_text(trainlist, outtrainpath)
-    gen_text(t1list, out1path)
-    gen_text(t2list, out2path)
-    gen_text(t3list, out3path)
-
-    gen_wav_scp(trainlist, wavlist, outtrainpath)
-    gen_wav_scp(t1list, wavlist, out1path)
-    gen_wav_scp(t2list, wavlist, out2path)
-    gen_wav_scp(t3list, wavlist, out3path)
-
-if __name__ == '__main__':
-    if len(sys.argv) < 10:
-        print(
-            "Usage: {}".format(sys.argv[0]) + "<xmlpath> " +
-            "<wavpath> <test1fn> <test2fn> <test3fn> " +
-            "<outtrainpath> <out1path> <out2path> <out3path>")
-        exit(1)
-
-    xmlpath = sys.argv[1]
-    wavpath = sys.argv[2]
-    test1fn = sys.argv[3]
-    test2fn = sys.argv[4]
-    test3fn = sys.argv[5]
-
-    outtrainpath = sys.argv[6]
-    out1path = sys.argv[7]
-    out2path = sys.argv[8]
-    out3path = sys.argv[9]
-
-    prep_text_wavscp(xmlpath, wavpath, test1fn,
-                     test2fn, test3fn, outtrainpath,
-                     out1path, out2path, out3path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.3.mincut.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.3.mincut.py
deleted file mode 100644
index 39e8b8659f722ac430e74c297be67ccf9dd4e818..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.3.mincut.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import librosa
-# import os
-import sys
-
-def mincut(wavscpfn, minsec):
-    outfn = wavscpfn + "_" + str(minsec)
-
-    with open(outfn, 'w') as bw:
-        with open(wavscpfn) as br:
-            for aline in br.readlines():
-                aline = aline.strip()
-                afn = aline.split('\t')[1]
-                # print(afn)
-                dur = librosa.get_duration(filename=afn)
-                if dur >= minsec:
-                    bw.write(aline + '\n')
-
-# wn.3.mincut.py <wav.scp> <min.sec>
-if __name__ == '__main__':
-    if len(sys.argv) < 3:
-        print('{} <in.wav.scp> <min.sec.cut>'.format(sys.argv[0]))
-        exit()
-
-    wavscpfn = sys.argv[1]
-    minsec = float(sys.argv[2])
-
-    mincut(wavscpfn, minsec)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.4.make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.4.make_raw_list.py
deleted file mode 100644
index eb5aac28b0985e60b3597fba8b8bef11a0ec9614..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/csj_tools/wn.4.make_raw_list.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                # assert key in wav_table
-                if key in wav_table:
-                    wav = wav_table[key]
-                    line = dict(key=key, wav=wav, txt=txt)
-                else:
-                    line = None
-            else:
-                # assert key in segments_table
-                if key in segments_table:
-                    wav_key, start, end = segments_table[key]
-                    wav = wav_table[wav_key]
-                    line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-                else:
-                    line = None
-            if line:
-                json_line = json.dumps(line, ensure_ascii=False)
-                fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/2ch.id.list b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/2ch.id.list
deleted file mode 100644
index a516ada0503ffcc1203fc6359dbb131854d32e27..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/2ch.id.list
+++ /dev/null
@@ -1,58 +0,0 @@
-D01F0002
-D01F0003
-D01F0023
-D01F0030
-D01F0046
-D01F0049
-D01F0055
-D01F0057
-D01M0005
-D01M0009
-D01M0012
-D01M0019
-D01M0020
-D01M0042
-D01M0043
-D01M0047
-D02F0015
-D02F0018
-D02F0025
-D02F0027
-D02F0031
-D02F0032
-D02F0033
-D02F0054
-D02M0014
-D02M0016
-D02M0024
-D02M0026
-D02M0028
-D02M0035
-D02M0039
-D02M0051
-D03F0001
-D03F0006
-D03F0008
-D03F0034
-D03F0036
-D03F0040
-D03F0045
-D03F0058
-D03M0004
-D03M0007
-D03M0013
-D03M0017
-D03M0037
-D03M0038
-D03M0048
-D03M0053
-D04F0011
-D04F0022
-D04F0029
-D04F0044
-D04F0050
-D04M0010
-D04M0021
-D04M0041
-D04M0052
-D04M0056
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.1.list b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.1.list
deleted file mode 100644
index b3293661df4df5b25580fcf01c53dc52b239c369..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.1.list
+++ /dev/null
@@ -1,11 +0,0 @@
-A01M0097
-A04M0051
-A04M0121
-A03M0156
-A03M0112
-A01M0110
-A05M0011
-A03M0106
-A01M0137
-A04M0123
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.123.list b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.123.list
deleted file mode 100644
index cedf88f84006a083327a7b0d755c96770f1a11b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.123.list
+++ /dev/null
@@ -1,33 +0,0 @@
-A01M0097
-A04M0051
-A04M0121
-A03M0156
-A03M0112
-A01M0110
-A05M0011
-A03M0106
-A01M0137
-A04M0123
-
-A01F0063
-A01M0056
-A06F0135
-A02M0012
-A06M0064
-A01M0141
-A01F0034
-A03M0016
-A03F0072
-A01F0001
-
-S00F0066
-S00M0213
-S00M0070
-S00M0008
-S01F0105
-S00F0148
-S00F0019
-S00M0112
-S00F0152
-S00M0079
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.2.list b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.2.list
deleted file mode 100644
index 7fd7de849da81debd842da04093584adab69d491..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.2.list
+++ /dev/null
@@ -1,11 +0,0 @@
-A01F0063
-A01M0056
-A06F0135
-A02M0012
-A06M0064
-A01M0141
-A01F0034
-A03M0016
-A03F0072
-A01F0001
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.3.list b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.3.list
deleted file mode 100644
index c1fec392e41f167eda9d63fb2bbe4a16af17389d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/list_files/test.set.3.list
+++ /dev/null
@@ -1,11 +0,0 @@
-S00F0066
-S00M0213
-S00M0070
-S00M0008
-S01F0105
-S00F0148
-S00F0019
-S00M0112
-S00F0152
-S00M0079
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/run.sh
deleted file mode 100644
index 39c91e41066348da0d89b6392ea57596327f29a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/run.sh
+++ /dev/null
@@ -1,278 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-
-# 1. xml split by sentences
-# 2. wav split by xml.simp's guidance
-# 3. generate "text" and "wav.scp" files as required by wenet
-# 4. compute cmvn, better wav.len >= 0.1s, otherwise bug happens...
-# 5. sentence piece's bpe vocabulary
-# 6. make "data.list" files
-# 7. train -> 50 epochs
-
-stage=1 # train -> 50 epochs
-stop_stage=8 #
-
-# data
-#data_url=www.openslr.org/resources/12
-# TODO use your own data path
-datadir=/workspace/asr/csj
-
-# output wav data dir
-wave_data=data # wave file path
-# Optional train_config
-train_config=conf/train_conformer.yaml
-checkpoint=
-cmvn=true # cmvn is for mean, variance, frame_number statistics
-do_delta=false # not used...
-
-dir=exp/sp_spec_aug # model's dir (output dir)
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-# maybe you can try to adjust it if you can not get close results as README.md
-average_num=10
-decode_modes="attention_rescoring ctc_greedy_search ctc_prefix_beam_search attention"
-
-. tools/parse_options.sh || exit 1;
-
-# bpemode (unigram or bpe)
-nbpe=4096 # TODO -> you can change this value to 5000, 100000 and so on
-bpemode=bpe #unigram # TODO -> you can use unigram and other methods
-
-set -e # if any line's exex result is not true, bash stops
-set -u # show the error line when stops (failed)
-set -o pipefail # return value of the whole bash = final line executed's result
-
-train_set=train
-dev_set=dev
-recog_set="test1 test2 test3"
-
-### CSJ data is not free!
-# buying URL: https://ccd.ninjal.ac.jp/csj/en/
-
-### data preparing - split xml by sentences ###
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  ### I did not check espnet nor kaldi for the pre-processing,
-  ### I developed my own ways. so, use at your own risks.
-  echo "stage 1: Data preparation -> xml preprocessing "
-  echo "  -> extract [start.time, end.time, text] from raw xml files"
-  python ./csj_tools/wn.0.parse.py $datadir ${wave_data}
-fi
-
-in_wav_path=$datadir/WAV
-xml_simp_path=${wave_data}/xml
-#wav_split_path=${wave_data}/wav.2
-wav_split_path=${wave_data}/wav
-mkdir -p ${wav_split_path}
-
-### data preparing - split wav by xml.simp's guidance ###
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  echo "stage 2: Data preparation -> wav preprocessing "
-  echo "  -> split wav file by xml.simp's [start.time, end.time, text] format"
-  # in addition, 2ch to 1ch!
-
-  python ./csj_tools/wn.1.split_wav.py ${in_wav_path} ${xml_simp_path} ${wav_split_path}
-fi
-
-### data preparing - generate "text" and "wav.scp" files ###
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "stage 3: prepare text and wav.scp for train/test1/test2/test3 from wav and xml folders"
-
-  t1fn='list_files/test.set.1.list'
-  t2fn='list_files/test.set.2.list'
-  t3fn='list_files/test.set.3.list'
-
-  outtrain=${wave_data}/train
-  outt1=${wave_data}/test1
-  outt2=${wave_data}/test2
-  outt3=${wave_data}/test3
-
-  mkdir -p $outtrain
-  mkdir -p $outt1
-  mkdir -p $outt2
-  mkdir -p $outt3
-
-  python ./csj_tools/wn.2.prep.text.py \
-    ${xml_simp_path} ${wav_split_path} \
-    $t1fn $t2fn $t3fn \
-    $outtrain $outt1 $outt2 $outt3
-fi
-
-minsec=0.1
-
-### compute static info: mean, variance, frame_num ###
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  echo "stage 4: Feature Generation"
-  # TODO if failed, then please make sure your wav files are all >= 0.1s ...
-
-  mkdir -p $wave_data/dev
-  # merge total dev data
-  for set in test1 test2 test3; do
-    for f in `ls $wave_data/$set`; do
-      cat $wave_data/$set/$f >> $wave_data/$dev_set/$f
-    done
-  done
-
-  python ./csj_tools/wn.3.mincut.py $wave_data/$train_set/wav.scp $minsec
-
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp $wave_data/$train_set/wav.scp_$minsec \
-    --out_cmvn $wave_data/$train_set/global_cmvn
-fi
-
-### use sentence piece to construct subword vocabulary ###
-dict=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 5: Dictionary and Json Data Preparation"
-  mkdir -p data/lang_char/
-
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " $wave_data/${train_set}/text > $wave_data/lang_char/input.txt
-  tools/spm_train \
-    --input=$wave_data/lang_char/input.txt \
-    --vocab_size=${nbpe} \
-    --model_type=${bpemode} \
-    --model_prefix=${bpemodel} \
-    --input_sentence_size=100000000
-
-  tools/spm_encode \
-    --model=${bpemodel}.model \
-    --output_format=piece < $wave_data/lang_char/input.txt | \
-    tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-  wc -l ${dict}
-fi
-
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Prepare wenet required data
-  echo "Prepare data, prepare required format"
-  for x in $train_set ; do
-    python csj_tools/wn.4.make_raw_list.py $wave_data/$x/wav.scp_$minsec $wave_data/$x/text \
-        $wave_data/$x/data.list
-  done
-  for x in $dev_set ${recog_set} ; do
-    python csj_tools/wn.4.make_raw_list.py $wave_data/$x/wav.scp $wave_data/$x/text \
-        $wave_data/$x/data.list
-  done
-fi
-
-### Training! ###
-
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn $wave_data/${train_set}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type raw \
-      --symbol_table $dict \
-      --train_data $wave_data/$train_set/data.list \
-      --cv_data $wave_data/$dev_set/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $num_gpus \
-      --ddp.rank $i \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-### test model ###
-
-if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-  mkdir -p $dir/test
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=-1
-  ctc_weight=0.5
-  # Polling GPU id begin with index 0
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  idx=0
-  for test in $recog_set; do
-    for mode in ${decode_modes}; do
-    {
-      {
-        test_dir=$dir/${test}_${mode}
-        mkdir -p $test_dir
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-        python wenet/bin/recognize.py --gpu $gpu_id \
-          --mode $mode \
-          --config $dir/train.yaml \
-          --data_type raw \
-          --test_data $wave_data/$test/data.list \
-          --checkpoint $decode_checkpoint \
-          --beam_size 10 \
-          --batch_size 1 \
-          --penalty 0.0 \
-          --dict $dict \
-          --result_file $test_dir/text_bpe \
-          --ctc_weight $ctc_weight \
-          ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-
-        cut -f2- -d " " $test_dir/text_bpe > $test_dir/text_bpe_value_tmp
-        cut -f1 -d " " $test_dir/text_bpe > $test_dir/text_bpe_key_tmp
-        tools/spm_decode --model=${bpemodel}.model --input_format=piece \
-          < $test_dir/text_bpe_value_tmp | sed -e "s/▁/ /g" > $test_dir/text_value_tmp
-        paste -d " " $test_dir/text_bpe_key_tmp $test_dir/text_value_tmp > $test_dir/text
-
-        python tools/compute-wer.py --char=1 --v=1 \
-          $wave_data/$test/text $test_dir/text > $test_dir/wer
-      } &
-
-      ((idx+=1))
-      if [ $idx -eq $num_gpus ]; then
-        idx=0
-      fi
-    }
-    done
-  done
-  wait
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/csj/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/README.md
deleted file mode 100644
index 6d4c175e362abb0ca45afa177f564a44dec5a60f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/README.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# GigaSpeech
-A Large, modern and evolving dataset for automatic speech recognition. More details about GigaSpeech can be found:  https://github.com/SpeechColab/GigaSpeech
-
-# Performance Record
-
-## Conformer bidecoder Result
-
-* Feature info: using fbank feature, dither 1.0, cmvn, 16k
-* Training info: conf/train_conformer_bidecoder.yaml, subsample 4, kernel size 31, lr 0.001, batch size 24, 8 gpu, acc_grad 4, 40 epochs
-* Decoding info: ctc_weight 0.3, reverse_weight 0.5, average_num 10
-* Git hash: 9a0c270f9f976d7e887f777690e6c358a45a1c27
-
-### test set gigaspeech scoring
-
-| SPKR      | # Snt |  # Wrd | Corr | Sub | Del | Ins | Err  | S.Err |
-|-----------|-------|--------|------|-----|-----|-----|------|-------|
-| Sum/Avg   | 19928 | 390656 | 91.4 | 6.4 | 2.2 | 2.0 | 10.6 | 63.1  |
-|  Mean     | 152.1 | 2982.1 | 91.4 | 6.3 | 2.3 | 1.7 | 10.3 | 63.7  |
-|  S.D.     | 142.2 | 2838.1 |  5.5 | 4.1 | 1.6 | 1.3 |  6.4 | 16.9  |
-| Median    | 108.0 | 2000.0 | 93.0 | 5.1 | 2.0 | 1.3 |  8.4 | 64.6  |
-
-### dev set gigaspeech scoring
-
-| SPKR      | # Snt |  # Wrd | Corr | Sub | Del | Ins | Err  | S.Err |
-|-----------|-------|--------|------|-----|-----|-----|------|-------|
-| Sum/Avg   | 5715  | 127790 | 92.1 | 5.8 | 2.1 | 2.8 | 10.7 |  69.9 |
-|  Mean     | 204.1 | 4563.9 | 92.9 | 5.2 | 1.9 | 2.0 |  9.1 |  69.4 |
-|  S.D.     | 269.7 | 4551.6 |  3.4 | 2.7 | 0.9 | 1.7 |  4.6 |  15.9 |
-| Median    | 151.5 | 3314.0 | 93.8 | 4.4 | 1.6 | 1.7 |  7.9 |  71.6 |
-
-## Conformer U2++ Result
-
-* Feature info: using fbank feature, dither 1.0, cmvn, 16k
-* Training info: conf/train_u2++_conformer.yaml, subsample 6, kernel size 31, lr 0.001, batch size 28, 8 gpu, acc_grad 1, 50 epochs
-* Decoding info: ctc_weight 0.3, reverse_weight 0.5, average_num 10
-* Git hash: 9a0c270f9f976d7e887f777690e6c358a45a1c27
-
-### test set gigaspeech scoring, full chunk (non-streaming)
-
-| SPKR      | # Snt |  # Wrd | Corr | Sub | Del | Ins | Err  | S.Err |
-|-----------|-------|--------|------|-----|-----|-----|------|-------|
-| Sum/Avg   | 19928 | 390656 | 90.7 | 6.8 | 2.6 | 2.0 | 11.3 |  66.9 |
-|  Mean     | 152.1 | 2982.1 | 90.6 | 6.8 | 2.7 | 1.6 | 11.1 |  67.1 |
-|  S.D.     | 142.2 | 2838.1 |  5.8 | 4.3 | 1.9 | 1.2 |  6.7 |  16.5 |
-| Median    | 108.0 | 2000.0 | 92.1 | 5.7 | 2.2 | 1.3 |  9.0 |  68.9 |
-
-### test set gigaspeech scoring, chunk 8 (latency range from 0 to 480ms)
-
-| SPKR      | # Snt |  # Wrd | Corr | Sub | Del | Ins | Err  | S.Err |
-|-----------|-------|--------|------|-----|-----|-----|------|-------|
-| Sum/Avg   | 19928 | 390656 | 89.6 | 7.5 | 2.9 | 2.0 | 12.5 |  70.1 |
-|  Mean     | 152.1 | 2982.1 | 89.3 | 7.6 | 3.1 | 1.7 | 12.4 |  70.6 |
-|  S.D.     | 142.2 | 2838.1 |  6.5 | 4.9 | 2.1 | 1.2 |  7.3 |  15.8 |
-| Median    | 108.0 | 2000.0 | 91.1 | 6.3 | 2.5 | 1.4 | 10.2 |  72.2 |
-
-## Conformer Result
-
-* Feature info: using fbank feature, dither 1.0, no cmvn, 48k
-* Training info: conf/train_conformer.yaml, kernel size 31, lr 0.001, batch size 24, 8 gpu, acc_grad 4, 30 epochs
-* Decoding info: ctc_weight 0.5, average_num 5
-* Git hash: 9a0c270f9f976d7e887f777690e6c358a45a1c27
-
-### test set gigaspeech scoring
-
-| SPKR          | # Snt |  # Wrd | Corr | Sub | Del | Ins | Err  | S.Err |
-|---------------|-------|--------|------|-----|-----|-----|------|-------|
-| Sum/Avg       | 19930 | 390744 | 90.8 | 6.9 | 2.3 | 2.0 | 11.2 | 65.1  |
-| Mean          | 152.1 | 2982.8 | 90.6 | 6.9 | 2.5 | 1.7 | 11.1 | 65.7  |
-| S.D.          | 142.3 | 2839.0 |  5.8 | 4.3 | 1.7 | 1.2 |  6.7 | 16.6  |
-| Median        | 108.0 | 2000.0 | 92.5 | 5.6 | 2.1 | 1.3 |  9.1 | 65.9  |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_conformer.yaml
deleted file mode 100644
index ca3eaa5cc27c6cd8bab06162bcdaf44f189ac2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d6 # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 31
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 100
-        token_max_length: 160
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: false
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 3
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 28
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 30
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 100000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_conformer_bidecoder.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_conformer_bidecoder.yaml
deleted file mode 100644
index 56dc40f70a086e51466e5a3f314197efe7377a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_conformer_bidecoder.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 31
-    use_cnn_module: True
-    cnn_module_norm: 'layer_norm'
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    reverse_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 100
-        token_max_length: 160
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: false
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 3
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 20
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 50
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 100000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_u2++_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_u2++_conformer.yaml
deleted file mode 100644
index 4effa4a61687f086707e8ca6466335a3e2879788..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/conf/train_u2++_conformer.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d6 # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 8
-    use_cnn_module: True
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm'
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    reverse_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 100
-        token_max_length: 160
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: false
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 3
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 28
-
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 50
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 80000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/extract_meta.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/extract_meta.py
deleted file mode 100644
index 27803537958703a09dbfcb4bca3088d768eb3216..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/extract_meta.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
-#                 Mobvoi Corporation (Author: Di Wu)
-
-import sys
-import os
-import argparse
-import json
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description="""
-      This script is used to process raw json dataset of GigaSpeech,
-      where the long wav is splitinto segments and
-      data of wenet format is generated.
-      """)
-    parser.add_argument('input_json', help="""Input json file of Gigaspeech""")
-    parser.add_argument('output_dir', help="""Output dir for prepared data""")
-
-    args = parser.parse_args()
-    return args
-
-
-def meta_analysis(input_json, output_dir):
-    input_dir = os.path.dirname(input_json)
-
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-    try:
-        with open(input_json, 'r') as injson:
-            json_data = json.load(injson)
-    except Exception:
-        sys.exit(f'Failed to load input json file: {input_json}')
-    else:
-        if json_data['audios'] is not None:
-            with open(f'{output_dir}/text', 'w') as utt2text, \
-                 open(f'{output_dir}/segments', 'w') as segments, \
-                 open(f'{output_dir}/utt2dur', 'w') as utt2dur, \
-                 open(f'{output_dir}/wav.scp', 'w') as wavscp, \
-                 open(f'{output_dir}/utt2subsets', 'w') as utt2subsets, \
-                 open(f'{output_dir}/reco2dur', 'w') as reco2dur:
-                for long_audio in json_data['audios']:
-                    try:
-                        long_audio_path = os.path.realpath(
-                            os.path.join(input_dir, long_audio['path']))
-                        aid = long_audio['aid']
-                        segments_lists = long_audio['segments']
-                        duration = long_audio['duration']
-                        assert (os.path.exists(long_audio_path))
-                        assert ('opus' == long_audio['format'])
-                        assert (16000 == long_audio['sample_rate'])
-                    except AssertionError:
-                        print(f'Warning: {aid} something is wrong, maybe'
-                              'AssertionError, skipped')
-                        continue
-                    except Warning:
-                        print(f'Warning: {aid} something is wrong, maybe the'
-                              'error path: {long_audio_path}, skipped')
-                        continue
-                    else:
-                        wavscp.write(f'{aid}\t{long_audio_path}\n')
-                        reco2dur.write(f'{aid}\t{duration}\n')
-                        for segment_file in segments_lists:
-                            try:
-                                sid = segment_file['sid']
-                                start_time = segment_file['begin_time']
-                                end_time = segment_file['end_time']
-                                dur = end_time - start_time
-                                text = segment_file['text_tn']
-                                segment_subsets = segment_file["subsets"]
-                            except Warning:
-                                print(f'Warning: {segment_file} something is'
-                                      'wrong, skipped')
-                                continue
-                            else:
-                                utt2text.write(f'{sid}\t{text}\n')
-                                segments.write(
-                                    f'{sid}\t{aid}\t{start_time}\t{end_time}\n'
-                                )
-                                utt2dur.write(f'{sid}\t{dur}\n')
-                                segment_sub_names = " ".join(segment_subsets)
-                                utt2subsets.write(
-                                    f'{sid}\t{segment_sub_names}\n')
-
-
-def main():
-    args = get_args()
-
-    meta_analysis(args.input_json, args.output_dir)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/gigaspeech_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/gigaspeech_data_prep.sh
deleted file mode 100644
index b639457d7d485874912911945c2dd8e5e9bdc5de..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/gigaspeech_data_prep.sh
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
-#                 Seasalt AI, Inc (Author: Guoguo Chen)
-#                 Mobvoi Corporation (Author: Di Wu)
-
-set -e
-set -o pipefail
-
-stage=1
-prefix=
-garbage_utterance_tags="<SIL> <MUSIC> <NOISE> <OTHER>"
-punctuation_tags="<COMMA> <EXCLAMATIONPOINT> <PERIOD> <QUESTIONMARK>"
-train_subset=XL
-
-. ./tools/parse_options.sh || exit 1;
-
-filter_by_id () {
-  idlist=$1
-  input=$2
-  output=$3
-  field=1
-  if [ $# -eq 4 ]; then
-    field=$4
-  fi
-  cat $input | perl -se '
-    open(F, "<$idlist") || die "Could not open id-list file $idlist";
-    while(<F>) {
-      @A = split;
-      @A>=1 || die "Invalid id-list file line $_";
-      $seen{$A[0]} = 1;
-    }
-    while(<>) {
-      @A = split;
-      @A > 0 || die "Invalid file line $_";
-      @A >= $field || die "Invalid file line $_";
-      if ($seen{$A[$field-1]}) {
-        print $_;
-      }
-    }' -- -idlist="$idlist" -field="$field" > $output ||\
-  (echo "$0: filter_by_id() error: $input" && exit 1) || exit 1;
-}
-
-subset_data_dir () {
-  utt_list=$1
-  src_dir=$2
-  dest_dir=$3
-  mkdir -p $dest_dir || exit 1;
-  # wav.scp text segments utt2dur
-  filter_by_id $utt_list $src_dir/utt2dur $dest_dir/utt2dur ||\
-    (echo "$0: subset_data_dir() error: $src_dir/utt2dur" && exit 1) || exit 1;
-  filter_by_id $utt_list $src_dir/text $dest_dir/text ||\
-    (echo "$0: subset_data_dir() error: $src_dir/text" && exit 1) || exit 1;
-  filter_by_id $utt_list $src_dir/segments $dest_dir/segments ||\
-    (echo "$0: subset_data_dir() error: $src_dir/segments" && exit 1) || exit 1;
-  awk '{print $2}' $dest_dir/segments | sort | uniq > $dest_dir/reco
-  filter_by_id $dest_dir/reco $src_dir/wav.scp $dest_dir/wav.scp ||\
-    (echo "$0: subset_data_dir() error: $src_dir/wav.scp" && exit 1) || exit 1;
-  rm -f $dest_dir/reco
-}
-
-if [ $# -ne 2 ]; then
-  echo "Usage: $0 [options] <gigaspeech-dataset-dir> <data-dir>"
-  echo " e.g.: $0 --train-subset XL /disk1/audio_data/gigaspeech/ data/"
-  echo ""
-  echo "This script takes the GigaSpeech source directory, and prepares the"
-  echo "WeNet format data directory."
-  echo "  --garbage-utterance-tags <tags>  # Tags for non-speech."
-  echo "  --prefix <prefix>                # Prefix for output data directory."
-  echo "  --punctuation-tags <tags>        # Tags for punctuations."
-  echo "  --stage <stage>                  # Processing stage."
-  echo "  --train-subset <XL|L|M|S|XS>     # Train subset to be created."
-  exit 1
-fi
-
-gigaspeech_dir=$1
-data_dir=$2
-
-declare -A subsets
-subsets=(
-  [XL]="train_xl"
-  [L]="train_l"
-  [M]="train_m"
-  [S]="train_s"
-  [XS]="train_xs"
-  [DEV]="dev"
-  [TEST]="test")
-prefix=${prefix:+${prefix}_}
-
-corpus_dir=$data_dir/${prefix}corpus/
-if [ $stage -le 1 ]; then
-  echo "$0: Extract meta into $corpus_dir"
-  # Sanity check.
-  [ ! -f $gigaspeech_dir/GigaSpeech.json ] &&\
-    echo "$0: Please download $gigaspeech_dir/GigaSpeech.json!" && exit 1;
-  [ ! -d $gigaspeech_dir/audio ] &&\
-    echo "$0: Please download $gigaspeech_dir/audio!" && exit 1;
-
-  [ ! -d $corpus_dir ] && mkdir -p $corpus_dir
-
-  # Files to be created:
-  # wav.scp text segments utt2dur
-  python3 local/extract_meta.py \
-     $gigaspeech_dir/GigaSpeech.json $corpus_dir || exit 1;
-fi
-
-if [ $stage -le 2 ]; then
-  echo "$0: Filter $corpus_dir/text"
-  # Delete utterances with garbage meta tags
-  for tag in $garbage_utterance_tags; do
-    sed -i "/${tag}/d" $corpus_dir/text
-  done
-
-  # Delete punctuations in utterances
-  for tag in $punctuation_tags; do
-    sed -i "s/${tag}//g" $corpus_dir/text
-  done
-
-  # Ensure space only appears once and utt is seprated with others by '\t'
-  sed -i 's/\t/ /g' $corpus_dir/text
-  sed -i 's/[ ][ ]*/ /g' $corpus_dir/text
-  sed -i 's/ /\t/' $corpus_dir/text
-fi
-
-if [ $stage -le 3 ]; then
-  echo "$0: Split data to train, dev and test"
-  # Split data to train, dev and test.
-  [ ! -f $corpus_dir/utt2subsets ] &&\
-    echo "$0: No such file $corpus_dir/utt2subsets!" && exit 1;
-  for label in $train_subset DEV TEST; do
-    if [ ! ${subsets[$label]+set} ]; then
-      echo "$0: Subset $label is not defined in GigaSpeech.json." && exit 1;
-    fi
-    subset=${subsets[$label]}
-    [ ! -d $data_dir/${prefix}$subset ] && mkdir -p $data_dir/${prefix}$subset
-    grep "{$label}" $corpus_dir/utt2subsets \
-      > $corpus_dir/${prefix}${subset}_utt_list|| exit 1;
-    subset_data_dir $corpus_dir/${prefix}${subset}_utt_list \
-      $corpus_dir $data_dir/${prefix}$subset || exit 1;
-  done
-fi
-
-echo "$0: Done"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/gigaspeech_scoring.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/gigaspeech_scoring.py
deleted file mode 100644
index e7679f4ab450bf26e472e613ebcaa14b39544187..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/local/gigaspeech_scoring.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-import os
-import argparse
-
-conversational_filler = [
-    'UH', 'UHH', 'UM', 'EH', 'MM', 'HM', 'AH', 'HUH', 'HA', 'ER', 'OOF', 'HEE',
-    'ACH', 'EEE', 'EW'
-]
-unk_tags = ['<UNK>', '<unk>']
-gigaspeech_punctuations = [
-    '<COMMA>', '<PERIOD>', '<QUESTIONMARK>', '<EXCLAMATIONPOINT>'
-]
-gigaspeech_garbage_utterance_tags = ['<SIL>', '<NOISE>', '<MUSIC>', '<OTHER>']
-non_scoring_words = conversational_filler + unk_tags + \
-    gigaspeech_punctuations + gigaspeech_garbage_utterance_tags
-
-def asr_text_post_processing(text):
-    # 1. convert to uppercase
-    text = text.upper()
-
-    # 2. remove hyphen
-    #   "E-COMMERCE" -> "E COMMERCE", "STATE-OF-THE-ART" -> "STATE OF THE ART"
-    text = text.replace('-', ' ')
-
-    # 3. remove non-scoring words from evaluation
-    remaining_words = []
-    for word in text.split():
-        if word in non_scoring_words:
-            continue
-        remaining_words.append(word)
-
-    return ' '.join(remaining_words)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='''This script evaluates GigaSpeech ASR
-                     result via SCTK's tool sclite''')
-    parser.add_argument(
-        'ref',
-        type=str,
-        help="sclite's standard transcription(trn) reference file")
-    parser.add_argument(
-        'hyp',
-        type=str,
-        help="sclite's standard transcription(trn) hypothesis file")
-    parser.add_argument('work_dir', type=str, help='working dir')
-    args = parser.parse_args()
-
-    if not os.path.isdir(args.work_dir):
-        os.mkdir(args.work_dir)
-
-    REF = os.path.join(args.work_dir, 'REF')
-    HYP = os.path.join(args.work_dir, 'HYP')
-    RESULT = os.path.join(args.work_dir, 'RESULT')
-
-    for io in [(args.ref, REF), (args.hyp, HYP)]:
-        with open(io[0],
-                  'r', encoding='utf8') as fi, open(io[1],
-                                                    'w+',
-                                                    encoding='utf8') as fo:
-            for line in fi:
-                line = line.strip()
-                if line:
-                    cols = line.split()
-                    text = asr_text_post_processing(' '.join(cols[0:-1]))
-                    uttid_field = cols[-1]
-                    print(F'{text} {uttid_field}', file=fo)
-
-    os.system(F'sclite -r {REF} trn -h {HYP} trn -i swb | tee {RESULT}'
-              )  # GigaSpeech's uttid comforms to swb
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/run.sh
deleted file mode 100644
index dc891bf20350b310580e792009c340db82205abc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/run.sh
+++ /dev/null
@@ -1,264 +0,0 @@
-#!/bin/bash
-
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-
-# data
-# use your own data path, you can contact gigaspeech@speechcolab.orgfor getting data for data information about gigaspeech
-# the preparation of gigaspeech dataset for wenet can be found https://github.com/SpeechColab/GigaSpeech
-giga_data_dir=/export/expts6/corpus/data/en-asr-data/16k/GigaSpeech
-shards_dir=/ssd/nfs06/unified_data/giga_shards
-# gigaspeech training set
-set=XL
-train_set=train_`echo $set |tr 'A-Z' 'a-z'`
-train_dev=dev
-recog_set=test
-# wav data dir
-data=data
-nj=16
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard Conformer
-# 2. conf/train_transformer_bidecoder.yaml: Bidecoder Conformer
-train_config=conf/train_conformer_bidecoder.yaml
-checkpoint=
-cmvn=false
-do_delta=false
-dir=exp/sp_spec_aug
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-# maybe you can try to adjust it if you can not get close results as README.md
-average_num=3
-decode_modes="attention_rescoring ctc_greedy_search"
-
-. tools/parse_options.sh || exit 1;
-
-# bpemode (unigram or bpe)
-nbpe=5000
-bpemode=unigram
-
-set -e
-set -u
-set -o pipefail
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  ### Task dependent. You have to make data the following preparation part by yourself.
-  ### But you can utilize Kaldi recipes in most cases
-  echo "stage 0: Data preparation"
-  local/gigaspeech_data_prep.sh --train-subset $set --stage 1 $giga_data_dir $data
-  sed -i "s/\t/ /g" $data/${train_set}/text
-  sed -i "s/\t/ /g" $data/${train_dev}/text
-  sed -i "s/\t/ /g" $data/${recog_set}/text
-  for x in $train_dev $train_set $recog_set; do
-    paste -d " " <(cut -f1 -d " " $data/$x/text) <(cut -f1 -d " " $data/$x/text) > $data/$x/spk2utt
-    cp $data/$x/spk2utt $data/$x/utt2spk
-    tools/fix_data_dir.sh $data/$x
-  done
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  ### Task dependent. You have to design training and dev sets by yourself.
-  echo "stage 1: generate segmented wav.scp and compute cmvn"
-  # the format of wav.segment.scp is:
-  # POD1000000004_S0000000 /GigaSpeech/audio/podcast/P0000/POD1000000004.opus,0.0,10.197
-  # 0.0 is start time, 10.197 is end time (second)
-  for x in $train_dev $train_set $recog_set; do
-    python tools/segment.py --segments $data/$x/segments \
-      --input $data/$x/wav.scp \
-      --output $data/$x/wav.segment.scp
-  done
-
-  # optional
-  # compute cmvn, perhaps you can sample some segmented examples fron wav.scp for cmvn computation
-  python tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp $data/$train_set/wav.segment.scp \
-    --out_cmvn $data/$train_set/global_cmvn
-fi
-
-
-dict=$data/lang_char_$set/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=$data/lang_char_$set/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 2: Dictionary and Json Data Preparation"
-  mkdir -p $data/lang_char_$set/
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " $data/${train_set}/text > $data/lang_char_$set/input.txt
-  tools/spm_train --input=$data/lang_char_$set/input.txt --vocab_size=${nbpe} \
-    --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
-  tools/spm_encode --model=${bpemodel}.model --output_format=piece \
-    < $data/lang_char_$set/input.txt | \
-    tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-  wc -l ${dict}
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Making shards, please wait..."
-  RED='\033[0;31m'
-  NOCOLOR='\033[0m'
-  echo -e "It requires ${RED}1.2T ${NOCOLOR}space for $shards_dir, please make sure you have enough space"
-  echo -e "It takes about ${RED}12 ${NOCOLOR}hours with 32 threads"
-
-  for x in $train_dev $train_set $recog_set; do
-    dst=$shards_dir/$x
-    mkdir -p $dst
-    tools/make_shard_list.py --resample 16000 --num_utts_per_shard 1000 \
-      --num_threads 32 --segments data/$x/segments \
-      data/$x/wav.scp data/$x/text \
-      $(realpath $dst) data/$x/data.list
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="nccl"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp ${feat_dir}/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type "shard" \
-      --symbol_table $dict \
-      --bpe_model $bpemodel.model \
-      --train_data $data/$train_set/data.list \
-      --cv_data $data/$train_dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 16 \
-      $cmvn_opts
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-  # TODO, Add model average here
-  mkdir -p $dir/test
-  if [ ${average_checkpoint} == true ]; then
-      decode_checkpoint=$dir/avg_${average_num}.pt
-      echo "do model average and final checkpoint is $decode_checkpoint"
-      python wenet/bin/average_model.py \
-          --dst_model $decode_checkpoint \
-          --src_path $dir  \
-          --num ${average_num} \
-          --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  # Polling GPU id begin with index 0
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  idx=0
-  for test in $recog_set; do
-    for mode in ${decode_modes}; do
-    {
-      {
-        test_dir=$dir/${test}_${mode}
-        mkdir -p $test_dir
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-        python wenet/bin/recognize.py --gpu $gpu_id \
-          --mode $mode \
-          --config $dir/train.yaml \
-          --data_type "shard" \
-          --symbol_table $dict \
-          --bpe_model $bpemodel.model \
-          --test_data $data/$test/format.data \
-          --checkpoint $decode_checkpoint \
-          --beam_size 20 \
-          --batch_size 1 \
-          --penalty 0.0 \
-          --dict $dict \
-          --result_file $test_dir/text_bpe \
-          --ctc_weight $ctc_weight \
-          ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-
-        cut -f2- -d " " $test_dir/text_bpe > $test_dir/text_bpe_value_tmp
-        cut -f1 -d " " $test_dir/text_bpe > $test_dir/text_bpe_key_tmp
-
-        tools/spm_decode --model=${bpemodel}.model --input_format=piece \
-          < $test_dir/text_bpe_value_tmp | sed -e "s/▁/ /g" > $test_dir/text_value
-        paste -d " " $test_dir/text_bpe_key_tmp $test_dir/text_value > $test_dir/text
-        # a raw version wer without refining processs
-        python tools/compute-wer.py --char=1 --v=1 \
-          $data/$test/text $test_dir/text > $test_dir/wer
-
-        # for gigaspeech scoring
-        cat $test_dir/text_bpe_key_tmp | sed -e "s/^/(/g" | sed -e "s/$/)/g" > $test_dir/hyp_key
-        paste -d " " $test_dir/text_value $test_dir/hyp_key > $test_dir/hyp
-        paste -d " " <(cut -f2- -d " " $data/$test/text) \
-          <(cut -f1 -d " " $data/$test/text | \
-          sed -e "s/^/(/g" | sed -e "s/$/)/g") > $data/$test/ref
-        local/gigaspeech_scoring.py $data/$test/ref $test_dir/hyp $test_dir
-      } &
-
-      ((idx+=1))
-      if [ $idx -eq $num_gpus ]; then
-        idx=0
-      fi
-    }
-    done
-  done
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/gigaspeech/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/README.md
deleted file mode 100644
index 2d83cec06ec0778509eeeae85cb2a8b0c819b698..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Performance Record
-
-## Conformer Result (Old IO)
-
-* Feature info: using fbank feature, with cmvn, with speed perturb.
-* Training info: lr 0.002, batch size 16, 1 machines, 1*4 = 4 gpu, acc_grad 4, 240 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 30
-
-| decoding mode            |       |
-|--------------------------|-------|
-| attention decoder        | 21.9  |
-| ctc greedy search        | 21.15 |
-| ctc prefix beam search   | 21.13 |
-| attention rescoring      | 20.47 |
-
-## Conformer Result (New IO)
-
-* Feature info: using fbank feature, with cmvn, with speed perturb.
-* Training info: lr 0.002, batch size 16, 1 machines, 1*4 = 4 gpu, acc_grad 4, 133 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 30
-
-| decoding mode            |       |
-|--------------------------|-------|
-| attention decoder        | 21.42 |
-| ctc greedy search        | 21.16 |
-| ctc prefix beam search   | 21.18 |
-| attention rescoring      | 20.42 |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/conf/train_960_unigram5000.model b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/conf/train_960_unigram5000.model
deleted file mode 100644
index 8419aa7bac81d9b02f9644e9cf8929b73765a3af..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/conf/train_960_unigram5000.model and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/conf/train_conformer.yaml
deleted file mode 100644
index 81c8571e2798f80d564f0650ce94266193cd8a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-# feature extraction
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 50
-        token_max_length: 400
-        token_min_length: 1
-        max_output_input_ratio: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/local/hkust_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/local/hkust_data_prep.sh
deleted file mode 100644
index cebdef1daf476c6e602b5cbe9a11fdb00521aced..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/local/hkust_data_prep.sh
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/usr/bin/env bash
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <audio-path> <text-path>"
-  echo " $0 /export/corpora/LDC03S04 /export/corpora/LDC03T19"
-  exit 1;
-fi
-
-hkust_audio_dir=$1
-hkust_text_dir=$2
-
-train_dir=data/local/train
-dev_dir=data/local/dev
-train_dev=train_dev
-train_nodev=train_nodev
-
-nj=16
-
-mkdir -p $train_dir
-mkdir -p $dev_dir
-
-#data directory check
-if [ ! -d $hkust_audio_dir ] || [ ! -d $hkust_text_dir ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-#find sph audio file for train dev resp.
-find $hkust_audio_dir -iname "*.sph" | grep -i "audio/train" > $train_dir/sph.flist || exit 1;
-find $hkust_audio_dir -iname "*.sph" | grep -i "audio/dev" > $dev_dir/sph.flist || exit 1;
-
-n=`cat $train_dir/sph.flist $dev_dir/sph.flist | wc -l`
-[ $n -ne 897 ] && \
-  echo Warning: expected 897 data data files, found $n
-
-#Transcriptions preparation
-
-#collect all trans, convert encodings to utf-8,
-find $hkust_text_dir -iname "*.txt" | grep -i "trans/train" | xargs cat |\
-  iconv -f GBK -t UTF-8 | perl -e '
-    while (<STDIN>) {
-      @A = split(" ", $_);
-      if (@A <= 1) { next; }
-      if ($A[0] eq "#") { $utt_id = $A[1]; }
-      if (@A >= 3) {
-        $A[2] =~ s:^([AB])\:$:$1:;
-        printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5;
-        for($n = 3; $n < @A; $n++) { print " $A[$n]" };
-        print "\n";
-      }
-    }
-  ' | sort -k1 > $train_dir/transcripts.txt || exit 1;
-
-find $hkust_text_dir -iname "*.txt" | grep -i "trans/dev" | xargs cat |\
-  iconv -f GBK -t UTF-8 | perl -e '
-    while (<STDIN>) {
-      @A = split(" ", $_);
-      if (@A <= 1) { next; }
-      if ($A[0] eq "#") { $utt_id = $A[1]; }
-      if (@A >= 3) {
-        $A[2] =~ s:^([AB])\:$:$1:;
-        printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5;
-        for($n = 3; $n < @A; $n++) { print " $A[$n]" };
-        print "\n";
-      }
-    }
-  ' | sort -k1  > $dev_dir/transcripts.txt || exit 1;
-
-#transcripts normalization and segmentation
-cat $train_dir/transcripts.txt |\
-  sed -e 's/<foreign language=\"[a-zA-Z]\+\">/ /g' |\
-  sed -e 's/<\/foreign>/ /g' |\
-  sed -e 's/<noise>\(.\+\)<\/noise>/\1/g' |\
-  sed -e 's/<\/noise>//g' |\
-  sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
-  sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
-  awk '{if (NF > 1) print $0;}' |\
-  local/hkust_normalize.pl |\
-  awk '{if (NF > 0) print $0;}' > $train_dir/text || exit 1;
-
-cat $dev_dir/transcripts.txt |\
-  sed -e 's/<foreign language=\"[a-zA-Z]\+\">/ /g' |\
-  sed -e 's/<\/foreign>/ /g' |\
-  sed -e 's/<noise>\(.\+\)<\/noise>/\1/g' |\
-  sed -e 's/<\/noise>//g' |\
-  sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
-  sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
-  awk '{if (NF > 1) print $0;}' |\
-  local/hkust_normalize.pl |\
-  awk '{if (NF > 0) print $0;}' > $dev_dir/text || exit 1;
-
-# some data is corrupted. Delete them
-cat $train_dir/text | grep -v 20040527_210939_A901153_B901154-A-035691-035691 | egrep -v "A:|B:" > tmp
-mv tmp $train_dir/text || exit 1;
-
-#Make segment files from transcript
-#segments file format is: utt-id side-id start-time end-time, e.g.:
-#sw02001-A_000098-001156 sw02001-A 0.98 11.56
-
-awk '{ segment=$1; split(segment,S,"-"); side=S[2]; audioname=S[1];startf=S[3];endf=S[4];
-   print segment " " audioname "-" side " " startf/100 " " endf/100}' <$train_dir/text > $train_dir/segments
-awk '{name = $0; gsub(".sph$","",name); gsub(".*/","",name); print(name " " $0)}' $train_dir/sph.flist > $train_dir/sph.scp
-
-awk '{ segment=$1; split(segment,S,"-"); side=S[2]; audioname=S[1];startf=S[3];endf=S[4];
-   print segment " " audioname "-" side " " startf/100 " " endf/100}' <$dev_dir/text > $dev_dir/segments
-awk '{name = $0; gsub(".sph$","",name); gsub(".*/","",name); print(name " " $0)}' $dev_dir/sph.flist > $dev_dir/sph.scp
-
-bash tools/sph2wav.sh --nj ${nj} $train_dir/sph.scp $train_dir/segments $train_dir/wav.scp
-bash tools/sph2wav.sh --nj ${nj} $dev_dir/sph.scp $dev_dir/segments $dev_dir/wav.scp
-
-#side A - channel 1, side B - channel 2
-
-# this file reco2file_and_channel maps recording-id (e.g. sw02001-A)
-# to the file name sw02001 and the A, e.g.
-# sw02001-A  sw02001 A
-# In this case it's trivial, but in other corpora the information might
-# be less obvious.  Later it will be needed for ctm scoring.
-cat $train_dir/wav_ori.scp | awk '{print $1}' | \
-  perl -ane '$_ =~ m:^(\S+)-([AB])$: || die "bad label $_"; print "$1-$2 $1 $2\n"; ' \
-  > $train_dir/reco2file_and_channel || exit 1;
-cat $dev_dir/wav_ori.scp | awk '{print $1}' | \
-  perl -ane '$_ =~ m:^(\S+)-([AB])$: || die "bad label $_"; print "$1-$2 $1 $2\n"; ' \
-  > $dev_dir/reco2file_and_channel || exit 1;
-
-
-cat $train_dir/segments | awk '{spk=substr($1,1,33); print $1 " " spk}' > $train_dir/utt2spk || exit 1;
-cat $train_dir/utt2spk | sort -k 2 | tools/utt2spk_to_spk2utt.pl > $train_dir/spk2utt || exit 1;
-
-cat $dev_dir/segments | awk '{spk=substr($1,1,33); print $1 " " spk}' > $dev_dir/utt2spk || exit 1;
-cat $dev_dir/utt2spk | sort -k 2 | tools/utt2spk_to_spk2utt.pl > $dev_dir/spk2utt || exit 1;
-
-mkdir -p data/train data/dev
-
-for f in spk2utt utt2spk wav.scp text segments reco2file_and_channel; do
-  cp data/local/train/$f data/train/$f || exit 1;
-done
-
-for f in spk2utt utt2spk wav.scp text segments reco2file_and_channel; do
-  cp data/local/dev/$f data/dev/$f || exit 1;
-done
-
-tools/subset_data_dir.sh --first data/train 4001 data/${train_dev}
-n=$(($(wc -l < data/train/segments) - 4001))
-tools/subset_data_dir.sh --last data/train ${n} data/${train_nodev}
-
-echo "$0: HKUST data preparation succeeded"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/local/hkust_normalize.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/local/hkust_normalize.pl
deleted file mode 100644
index ff2d3eaad6b1d5ca72c0e43ebf251dfcb4c953d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/local/hkust_normalize.pl
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright Chao Weng
-
-# normalizations for hkust trascript
-# see the docs/trans-guidelines.pdf for details
-
-while (<STDIN>) {
-  @A = split(" ", $_);
-  print "$A[0] ";
-  for ($n = 1; $n < @A; $n++) {
-    $a = $A[$n];
-    if (($a eq "{breath}")||($a eq "{cough}")||($a eq "{sneeze}")
-       || ($a eq "{lipsmack}")) {next;}
-    if (($a eq "{laugh}")) {next;}
-    if (($a eq "<noise>")) {next;}
-    $tmp = $a;
-    if ($tmp =~ /[^.,?+-]{0,}[.,?+-]+/) { $tmp =~ s:([^.,?+-]{0,})[.,?+-]+:$1:g; }
-    if ($tmp =~ /\~[A-Z]/) { $tmp =~ s:\~([A-Z]):$1:; }
-    if ($tmp =~ /%\S/) { $tmp =~ s:%(\S):$1:; }
-    if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);}
-    print "$tmp ";
-  }
-  print "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/run.sh
deleted file mode 100644
index 612bddeaf693ba4fc7a9897d4835d3844e711404..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/run.sh
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/bin/bash
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
-stage=4 # start from 0 if you need to start from data preparation
-stop_stage=4
-
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-
-nj=16
-feat_dir=raw_wav
-
-data_type=raw
-num_utts_per_shard=1000
-prefetch=100
-
-train_set=train_nodev
-dev_set=train_dev
-
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard transformer
-# 2. conf/train_conformer.yaml: Standard conformer
-# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
-# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
-train_config=conf/train_conformer.yaml
-# English modeling unit
-# Optional 1. bpe 2. char
-en_modeling_unit=bpe
-dict=data/dict_$en_modeling_unit/lang_char.txt
-cmvn=true
-debug=false
-num_workers=2
-dir=exp/conformer
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=30
-decode_modes="ctc_greedy_search ctc_prefix_beam_search
-              attention attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  # Data preparation
-  local/hkust_data_prep.sh /mnt/cfs/database/hkust/LDC2005S15/ \
-    /mnt/cfs/database/hkust/LDC2005T32/ || exit 1;
-fi
-
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  # For wav feature, just copy the data. Fbank extraction is done in training
-  mkdir -p ${feat_dir}_${en_modeling_unit}
-  for x in ${train_set} ${dev_set}; do
-    cp -r data/$x ${feat_dir}_${en_modeling_unit}
-  done
-
-  cp -r data/dev ${feat_dir}_${en_modeling_unit}/test
-
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp data/${train_set}/wav.scp \
-    --out_cmvn ${feat_dir}_${en_modeling_unit}/$train_set/global_cmvn
-
-fi
-
-# This bpe model is trained on librispeech training data set.
-bpecode=conf/train_960_unigram5000.model
-trans_type_ops=
-bpe_ops=
-if [ $en_modeling_unit = "bpe" ]; then
-  trans_type_ops="--trans_type cn_char_en_bpe"
-  bpe_ops="--bpecode ${bpecode}"
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  # Make train dict
-  echo "Make a dictionary"
-  mkdir -p $(dirname $dict)
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  paste -d " " \
-    <(cut -f 1 -d" " ${feat_dir}_${en_modeling_unit}/${train_set}/text) \
-    <(cut -f 2- -d" " ${feat_dir}_${en_modeling_unit}/${train_set}/text \
-    | tr 'a-z' 'A-Z' | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' \
-    | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' | tr -d " " ) \
-    > ${feat_dir}_${en_modeling_unit}/${train_set}/text4dict
-  sed -i 's/\xEF\xBB\xBF//' \
-    ${feat_dir}_${en_modeling_unit}/${train_set}/text4dict
-
-  tools/text2token.py -s 1 -n 1 -m ${bpecode} \
-    ${feat_dir}_${en_modeling_unit}/${train_set}/text4dict ${trans_type_ops} \
-    | cut -f 2- -d" " | tr " " "\n" \
-    | sort | uniq | grep -a -v -e '^\s*$' \
-    | grep -v '·' | grep -v '“' | grep -v "”" | grep -v "\[" | grep -v "\]" \
-    | grep -v "…" \
-    | awk '{print $0 " " NR+1}' >> ${dict}
-
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  # Prepare wenet required data
-  echo "Prepare data, prepare required format"
-  for x in ${dev_set} ${train_set} test; do
-    if [ $data_type == "shard" ]; then
-      tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 ${feat_dir}_${en_modeling_unit}/$x/wav.scp \
-        ${feat_dir}_${en_modeling_unit}/$x/text \
-        $(realpath ${feat_dir}_${en_modeling_unit}/$x/shards) \
-        ${feat_dir}_${en_modeling_unit}/$x/data.list
-    else
-      tools/make_raw_list.py ${feat_dir}_${en_modeling_unit}/$x/wav.scp \
-      ${feat_dir}_${en_modeling_unit}/$x/text \
-      ${feat_dir}_${en_modeling_unit}/$x/data.list
-    fi
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  # You had better rm it manually before you start run.sh on first node.
-  # rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp ${feat_dir}_${en_modeling_unit}/$train_set/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --prefetch $prefetch \
-      --train_data ${feat_dir}_${en_modeling_unit}/$train_set/data.list \
-      --cv_data ${feat_dir}_${en_modeling_unit}/$dev_set/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory \
-      --bpe_model ${bpecode}
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=-1
-  ctc_weight=0.5
-  idx=0
-  for mode in ${decode_modes}; do
-  {
-    test_dir="$dir/"`
-      `"test_${mode}${decoding_chunk_size:+_chunk$decoding_chunk_size}/test"
-    mkdir -p $test_dir
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-    python wenet/bin/recognize.py --gpu $gpu_id \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data ${feat_dir}_${en_modeling_unit}/test/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --ctc_weight $ctc_weight \
-      --result_file $test_dir/text_${en_modeling_unit} \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    if [ $en_modeling_unit == "bpe" ]; then
-      tools/spm_decode --model=${bpecode} --input_format=piece \
-      < $test_dir/text_${en_modeling_unit} | sed -e "s/▁/ /g" > $test_dir/text
-    else
-      cat $test_dir/text_${en_modeling_unit} \
-      | sed -e "s/▁/ /g" > $test_dir/text
-    fi
-    # Cer used to be consistent with kaldi & espnet
-    python tools/compute-cer.py --char=1 --v=1 \
-      ${feat_dir}_${en_modeling_unit}/test/text $test_dir/text > $test_dir/wer
-  } &
-  ((idx+=1))
-  done
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip \
-    --output_quant_file $dir/final_quant.zip
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/hkust/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/README.md
deleted file mode 100644
index 0a491eeb453db8e87c710fb1c075a76422433eb9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Performance Record
-
-## Conformer Bidecoder Transducer Result
-
-* Feature info: using fbank feature, dither, cmvn, online speed perturb
-* Training info: lr 0.001, dynamic batch with max_frames_in_batch 4000, 8 gpu, acc_grad 1, 60 epochs
-* Training weight info: transducer_weight 0.75,  ctc_weight 0.1, reverse_weight 0.30, average_num 10
-* Predictor type: lstm
-
-| decoding mode         | dev_clean  | dev_other | test_clean | test_other |
-|-----------------------|------------|-----------|------------|------------|
-| rnnt_greedy_search    | 3.42%      | 8.99%     |    3.56%   |   9.15%    |
-| rnnt_beam_search      | 3.35%      | 8.77%     |    3.45%   |   8.78%    |
-| rnnt_beam_att_rescore | 3.25%      | 8.66%     |    3.41%   |   8.68%    |
-
-Pretrained model: https://huggingface.co/yuekai/wenet-asr-librispeech-conformer-transducer-mtl/blob/main/exp/conformer_transducer/avg_10.pt
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/conf/conformer_rnnt.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/conf/conformer_rnnt.yaml
deleted file mode 100644
index 8a517cccae85d5d26b5fafefc8bd97f4118060d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/conf/conformer_rnnt.yaml
+++ /dev/null
@@ -1,100 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: true
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-
-joint_conf:
-    join_dim: 512
-    prejoin_linear: True
-    postjoin_linear: false
-    joint_mode: 'add'
-    activation: 'tanh'
-
-predictor: rnn
-predictor_conf:
-    embed_size: 256
-    output_size: 256
-    embed_dropout: 0.1
-    hidden_size: 256
-    num_layers: 2
-    bias: true
-    rnn_type: 'lstm'
-    dropout: 0.1
-
-decoder: bitransformer
-decoder_conf:
-  attention_heads: 4
-  dropout_rate: 0.1
-  linear_units: 2048
-  num_blocks: 3
-  positional_dropout_rate: 0.1
-  r_num_blocks: 3
-  self_attention_dropout_rate: 0.1
-  src_attention_dropout_rate: 0.1
-
-# hybrid transducer+ctc+attention
-model_conf:
-    transducer_weight: 0.75
-    ctc_weight: 0.1
-    attention_weight: 0.15
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 1650
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'dynamic' # static or dynamic
-        max_frames_in_batch: 4000
-
-grad_clip: 4
-accum_grad: 1
-max_epoch: 140
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/local/data_prep_torchaudio.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/local/data_prep_torchaudio.sh
deleted file mode 100644
index c7dc1deb7dec59f571c8f6935fe36c6aea2e8e99..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/local/data_prep_torchaudio.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-# Copyright 2014  Vassil Panayotov
-#           2014  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <src-dir> <dst-dir>"
-  echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
-  exit 1
-fi
-
-src=$1
-dst=$2
-
-# all utterances are FLAC compressed
-if ! which flac >&/dev/null; then
-   echo "Please install 'flac' on ALL worker nodes!"
-   exit 1
-fi
-
-mkdir -p $dst || exit 1
-
-[ ! -d $src ] && echo "$0: no such directory $src" && exit 1
-
-wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
-trans=$dst/text; [[ -f "$trans" ]] && rm $trans
-
-for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
-  reader=$(basename $reader_dir)
-  if ! [ $reader -eq $reader ]; then  # not integer.
-    echo "$0: unexpected subdirectory name $reader"
-    exit 1
-  fi
-
-  for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
-    chapter=$(basename $chapter_dir)
-    if ! [ "$chapter" -eq "$chapter" ]; then
-      echo "$0: unexpected chapter-subdirectory name $chapter"
-      exit 1
-    fi
-
-    find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
-      awk -v "dir=$chapter_dir" '{printf "%s %s/%s.flac\n", $0, dir, $0}' >>$wav_scp|| exit 1
-
-    chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt
-    [ ! -f  $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1
-    cat $chapter_trans >>$trans
-  done
-done
-
-echo "$0: successfully prepared data in $dst"
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/local/download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/local/download_and_untar.sh
deleted file mode 100644
index cd32fb6b989d7229272f1066a75a1688df2bf06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/local/download_and_untar.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /export/a15/vpanayotov/data www.openslr.org/resources/11 dev-clean"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: dev-clean, test-clean, dev-other, test-other,"
-  echo "          train-clean-100, train-clean-360, train-other-500."
-  exit 1
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data"
-  exit 1
-fi
-
-part_ok=false
-list="dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1
-fi
-
-if [ -f $data/LibriSpeech/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0
-fi
-
-
-# sizes of the archive files in bytes.  This is some older versions.
-sizes_old="371012589 347390293 379743611 361838298 6420417880 23082659865 30626749128"
-# sizes_new is the archive file sizes of the final release.  Some of these sizes are of
-# things we probably won't download.
-sizes_new="337926286 314305928 695964615 297279345 87960560420 33373768 346663984 328757843 6387309499 23049477885 30593501606"
-
-if [ -f $data/$part.tar.gz ]; then
-  size=$(/bin/ls -l $data/$part.tar.gz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes_old $sizes_new; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tar.gz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tar.gz
-  else
-    echo "$data/$part.tar.gz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tar.gz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1
-  fi
-  full_url=$url/$part.tar.gz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  if ! wget -P $data --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1
-  fi
-fi
-
-if ! tar -C $data -xvzf $data/$part.tar.gz; then
-  echo "$0: error un-tarring archive $data/$part.tar.gz"
-  exit 1
-fi
-
-touch $data/LibriSpeech/$part/.complete
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tar.gz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tar.gz file since --remove-archive option was supplied."
-  rm $data/$part.tar.gz
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/path.sh
deleted file mode 100644
index ac1ca08baf5d4540b92ed239b8aa7cd613064a8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/run.sh
deleted file mode 100644
index a7a30cc29aff691e52b79c0cd26384a114c30a04..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/run.sh
+++ /dev/null
@@ -1,286 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-stage=-1 # start from 0 if you need to start from data preparation
-stop_stage=7
-# data
-data_url=www.openslr.org/resources/12
-# data_url=https://us.openslr.org/resources/12
-data_url=https://openslr.elda.org/resources/12
-# use your own data path
-datadir=
-
-# wav data dir
-wave_data=data
-# Optional train_config
-# 1. conf/train_transformer_large.yaml: Standard transformer
-train_config=conf/conformer_rnnt.yaml
-checkpoint=
-cmvn=true
-do_delta=false
-
-dir=exp/conformer_transducer
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-# maybe you can try to adjust it if you can not get close results as README.md
-average_num=10
-decode_modes="attention_rescoring ctc_greedy_search ctc_prefix_beam_search attention"
-
-. tools/parse_options.sh || exit 1;
-
-# bpemode (unigram or bpe)
-nbpe=5000
-bpemode=unigram
-
-set -e
-set -u
-set -o pipefail
-
-train_set=train_960
-dev_set=dev
-recog_set="test_clean test_other dev_clean dev_other"
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  for part in train-clean-100 train-clean-360 train-other-500; do
-    local/download_and_untar.sh ${datadir} ${data_url} ${part}
-  done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  ### Task dependent. You have to make data the following preparation part by yourself.
-  ### But you can utilize Kaldi recipes in most cases
-  echo "stage 0: Data preparation"
-  for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
-    # use underscore-separated names in data directories.
-    local/data_prep_torchaudio.sh ${datadir}/LibriSpeech/${part} $wave_data/${part//-/_}
-  done
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  ### Task dependent. You have to design training and dev sets by yourself.
-  ### But you can utilize Kaldi recipes in most cases
-  echo "stage 1: Feature Generation"
-  mkdir -p $wave_data/train_960
-  # merge total training data
-  for set in train_clean_100 train_clean_360 train_other_500; do
-    for f in `ls $wave_data/$set`; do
-      cat $wave_data/$set/$f >> $wave_data/train_960/$f
-    done
-  done
-  mkdir -p $wave_data/dev
-  # merge total dev data
-  for set in dev_clean dev_other; do
-    for f in `ls $wave_data/$set`; do
-      cat $wave_data/$set/$f >> $wave_data/$dev_set/$f
-    done
-  done
-
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp $wave_data/$train_set/wav.scp \
-    --out_cmvn $wave_data/$train_set/global_cmvn
-
-fi
-
-
-dict=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 2: Dictionary and Json Data Preparation"
-  mkdir -p data/lang_char/
-
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " $wave_data/${train_set}/text > $wave_data/lang_char/input.txt
-  tools/spm_train --input=$wave_data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
-  tools/spm_encode --model=${bpemodel}.model --output_format=piece < $wave_data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-  wc -l ${dict}
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  # Prepare wenet required data
-  echo "Prepare data, prepare required format"
-  for x in $dev_set ${recog_set} $train_set ; do
-    tools/make_raw_list.py $wave_data/$x/wav.scp $wave_data/$x/text \
-        $wave_data/$x/data.list
-  done
-
-fi
-
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  dist_backend="nccl"
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn $wave_data/${train_set}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    python3 wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type raw \
-      --symbol_table $dict \
-      --bpe_model ${bpemodel}.model \
-      --train_data $wave_data/$train_set/data.list \
-      --cv_data $wave_data/$dev_set/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $num_gpus \
-      --ddp.rank $i \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 4 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-  # TODO, Add model average here
-  mkdir -p $dir/test
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  # Polling GPU id begin with index 0
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  idx=0
-  for test in $recog_set; do
-    for mode in ${decode_modes}; do
-    {
-      {
-        test_dir=$dir/${test}_${mode}
-        mkdir -p $test_dir
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-        python wenet/bin/recognize.py --gpu $gpu_id \
-          --mode $mode \
-          --config $dir/train.yaml \
-          --data_type raw \
-          --dict $dict \
-          --bpe_model ${bpemodel}.model \
-          --test_data $wave_data/$test/data.list \
-          --checkpoint $decode_checkpoint \
-          --beam_size 10 \
-          --batch_size 1 \
-          --penalty 0.0 \
-          --result_file $test_dir/text_bpe \
-          --ctc_weight $ctc_weight \
-          ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-
-        cut -f2- -d " " $test_dir/text_bpe > $test_dir/text_bpe_value_tmp
-        cut -f1 -d " " $test_dir/text_bpe > $test_dir/text_bpe_key_tmp
-        tools/spm_decode --model=${bpemodel}.model --input_format=piece \
-          < $test_dir/text_bpe_value_tmp | sed -e "s/▁/ /g" > $test_dir/text_value_tmp
-        paste -d " " $test_dir/text_bpe_key_tmp $test_dir/text_value_tmp > $test_dir/text
-
-        python tools/compute-wer.py --char=1 --v=1 \
-          $wave_data/$test/text $test_dir/text > $test_dir/wer
-      } &
-
-      ((idx+=1))
-      if [ $idx -eq $num_gpus ]; then
-        idx=0
-      fi
-    }
-    done
-  done
-  wait
-
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
-
-# Optionally, you can add LM and test it with runtime.
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-  lm=data/local/lm
-  lexicon=data/local/dict/lexicon.txt
-  mkdir -p $lm
-  mkdir -p data/local/dict
-
-  # 7.1 Download & format LM
-  which_lm=3-gram.pruned.1e-7.arpa.gz
-  if [ ! -e ${lm}/${which_lm} ]; then
-    wget http://www.openslr.org/resources/11/${which_lm} -P ${lm}
-  fi
-  echo "unzip lm($which_lm)..."
-  gunzip -k ${lm}/${which_lm} -c > ${lm}/lm.arpa
-  echo "Lm saved as ${lm}/lm.arpa"
-
-  # 7.2 Prepare dict
-  unit_file=$dict
-  bpemodel=$bpemodel
-  # use $dir/words.txt (unit_file) and $dir/train_960_unigram5000 (bpemodel)
-  # if you download pretrained librispeech conformer model
-  cp $unit_file data/local/dict/units.txt
-  if [ ! -e ${lm}/librispeech-lexicon.txt ]; then
-    wget http://www.openslr.org/resources/11/librispeech-lexicon.txt -P ${lm}
-  fi
-  echo "build lexicon..."
-  tools/fst/prepare_dict.py $unit_file ${lm}/librispeech-lexicon.txt \
-    $lexicon $bpemodel.model
-  echo "lexicon saved as '$lexicon'"
-
-  # 7.3 Build decoding TLG
-  tools/fst/compile_lexicon_token_fst.sh \
-     data/local/dict data/local/tmp data/local/lang
-  tools/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
-
-  # 7.4 Decoding with runtime
-  fst_dir=data/lang_test
-  for test in ${recog_set}; do
-    ./tools/decode.sh --nj 6 \
-      --beam 10.0 --lattice_beam 5 --max_active 7000 --blank_skip_thresh 0.98 \
-      --ctc_weight 0.5 --rescoring_weight 1.0 --acoustic_scale 1.2 \
-      --fst_path $fst_dir/TLG.fst \
-      --dict_path $fst_dir/words.txt \
-      data/$test/wav.scp data/$test/text $dir/final.zip $fst_dir/units.txt \
-      $dir/lm_with_runtime_${test}
-    tail $dir/lm_with_runtime_${test}/wer
-  done
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/rnnt/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/README.md
deleted file mode 100644
index 484de3773aa5b05a089b5be3f329037fc7a499b2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/README.md
+++ /dev/null
@@ -1,291 +0,0 @@
-# Performance Record
-
-## Conformer Result Bidecoder (large)
-
-* Encoder FLOPs(30s): 96,238,430,720, params: 85,709,704
-* Feature info: using fbank feature, cmvn, dither, online speed perturb
-* Training info: train_conformer_bidecoder_large.yaml, kernel size 31, lr 0.002, batch size 12, 8 gpu, acc_grad 4, 120 epochs, dither 1.0
-* Decoding info: ctc_weight 0.3, reverse weight 0.5, average_num 30
-* Git hash: 65270043fc8c2476d1ab95e7c39f730017a670e0
-* LM-tgmed: [3-gram.pruned.1e-7.arpa.gz](http://www.openslr.org/resources/11/3-gram.pruned.1e-7.arpa.gz)
-* LM-tglarge: [3-gram.arpa.gz](http://www.openslr.org/resources/11/3-gram.arpa.gz)
-* LM-fglarge: [4-gram.arpa.gz](http://www.openslr.org/resources/11/4-gram.arpa.gz)
-
-| decoding mode                    | test clean | test other |
-|----------------------------------|------------|------------|
-| ctc prefix beam search           | 2.96       | 7.14       |
-| attention rescoring              | 2.66       | 6.53       |
-| LM-tgmed + attention rescoring   | 2.78       | 6.32       |
-| LM-tglarge + attention rescoring | 2.68       | 6.10       |
-| LM-fglarge + attention rescoring | 2.65       | 5.98       |
-
-## SqueezeFormer Result (U2++, FFN:2048)
-
-* Encoder info:
-    * SM12, reduce_idx 5, recover_idx 11, conv1d, batch_norm, syncbn
-    * encoder_dim 512, output_size 512, head 8, ffn_dim 512*4=2048
-    * Encoder FLOPs(30s): 82,283,704,832, params: 85,984,648
-* Feature info:
-    * using fbank feature, cmvn, dither, online speed perturb, spec_aug
-* Training info:
-    * train_squeezeformer_bidecoder_large.yaml, kernel size 31
-    * batch size 12, 8 gpu, acc_grad 4, 120 epochs, dither 1.0
-    * adamw, lr 8e-4, NoamHold, warmup 0.2, hold 0.3, lr_decay 1.0
-* Decoding info:
-    * ctc_weight 0.3, reverse weight 0.5, average_num 30
-
-| decoding mode                    | dev clean | dev other | test clean | test other |
-|----------------------------------|-----------|-----------|------------|------------|
-| ctc greedy search                | 2.55      | 6.62      | 2.73       | 6.59       |
-| ctc prefix beam search           | 2.53      | 6.60      | 2.72       | 6.52       |
-| attention decoder                | 2.93      | 6.56      | 3.31       | 6.47       |
-| attention rescoring              | 2.19      | 6.06      | 2.45       | 5.85       |
-
-## Conformer Result
-
-* Encoder FLOPs(30s): 34,085,088,512, params: 34,761,608
-* Feature info: using fbank feature, cmvn, dither, online speed perturb
-* Training info: train_conformer.yaml, kernel size 31, lr 0.004, batch size 12, 8 gpu, acc_grad 4, 120 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 30
-* Git hash: 90d9a559840e765e82119ab72a11a1f7c1a01b78
-* LM-fglarge: [4-gram.arpa.gz](http://www.openslr.org/resources/11/4-gram.arpa.gz)
-
-| decoding mode                    | test clean | test other |
-|----------------------------------|------------|------------|
-| ctc greedy search                | 3.51       | 9.57       |
-| ctc prefix beam search           | 3.51       | 9.56       |
-| attention decoder                | 3.05       | 8.36       |
-| attention rescoring              | 3.18       | 8.72       |
-| attention rescoring (beam 50)    | 3.12       | 8.55       |
-| LM-fglarge + attention rescoring | 3.09       | 7.40       |
-
-## Conformer Result (12 layers, FFN:2048)
-* Encoder FLOPs(30s): 34,085,088,512, params: 34,761,608
-* Feature info: using fbank feature, cmvn, dither, online speed perturb
-* Training info: train_squeezeformer.yaml, kernel size 31,
-* batch size 12, 8 gpu, acc_grad 4, 120 epochs, dither 0.1
-* AdamW, lr 1e-3, NoamHold, warmup 0.2, hold 0.3, lr_decay 1.0
-* Decoding info: ctc_weight 0.3, reverse weight 0.5, average_num 30
-
-| decoding mode                    | dev clean | dev other | test clean | test other |
-|----------------------------------|-----------|-----------|------------|------------|
-| ctc greedy search                | 3.49      | 9.59      | 3.66       | 9.59       |
-| ctc prefix beam search           | 3.49      | 9.61      | 3.66       | 9.55       |
-| attention decoder                | 3.52      | 9.04      | 3.85       | 8.97       |
-| attention rescoring              | 3.10      | 8.91      | 3.29       | 8.81       |
-
-## SqueezeFormer Result (SM12, FFN:1024)
-* Encoder info:
-    * SM12, reduce_idx 5, recover_idx 11, conv2d, w/o syncbn
-    * encoder_dim 256, output_size 256, head 4, ffn_dim 256*4=1024
-    * Encoder FLOPs(30s): 21,158,877,440, params: 22,219,912
-* Feature info:
-    * using fbank feature, cmvn, dither, online speed perturb
-* Training info:
-    * train_squeezeformer.yaml, kernel size 31,
-    * batch size 12, 8 gpu, acc_grad 4, 120 epochs, dither 0.1
-    * adamw, lr=1e-3, noamhold, warmup=0.2, hold=0.3, lr_decay=1.0
-* Decoding info: ctc_weight 0.3, reverse weight 0.5, average_num 30
-
-| decoding mode                    | dev clean | dev other | test clean | test other |
-|----------------------------------|-----------|-----------|------------|------------|
-| ctc greedy search                | 3.49      | 9.24      | 3.51       | 9.28       |
-| ctc prefix beam search           | 3.44      | 9.23      | 3.51       | 9.25       |
-| attention decoder                | 3.59      | 8.74      | 3.75       | 8.70       |
-| attention rescoring              | 2.97      | 8.48      | 3.07       | 8.44       |
-
-## SqueezeFormer Result (SM12, FFN:2048)
-* Encoder info:
-    * SM12, reduce_idx 5, recover_idx 11, conv2d, w/o syncbn
-    * encoder_dim 256, output_size 256, head 4, ffn_dim 256*8=2048
-    * encoder FLOPs(30s): 28,230,473,984, params: 34,827,400
-* Feature info: using fbank feature, cmvn, dither, online speed perturb
-* Training info:
-    * train_squeezeformer.yaml, kernel size 31
-    * batch size 12, 8 gpu, acc_grad 4, 120 epochs, dither 0.1
-    * adamw, lr 1e-3, noamhold, warmup 0.2, hold 0.3, lr_decay 1.0
-* Decoding info:
-    * ctc_weight 0.3, reverse weight 0.5, average_num 30
-
-| decoding mode                    | dev clean | dev other | test clean | test other |
-|----------------------------------|-----------|-----------|------------|------------|
-| ctc greedy search                | 3.34      | 9.01      | 3.47       | 8.85       |
-| ctc prefix beam search           | 3.33      | 9.02      | 3.46       | 8.81       |
-| attention decoder                | 3.64      | 8.62      | 3.91       | 8.33       |
-| attention rescoring              | 2.89      | 8.34      | 3.10       | 8.03       |
-
-## SqueezeFormer Result (SM12, FFN:1312)
-* Encoder info:
-    * SM12, reduce_idx 5, recover_idx 11, conv1d, w/o syncbn
-    * encoder_dim 328, output_size 256, head 4, ffn_dim 328*4=1312
-    * encoder FLOPs(30s): 34,103,960,008, params: 35,678,352
-* Feature info:
-    * using fbank feature, cmvn, dither, online speed perturb
-* Training info:
-    * train_squeezeformer.yaml, kernel size 31,
-    * batch size 12, 8 gpu, acc_grad 4, 120 epochs, dither 1.0
-    * adamw, lr 1e-3, noamhold, warmup 0.2, hold 0.3, lr_decay 1.0
-* Decoding info:
-    * ctc_weight 0.3, reverse weight 0.5, average_num 30
-
-| decoding mode                    | dev clean | dev other | test clean | test other |
-|----------------------------------|-----------|-----------|------------|------------|
-| ctc greedy search                | 3.20      | 8.46      | 3.30       | 8.58       |
-| ctc prefix beam search           | 3.18      | 8.44      | 3.30       | 8.55       |
-| attention decoder                | 3.38      | 8.31      | 3.89       | 8.32       |
-| attention rescoring              | 2.81      | 7.86      | 2.96       | 7.91       |
-
-## Conformer U2++ Result
-
-* Feature info: using fbank feature, cmvn, no speed perturb, dither
-* Training info: train_u2++_conformer.yaml lr 0.001, batch size 24, 8 gpu, acc_grad 1, 120 epochs, dither 1.0
-* Decoding info: ctc_weight 0.3,  reverse weight 0.5, average_num 30
-* Git hash: 65270043fc8c2476d1ab95e7c39f730017a670e0
-
-test clean
-
-| decoding mode                  | full | 16   |
-|--------------------------------|------|------|
-| ctc prefix beam search         | 3.76 | 4.54 |
-| attention rescoring            | 3.32 | 3.80 |
-
-test other
-
-| decoding mode                  | full  | 16    |
-|--------------------------------|-------|-------|
-| ctc prefix beam search         | 9.50  | 11.52 |
-| attention rescoring            | 8.67  | 10.38 |
-
-## SqueezeFormer Result (U2++, FFN:2048)
-
-* Encoder info:
-    * SM12, reduce_idx 5, recover_idx 11, conv1d, layer_norm
-    * do_rel_shift false, warp_for_time, syncbn
-    * encoder_dim 256, output_size 256, head 4, ffn_dim 256*8=2048
-    * Encoder FLOPs(30s): 28,255,337,984, params: 34,893,704
-* Feature info:
-    * using fbank feature, cmvn, dither, online speed perturb
-* Training info:
-    * train_squeezeformer.yaml, kernel size 31
-    * batch size 12, 8 gpu, acc_grad 2, 120 epochs, dither 1.0
-    * adamw, lr 8e-4, NoamHold, warmup 0.2, hold 0.3, lr_decay 1.0
-* Decoding info:
-    * ctc_weight 0.3, reverse weight 0.5, average_num 30
-
-test clean
-
-| decoding mode                  | full | 16   |
-|--------------------------------|------|------|
-| ctc prefix beam search         | 3.45 | 4.34 |
-| attention rescoring            | 3.07 | 3.71 |
-
-test other
-
-| decoding mode                  | full  | 16    |
-|--------------------------------|-------|-------|
-| ctc prefix beam search         | 8.29  | 10.60 |
-| attention rescoring            | 7.58  | 9.60  |
-
-## Conformer U2 Result
-
-* Feature info: using fbank feature, cmvn, speed perturb, dither
-* Training info: train_unified_conformer.yaml lr 0.001, batch size 10, 8 gpu, acc_grad 1, 120 epochs, dither 1.0
-* Decoding info: ctc_weight 0.5, average_num 30
-* Git hash: 90d9a559840e765e82119ab72a11a1f7c1a01b78
-* LM-tgmed: [3-gram.pruned.1e-7.arpa.gz](http://www.openslr.org/resources/11/3-gram.pruned.1e-7.arpa.gz)
-* LM-tglarge: [3-gram.arpa.gz](http://www.openslr.org/resources/11/3-gram.arpa.gz)
-* LM-fglarge: [4-gram.arpa.gz](http://www.openslr.org/resources/11/4-gram.arpa.gz)
-
-test clean
-
-| decoding mode                    | full | 16   |
-|----------------------------------|------|------|
-| ctc prefix beam search           | 4.26 | 5.00 |
-| attention decoder                | 3.05 | 3.44 |
-| attention rescoring              | 3.72 | 4.10 |
-| attention rescoring (beam 50)    | 3.57 | 3.95 |
-| LM-tgmed + attention rescoring   | 3.56 | 4.02 |
-| LM-tglarge + attention rescoring | 3.40 | 3.82 |
-| LM-fglarge + attention rescoring | 3.38 | 3.74 |
-
-test other
-
-| decoding mode                    | full  | 16    |
-|----------------------------------|-------|-------|
-| ctc prefix beam search           | 10.87 | 12.87 |
-| attention decoder                | 9.07  | 10.44 |
-| attention rescoring              | 9.74  | 11.61 |
-| attention rescoring (beam 50)    | 9.34  | 11.13 |
-| LM-tgmed + attention rescoring   | 8.78  | 10.26 |
-| LM-tglarge + attention rescoring | 8.34  | 9.74  |
-| LM-fglarge + attention rescoring | 8.17  | 9.44  |
-
-
-## Efficient Conformer V1 Result
-
-* Feature info:
-    * using fbank feature, cmvn, speed perturb, dither
-* Training info:
-    * train_u2++_efficonformer_v1.yaml
-    * 8 gpu, batch size 16, acc_grad 1, 120 epochs
-    * lr 0.001, warmup_steps 35000
-* Model info:
-    * Model Params: 49,474,974
-    * Downsample rate: 1/4 (conv2d) * 1/2 (efficonformer block)
-    * encoder_dim 256, output_size 256, head 8, linear_units 2048
-    * num_blocks 12, cnn_module_kernel 15, group_size 3
-* Decoding info:
-    * ctc_weight 0.5, reverse_weight 0.3, average_num 20
-
-test clean
-
-| decoding mode          | full | 18   | 16   |
-|------------------------|------|------|------|
-| attention decoder      | 3.65 | 3.88 | 3.87 |
-| ctc_greedy_search      | 3.46 | 3.79 | 3.77 |
-| ctc prefix beam search | 3.44 | 3.75 | 3.74 |
-| attention rescoring    | 3.17 | 3.44 | 3.41 |
-
-test other
-
-| decoding mode          | full | 18    | 16    |
-|------------------------|------|-------|-------|
-| attention decoder      | 8.51 | 9.24  | 9.25  |
-| ctc_greedy_search      | 8.94 | 10.04 | 10.06 |
-| ctc prefix beam search | 8.91 | 10    | 10.01 |
-| attention rescoring    | 8.21 | 9.25  | 9.25  |
-
-
-## Efficient Conformer V2 Result
-
-* Feature info:
-    * using fbank feature, cmvn, speed perturb, dither
-* Training info:
-    * train_u2++_efficonformer_v2.yaml
-    * 8 gpu, batch size 16, acc_grad 1, 120 epochs
-    * lr 0.001, warmup_steps 35000
-* Model info:
-    * Model Params: 50,341,278
-    * Downsample rate: 1/2 (conv2d2) * 1/4 (efficonformer block)
-    * encoder_dim 256, output_size 256, head 8, linear_units 2048
-    * num_blocks 12, cnn_module_kernel 15, group_size 3
-* Decoding info:
-    * ctc_weight 0.5, reverse_weight 0.3, average_num 20
-
-test clean
-
-| decoding mode          | full | 18   | 16   |
-|------------------------|------|------|------|
-| attention decoder      | 3.49 | 3.71 | 3.72 |
-| ctc_greedy_search      | 3.49 | 3.74 | 3.77 |
-| ctc prefix beam search | 3.47 | 3.72 | 3.74 |
-| attention rescoring    | 3.12 | 3.38 | 3.36 |
-
-test other
-
-| decoding mode          | full | 18   | 16   |
-|------------------------|------|------|------|
-| attention decoder      | 8.15 | 9.05 | 9.03 |
-| ctc_greedy_search      | 8.73 | 9.82 | 9.83 |
-| ctc prefix beam search | 8.70 | 9.81 | 9.79 |
-| attention rescoring    | 8.05 | 9.08 | 9.10 |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_conformer.yaml
deleted file mode 100644
index a1298d8a4c90eed0d704c3839ab5bd71c84d8593..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 50
-        token_max_length: 400
-        token_min_length: 1
-        min_output_input_ratio: 0.0005
-        max_output_input_ratio: 0.1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 12
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 70
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.004
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_conformer_bidecoder_large.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_conformer_bidecoder_large.yaml
deleted file mode 100644
index 28b218855cf7ac83b21deb027f9c4420f3dbaecb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_conformer_bidecoder_large.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 31
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    cnn_module_norm: 'layer_norm'
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 50
-        token_max_length: 400
-        token_min_length: 1
-        min_output_input_ratio: 0.0005
-        max_output_input_ratio: 0.1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 3
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 12
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 120
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 50000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_squeezeformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_squeezeformer.yaml
deleted file mode 100644
index 15dd2d33ba1483747753f33f3afc73bc61c01b6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_squeezeformer.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# network architecture
-# encoder related
-encoder: squeezeformer
-encoder_conf:
-    encoder_dim: 256
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    num_blocks: 12      # the number of encoder blocks
-    reduce_idx: 5
-    recover_idx: 11
-    pos_enc_layer_type: 'rel_pos'
-    time_reduction_layer_type: 'conv1d'
-    feed_forward_expansion_factor: 4
-    input_dropout_rate: 0.1
-    feed_forward_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    cnn_module_kernel: 31
-    cnn_norm_type: layer_norm
-    adaptive_scale: true
-    normalize_before: false
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 50
-        token_max_length: 400
-        token_min_length: 1
-        min_output_input_ratio: 0.0005
-        max_output_input_ratio: 0.1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 12
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 120
-log_interval: 100
-
-optim: adamw
-optim_conf:
-    lr: 1.e-3
-    weight_decay: 4.e-5
-
-scheduler: NoamHoldAnnealing
-scheduler_conf:
-    warmup_ratio: 0.2
-    hold_ratio: 0.3
-    max_steps: 87960
-    decay_rate: 1.0
-    min_lr: 1.e-5
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_squeezeformer_bidecoder_large.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_squeezeformer_bidecoder_large.yaml
deleted file mode 100644
index 6d81b2a545719b9e90ff0bc04f2e56d9d9d0c3bc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_squeezeformer_bidecoder_large.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-# network architecture
-# encoder related
-encoder: squeezeformer
-encoder_conf:
-    encoder_dim: 512
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    num_blocks: 12      # the number of encoder blocks
-    reduce_idx: 5
-    recover_idx: 11
-    feed_forward_expansion_factor: 4
-    input_dropout_rate: 0.1
-    feed_forward_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    cnn_module_kernel: 31
-    cnn_norm_type: batch_norm
-    adaptive_scale: true
-    normalize_before: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    syncbn: true
-    filter_conf:
-        max_length: 2000
-        min_length: 50
-        token_max_length: 400
-        token_min_length: 1
-        min_output_input_ratio: 0.0005
-        max_output_input_ratio: 0.1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 3
-        num_f_mask: 2
-        max_t: 100
-        max_f: 27
-        max_w: 80
-#        warp_for_time: true
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 12
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 120
-log_interval: 100
-
-optim: adamw
-optim_conf:
-    lr: 1.e-3
-    weight_decay: 4.e-5
-
-scheduler: NoamHoldAnnealing
-scheduler_conf:
-    warmup_ratio: 0.2
-    hold_ratio: 0.3
-    max_steps: 87960
-    decay_rate: 1.0
-    min_lr: 1.e-5
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_conformer.yaml
deleted file mode 100644
index 97928fe7d77c32f169d9a66c3cb78634abd1c4fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_conformer.yaml
+++ /dev/null
@@ -1,91 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 400
-        token_min_length: 1
-        # min_output_input_ratio: 0.0005
-        # max_output_input_ratio: 0.1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-        warp_for_time: true
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 10000
-    sort: true
-    sort_conf:
-        sort_size: 2000  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 24
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 120
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_efficonformer_v1.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_efficonformer_v1.yaml
deleted file mode 100644
index 451409abb0e5d739e792cbd5de820a2790e1fd5f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_efficonformer_v1.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-# network architecture
-# encoder related
-encoder: efficientConformer
-encoder_conf:
-    activation_type: 'swish'
-    attention_heads: 8
-    causal: false
-    cnn_module_kernel: 15
-    cnn_module_norm: 'layer_norm'
-    dropout_rate: 0.1
-    input_layer: conv2d
-    linear_units: 2048
-    normalize_before: true
-    num_blocks: 12
-    output_size: 256
-    pos_enc_layer_type: 'rel_pos'
-    attention_dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    use_cnn_module: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-    efficient_conf:
-        stride_layer_idx: [3]           # layer id with StrideConv
-        stride: [2]                     # stride size of each StrideConv
-        group_layer_idx: [0, 1, 2, 3]   # layer id with GroupedAttention
-        group_size: 3                   # group size of every GroupedAttention layer
-        stride_kernel: true             # true: recompute cnn kernels with stride
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    dropout_rate: 0.1
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    batch_conf:
-        batch_size: 16
-        batch_type: 'static'
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    spec_trim: false
-    spec_trim_conf:
-        max_t: 50
-    speed_perturb: true
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 120
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 35000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_efficonformer_v2.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_efficonformer_v2.yaml
deleted file mode 100644
index 1ba165953b671e5d657c3d5ad3261ca5db188c60..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_efficonformer_v2.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-# network architecture
-# encoder related
-encoder: efficientConformer
-encoder_conf:
-    activation_type: 'swish'
-    attention_heads: 8
-    causal: false
-    cnn_module_kernel: 15
-    cnn_module_norm: 'layer_norm'
-    dropout_rate: 0.1
-    input_layer: conv2d2
-    linear_units: 2048
-    normalize_before: true
-    num_blocks: 12
-    output_size: 256
-    pos_enc_layer_type: 'rel_pos'
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    use_cnn_module: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-    efficient_conf:
-        stride_layer_idx: [3, 7]           # layer id with StrideConv
-        stride: [2, 2]                     # stride size of each StrideConv
-        group_layer_idx: [3, 7]            # layer id with GroupedAttention
-        group_size: 3                      # group size of every GroupedAttention layer
-        stride_kernel: false               # true: recompute cnn kernels with stride
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    dropout_rate: 0.1
-    linear_units: 2048
-    num_blocks: 3
-    positional_dropout_rate: 0.1
-    r_num_blocks: 3
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    batch_conf:
-        batch_size: 10
-        batch_type: 'static'
-    fbank_conf:
-        dither: 1.0
-        frame_length: 25
-        frame_shift: 10
-        num_mel_bins: 80
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        max_output_input_ratio: 0.1
-        min_output_input_ratio: 0.005
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    spec_trim: false
-    spec_trim_conf:
-        max_t: 50
-    speed_perturb: true
-
-grad_clip: 5
-accum_grad: 2
-max_epoch: 120
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 28000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_squeezeformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_squeezeformer.yaml
deleted file mode 100644
index 2cb6204bd67d51d1e9d796bb4f8ab77bbe55610e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_u2++_squeezeformer.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-# network architecture
-# encoder related
-encoder: squeezeformer
-encoder_conf:
-    encoder_dim: 256
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    num_blocks: 12      # the number of encoder blocks
-    reduce_idx: 5
-    recover_idx: 11
-    time_reduction_layer_type: "stream"
-    feed_forward_expansion_factor: 8
-    input_dropout_rate: 0.1
-    feed_forward_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    cnn_module_kernel: 31
-    do_rel_shift: false
-    cnn_norm_type: layer_norm
-    adaptive_scale: true
-    normalize_before: false
-    causal: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 400
-        token_min_length: 1
-        # min_output_input_ratio: 0.0005
-        # max_output_input_ratio: 0.1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-#        warp_for_time: true
-    spec_sub: true
-    spec_sub_conf:
-        num_t_sub: 3
-        max_t: 30
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 10000
-    sort: true
-    sort_conf:
-        sort_size: 2000  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 12
-
-grad_clip: 5
-accum_grad: 2
-max_epoch: 120
-log_interval: 100
-
-optim: adamw
-optim_conf:
-    lr: 8.e-4
-    weight_decay: 4.e-5
-
-scheduler: NoamHoldAnnealing
-scheduler_conf:
-    warmup_ratio: 0.2
-    hold_ratio: 0.3
-    max_steps: 175680
-    decay_rate: 1.0
-    min_lr: 1.e-5
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_unified_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_unified_conformer.yaml
deleted file mode 100644
index 288687e6161eb8a4acb0ca3e6006ad1a5e51df54..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/conf/train_unified_conformer.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 50
-        token_max_length: 400
-        token_min_length: 1
-        min_output_input_ratio: 0.0005
-        max_output_input_ratio: 0.1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 120
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/local/data_prep_torchaudio.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/local/data_prep_torchaudio.sh
deleted file mode 100644
index c7dc1deb7dec59f571c8f6935fe36c6aea2e8e99..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/local/data_prep_torchaudio.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-# Copyright 2014  Vassil Panayotov
-#           2014  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <src-dir> <dst-dir>"
-  echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
-  exit 1
-fi
-
-src=$1
-dst=$2
-
-# all utterances are FLAC compressed
-if ! which flac >&/dev/null; then
-   echo "Please install 'flac' on ALL worker nodes!"
-   exit 1
-fi
-
-mkdir -p $dst || exit 1
-
-[ ! -d $src ] && echo "$0: no such directory $src" && exit 1
-
-wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
-trans=$dst/text; [[ -f "$trans" ]] && rm $trans
-
-for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
-  reader=$(basename $reader_dir)
-  if ! [ $reader -eq $reader ]; then  # not integer.
-    echo "$0: unexpected subdirectory name $reader"
-    exit 1
-  fi
-
-  for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
-    chapter=$(basename $chapter_dir)
-    if ! [ "$chapter" -eq "$chapter" ]; then
-      echo "$0: unexpected chapter-subdirectory name $chapter"
-      exit 1
-    fi
-
-    find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
-      awk -v "dir=$chapter_dir" '{printf "%s %s/%s.flac\n", $0, dir, $0}' >>$wav_scp|| exit 1
-
-    chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt
-    [ ! -f  $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1
-    cat $chapter_trans >>$trans
-  done
-done
-
-echo "$0: successfully prepared data in $dst"
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/local/download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/local/download_and_untar.sh
deleted file mode 100644
index cd32fb6b989d7229272f1066a75a1688df2bf06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/local/download_and_untar.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /export/a15/vpanayotov/data www.openslr.org/resources/11 dev-clean"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: dev-clean, test-clean, dev-other, test-other,"
-  echo "          train-clean-100, train-clean-360, train-other-500."
-  exit 1
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data"
-  exit 1
-fi
-
-part_ok=false
-list="dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1
-fi
-
-if [ -f $data/LibriSpeech/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0
-fi
-
-
-# sizes of the archive files in bytes.  This is some older versions.
-sizes_old="371012589 347390293 379743611 361838298 6420417880 23082659865 30626749128"
-# sizes_new is the archive file sizes of the final release.  Some of these sizes are of
-# things we probably won't download.
-sizes_new="337926286 314305928 695964615 297279345 87960560420 33373768 346663984 328757843 6387309499 23049477885 30593501606"
-
-if [ -f $data/$part.tar.gz ]; then
-  size=$(/bin/ls -l $data/$part.tar.gz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes_old $sizes_new; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tar.gz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tar.gz
-  else
-    echo "$data/$part.tar.gz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tar.gz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1
-  fi
-  full_url=$url/$part.tar.gz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  if ! wget -P $data --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1
-  fi
-fi
-
-if ! tar -C $data -xvzf $data/$part.tar.gz; then
-  echo "$0: error un-tarring archive $data/$part.tar.gz"
-  exit 1
-fi
-
-touch $data/LibriSpeech/$part/.complete
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tar.gz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tar.gz file since --remove-archive option was supplied."
-  rm $data/$part.tar.gz
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/path.sh
deleted file mode 100644
index ac1ca08baf5d4540b92ed239b8aa7cd613064a8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/run.sh
deleted file mode 100644
index ede3922c1c457ea9ef2a9de7070867708617bb34..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/run.sh
+++ /dev/null
@@ -1,282 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-# data
-data_url=www.openslr.org/resources/12
-# use your own data path
-datadir=/export/data/en-asr-data/OpenSLR
-# wav data dir
-wave_data=data
-# Optional train_config
-# 1. conf/train_transformer_large.yaml: Standard transformer
-train_config=conf/train_conformer.yaml
-checkpoint=
-cmvn=true
-do_delta=false
-
-dir=exp/sp_spec_aug
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-# maybe you can try to adjust it if you can not get close results as README.md
-average_num=10
-decode_modes="attention_rescoring ctc_greedy_search ctc_prefix_beam_search attention"
-
-. tools/parse_options.sh || exit 1;
-
-# bpemode (unigram or bpe)
-nbpe=5000
-bpemode=unigram
-
-set -e
-set -u
-set -o pipefail
-
-train_set=train_960
-dev_set=dev
-recog_set="test_clean test_other dev_clean dev_other"
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
-    local/download_and_untar.sh ${datadir} ${data_url} ${part}
-  done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  ### Task dependent. You have to make data the following preparation part by yourself.
-  ### But you can utilize Kaldi recipes in most cases
-  echo "stage 0: Data preparation"
-  for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
-    # use underscore-separated names in data directories.
-    local/data_prep_torchaudio.sh ${datadir}/LibriSpeech/${part} $wave_data/${part//-/_}
-  done
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  ### Task dependent. You have to design training and dev sets by yourself.
-  ### But you can utilize Kaldi recipes in most cases
-  echo "stage 1: Feature Generation"
-  mkdir -p $wave_data/train_960
-  # merge total training data
-  for set in train_clean_100 train_clean_360 train_other_500; do
-    for f in `ls $wave_data/$set`; do
-      cat $wave_data/$set/$f >> $wave_data/train_960/$f
-    done
-  done
-  mkdir -p $wave_data/dev
-  # merge total dev data
-  for set in dev_clean dev_other; do
-    for f in `ls $wave_data/$set`; do
-      cat $wave_data/$set/$f >> $wave_data/$dev_set/$f
-    done
-  done
-
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp $wave_data/$train_set/wav.scp \
-    --out_cmvn $wave_data/$train_set/global_cmvn
-
-fi
-
-
-dict=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=$wave_data/lang_char/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 2: Dictionary and Json Data Preparation"
-  mkdir -p data/lang_char/
-
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " $wave_data/${train_set}/text > $wave_data/lang_char/input.txt
-  tools/spm_train --input=$wave_data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
-  tools/spm_encode --model=${bpemodel}.model --output_format=piece < $wave_data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-  wc -l ${dict}
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  # Prepare wenet required data
-  echo "Prepare data, prepare required format"
-  for x in $dev_set ${recog_set} $train_set ; do
-    tools/make_raw_list.py $wave_data/$x/wav.scp $wave_data/$x/text \
-        $wave_data/$x/data.list
-  done
-
-fi
-
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn $wave_data/${train_set}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type raw \
-      --symbol_table $dict \
-      --bpe_model ${bpemodel}.model \
-      --train_data $wave_data/$train_set/data.list \
-      --cv_data $wave_data/$dev_set/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $num_gpus \
-      --ddp.rank $i \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  cmvn_opts=
-  $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-  # TODO, Add model average here
-  mkdir -p $dir/test
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  # Polling GPU id begin with index 0
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  idx=0
-  for test in $recog_set; do
-    for mode in ${decode_modes}; do
-    {
-      {
-        test_dir=$dir/${test}_${mode}
-        mkdir -p $test_dir
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-        python wenet/bin/recognize.py --gpu $gpu_id \
-          --mode $mode \
-          --config $dir/train.yaml \
-          --data_type raw \
-          --dict $dict \
-          --bpe_model ${bpemodel}.model \
-          --test_data $wave_data/$test/data.list \
-          --checkpoint $decode_checkpoint \
-          --beam_size 10 \
-          --batch_size 1 \
-          --penalty 0.0 \
-          --result_file $test_dir/text_bpe \
-          --ctc_weight $ctc_weight \
-          ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-
-        cut -f2- -d " " $test_dir/text_bpe > $test_dir/text_bpe_value_tmp
-        cut -f1 -d " " $test_dir/text_bpe > $test_dir/text_bpe_key_tmp
-        tools/spm_decode --model=${bpemodel}.model --input_format=piece \
-          < $test_dir/text_bpe_value_tmp | sed -e "s/▁/ /g" > $test_dir/text_value_tmp
-        paste -d " " $test_dir/text_bpe_key_tmp $test_dir/text_value_tmp > $test_dir/text
-
-        python tools/compute-wer.py --char=1 --v=1 \
-          $wave_data/$test/text $test_dir/text > $test_dir/wer
-      } &
-
-      ((idx+=1))
-      if [ $idx -eq $num_gpus ]; then
-        idx=0
-      fi
-    }
-    done
-  done
-  wait
-
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
-
-# Optionally, you can add LM and test it with runtime.
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-  lm=data/local/lm
-  lexicon=data/local/dict/lexicon.txt
-  mkdir -p $lm
-  mkdir -p data/local/dict
-
-  # 7.1 Download & format LM
-  which_lm=3-gram.pruned.1e-7.arpa.gz
-  if [ ! -e ${lm}/${which_lm} ]; then
-    wget http://www.openslr.org/resources/11/${which_lm} -P ${lm}
-  fi
-  echo "unzip lm($which_lm)..."
-  gunzip -k ${lm}/${which_lm} -c > ${lm}/lm.arpa
-  echo "Lm saved as ${lm}/lm.arpa"
-
-  # 7.2 Prepare dict
-  unit_file=$dict
-  bpemodel=$bpemodel
-  # use $dir/words.txt (unit_file) and $dir/train_960_unigram5000 (bpemodel)
-  # if you download pretrained librispeech conformer model
-  cp $unit_file data/local/dict/units.txt
-  if [ ! -e ${lm}/librispeech-lexicon.txt ]; then
-    wget http://www.openslr.org/resources/11/librispeech-lexicon.txt -P ${lm}
-  fi
-  echo "build lexicon..."
-  tools/fst/prepare_dict.py $unit_file ${lm}/librispeech-lexicon.txt \
-    $lexicon $bpemodel.model
-  echo "lexicon saved as '$lexicon'"
-
-  # 7.3 Build decoding TLG
-  tools/fst/compile_lexicon_token_fst.sh \
-     data/local/dict data/local/tmp data/local/lang
-  tools/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
-
-  # 7.4 Decoding with runtime
-  fst_dir=data/lang_test
-  for test in ${recog_set}; do
-    ./tools/decode.sh --nj 6 \
-      --beam 10.0 --lattice_beam 5 --max_active 7000 --blank_skip_thresh 0.98 \
-      --ctc_weight 0.5 --rescoring_weight 1.0 --acoustic_scale 1.2 \
-      --fst_path $fst_dir/TLG.fst \
-      --dict_path $fst_dir/words.txt \
-      data/$test/wav.scp data/$test/text $dir/final.zip $fst_dir/units.txt \
-      $dir/lm_with_runtime_${test}
-    tail $dir/lm_with_runtime_${test}/wer
-  done
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/librispeech/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/README.md
deleted file mode 100644
index 344faf23debb56bddbfb097df1db06f3f819e28e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/README.md
+++ /dev/null
@@ -1,87 +0,0 @@
-# Performance Record
-
-This is a Chinese speech recognition recipe that trains on all Chinese corpora including:
-
-| Dataset    | Duration (Hours) |
-|------------|------------------|
-| Aidatatang | 140              |
-| Aishell    | 151              |
-| MagicData  | 712              |
-| Primewords | 99               |
-| ST-CMDS    | 110              |
-| THCHS-30   | 26               |
-| TAL-ASR    | 587              |
-| AISHELL2   | 1000             |
-
-## Unified Transformer Result
-
-### Data info:
-
-* Dataset: Aidatatang, Aishell, MagicData, Primewords, ST-CMDS, and THCHS-30.
-* Feature info: using fbank feature, with cmvn, no speed perturb.
-* Training info: lr 0.004, batch size 18, 3 machines, 3*8 = 24 GPUs, acc_grad 1, 220 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 30
-* Git hash: 013794572a55c7d0dbea23a66106ccf3e5d3b8d4
-
-### WER
-
-| Dataset    | chunk size | attention decoder | ctc greedy search | ctc prefix beam search | attention rescoring |
-|------------|------------|-------------------|-------------------|------------------------|---------------------|
-| Aidatatang | full       | 4.23              | 5.82              | 5.82                   | 4.71                |
-|            | 16         | 4.59              | 6.99              | 6.99                   | 5.29                |
-| Aishell    | full       | 4.69              | 5.80              | 5.80                   | 4.64                |
-|            | 16         | 4.97              | 6.75              | 6.75                   | 5.37                |
-| MagicData  | full       | 2.86              | 4.01              | 4.00                   | 3.07                |
-|            | 16         | 3.10              | 5.02              | 5.02                   | 3.68                |
-| THCHS-30   | full       | 16.68             | 15.46             | 15.46                  | 14.38               |
-|            | 16         | 17.47             | 16.81             | 16.82                  | 15.63               |
-
-## Unified Conformer Result
-
-### Data info:
-
-* Dataset: Aidatatang, Aishell, MagicData, Primewords, ST-CMDS, and THCHS-30.
-* Feature info: using fbank feature, with cmvn, speed perturb.
-* Training info: lr 0.001, batch size 8, 1 machines, 1*8 = 8 GPUs, acc_grad 12, 60 epochs
-* Decoding info: ctc_weight 0.5, average_num 10
-* Git hash: 5bdf436e671ef4c696d1b039f29cc33109e072fa
-
-### WER
-
-| Dataset    | chunk size | attention decoder | ctc greedy search | ctc prefix beam search | attention rescoring |
-|------------|------------|-------------------|-------------------|------------------------|---------------------|
-| Aidatatang | full       | 4.12              | 4.97              | 4.97                   | 4.22                |
-|            | 16         | 4.45              | 5.73              | 5.73                   | 4.75                |
-| Aishell    | full       | 4.49              | 5.07              | 5.05                   | 4.43                |
-|            | 16         | 4.77              | 5.77              | 5.77                   | 4.85                |
-| MagicData  | full       | 2.55              | 3.07              | 3.05                   | 2.59                |
-|            | 16         | 2.81              | 3.88              | 3.86                   | 3.08                |
-| THCHS-30   | full       | 13.55             | 13.75             | 13.76                  | 12.72               |
-|            | 16         | 13.78             | 15.10             | 15.08                  | 13.90               |
-
-## Unified Conformer Result
-
-### Data info:
-
-* Dataset: Aidatatang, Aishell, MagicData, Primewords, ST-CMDS, THCHS-30, TAL-ASR, and AISHELL2.
-* Feature info: using fbank feature, dither=0, cmvn, speed perturb
-* Training info: lr 0.001, batch size 22, 4 GPUs, acc_grad 4, 120 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 10
-* Git hash: 66f30c197d00c59fdeda3bc8ada801f867b73f78
-
-### WER
-
-| Dataset    | chunk size | attention decoder | ctc greedy search | ctc prefix beam search | attention rescoring |
-|------------|------------|-------------------|-------------------|------------------------|---------------------|
-| Aidatatang | full       | 3.22              | 4.00              | 4.01                   | 3.35                |
-|            | 16         | 3.50              | 4.63              | 4.63                   | 3.79                |
-| Aishell    | full       | 1.23              | 2.12              | 2.13                   | 1.42                |
-|            | 16         | 1.33              | 2.72              | 2.72                   | 1.72                |
-| MagicData  | full       | 2.38              | 3.07              | 3.05                   | 2.52                |
-|            | 16         | 2.66              | 3.80              | 3.78                   | 2.94                |
-| THCHS-30   | full       | 9.93              | 11.07             | 11.06                  | 10.16               |
-|            | 16         | 10.28             | 11.85             | 11.85                  | 10.81               |
-| AISHELL2   | full       | 5.25              | 5.81              | 5.79                   | 5.22                |
-|            | 16         | 5.48              | 6.48              | 6.50                   | 5.61                |
-| TAL-ASR    | full       | 9.54              | 10.35             | 10.28                  | 9.66                |
-|            | 16         | 10.04             | 11.43             | 11.39                  | 10.55               |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_960_unigram5000.model b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_960_unigram5000.model
deleted file mode 100644
index 8419aa7bac81d9b02f9644e9cf8929b73765a3af..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_960_unigram5000.model and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_conformer.yaml
deleted file mode 100644
index b8ce511cdaad0f03be4a82708d70290ec9e37c3d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_unified_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_unified_conformer.yaml
deleted file mode 100644
index 3155d1b760b676476ba1abc60b64001b242988c4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_unified_conformer.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: true
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 180
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_unified_transformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_unified_transformer.yaml
deleted file mode 100644
index aa6645df9fc6df20e4946bdfe401c961c3bed31b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/conf/train_unified_transformer.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# network architecture
-# encoder related
-encoder: transformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder architecture type
-    normalize_before: true
-    use_dynamic_chunk: true
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 220
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.004
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aidatatang_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aidatatang_data_prep.sh
deleted file mode 100644
index cb334a49a3472cde963329f134edc93476f83315..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aidatatang_data_prep.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Xingyu Na
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-path> <data-path>"
-  echo " $0 /export/a05/xna/data/data_aidatatang_200zh data/aidatatang"
-  exit 1;
-fi
-
-aidatatang_audio_dir=$1/corpus
-aidatatang_text=$1/transcript/aidatatang_200_zh_transcript.txt
-data=$2
-
-train_dir=$data/local/train
-dev_dir=$data/local/dev
-test_dir=$data/local/test
-tmp_dir=$data/local/tmp
-
-mkdir -p $train_dir
-mkdir -p $dev_dir
-mkdir -p $test_dir
-mkdir -p $tmp_dir
-
-# data directory check
-if [ ! -d $aidatatang_audio_dir ] || [ ! -f $aidatatang_text ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-echo "**** Creating aidatatang data folder ****"
-
-# find wav audio file for train, dev and test resp.
-find $aidatatang_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
-n=`cat $tmp_dir/wav.flist | wc -l`
-[ $n -ne 237265 ] && \
-  echo Warning: expected 237265 data files, found $n
-
-grep -i "corpus/train" $tmp_dir/wav.flist > $train_dir/wav.flist || exit 1;
-grep -i "corpus/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1;
-grep -i "corpus/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1;
-
-rm -r $tmp_dir
-
-# Transcriptions preparation
-for dir in $train_dir $dev_dir $test_dir; do
-  echo Preparing $dir transcriptions
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{i=NF-1;printf("%s %s\n",$NF,$i)}' > $dir/utt2spk_all
-  paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all
-  tools/filter_scp.pl -f 1 $dir/utt.list $aidatatang_text | sed 's/Ａ/A/g' > $dir/transcripts.txt
-  awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
-  tools/filter_scp.pl -f 1 $dir/utt.list $dir/utt2spk_all | sort -u | awk '{print $1" T0055"$2}' > $dir/utt2spk
-  tools/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp
-  sort -u $dir/transcripts.txt > $dir/text
-  tools/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
-done
-
-mkdir -p $data/train $data/dev $data/test
-
-for f in spk2utt utt2spk wav.scp text; do
-  cp $train_dir/$f $data/train/$f || exit 1;
-  cp $dev_dir/$f $data/dev/$f || exit 1;
-  cp $test_dir/$f $data/test/$f || exit 1;
-done
-
-# utils/data/validate_data_dir.sh --no-feats $data/train || exit 1;
-# utils/data/validate_data_dir.sh --no-feats $data/dev || exit 1;
-# utils/data/validate_data_dir.sh --no-feats $data/test || exit 1;
-
-echo "$0: aidatatang_200zh data preparation succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aidatatang_download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aidatatang_download_and_untar.sh
deleted file mode 100644
index a2616ba0e2046e8a009e915bb02d1eb509f62228..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aidatatang_download_and_untar.sh
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-#             2017  Xingyu Na
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/62 aidatatang_200zh"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: aidatatang_200zh."
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data, make it"
-  mkdir -p $data
-fi
-
-part_ok=false
-list="aidatatang_200zh"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1;
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-# sizes of the archive files in bytes.
-sizes="18756983399"
-
-if [ -f $data/$part.tgz ]; then
-  size=$(/bin/ls -l $data/$part.tgz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tgz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.gz
-  else
-    echo "$data/$part.tgz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tgz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tgz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tgz; then
-  echo "$0: error un-tarring archive $data/$part.tgz"
-  exit 1;
-fi
-
-touch $data/$part/.complete
-
-dev_dir=$data/$part/corpus/dev
-test_dir=$data/$part/corpus/test
-train_dir=$data/$part/corpus/train
-if [ $part == "aidatatang_200zh" ]; then
-  for set in $dev_dir $test_dir $train_dir;do
-    cd $set
-    for wav in ./*.tar.gz; do
-      echo "Extracting wav from $wav"
-      tar -zxf $wav && rm $wav
-    done
-  done
-fi
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tgz file since --remove-archive option was supplied."
-  rm $data/$part.tgz
-fi
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell2_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell2_data_prep.sh
deleted file mode 100644
index 016b7058c3811fa93e8b10edc80cbc13a7e8e4d3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell2_data_prep.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
-#           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
-# Apache 2.0
-
-# This script is copied from aishell2/s5/local/prepare_data.sh
-# but using difference word segmentation script.
-
-# transform raw AISHELL-2 data to kaldi format
-
-. ./path.sh || exit 1;
-
-tmp=
-dir=
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-data-dir> <output-dir>"
-  echo " $0 /export/AISHELL-2/iOS/train data/train"
-  exit 1;
-fi
-
-corpus=$1
-dir=$2
-tmp=$dir/tmp
-
-echo "prepare_data.sh: Preparing data in $corpus"
-
-mkdir -p $dir
-mkdir -p $tmp
-
-
-# corpus check
-if [ ! -d $corpus ] || [ ! -f $corpus/wav.scp ] || [ ! -f $corpus/trans.txt ]; then
-  echo "Error: $0 requires wav.scp and trans.txt under $corpus directory."
-  exit 1;
-fi
-
-# validate utt-key list
-awk '{print "AISHELL2_"$1}' $corpus/wav.scp   > $tmp/wav_utt.list
-awk '{print "AISHELL2_"$1}' $corpus/trans.txt > $tmp/trans_utt.list
-tools/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
-
-# wav.scp
-awk -F'\t' -v path_prefix=$corpus '{printf("AISHELL2_%s %s/%s\n",$1,path_prefix,$2)}' $corpus/wav.scp > $tmp/tmp_wav.scp
-tools/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
-
-awk -F'\t' '{printf("AISHELL2_%s %s\n",$1,$2)}' $corpus/trans.txt > $tmp/tmp_trans.txt
-tools/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_trans.txt | sort -k 1 | uniq > $tmp/trans.txt
-
-# text has ' sed "s/'//g"
-dos2unix < $tmp/trans.txt | \
-  tools/filter_scp.pl -f 1 $tmp/utt.list - | \
-  sort -k 1 | uniq | tr '[a-z]' '[A-Z]' | \
-  sed 's/Ａ/A/g' | sed 's/Ｔ/T/g' | sed 's/Ｍ/M/g' | sed 's/𫚉//g' | sed 's/𫖯/頫/g' \
-  > $tmp/text
-
-# utt2spk & spk2utt
-awk -F' ' '{print $2}' $tmp/wav.scp > $tmp/wav.list
-sed -e 's:\.wav::g' $tmp/wav.list | \
-  awk -F'/' '{i=NF-1;printf("AISHELL2_%s AISHELL2_%s\n",$NF,$i)}' > $tmp/tmp_utt2spk
-tools/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_utt2spk | sort -k 1 | uniq > $tmp/utt2spk
-tools/utt2spk_to_spk2utt.pl $tmp/utt2spk | sort -k 1 | uniq > $tmp/spk2utt
-
-# copy prepared resources from tmp_dir to target dir
-mkdir -p $dir
-for f in wav.scp text spk2utt utt2spk; do
-  cp $tmp/$f $dir/$f || exit 1;
-done
-
-tools/validate_data_dir.sh --no-feats $dir || exit 1;
-echo "local/prepare_data.sh succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell_data_prep.sh
deleted file mode 100644
index 6c3b6a40bf9e7d5392a0cdc2517b14b38714e332..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell_data_prep.sh
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Xingyu Na
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-path> <data-path>"
-  echo " $0 /export/a05/xna/data/data_aishell data/aishell"
-  exit 1;
-fi
-
-aishell_audio_dir=$1/wav
-aishell_text=$1/transcript/aishell_transcript_v0.8.txt
-data=data/aishell
-
-train_dir=$data/local/train
-dev_dir=$data/local/dev
-test_dir=$data/local/test
-tmp_dir=$data/local/tmp
-
-mkdir -p $train_dir
-mkdir -p $dev_dir
-mkdir -p $test_dir
-mkdir -p $tmp_dir
-
-# data directory check
-if [ ! -d $aishell_audio_dir ] || [ ! -f $aishell_text ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-echo "**** Creating aishell data folder ****"
-
-# find wav audio file for train, dev and test resp.
-find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
-n=`cat $tmp_dir/wav.flist | wc -l`
-[ $n -ne 141925 ] && \
-  echo Warning: expected 141925 data data files, found $n
-
-grep -i "wav/train" $tmp_dir/wav.flist > $train_dir/wav.flist || exit 1;
-grep -i "wav/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1;
-grep -i "wav/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1;
-
-rm -r $tmp_dir
-
-# Transcriptions preparation
-for dir in $train_dir $dev_dir $test_dir; do
-  echo Preparing $dir transcriptions
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{i=NF-1;printf("%s %s\n",$NF,$i)}' > $dir/utt2spk_all
-  paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all
-  tools/filter_scp.pl -f 1 $dir/utt.list $aishell_text | \
-    sed 's/ａ/a/g' | sed 's/ｂ/b/g' |\
-    sed 's/ｃ/c/g' | sed 's/ｋ/k/g' |\
-    sed 's/ｔ/t/g' > $dir/transcripts.txt
-  awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
-  tools/filter_scp.pl -f 1 $dir/utt.list $dir/utt2spk_all | sort -u | awk '{print $1" BAC009"$2}' > $dir/utt2spk
-  tools/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp
-  sort -u $dir/transcripts.txt > $dir/text
-  tools/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
-done
-
-mkdir -p $data/train $data/dev $data/test
-
-for f in spk2utt utt2spk wav.scp text; do
-  cp $train_dir/$f $data/train/$f || exit 1;
-  cp $dev_dir/$f $data/dev/$f || exit 1;
-  cp $test_dir/$f $data/test/$f || exit 1;
-done
-
-# utils/data/validate_data_dir.sh --no-feats $data/train || exit 1;
-# utils/data/validate_data_dir.sh --no-feats $data/dev || exit 1;
-# utils/data/validate_data_dir.sh --no-feats $data/test || exit 1;
-
-echo "$0: AISHELL data preparation succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell_download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell_download_and_untar.sh
deleted file mode 100644
index e251a9aae2fefd4d52e98530936cae35e74cf0e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/aishell_download_and_untar.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-#             2017  Xingyu Na
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/33 data_aishell"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: data_aishell, resource_aishell."
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data"
-  mkdir -p $data
-fi
-
-part_ok=false
-list="data_aishell resource_aishell"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1;
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-# sizes of the archive files in bytes.
-sizes="15582913665 1246920"
-
-if [ -f $data/$part.tgz ]; then
-  size=$(/bin/ls -l $data/$part.tgz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tgz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tgz
-  else
-    echo "$data/$part.tgz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tgz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tgz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tgz; then
-  echo "$0: error un-tarring archive $data/$part.tgz"
-  exit 1;
-fi
-
-touch $data/$part/.complete
-
-if [ $part == "data_aishell" ]; then
-  cd $data/$part/wav
-  for wav in ./*.tar.gz; do
-    echo "Extracting wav from $wav"
-    tar -zxf $wav && rm $wav
-  done
-fi
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tgz file since --remove-archive option was supplied."
-  rm $data/$part.tgz
-fi
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_badlist b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_badlist
deleted file mode 100644
index 67636273d53a9708c4f938d619d7622970d88540..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_badlist
+++ /dev/null
@@ -1,2 +0,0 @@
-16_4013_20170819121429.wav
-18_1565_20170712000170.wav
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_data_prep.sh
deleted file mode 100644
index a2609c5159da0acd9de18d7af7fe01683c38d433..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_data_prep.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Xingyu Na
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-path> <data-path>"
-  echo " $0 /export/a05/xna/data/magicdata data/magicdata"
-  exit 1;
-fi
-
-corpus=$1
-data=$2
-
-if [ ! -d $corpus/train ] || [ ! -d $corpus/dev ] || [ ! -d $corpus/test ]; then
-  echo "Error: $0 requires complete corpus"
-  exit 1;
-fi
-
-echo "**** Creating magicdata data folder ****"
-
-mkdir -p $data/{train,dev,test,tmp}
-
-# find wav audio file for train, dev and test resp.
-tmp_dir=$data/tmp
-find $corpus -iname "*.wav" > $tmp_dir/wav.flist
-n=`cat $tmp_dir/wav.flist | wc -l`
-[ $n -ne 609552 ] && \
-  echo Warning: expected 609552 data data files, found $n
-
-for x in train dev test; do
-  grep -i "/$x/" $tmp_dir/wav.flist > $data/$x/wav.flist || exit 1;
-  echo "Filtering data using found wav list and provided transcript for $x"
-  awk -F '.wav' '{print $1}' local/magicdata_badlist | tools/filter_scp.pl --exclude -f 1 - \
-    <(cat $data/$x/wav.flist|awk -F '/' '{print gensub(".wav", "", "g", $NF), $0}') \
-    > $data/$x/wav.scp
-  sed '1d' $corpus/$x/TRANS.txt | awk -F '\t' '{print gensub(".wav","","g",$1), $2}' > $data/$x/utt2spk
-  sed '1d' $corpus/$x/TRANS.txt | awk -F '\t' '{print gensub(".wav","","g",$1), $3}' |\
-    sed 's/！//g' | sed 's/？//g' |\
-    sed 's/，//g' | sed 's/－//g' |\
-    sed 's/：//g' | sed 's/；//g' |\
-    sed 's/　//g' | sed 's/。//g' |\
-    sed 's/`//g' | sed 's/,//g' |\
-    sed 's/://g' | sed 's/?//g' |\
-    sed 's/\///g' | sed 's/·//g' |\
-    sed 's/\"//g' | sed 's/“//g' |\
-    sed 's/”//g' | sed 's/\\//g' |\
-    sed 's/…//g' | sed "s///g" |\
-    sed 's/、//g' | sed "s///g" | sed 's/《//g' | sed 's/》//g' |\
-    sed 's/\[//g' | sed 's/\]//g' | sed 's/FIL//g' | sed 's/SPK//' |\
-    tr '[a-z]' '[A-Z]' |\
-    awk '{if (NF > 1) print $0;}' > $data/$x/text
-  for file in wav.scp utt2spk text; do
-    sort $data/$x/$file -o $data/$x/$file
-  done
-  tools/utt2spk_to_spk2utt.pl $data/$x/utt2spk > $data/$x/spk2utt
-done
-
-# rm -r $tmp_dir
-
-tools/fix_data_dir.sh $data/train || exit 1;
-tools/fix_data_dir.sh $data/dev || exit 1;
-tools/fix_data_dir.sh $data/test || exit 1;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_download_and_untar.sh
deleted file mode 100644
index df8ca8d229634b67f5fb21a3dd0f8fe561026cb6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/magicdata_download_and_untar.sh
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-#             2019  Xingyu Na
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/68 train_set"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: train_set, dev_set, test_set."
-fi
-
-data=$1
-url=$2
-part=$3
-part1=`echo $part | sed s/_set//`
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data, make it."
-  mkdir -p $data
-fi
-
-part_ok=false
-list="train_set dev_set test_set"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1;
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/$part1/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-# sizes of the archive files in bytes.
-sizes="52627842921 1035537823 2201936013"
-
-if [ -f $data/$part.tar.gz ]; then
-  size=$(/bin/ls -l $data/$part.tar.gz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tar.gz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tar.gz
-  else
-    echo "$data/$part.tar.gz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tar.gz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tar.gz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tar.gz; then
-  echo "$0: error un-tarring archive $data/$part.tar.gz"
-  exit 1;
-fi
-
-touch $data/$part1/.complete
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tar.gz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tar.gz file since --remove-archive option was supplied."
-  rm $data/$part.tar.gz
-fi
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_data_prep.sh
deleted file mode 100644
index 96299a295c3a6f5055fb84d6f1d752868ce623ea..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_data_prep.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Xingyu Na
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-path> <data-path>"
-  echo " $0 /export/a05/xna/data/primewords data/primewords"
-  exit 1;
-fi
-
-corpus=$1/primewords_md_2018_set1
-data=$2
-
-if [ ! -d $corpus/audio_files ] || [ ! -f $corpus/set1_transcript.json ]; then
-  echo "Error: $0 requires complete corpus"
-  exit 1;
-fi
-
-echo "**** Creating primewords data folder ****"
-
-mkdir -p $data/train
-
-# find wav audio file for train
-
-find $corpus -iname "*.wav" > $data/wav.flist
-n=`cat $data/wav.flist | wc -l`
-[ $n -ne 50384 ] && \
-  echo Warning: expected 50384 data files, found $n
-
-echo "Filtering data using found wav list and provided transcript"
-local/primewords_parse_transcript.py $data/wav.flist $corpus/set1_transcript.json $data/train
-cat $data/train/transcripts.txt |\
-  awk '{if (NF > 1) print $0;}' > $data/train/text
-
-for file in wav.scp utt2spk text; do
-  sort $data/train/$file -o $data/train/$file
-done
-tools/utt2spk_to_spk2utt.pl $data/train/utt2spk > $data/train/spk2utt
-
-# rm -r $data/wav.flist
-
-tools/validate_data_dir.sh --no-feats $data/train || exit 1;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_download_and_untar.sh
deleted file mode 100644
index 7e716c7a0a6683459ae2c14bcdf7394c157aa1ba..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_download_and_untar.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-#             2017  Xingyu Na
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 2 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base>"
-  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/38"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-fi
-
-data=$1
-url=$2
-part=primewords_md_2018_set1
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data, make it"
-  mkdir -p $data
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-# sizes of the archive files in bytes.
-sizes="9057625192"
-
-if [ -f $data/$part.tar.gz ]; then
-  size=$(/bin/ls -l $data/$part.tar.gz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tar.gz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tar.gz
-  else
-    echo "$data/$part.tar.gz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tar.gz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tar.gz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tar.gz; then
-  echo "$0: error un-tarring archive $data/$part.tar.gz"
-  exit 1;
-fi
-
-touch $data/.complete
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tar.gz file since --remove-archive option was supplied."
-  rm $data/$part.tar.gz
-fi
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_parse_transcript.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_parse_transcript.py
deleted file mode 100644
index 772ab7f93810b3094c8f0b7bab22eac528a17817..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/primewords_parse_transcript.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-import os
-import sys
-import json
-
-
-def main(argv):
-    fp = open(argv[1], encoding="utf-8")
-    js = json.load(fp)
-    fp.close()
-    metas = {}
-    for ele in js:
-        fname = ele['file']
-        metas[fname] = ele
-
-    fWavScp = open(os.path.join(argv[2], 'wav.scp'), 'w')
-    fText = open(os.path.join(
-        argv[2], 'transcripts.txt'), 'w', encoding="utf-8")
-    fUtt2Spk = open(os.path.join(argv[2], 'utt2spk'), 'w')
-    for line in open(argv[0]):
-        fpath = line.strip('\r\n')
-        wname = os.path.basename(fpath)
-        meta = metas[wname]
-        spkid = 'P' + meta['user_id']
-        uttid = spkid + '-' + meta['id']
-        fWavScp.write(uttid + ' ' + fpath + '\n')
-        fText.write(uttid + ' ' + meta['text'] + '\n')
-        fUtt2Spk.write(uttid + ' ' + spkid + '\n')
-    fWavScp.close()
-    fText.close()
-    fUtt2Spk.close()
-
-
-if __name__ == "__main__":
-    main(sys.argv[1:])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/stcmds_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/stcmds_data_prep.sh
deleted file mode 100644
index 36f8e49af08e633c422fad4e771109cdfaa73847..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/stcmds_data_prep.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Xingyu Na
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-path> <data-path>"
-  echo " $0 /export/a05/xna/data/stcmds data/stcmds"
-  exit 1;
-fi
-
-corpus=$1/ST-CMDS-20170001_1-OS
-data=$2
-
-if [ ! -d $corpus ]; then
-  echo "Error: $0 requires complete corpus"
-  exit 1;
-fi
-
-echo "**** Creating ST-CMDS data folder ****"
-
-mkdir -p $data/train
-
-# find wav audio file for train
-
-find $corpus -iname "*.wav" > $data/wav.list
-n=`cat $data/wav.list | wc -l`
-[ $n -ne 102600 ] && \
-  echo Warning: expected 102600 data files, found $n
-
-cat $data/wav.list | awk -F'20170001' '{print $NF}' | awk -F'.' '{print $1}' > $data/utt.list
-cat $data/utt.list | awk '{print substr($1,1,6)}' > $data/spk.list
-while read line; do
-  tn=`dirname $line`/`basename $line .wav`.txt;
-  cat $tn; echo;
-done < $data/wav.list > $data/text.list
-
-paste -d' ' $data/utt.list $data/wav.list > $data/train/wav.scp
-paste -d' ' $data/utt.list $data/spk.list > $data/train/utt2spk
-paste -d' ' $data/utt.list $data/text.list |\
-  sed 's/，//g' |\
-  tr '[a-z]' '[A-Z]' |\
-  awk '{if (NF > 1) print $0;}' > $data/train/text
-
-for file in wav.scp utt2spk text; do
-  sort $data/train/$file -o $data/train/$file
-done
-
-tools/utt2spk_to_spk2utt.pl $data/train/utt2spk > $data/train/spk2utt
-
-# rm -r $data/{wav,utt,spk,text}.list
-
-tools/validate_data_dir.sh --no-feats $data/train || exit 1;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/stcmds_download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/stcmds_download_and_untar.sh
deleted file mode 100644
index ca89b5a292ac8246d9d0aabeb5884c75020a0178..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/stcmds_download_and_untar.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-#             2017  Xingyu Na
-# Apache 2.0
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 2 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base>"
-  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/38"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-fi
-
-data=$1
-url=$2
-part=ST-CMDS-20170001_1-OS
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data, make it"
-  mkdir -p $data
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-# sizes of the archive files in bytes.
-sizes="8231662593"
-
-if [ -f $data/$part.tar.gz ]; then
-  size=$(/bin/ls -l $data/$part.tar.gz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tar.gz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tar.gz
-  else
-    echo "$data/$part.tar.gz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tar.gz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tar.gz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tar.gz; then
-  echo "$0: error un-tarring archive $data/$part.tar.gz"
-  exit 1;
-fi
-
-touch $data/.complete
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tar.gz file since --remove-archive option was supplied."
-  rm $data/$part.tar.gz
-fi
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/tal_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/tal_data_prep.sh
deleted file mode 100644
index 1a9c48cb1850465f461ecb63d9ebb8d103cc1682..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/tal_data_prep.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-path> <data-path>"
-  echo " $0 /export/a05/xna/data/aisolution_data data/tal_asr"
-  exit 1;
-fi
-
-tal_audio_dir=$1/wav/
-tal_text=$1/transcript/transcript.txt
-data=$2
-
-train_dir=$data/local/train
-dev_dir=$data/local/dev
-test_dir=$data/local/test
-tmp_dir=$data/local/tmp
-
-mkdir -p $train_dir
-mkdir -p $dev_dir
-mkdir -p $test_dir
-mkdir -p $tmp_dir
-
-# data directory check
-if [ ! -d $tal_audio_dir ] || [ ! -f $tal_text ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-echo "**** Creating tal asr data folder ****"
-
-# find wav audio file for train, dev and test resp.
-find $tal_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
-n=`cat $tmp_dir/wav.flist | wc -l`
-[ $n -ne 31747 ] && \
-  echo Warning: expected 31747 data files, found $n
-
-grep -i "wav/train" $tmp_dir/wav.flist > $train_dir/wav.flist || exit 1;
-grep -i "wav/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1;
-grep -i "wav/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1;
-
-rm -r $tmp_dir
-
-# Transcriptions preparation
-for dir in $train_dir $dev_dir $test_dir; do
-  echo Preparing $dir transcriptions
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF, "TALASR"$(NF-1)"-"$NF}' > $dir/utt_uttid
-  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print "TALASR"$(NF-1)"-"$NF, "TALASR"$(NF-1)}' > $dir/utt2spk
-  paste -d ' ' <(awk '{print $2}' $dir/utt_uttid) $dir/wav.flist > $dir/wav.scp
-  tools/filter_scp.pl -f 1 $dir/utt.list $tal_text | \
-    sed 's/Ａ/A/g' | sed 's/#//g' | sed 's/=//g' | sed 's/、//g' | \
-    sed 's/，//g' | sed 's/？//g' | sed 's/。//g' | sed 's/[ ][ ]*$//g'\
-    > $dir/transcripts.txt
-  awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
-  paste -d " " <(sort -u -k 1 $dir/utt_uttid | awk '{print $2}') \
-    <(sort -u -k 1 $dir/transcripts.txt | awk '{for(i=2;i<NF;i++) {printf($i" ")}printf($NF"\n") }') \
-    > $dir/text
-  tools/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
-done
-
-mkdir -p $data/train $data/dev $data/test
-
-for f in spk2utt utt2spk wav.scp text; do
-  cp $train_dir/$f $data/train/$f || exit 1;
-  cp $dev_dir/$f $data/dev/$f || exit 1;
-  cp $test_dir/$f $data/test/$f || exit 1;
-done
-
-tools/fix_data_dir.sh $data/train || exit 1;
-tools/fix_data_dir.sh $data/dev || exit 1;
-tools/fix_data_dir.sh $data/test || exit 1;
-
-echo "$0: tal asr data preparation succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/tal_mix_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/tal_mix_data_prep.sh
deleted file mode 100644
index 1bc808d76a83209c68132886da3bd6a9b79a0f46..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/tal_mix_data_prep.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash
-
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-# Apache 2.0
-
-. ./path.sh || exit 1;
-
-if [ $# != 2 ]; then
-  echo "Usage: $0 <corpus-path> <data-path>"
-  echo " $0 /export/a05/xna/data/TAL_CSASR data/tal_mix"
-  exit 1;
-fi
-
-tal_mix_audio_dir=$1/cs_wav
-tal_mix_text=$1/label
-data=$2
-
-train_dir=$data/local/train
-tmp_dir=$data/local/tmp
-
-mkdir -p $train_dir
-mkdir -p $tmp_dir
-
-# data directory check
-if [ ! -d $tal_mix_audio_dir ] || [ ! -f $tal_mix_text ]; then
-  echo "Error: $0 requires two directory arguments"
-  exit 1;
-fi
-
-echo "**** Creating tal mix data folder ****"
-
-# find wav audio file for train, dev and test resp.
-find $tal_mix_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
-n=`cat $tmp_dir/wav.flist | wc -l`
-[ $n -ne 370000 ] && \
-  echo Warning: expected 370000 data files, found $n
-
-# rm -r $tmp_dir
-
-# Transcriptions preparation
-echo Preparing transcriptions
-sed -e 's/\.wav//' $tmp_dir/wav.flist | awk -F '/' '{print $NF}' > $train_dir/utt.list
-sed -e 's/\.wav//' $tmp_dir/wav.flist | awk -F '/' '{printf("%s %s\n",$NF,$NF)}' > $train_dir/utt2spk
-paste -d' ' $train_dir/utt.list $tmp_dir/wav.flist > $train_dir/wav.scp
-cat $tal_mix_text  | grep -Ev '^\s*$' | awk '{if(NF>1) print $0}' > $train_dir/transcript.txt
-#cp $tal_mix_text $train_dir
-
-wc -l $train_dir/transcript.txt
-echo filtering
-tools/filter_scp.pl -f 1 $train_dir/utt.list $train_dir/transcript.txt | \
-  sed 's/Ａ/A/g' | sed 's/Ｃ/C/g' | sed 's/Ｄ/D/g' | sed 's/Ｇ/G/g' | \
-  sed 's/Ｈ/H/g' | sed 's/Ｕ/U/g' | sed 's/Ｙ/Y/g' | sed 's/ａ/a/g' | \
-  sed 's/Ｉ/I/g' | sed 's/#//g' | sed 's/=//g' | sed 's/；//g' | \
-  sed 's/，//g' | sed 's/？//g' | sed 's/。//g' | sed 's/\///g' | \
-  sed 's/！//g' | sed 's/!//g' | sed 's/\.//g' | sed 's/\?//g' | \
-  sed 's/：//g' | sed 's/,//g' | sed 's/\"//g' | sed 's/://g' | \
-  sed 's/@//g' | sed 's/-/ /g' | sed 's/、/ /g' | sed 's/~/ /g' | \
-  sed "s/‘/\'/g" | sed 's/Ｅ/E/g' | sed "s/’/\'/g" | sed 's/《//g' | sed 's/》//g' | \
-  sed "s/[ ][ ]*$//g" | sed "s/\[//g" | sed 's/、//g' > $train_dir/text
-tools/utt2spk_to_spk2utt.pl $train_dir/utt2spk > $train_dir/spk2utt
-
-mkdir -p $data/train
-
-for f in spk2utt utt2spk wav.scp text; do
-  cp $train_dir/$f $data/train/$f || exit 1;
-done
-
-tools/fix_data_dir.sh $data/train || exit 1;
-
-echo "$0: tal mix data preparation succeeded"
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/thchs-30_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/thchs-30_data_prep.sh
deleted file mode 100644
index a72efcddccf84b5706502234428d50440f02b8e8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/thchs-30_data_prep.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Copyright 2016  Tsinghua University (Author: Dong Wang, Xuewei Zhang).  Apache 2.0.
-#           2016  LeSpeech (Author: Xingyu Na)
-
-#This script pepares the data directory for thchs30 recipe.
-#It reads the corpus and get wav.scp and transcriptions.
-
-corpus_dir=$1
-data=$2
-
-echo "**** Creating THCHS-30 data folder ****"
-mkdir -p $data/{train,dev,test}
-
-#create wav.scp, utt2spk.scp, spk2utt.scp, text
-(
-for x in train dev test; do
-  echo "cleaning $data/$x"
-  part=$data/$x
-  rm -rf $part/{wav.scp,utt2spk,spk2utt,text}
-  echo "preparing scps and text in $part"
-  # updated new "for loop" figured out the compatibility issue with Mac     created by Xi Chen, in 03/06/2018
-  for nn in `find  $corpus_dir/$x -name "*.wav" | sort -u | xargs -I {} basename {} .wav`; do
-      spkid=`echo $nn | awk -F"_" '{print "" $1}'`
-      spk_char=`echo $spkid | sed 's/\([A-Z]\).*/\1/'`
-      spk_num=`echo $spkid | sed 's/[A-Z]\([0-9]\)/\1/'`
-      spkid=$(printf '%s%.2d' "$spk_char" "$spk_num")
-      utt_num=`echo $nn | awk -F"_" '{print $2}'`
-      uttid=$(printf '%s%.2d_%.3d' "$spk_char" "$spk_num" "$utt_num")
-      echo $uttid $corpus_dir/$x/$nn.wav >> $part/wav.scp
-      echo $uttid $spkid >> $part/utt2spk
-      echo $uttid `sed -n 1p $corpus_dir/data/$nn.wav.trn` | sed 's/ l =//' >> $part/text
-  done
-  sort $part/wav.scp -o $part/wav.scp
-  sort $part/utt2spk -o $part/utt2spk
-  sort $part/text -o $part/text
-  tools/utt2spk_to_spk2utt.pl $part/utt2spk > $part/spk2utt
-done
-) || exit 1
-
-tools/validate_data_dir.sh --no-feats $data/train || exit 1;
-tools/validate_data_dir.sh --no-feats $data/dev || exit 1;
-tools/validate_data_dir.sh --no-feats $data/test || exit 1;
-
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/thchs_download_and_untar.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/thchs_download_and_untar.sh
deleted file mode 100644
index 5cf6769e0b69c4173075b1c1c631e64bba2bf296..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/local/thchs_download_and_untar.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/bash
-
-# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
-# Copyright   2016  Tsinghua University (author: Dong Wang)
-# Apache 2.0
-
-# Adapted from librispeech recipe local/download_and_untar.sh
-
-remove_archive=false
-
-if [ "$1" == --remove-archive ]; then
-  remove_archive=true
-  shift
-fi
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
-  echo "e.g.: $0 /nfs/public/materials/data/thchs30-openslr www.openslr.org/resources/18 data_thchs30"
-  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
-  echo "<corpus-part> can be one of: data_thchs30, test-noise, resource"
-fi
-
-data=$1
-url=$2
-part=$3
-
-if [ ! -d "$data" ]; then
-  echo "$0: no such directory $data, make it"
-  mkdir -p $data
-fi
-
-part_ok=false
-list="data_thchs30 test-noise resource"
-for x in $list; do
-  if [ "$part" == $x ]; then part_ok=true; fi
-done
-if ! $part_ok; then
-  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
-  exit 1;
-fi
-
-if [ -z "$url" ]; then
-  echo "$0: empty URL base."
-  exit 1;
-fi
-
-if [ -f $data/$part/.complete ]; then
-  echo "$0: data part $part was already successfully extracted, nothing to do."
-  exit 0;
-fi
-
-
-sizes="6453425169 1971460210 24813708"
-
-if [ -f $data/$part.tgz ]; then
-  size=$(/bin/ls -l $data/$part.tgz | awk '{print $5}')
-  size_ok=false
-  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
-  if ! $size_ok; then
-    echo "$0: removing existing file $data/$part.tgz because its size in bytes $size"
-    echo "does not equal the size of one of the archives."
-    rm $data/$part.tgz
-  else
-    echo "$data/$part.tgz exists and appears to be complete."
-  fi
-fi
-
-if [ ! -f $data/$part.tgz ]; then
-  if ! which wget >/dev/null; then
-    echo "$0: wget is not installed."
-    exit 1;
-  fi
-  full_url=$url/$part.tgz
-  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
-
-  cd $data
-  pwd
-  echo " wget --no-check-certificate $full_url"
-  if ! wget --no-check-certificate $full_url; then
-    echo "$0: error executing wget $full_url"
-    exit 1;
-  fi
-fi
-
-cd $data
-
-if ! tar -xvzf $part.tgz; then
-  echo "$0: error un-tarring archive $data/$part.tgz"
-  exit 1;
-fi
-
-touch $data/$part/.complete
-
-echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
-
-if $remove_archive; then
-  echo "$0: removing $data/$part.tgz file since --remove-archive option was supplied."
-  rm $data/$part.tgz
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/run.sh
deleted file mode 100644
index 9b1813ceceaf75d5c8776c05c1843b17276b09ce..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/run.sh
+++ /dev/null
@@ -1,357 +0,0 @@
-#!/bin/bash
-
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=6
-
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-
-# data
-dbase=/ssd/nfs06/di.wu/open_source
-aidatatang_url=www.openslr.org/resources/62
-aishell_url=www.openslr.org/resources/33
-magicdata_url=www.openslr.org/resources/68
-primewords_url=www.openslr.org/resources/47
-stcmds_url=www.openslr.org/resources/38
-thchs_url=www.openslr.org/resources/18
-
-nj=16
-
-train_set=train
-dev_set=dev
-
-has_aishell2=false  # AISHELL2 train set is not publically downloadable
-                    # With this option true, the script assumes you have it in
-                    # $dbase
-has_tal=false       # TAL data need download from Baidu SkyDrive
-                    # With this option true, the script assumes you have
-                    # TAL/TAL_ASR and TAL/TAL_ASR_mix in $dbase
-data_type=raw # raw or shard
-num_utts_per_shard=1000
-shards_dir= # specify if you prefer to store to somewhere else
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard transformer
-# 2. conf/train_conformer.yaml: Standard conformer
-# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
-# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
-train_config=conf/train_conformer.yaml
-# English modeling unit
-# Optional 1. bpe 2. char
-en_modeling_unit=bpe
-dict=data/dict_$en_modeling_unit/lang_char.txt
-cmvn=true
-dir=exp/conformer
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=30
-decode_modes="ctc_greedy_search ctc_prefix_beam_search"
-decode_modes="$decode_modes attention attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-test_sets="aishell aidatatang magicdata thchs"
-if $has_aishell2; then
-  test_sets="$test_sets aishell2"
-fi
-if $has_tal; then
-  test_sets="$test_sets tal_asr"
-fi
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  # download all training data
-  local/aidatatang_download_and_untar.sh $dbase/aidatatang $aidatatang_url \
-    aidatatang_200zh || exit 1;
-  local/aishell_download_and_untar.sh $dbase/aishell $aishell_url \
-    data_aishell || exit 1;
-  local/magicdata_download_and_untar.sh $dbase/magicdata $magicdata_url \
-    train_set || exit 1;
-  local/primewords_download_and_untar.sh $dbase/primewords $primewords_url \
-    || exit 1;
-  local/stcmds_download_and_untar.sh $dbase/stcmds $stcmds_url || exit 1;
-  local/thchs_download_and_untar.sh $dbase/thchs $thchs_url data_thchs30 || \
-    exit 1;
-
-  # download all test data
-  local/thchs_download_and_untar.sh $dbase/thchs $thchs_url test-noise \
-    || exit 1;
-  local/magicdata_download_and_untar.sh $dbase/magicdata $magicdata_url \
-    dev_set || exit 1;
-  local/magicdata_download_and_untar.sh $dbase/magicdata $magicdata_url \
-    test_set || exit 1;
-  # tal data need download from Baidu SkyDrive
-  # AISHELL-2 database is free for academic research, not in the commerce,
-  # if without permission.
-  # You need to request the data from AISHELL company.
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  # Data preparation
-  local/aidatatang_data_prep.sh $dbase/aidatatang/aidatatang_200zh \
-    data/aidatatang || exit 1;
-  local/aishell_data_prep.sh $dbase/aishell/data_aishell data/aishell \
-    || exit 1;
-  local/thchs-30_data_prep.sh $dbase/thchs/data_thchs30 data/thchs || exit 1;
-  local/magicdata_data_prep.sh $dbase/magicdata/ data/magicdata || exit 1;
-  local/primewords_data_prep.sh $dbase/primewords data/primewords || exit 1;
-  local/stcmds_data_prep.sh $dbase/stcmds data/stcmds || exit 1;
-  if $has_tal; then
-    local/tal_data_prep.sh $dbase/TAL/TAL_ASR data/tal_asr || exit 1;
-    local/tal_mix_data_prep.sh $dbase/TAL/TAL_ASR_mix data/tal_mix || exit 1;
-  fi
-  if $has_aishell2; then
-    local/aishell2_data_prep.sh $dbase/aishell2/IOS data/aishell2/train \
-      || exit 1;
-    local/aishell2_data_prep.sh $dbase/aishell2/IOS/dev data/aishell2/dev \
-      || exit 1;
-    local/aishell2_data_prep.sh $dbase/aishell2/IOS/test data/aishell2/test \
-      || exit 1;
-  fi
-  # Merge all data sets.
-  train_sets=aidatatang,aishell,magicdata,primewords,stcmds,thchs
-  dev_sets=aidatatang,aishell,magicdata,thchs
-  if $has_aishell2; then
-    train_sets=$train_sets,aishell2
-    dev_sets=$dev_sets,aishell2
-  fi
-  if $has_tal; then
-    train_sets=$train_sets,tal_asr,tal_mix
-    dev_sets=$dev_sets,tal_asr
-  fi
-  unrolled_train_sets=$(eval echo data/{$train_sets}/train)
-  unrolled_dev_sets=$(eval echo data/{$dev_sets}/dev)
-  tools/combine_data.sh data/train $unrolled_train_sets || exit 1;
-  tools/combine_data.sh data/dev $unrolled_dev_sets || exit 1;
-fi
-
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  # For wav feature, just copy the data. Fbank extraction is done in training
-  mkdir -p data_${en_modeling_unit}
-  for x in ${train_set} ${dev_set}; do
-      cp -r data/$x data_${en_modeling_unit}
-  done
-
-  for x in ${test_sets}; do
-      cp -r data/$x/test data_${en_modeling_unit}/test_${x}
-  done
-
-  # Unified data format for char and bpe modelding
-  # Here we use ▁ for blank among english words
-  # Warning : it is "▁" symbol, not "_" symbol
-  for x in train dev; do
-    cp data_${en_modeling_unit}/${x}/text data_${en_modeling_unit}/${x}/text.org
-    paste -d " " <(cut -f 1 -d" " data_${en_modeling_unit}/${x}/text.org) \
-      <(cut -f 2- -d" " data_${en_modeling_unit}/${x}/text.org \
-      | tr 'a-z' 'A-Z' | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' \
-      | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' | tr -d " ") \
-      > data_${en_modeling_unit}/${x}/text
-    sed -i 's/\xEF\xBB\xBF//' data_${en_modeling_unit}/${x}/text
-
-  done
-
-  for x in ${test_sets}; do
-    cp data_${en_modeling_unit}/test_${x}/text \
-      data_${en_modeling_unit}/test_${x}/text.org
-    paste -d " " <(cut -f 1 -d" " data_${en_modeling_unit}/test_${x}/text.org) \
-      <(cut -f 2- -d" " data_${en_modeling_unit}/test_${x}/text.org \
-      | tr 'a-z' 'A-Z' | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' \
-      | sed 's/\([A-Z]\) \([A-Z]\)/\1▁\2/g' | tr -d " ") \
-      > data_${en_modeling_unit}/test_${x}/text
-    sed -i 's/\xEF\xBB\xBF//' data_${en_modeling_unit}/test_${x}/text
-  done
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  echo "Compute cmvn"
-  # Here we use all the training data, you can sample some data to save time
-  if $cmvn; then
-    tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-      --in_scp data/${train_set}/wav.scp \
-      --out_cmvn data_${en_modeling_unit}/$train_set/global_cmvn
-  fi
-fi
-
-# This bpe model is trained on librispeech training data set.
-bpecode=conf/train_960_unigram5000.model
-trans_type_ops=
-enable_bpe=
-if [ $en_modeling_unit = "bpe" ]; then
-  trans_type_ops="--trans_type cn_char_en_bpe"
-  enable_bpe=true
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  # Make train dict
-  echo "Make a dictionary"
-  mkdir -p $(dirname $dict)
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  tools/text2token.py -s 1 -n 1 -m ${bpecode} \
-    data_${en_modeling_unit}/${train_set}/text ${trans_type_ops} \
-    | cut -f 2- -d" " | tr " " "\n" | sort | uniq | grep -a -v -e '^\s*$' \
-    | grep -v '·' | grep -v '“' | grep -v "”" | grep -v "\[" | grep -v "\]" \
-    | grep -v "…" | awk '{print $0 " " NR+1}' >> ${dict}
-
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  echo "Prepare data, prepare required format"
-  feat_test_sets=""
-  for x in ${test_sets}; do
-    feat_test_sets=${feat_test_sets}" "test_${x}
-  done
-  for x in ${dev_set} ${train_set} ${feat_test_sets}; do
-    if [ $data_type == "shard" ]; then
-      sdir=${shards_dir:+$shards_dir/}shards_${en_modeling_unit}
-      mkdir -p $sdir
-      tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 data_${en_modeling_unit}/$x/wav.scp \
-        data_${en_modeling_unit}/$x/text $(realpath $sdir/$x) \
-        data_${en_modeling_unit}/$x/data.list
-    else
-      tools/make_raw_list.py data_${en_modeling_unit}/$x/wav.scp \
-        data_${en_modeling_unit}/$x/text data_${en_modeling_unit}/$x/data.list
-    fi
-  done
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  # You had better rm it manually before you start run.sh on first node.
-  # rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="nccl"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data_${en_modeling_unit}/$train_set/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data data_${en_modeling_unit}/$train_set/data.list \
-      --cv_data data_${en_modeling_unit}/$dev_set/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 4 \
-      ${enable_bpe:+--bpe_model $bpecode} \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-      decode_checkpoint=$dir/avg_${average_num}.pt
-      echo "do model average and final checkpoint is $decode_checkpoint"
-      python wenet/bin/average_model.py \
-          --dst_model $decode_checkpoint \
-          --src_path $dir  \
-          --num ${average_num} \
-          --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=16
-  ctc_weight=0.5
-  idx=0
-  for mode in ${decode_modes}; do
-  {
-    for x in ${test_sets}; do
-    {
-      test_name=test_${mode}${decoding_chunk_size:+_chunk$decoding_chunk_size}
-      test_dir=$dir/$test_name/${x}
-      mkdir -p $test_dir
-      gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$idx+1])
-      python wenet/bin/recognize.py --gpu $gpu_id \
-        --mode $mode \
-        --config $dir/train.yaml \
-        --data_type $data_type \
-        --test_data data_${en_modeling_unit}/test_${x}/data.list \
-        --checkpoint $decode_checkpoint \
-        --beam_size 10 \
-        --batch_size 1 \
-        --penalty 0.0 \
-        --dict $dict \
-        --ctc_weight $ctc_weight \
-        ${enable_bpe:+--bpe_model $bpecode} \
-        --result_file $test_dir/text_${en_modeling_unit} \
-        ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-
-      cat $test_dir/text_${en_modeling_unit} | sed -e "s/▁/ /g" \
-        > $test_dir/text
-      cat data_${en_modeling_unit}/test_${x}/text | sed -e "s/▁/ /g" \
-        > data_${en_modeling_unit}/test_${x}/text.tmp
-      python tools/compute-wer.py --char=1 --v=1 \
-        data_${en_modeling_unit}/test_${x}/text.tmp $test_dir/text \
-        > $test_dir/wer
-      rm data_${en_modeling_unit}/test_${x}/text.tmp
-    }
-    done
-  } &
-  ((idx+=1))
-  done
-  wait
-
-fi
-
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip \
-    --output_quant_file $dir/final_quant.zip
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/multi_cn/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/README.md
deleted file mode 100644
index 7b904dbfb7af31ac08fccc27095631ece8173762..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# w2v-conformer based end-to-end model for Openasr2021 challenge
-
-This is a example to use unsupervised pretrained w2v-conformer model to fintune [OpenASR2021](https://www.nist.gov/itl/iad/mig/openasr-challenge) constrained-plus tasks.
-
-We pretrain conformer encoders using wav2vec 2.0 pre-training method , which we called ch-w2v-conformer. The original pre-training works take raw waveforms
-as input. Unlike these works, we use MFCC features as inputs.
-
-The ch-w2v-conformer model uses following datasets to pretrain:
-
-ISML datasets (6 languages,70k hours): internal dataset contains 40k hours Chinese, Cantonese, Tibetan, Inner Mongolian, Inner Kazakh, Uighur.
-
-Babel datasets (17 languages, 2k hours): Assamese, Bengali, Cantonese, Cebuano, Georgian, Haitian, Kazakh, Kurmanji, Lao, Pashto, Swahili, Tagalog, Tamil, Tok, Turkish, Vietnamese, Zulu
-
-After pretraining, we build ASR system based on CTC-Attention structure. In very low resource task, we find that if too many initialization network structures are constructed in the upper layer of pre-training conformer encoder, the migration performance of the pre-training model will be destroyed, so we only build a single-layer transformer decoder for joint training.
-
-pretrained model link: https://huggingface.co/emiyasstar/ch-w2v-conformer
-
-
-## constrained-plus Task Performance
-
-* Languages: Cantonese,mongolian,kazakh
-* config: conf/train_conformer_large_10h.yaml
-* Feature info: using mfcc feature, with dither 1.0, without cmvn
-* Training info: lr 0.001, batch size 10, 4 gpus on V100, acc_grad 1, 80 epochs
-* Decoding info: ctc_weight 0.5, average_num 35
-
-dev set results trained only with 10 hours training set
-
-## w2v-Conformer
-
-|   decoding_method   | Cantonese(CER)  | mongolian(WER) |
-|:-------------------:|:----:|:----:|
-|  ctc_greedy_search  | 31.46 | 53.64 |
-|  ctc_prefix_search |  31.47   | 53.50 |
-| attention_rescoring | 31.45 |  52.96 |
-
-## Conformer （train from scratch）
-
-
-|   decoding_method   |  Cantonese(CER)  | mongolian(WER) |
-|:-------------------:|----:|:----:|
-|  ctc_greedy_search  | 61.43 | 89.38 |
-|  ctc_prefix_search |  61.37   | 89.53|
-| attention_rescoring | 60.61 | 89.60|
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/conf/lang.conf b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/conf/lang.conf
deleted file mode 100644
index 5177c06e6940b9ba059bd316d65dd73da617f789..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/conf/lang.conf
+++ /dev/null
@@ -1,38 +0,0 @@
-# A giant configurations file for all the BABEL languages
-# as well as some training configurations for training HMM-GMM systems
-# for obtaining phoneme level alignments if you really want to do that
-# All paths starting with /export/* are set for the JHU/CLSP grid and shoudl
-# be changed appropriately for other users
-
-# Cantonese
-train_data_dir_101=/train/asr/feat/mfcchires/openasr/cantonese/openasr21_cantonese/build
-train_data_list_101=/train/asr/feat/mfcchires/openasr/cantonese/openasr21_cantonese/build/ct_train_openasr21_uniq
-train_data_dir_101_FLP=/export/babel/data/101-cantonese/release-current/conversational/training
-train_data_list_101_FLP=./conf/lists/101-cantonese/train.FullLP.list
-dev10h_data_dir_101=/train/asr/feat/mfcchires/openasr/cantonese/openasr21_cantonese/dev
-dev10h_data_list_101=/train/asr/feat/mfcchires/openasr/cantonese/openasr21_cantonese/dev/ct_dev_openasr21_uniq
-lexicon_file_101=/train/asr/feat/mfcchires/openasr/cantonese/openasr21_cantonese/build/reference_materials/lexicon.txt
-lexiconFlags_101="--romanized --oov <unk>"
-
-
-# Kazakh
-train_data_dir_302=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training
-train_data_list_302=./conf/lists/302-kazakh/sub-train.list
-train_data_dir_302_FLP=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training
-train_data_list_302_FLP=./conf/lists/302-kazakh/training.list
-dev10h_data_dir_302=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev
-dev10h_data_list_302=./conf/lists/302-kazakh/dev.list
-lexicon_file_302=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/reference_materials/lexicon.sub-train.txt
-lexiconFlags_302="--romanized --oov <unk>"
-
-#mongolian
-train_data_dir_401=/train/asr/feat/mfcchires/openasr/mongolian/openasr21_mongolian/build
-train_data_list_401=/train/asr/feat/mfcchires/openasr/mongolian/openasr21_mongolian/build/mn_train_openasr21
-dev10h_data_dir_401=/train/asr/feat/mfcchires/openasr/mongolian/openasr21_mongolian/dev
-dev10h_data_list_401=/train/asr/feat/mfcchires/openasr/mongolian/openasr21_mongolian/dev/mn_dev_openasr21
-lexicon_file_401=/train/asr/feat/mfcchires/openasr/mongolian/openasr21_mongolian/build/reference_materials/lexicon.txt
-lexiconFlags_401="--romanized --oov <unk>"
-
-
-oovSymbol="<unk>"
-lexiconFlags="--oov <unk>"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/conf/train_conformer_large_10h.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/conf/train_conformer_large_10h.yaml
deleted file mode 100644
index cb3312c13cd2bc9d294506f2a1429e74c9f0c93c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/conf/train_conformer_large_10h.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 24      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.0
-    attention_dropout_rate: 0.0
-    input_layer: conv2d6 # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    macaron_style: True
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    cnn_module_norm: 'layer_norm'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 1024
-    num_blocks: 1
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.0
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.7
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-raw_wav: True
-
-# dataset related
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    feats_type: mfcc
-    mfcc_conf:
-        num_mel_bins: 40
-        frame_shift: 10
-        frame_length: 25
-        num_ceps: 40
-        low_freq: 20
-        high_freq: -400
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 10
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 100
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.0004
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 15000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/dump_wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/dump_wav.sh
deleted file mode 100644
index 0f9df4c7a2b2bd59542dadcf07131fb17f52effc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/dump_wav.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-# dumps such pipe-style-wav to real audio file
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-paste -d " " <(cut -f 1 -d " " $inscp) <(cut -f 2- -d " " $inscp | tr -t " " "#") \
-    > $data/wav_ori.scp
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
-
-rm -f $data/{segments,wav_segments.scp,reco2file_and_channel,reco2dur}
-tools/fix_data_dir.sh $data
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/make_absolute.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/make_absolute.sh
deleted file mode 100644
index 8936bdaea78972dea9261a041c67dfa5ee41eca6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/make_absolute.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# This script replaces the command readlink -f (which is not portable).
-# It turns a pathname into an absolute pathname, including following soft links.
-target_file=$1
-
-cd $(dirname $target_file)
-target_file=$(basename $target_file)
-
-# Iterate down a (possible) chain of symlinks
-while [ -L "$target_file" ]; do
-    target_file=$(readlink $target_file)
-    cd $(dirname $target_file)
-    target_file=$(basename $target_file)
-done
-
-# Compute the canonicalized name by finding the physical path
-# for the directory we're in and appending the target file.
-phys_dir=$(pwd -P)
-result=$phys_dir/$target_file
-echo $result
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/make_corpus_subset.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/make_corpus_subset.sh
deleted file mode 100644
index d5bfbb30ae071dcb0ecdb02ec7a654d738eeef5f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/make_corpus_subset.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
-# Apache 2.0.
-
-#Begin configuration
-ignore_missing_txt=false  #If the reference transcript txt is missing, \
-                          #shall we ignore it or treat it as a fatal error?
-#End configuration
-echo "$0 $@"  # Print the command line for logging
-
-help_message="$0: create subset of the input directory (specified as the first directory).
-                 The subset is specified by the second parameter.
-                 The directory in which the subset should be created is the third parameter
-             Example:
-                 $0 <source-corpus-dir> <subset-descriptor-list-file> <target-corpus-subset-dir>"
-
-[ -f ./path.sh ] && . ./path.sh; # source the path.
-. parse_options.sh || exit 1;
-
-if [[ "$#" -ne "3" ]] ; then
-    echo -e "FATAL: wrong number of script parameters!\n\n"
-    printf "$help_message\n\n"
-    exit 1;
-fi
-
-input_data_dir=$1
-input_data_list=$2
-output_data_dir=$3
-
-if [[ ! -d "$input_data_dir" ]] ; then
-  echo "FATAL: input data directory does not exist";
-  exit 1;
-fi
-if [[ ! -f "$input_data_list" ]] ; then
-  echo "FATAL: input data list file does not exist!";
-  exit 1;
-fi
-
-mkdir -p $output_data_dir/transcription
-mkdir -p $output_data_dir/audio
-
-abs_src_dir=`local/make_absolute.sh $input_data_dir`
-abs_tgt_dir=`local/make_absolute.sh $output_data_dir`
-
-echo "Making subset..."
-for file_basename in `cat $input_data_list`; do
-    echo $file_basename
-    if [[ -e $abs_src_dir/audio/$file_basename.sph ]] ; then
-        ln -sf $abs_src_dir/audio/$file_basename.sph $abs_tgt_dir/audio || exit 1
-    else
-      if [[ -e $abs_src_dir/audio/$file_basename.wav ]] ; then
-        ln -sf $abs_src_dir/audio/$file_basename.wav $abs_tgt_dir/audio || exit 1
-      else
-        echo "File $abs_src_dir/audio/$file_basename.sph|wav does not exist!"
-        exit 1
-      fi
-    fi
-
-    if [[ -e $abs_src_dir/transcription/$file_basename.txt ]] ; then
-        ln -sf $abs_src_dir/transcription/$file_basename.txt $abs_tgt_dir/transcription || exit 1
-    else
-        echo "File $abs_src_dir/transcription/$file_basename.txt does not exist!"
-
-        if ! $ignore_missing_txt ; then
-          exit 1;
-        fi
-    fi
-done
-
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/prepare_acoustic_training_data.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/prepare_acoustic_training_data.pl
deleted file mode 100644
index aeb57c4901eb23c21b0cec68e4afd0cf9a75b1b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/prepare_acoustic_training_data.pl
+++ /dev/null
@@ -1,477 +0,0 @@
-#!/usr/bin/env perl
-use Getopt::Long;
-
-########################################################################
-#
-# Script to prepare the Babel acoustic training data for Kaldi.
-#
-#  -  Place transcripts in a file named "text"
-#     Each line contains: utteranceID word1 word2 ...
-#
-#  -  Place the utterance-to-speaker map in a file named "utt2spk"
-#     Each line contains: utteranceID speakerID
-#     speakerID MUST BE be a prefix of the utteranceID
-#     Kaldi code does not require it, but some training scripts do.
-#
-#   -  Place the utterance-to-segment map in a file named "segments"
-#      Each line contains: utteranceID recordingID startTime endTime
-#
-#   -  Place the recordingID-to-waveformFile map in "wav.scp"
-#      Each line contains: recordingIB Input_pipe_for_reading_waveform|
-#
-#  -  Place the speaker-utterance map in a file named "spk2utt"
-#     Each line contains: speakerID utteranceID_1 utteranceID_2 ...
-#     This is the inverse of the utt2spk mapping
-#
-# Note 1: the utteranceIDs in the first 3 files must match exactly, and
-#         the recordingIDSs in the last 2 files must match exactly.
-#
-# Note 2: Babel data formats and file-naming conventions are assumed.
-#
-#   -  The transcriptions and waveforms are in subdirectories named
-#        audio/<filename>.sph
-#        transcription/<filename>.txt
-#      There is 1 pair of files per recording, with extensions as above
-#
-#   -  The audio is in NIST sphere format, so shp2pipe may be used, e.g.
-#        BABEL_BP_101_11694_20111204_205320_inLine \
-#        /export/babel/sanjeev/kaldi-trunk/tools/sph2pipe_v2.5/sph2pipe \
-#        -f wav -p -c 1 \
-#        BABEL_BP_101_11694_20111204_205320_inLine.sph|
-#
-#   -  The filename contains speaker information, e.g.
-#        BABEL_BP_101_37210_20111102_170037_O1_scripted.sph -> 37210_A
-#        BABEL_BP_101_37210_20111102_172955_inLine.sph      -> 37210_A
-#        BABEL_BP_101_37210_20111102_172955_outLine.sph     -> 37210_B
-#      Specifically, the inLine speaker is the same as scripted
-#
-#   -  The transcription file has time marks in square brackets, e.g.
-#        [0.0]
-#        <no-speech> 喂 <no-speech>
-#        [7.05]
-#        啊 听 听唔听到 啊 <no-speech> 你 而家 仲未 上课 系 嘛 <no-speech>
-#        [14.07]
-#
-#  -  If a vocabulary is provided, map all OOV tokens to an OOV symbol,
-#     and write out an OOV list with counts to a file named "oovCounts"
-#
-#     If one or more word-fragment markers are provided, this script
-#     checks if an OOV token can be made in-vocabulary by stripping off
-#     the markers one  by one from either end of the token.
-#
-#     The default settings are
-#
-      $vocabFile = "";       # No vocab file; nothing is mapped to OOV
-      $OOV_symbol = "<unk>"; # Default OOV symbol
-      $fragMarkers = "";     # No characters are word-fragment markers
-#
-#  -  Babel transcriptions contain 4 kinds of untranscribed words
-#
-#         (())         designates unintelligible words
-#         <foreign>    designates a word in another language
-#         <prompt>     designates a sequence of pre-recorded words
-#         <overlap>    designates two simultaneous foreground speakers
-#
-#     This script maps them to OOV.  They are not included in oovCounts
-#
-#  -  Babel transcriptions also contain a few non-linguistics tokens
-#
-#         <limspack>   map to a vocal noise symbol
-#         <breath>     map to a vocal noise symbol
-#         <cough>      map to a vocal noise symbol
-#         <laugh>      map to a vocal noise symbol
-#
-#         <click>      map to a nonvocal noise symbol
-#         <ring>       map to a nonvocal noise symbol
-#         <dtmf>       map to a nonvocal noise symbol
-#         <int>        map to a nonvocal noise symbol
-#
-#         <no-speech>  designates silence > 1 sec.
-#
-      $vocalNoise = "<v-noise>";
-      $nVoclNoise = "<noise>";
-      $silence    = "<silence>";
-      $icu_transform="";
-#
-########################################################################
-
-GetOptions("fragmentMarkers=s" => \$fragMarkers,
-           "oov=s" => \$OOV_symbol,
-           "vocab=s" => \$vocabFile,
-           "icu-transform=s" => \$icu_transform
-           );
-
-if ($#ARGV == 1) {
-    $inDir  = $ARGV[0];
-    $outDir = $ARGV[1];
-    print STDERR ("$0: $inDir $outDir\n");
-    if($vocabFile) {
-    print STDERR ("\tLimiting transcriptions to words in $vocabFile\n");
-    print STDERR ("\tMapping OOV tokens to \"$OOV_symbol\"\n");
-    print STDERR ("\tif they remain OOV even after removing [$fragMarkers] from either end\n") if ($fragMarkers);
-    }
-    print STDERR ("$0 ADVICE: Use full path for the Input Directory\n") unless ($inDir=~m:^/:);
-} else {
-    print STDERR ("Usage: $0 [--options] InputDir OutputDir\n");
-    print STDERR ("\t--vocab <file>             File containing the permitted vocabulary\n");
-    print STDERR ("\t--oov <symbol>             Use this symbol for OOV words (default <unk>)\n");
-    print STDERR ("\t--fragmentMarkers <chars>  Remove these from ends of words to minimize OOVs (default none)\n");
-    exit(1);
-}
-
-########################################################################
-# Read and save the vocabulary and map anything not in the vocab <unk>
-########################################################################
-
-if ($vocabFile) {
-    open (VOCAB, $vocabFile)
-        || die "Unable to open vocabulary file $vocabFile";
-    $numWords = 0;
-    while (<VOCAB>) {
-        next unless (m:^([^\s]+):);
-        $numWords++ unless (exists $inVocab{$1}); # Don't count word repetitions
-        $inVocab{$1} = 1;                         # commonly found in lexicons
-    }
-    close(VOCAB);
-    print STDERR ("Read $numWords unique words from $vocabFile\n");
-}
-
-########################################################################
-# First read segmentation information from all the transcription files
-########################################################################
-
-$TranscriptionDir = "$inDir/transcription";
-if (-d $TranscriptionDir) {
-    @TranscriptionFiles = `ls ${TranscriptionDir}/*.txt`;
-    if ($#TranscriptionFiles >= 0) {
-        printf STDERR ("$0: Found %d .txt files in $TranscriptionDir\n", ($#TranscriptionFiles +1));
-        $numFiles = $numUtterances = $numWords = $numOOV = $numSilence = 0;
-        while ($filename = shift @TranscriptionFiles) {
-            $fileID =  $filename;     # To capture the base file name
-            $fileID =~ s:.+/::;       # remove path prefix
-            $fileID =~ s:\.txt\s*$::; # remove file extension
-            # For each transcription file, extract and save segmentation data
-            $numUtterancesThisFile = 0;
-            $prevTimeMark = -1.0;
-            $text = "";
-            if ( $icu_transform ) {
-              $inputspec="uconv -f utf8 -t utf8 -x \"$icu_transform\" $filename |";
-            } else {
-              $inputspec=$filename;
-            }
-            open (TRANSCRIPT, $inputspec) || die "Unable to open $filename";
-            while ($line=<TRANSCRIPT>) {
-                chomp $line;
-                if ($line =~ m:^\[([0-9]+\.*[0-9]*)\]$:) {
-                    $thisTimeMark = $1;
-                    if ($thisTimeMark < $prevTimeMark) {
-                      print STDERR ("$0 ERROR: Found segment with negative duration in $filename\n");
-                      print STDERR ("\tStart time = $prevTimeMark, End time = $thisTimeMark\n");
-                      print STDERR ("\tThis could be a sign of something seriously wrong!\n");
-                      print STDERR ("\tFix the file by hand or remove it from the directory, and retry.\n");
-                      exit(1);
-                    }
-                    if ($prevTimeMark<0) {
-                        # Record the first timemark and continue
-                        $prevTimeMark = $thisTimeMark;
-                        next;
-                    }
-                    ##################################################
-                    # Create an utteranceID using fileID & start time
-                    #    -  Assume Babel file naming conventions
-                    #    -  Remove prefix: program_phase_language
-                    #    -  inLine = scripted = spkr A, outLine = B
-                    #    -  Move A/B so that utteranceIDs sort by spkr
-                    #    -  Assume utterance start time < 10000 sec.
-                    ##################################################
-                    $utteranceID =  $fileID;
-                    $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::;
-                    $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:;
-                    $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:;
-                    $utteranceID .= sprintf ("_%06i", (100*$prevTimeMark));
-                    ##################################################
-                    # Then save segmentation, transcription, spkeaerID
-                    ##################################################
-                    if (exists $transcription{$utteranceID}) {
-                        # utteranceIDs should be unique, but this one is not!
-                        # Either time marks in the transcription file are bad,
-                        # or something went wrong in generating the utteranceID
-                        print STDERR ("$0 WARNING: Skipping duplicate utterance $utteranceID\n");
-                    }
-                    elsif ($text eq "") {
-                        # Could be due to text filtering done below
-                        # Output information to STDOUT to enable > /dev/null
-                        print STDOUT ("$0: Skipping empty transcription $utteranceID\n");
-                    } else {
-                        $transcription{$utteranceID} = $text;
-                        $startTime{$utteranceID} = $prevTimeMark;
-                        $endTime{$utteranceID} = $thisTimeMark;
-                        if ($utteranceID =~ m:([^_]+_[AB]).*:) {
-                            $speakerID{$utteranceID} = $1;
-                        } else {
-                            # default: one speaker per audio file
-                            $speakerID{$utteranceID} = $fileID;
-                        }
-                        $baseFileID{$utteranceID} = $fileID;
-                        $numUtterancesThisFile++;
-                        $numUtterances++;
-                        $text = "";
-                    }
-                    $prevTimeMark = $thisTimeMark;
-                } else {
-            @tokens = split(/\s+/, $line);
-            $text = "";
-            while ($w = shift(@tokens)) {
-            # First, some Babel-specific transcription filtering
-            if (($w eq "<sta>")||($w eq "<male-to-female>")||($w eq "<female-to-male>")||($w eq "~")) {
-                next;
-            } elsif (($w eq "<lipsmack>")||($w eq "<breath>")||($w eq "<cough>")||($w eq "<laugh>")) {
-                $text .= " $vocalNoise";
-                $numWords++;
-            } elsif (($w eq "<click>")||($w eq "<ring>")||($w eq "<dtmf>")||($w eq "<int>")){
-                $text .= " $nVoclNoise";
-                $numWords++;
-            } elsif (($w eq "(())")||($w eq "<foreign>")||($w eq "<overlap>")||($w eq "<prompt>")) {
-                $text .= " $OOV_symbol";
-                $oovCount{$w}++;
-                $numOOV++;
-                $numWords++;
-            } elsif ($w eq "<no-speech>") {
-                $text .= " $silence";
-                $numSilence++;
-            } else {
-                # This is a just regular spoken word
-                if ($vocabFile && (! $inVocab{$w}) && $fragMarkers) {
-                # $w is a potential OOV token
-                # Remove fragMarkers to see if $w becomes in-vocabulary
-                while ($w =~ m:^(\S+[$fragMarkers]|[$fragMarkers]\S+)$:) {
-                    if ($w =~ m:^(\S+)[$fragMarkers]$:) {
-                    $w = $1;
-                    last if ($inVocab{$w});
-                    } elsif ($w =~m:^[$fragMarkers](\S+)$:) {
-                    $w = $1;
-                    last if ($inVocab{$w});
-                    } else {
-                    die "Logically, the program should never reach here!";
-                    }
-                }
-                }
-                # If still an OOV, replace $w by $OOV_symbol
-                if ($vocabFile && (! $inVocab{$w})) {
-                # $w is definitely an OOV token
-                if (exists $oovCount{$w}) {
-                    $oovCount{$w}++;
-                } else {
-                    $oovCount{$w} = 1;
-                }
-                $w = $OOV_symbol;
-                $numOOV++;
-                }
-                $text .= " $w";
-                $numWords++;
-            }
-            }
-            $text =~ s:^\s+::; # Remove leading white space, if any
-                    # Transcriptions must contain real words to be useful in training
-                    $text =~ s:^(($OOV_symbol|$vocalNoise|$nVoclNoise|$silence)[ ]{0,1})+$::;
-        }
-        }
-            close(TRANSCRIPTION);
-            if ($numUtterancesThisFile>0) {
-                $lastTimeMarkInFile{$fileID} = $prevTimeMark;
-                $numUtterancesInFile{$fileID} = $numUtterancesThisFile;
-                $numUtterancesThisFile = 0;
-            }
-            $numFiles++;
-        }
-        print STDERR ("$0: Recorded $numUtterances non-empty utterances from $numFiles files\n");
-    } else {
-        print STDERR ("$0 ERROR: No .txt files found $TranscriptionDir\n");
-        exit(1);
-    }
-} else {
-    print STDERR ("$0 ERROR: No directory named $TranscriptionDir\n");
-    exit(1);
-}
-
-########################################################################
-# Then verify existence of corresponding audio files and their durations
-########################################################################
-
-$AudioDir = "$inDir/audio";
-if (-d $AudioDir) {
-    @AudioFiles = `ls ${AudioDir}/*.sph`;
-    if ($#AudioFiles >= 0) {
-        printf STDERR ("$0: Found %d .sph files in $AudioDir\n", ($#AudioFiles +1));
-        $numFiles = 0;
-        while ($filename = shift @AudioFiles) {
-            $fileID = $filename;
-            $fileID =~ s:.+/::;      # remove path prefix
-            $fileID =~ s:\.sph\s*::; # remove file extension
-            if (exists $numUtterancesInFile{$fileID}) {
-                # Some portion of this file has training transcriptions
-                @Info = `head $filename`;
-                $SampleCount = -1;
-                $SampleRate  = 8000; #default
-                while ($#Info>=0) {
-                   $line = shift @Info;
-                   $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):);
-                   $SampleRate  = $1 if ($line =~ m:sample_rate -i (\d+):);
-                }
-                if ($SampleCount<0) {
-                    # Unable to extract a valid duration from the sphere header
-                    print STDERR ("Unable to extract duration: skipping file $filename");
-                } else {
-                    $waveformName{$fileID} = $filename; chomp $waveformName{$fileID};
-                    $duration{$fileID} = $SampleCount/$SampleRate;
-                    $numFiles++;
-                }
-            } else {
-                # Could be due to text filtering resulting in an empty transcription
-                # Output information to STDOUT to enable > /dev/null
-                print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n");
-            }
-        }
-        print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n");
-    } else {
-        print STDERR ("$0 NOTICE: No .sph files in $AudioDir\n");
-    }
-
-    @AudioFiles = `ls ${AudioDir}/*.wav`;
-    if ($#AudioFiles >= 0) {
-        $soxi=`which soxi` or die "Could not find soxi binary -- do you have sox installed?\n";
-        chomp $soxi;
-        printf STDERR ("$0: Found %d .wav files in $AudioDir\n", ($#AudioFiles +1));
-        print STDERR "Soxi found: $soxi\n";
-        $numFiles = 0;
-        while ($filename = shift @AudioFiles) {
-            $fileID = $filename;
-            $fileID =~ s:.+/::;      # remove path prefix
-            $fileID =~ s:\.wav\s*::; # remove file extension
-            if (exists $numUtterancesInFile{$fileID}) {
-                # Some portion of this file has training transcriptions
-                $duration = `$soxi -D $filename`;
-                if ($duration <=0) {
-                    # Unable to extract a valid duration from the sphere header
-                    print STDERR ("Unable to extract duration: skipping file $filename");
-                } else {
-                    if (exists $waveformName{$fileID} ) {
-                      print STDERR ("$0 ERROR: duplicate fileID \"$fileID\" for files \"$filename\" and \"" . $waveformName{$fileID} ."\"\n");
-                      exit(1);
-                    }
-                    $waveformName{$fileID} = $filename; chomp $waveformName{$fileID};
-                    $duration{$fileID} = $duration;
-                    $numFiles++;
-                }
-            } else {
-                # Could be due to text filtering resulting in an empty transcription
-                # Output information to STDOUT to enable > /dev/null
-                print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n");
-            }
-        }
-        print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n");
-    } else {
-        print STDERR ("$0 NOTICE: No .wav files in $AudioDir\n");
-    }
-    if ( $#waveformName == 0 ) {
-      print STDERR ("$0 ERROR: No audio files found!");
-    }
-} else {
-    print STDERR ("$0 ERROR: No directory named $AudioDir\n");
-    exit(1);
-}
-
-########################################################################
-# Now all the needed information is available.  Write out the 4 files.
-########################################################################
-
-unless (-d $outDir) {
-    print STDERR ("$0: Creating output directory $outDir\n");
-    die "Failed to create output directory" if (`mkdir -p $outDir`); # i.e. if the exit status is not zero.
-}
-print STDERR ("$0: Writing 5 output files to $outDir\n");
-
-$textFileName = "$outDir/text";
-open (TEXT, "> $textFileName") || die "$0 ERROR: Unable to write text file $textFileName\n";
-
-$utt2spkFileName = "$outDir/utt2spk";
-open (UTT2SPK, "> $utt2spkFileName") || die "$0 ERROR: Unable to write utt2spk file $utt2spkFileName\n";
-
-$segmentsFileName = "$outDir/segments";
-open (SEGMENTS, "> $segmentsFileName") || die "$0 ERROR: Unable to write segments file $segmentsFileName\n";
-
-$scpFileName = "$outDir/wav.scp";
-open (SCP, "| sort -u >  $scpFileName") || die "$0 ERROR: Unable to write wav.scp file $scpFileName\n";
-my $binary=$ENV{SPH2PIPE}
-$SPHBINARY ="$binary -f wav -p -c 1";
-my $SOXBINARY =`which sox` or die "Could not find the sph2pipe command"; chomp $SOXBINARY;
-$SOXFLAGS ="-r 8000 -c 1 -b 16 -t wav - downsample";
-
-$spk2uttFileName = "$outDir/spk2utt";
-open (SPK2UTT, "> $spk2uttFileName") || die "$0 ERROR: Unable to write spk2utt file $spk2uttFileName\n";
-
-$oovFileName = "$outDir/oovCounts";
-open (OOV, "| sort -nrk2 > $oovFileName") || die "$0 ERROR: Unable to write oov file $oovFileName\n";
-
-$numUtterances = $numSpeakers = $numWaveforms = 0;
-$totalSpeech = $totalSpeechSq = 0.0;
-foreach $utteranceID (sort keys %transcription) {
-    $fileID = $baseFileID{$utteranceID};
-    if (exists $waveformName{$fileID}) {
-        # There are matching transcriptions and audio
-        $numUtterances++;
-        $totalSpeech += ($endTime{$utteranceID} - $startTime{$utteranceID});
-        $totalSpeechSq += (($endTime{$utteranceID} - $startTime{$utteranceID})
-               *($endTime{$utteranceID} - $startTime{$utteranceID}));
-        print TEXT ("$utteranceID $transcription{$utteranceID}\n");
-        print UTT2SPK ("$utteranceID $speakerID{$utteranceID}\n");
-        print SEGMENTS ("$utteranceID $fileID $startTime{$utteranceID} $endTime{$utteranceID}\n");
-        if (exists $uttList{$speakerID{$utteranceID}}) {
-            $uttList{$speakerID{$utteranceID}} .= " $utteranceID";
-        } else {
-            $numSpeakers++;
-            $uttList{$speakerID{$utteranceID}} = "$utteranceID";
-        }
-        next if (exists $scpEntry{$fileID});
-        $numWaveforms++;
-        if ($waveformName{$fileID} =~ /.*\.sph/ ) {
-          $scpEntry{$fileID} = "$SPHBINARY $waveformName{$fileID} |";
-        } else {
-          $scpEntry{$fileID} = "$SOXBINARY $waveformName{$fileID} $SOXFLAGS |";
-        }
-    } else {
-        print STDERR ("$0 WARNING: No audio file for transcription $utteranceID\n");
-    }
-}
-foreach $fileID (sort keys %scpEntry) {
-    print SCP ("$fileID $scpEntry{$fileID}\n");
-}
-foreach $speakerID (sort keys %uttList) {
-    print SPK2UTT ("$speakerID $uttList{$speakerID}\n");
-}
-foreach $w (sort keys %oovCount) {
-    print OOV ("$w\t$oovCount{$w}\n");
-}
-exit(1) unless (close(TEXT) && close(UTT2SPK) && close(SEGMENTS) && close(SCP) && close(SPK2UTT) && close(OOV));
-
-print STDERR ("$0: Summary\n");
-print STDERR ("\tWrote $numUtterances lines each to text, utt2spk and segments\n");
-print STDERR ("\tWrote $numWaveforms lines to wav.scp\n");
-print STDERR ("\tWrote $numSpeakers lines to spk2utt\n");
-print STDERR ("\tHmmm ... $numSpeakers distinct speakers in this corpus? Unusual!\n")
-    if (($numSpeakers<($numUtterances/500.0)) || ($numSpeakers>($numUtterances/2.0)));
-print STDERR ("\tTotal # words = $numWords (including $numOOV OOVs) + $numSilence $silence\n")
-    if ($vocabFile);
-printf STDERR ("\tAmount of speech = %.2f hours (including some due to $silence)\n", $totalSpeech/3600.0);
-if ($numUtterances>0) {
-    printf STDERR ("\tAverage utterance length = %.2f sec +/- %.2f sec, and %.2f words\n",
-           $totalSpeech /= $numUtterances,
-           sqrt(($totalSpeechSq/$numUtterances)-($totalSpeech*$totalSpeech)),
-           $numWords/$numUtterances);
-}
-
-exit(0);
-
-########################################################################
-# Done!
-########################################################################
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/prepare_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/prepare_data.sh
deleted file mode 100644
index b53c805e291851f0e82182f6a5b22f406c8311bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/prepare_data.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/bash
-
-# Copyright 2018 Johns Hopkins University (Matthew Wiesner)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-# This is not necessarily the top-level run.sh as it is in other directories.   see README.txt first.
-
-. ./conf/lang.conf
-. ./path.sh
-. ./cmd.sh
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-export SPH2PIPE=$sph2pipe
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-FLP=false
-
-. ./utils/parse_options.sh
-if [ $# -ne 1 ]; then
-  echo >&2 "Usage: ./local/prepare_data.sh [opts] <lang_id>"
-  echo >&2 "       --FLP : Use FLP training data (instead of LLP ~10h)"
-  exit 1
-fi
-
-l=$1
-
-l_suffix=${l}
-if $FLP; then
-  l_suffix=${l_suffix}_FLP
-fi
-
-#Preparing train directories
-if [ ! -f data/raw_train_data/.done ]; then
-    echo ---------------------------------------------------------------------
-    echo "Subsetting the TRAIN set"
-    echo ---------------------------------------------------------------------
-    train_data_dir=train_data_dir_${l_suffix}
-    train_data_list=train_data_list_${l_suffix}
-    local/make_corpus_subset.sh "${!train_data_dir}" "${!train_data_list}" ./data/raw_train_data
-    train_data_dir=`utils/make_absolute.sh ./data/raw_train_data`
-    touch data/raw_train_data/.done
-fi
-
-#exit 0
-
-#Preparing dev10 directories
-if [ ! -f data/raw_dev10h_data/.done ]; then
-    echo ---------------------------------------------------------------------
-    echo "Subsetting the Dev set"
-    echo ---------------------------------------------------------------------
-    dev10h_data_dir=dev10h_data_dir_${l}
-    dev10h_data_list=dev10h_data_list_${l}
-    local/make_corpus_subset.sh "${!dev10h_data_dir}" "${!dev10h_data_list}" ./data/raw_dev10h_data
-    dev10h_data_dir=`utils/make_absolute.sh ./data/raw_dev10h_data`
-    touch data/raw_dev10h_data/.done
-fi
-
-dev10h_data_dir=`utils/make_absolute.sh ./data/raw_dev10h_data`
-train_data_dir=`utils/make_absolute.sh ./data/raw_train_data`
-lexicon_file=lexicon_file_${l_suffix}
-
-if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then
-  echo ---------------------------------------------------------------------
-  echo "Preparing acoustic training lists in data/train on" `date`
-  echo ---------------------------------------------------------------------
-  mkdir -p data/train.tmp
-  local/prepare_acoustic_training_data.pl \
-    --fragmentMarkers \-\*\~ \
-    $train_data_dir data/train.tmp > data/train.tmp/skipped_utts.log
-fi
-
-if [[ ! -f data/dev10h.pem/wav.scp || data/dev10h.pem/wav.scp -ot "$dev10h_data_dir" ]]; then
-  echo ---------------------------------------------------------------------
-  echo "Preparing acoustic training lists in data/train on" `date`
-  echo ---------------------------------------------------------------------
-  mkdir -p data/dev10h.pem
-  local/prepare_acoustic_training_data.pl \
-    --fragmentMarkers \-\*\~ \
-    $dev10h_data_dir data/dev10h.pem > data/dev10h.pem/skipped_utts.log
-fi
-
-
-###########################################################################
-# Prepend language ID to all utterances to disambiguate between speakers
-# of different languages sharing the same speaker id.
-#
-# The individual lang directories can be used for alignments, while a
-# combined directory will be used for training. This probably has minimal
-# impact on performance as only words repeated across languages will pose
-# problems and even amongst these, the main concern is the <hes> marker.
-###########################################################################
-
-num_utts=$(cat data/train.tmp/segments | wc -l)
-dev_utts=$((num_utts / 10))
-
-./utils/subset_data_dir.sh data/train.tmp ${dev_utts} data/train_dev
-
-awk '{print $1}' data/train_dev/utt2spk > data/train_dev.list
-awk '{print $1}' data/train.tmp/utt2spk | grep -vf data/train_dev.list > data/train.list
-
-./utils/subset_data_dir.sh --utt-list data/train.list data/train.tmp data/train
-
-echo "Prepend ${l} to data dir"
-./utils/copy_data_dir.sh --spk-prefix "${l}_" --utt-prefix "${l}_" \
-  data/train data/train_${l}
-
-./utils/copy_data_dir.sh --spk-prefix "${l}_" --utt-prefix "${l}_" \
-  data/train_dev data/dev_${l}
-
-./utils/copy_data_dir.sh --spk-prefix "${l}_" --utt-prefix "${l}_" \
-  data/dev10h.pem data/eval_${l}
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/setup_languages.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/setup_languages.sh
deleted file mode 100644
index 8c6eb48a4b83cc7421f11a92def1aa7cf14f4ba6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/local/setup_languages.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-
-# Copyright 2018 Johns Hopkins University (Matthew Wiesner)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-. ./path.sh
-. ./cmd.sh
-. ./conf/lang.conf
-
-#langs="101 102 103 104 105 106 202 203 204 205 206 207 301 302 303 304 305 306 401 402 403"
-langs="101"
-recog="101"
-FLP=false
-garbage_utterance_tags="<silence> <v-noise> <noise> <unk> <hes>"
-
-. ./tools/parse_options.sh
-
-set -e
-set -o pipefail
-
-all_langs=""
-for l in `cat <(echo ${langs}) <(echo ${recog}) | tr " " "\n" | sort -u`; do
-  all_langs="${l} ${all_langs}"
-done
-all_langs=${all_langs%% }
-
-# Save top-level directory
-cwd=$(local/make_absolute.sh `pwd`)
-echo "Stage 0: Setup Language Specific Directories"
-echo "cwd"
-echo $cwd
-
-echo " --------------------------------------------"
-echo "Languagues: ${all_langs}"
-
-# Basic directory prep
-for l in ${all_langs}; do
-  [ -d data/${l} ] || mkdir -p data/${l}
-  cd data/${l}
-
-  ln -sf ${cwd}/local .
-  for f in ${cwd}/{tools,conf}; do
-    link=`make_absolute.sh $f`
-    ln -sf $link .
-  done
-
-  cp ${cwd}/cmd.sh .
-  cp ${cwd}/path.sh .
-  sed -i 's/\.\.\/\.\.\/\.\./\.\.\/\.\.\/\.\.\/\.\.\/\.\./g' path.sh
-  cd ${cwd}
-done
-
-# Prepare language specific data
-for l in ${all_langs}; do
-  (
-    cd data/${l}
-    ./local/prepare_data.sh --FLP ${FLP} ${l}
-    cd ${cwd}
-  ) &
-done
-wait
-
-# Combine all language specific training directories and generate a single
-# lang directory by combining all language specific dictionaries
-train_dirs=""
-dev_dirs=""
-eval_dirs=""
-for l in ${langs}; do
-  train_dirs="data/${l}/data/train_${l} ${train_dirs}"
-done
-
-for l in ${recog}; do
-  dev_dirs="data/${l}/data/dev_${l} ${dev_dirs}"
-done
-
-./tools/combine_data.sh data/train ${train_dirs}
-./tools/combine_data.sh data/dev ${dev_dirs}
-
-for l in ${recog}; do
-  ln -s ${cwd}/data/${l}/data/eval_${l} ${cwd}/data/eval_${l}
-done
-
-
-# Delete utterances with garbage meta tags
-for tag in $garbage_utterance_tags; do
-   sed -i "s/${tag}//g" data/train/text
-   sed -i "s/${tag}//g" data/dev/text
-   sed -i "s/${tag}//g" data/eval_${l}/text
-done
-
-sed -i "/_.*[0-9][ ]*$/d" data/train/text
-sed -i "/_.*[0-9][ ]*$/d" data/dev/text
-sed -i "/_.*[0-9][ ]*$/d" data/eval_${l}/text
-sed -i 's/[ ][ ]*/ /g' data/train/text
-sed -i 's/[ ][ ]*/ /g' data/dev/text
-sed -i 's/[ ][ ]*/ /g' data/eval_${l}/text
-
-./tools/fix_data_dir.sh data/train
-./tools/fix_data_dir.sh data/dev
-./tools/fix_data_dir.sh data/eval_${l}
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/run.sh
deleted file mode 100644
index 7c338f13e9c8ca7c9613993e9ac5117ec3ba6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/run.sh
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/bash
-# Copyright 2021 Tencent Inc. (Author: Kai Tang).
-# Apach 2.0
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-# data
-data=data
-data_url=www.openslr.org/resources/33
-nj=4
-
-#langid: 101 Cantonese , 302 Kazakh , 401 mongolian
-langs="101"
-recog="101"
-
-token_type=char
-# bpemode (unigram or bpe)
-nbpe=4500
-bpemode=unigram
-
-# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
-# `shard` is used for large dataset which is over 1k hours, and `shard` is
-# faster on reading data and training.
-data_type=raw
-num_utts_per_shard=1000
-
-if [ "${token_type}" = bpe ]; then
-    dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
-    bpemodel=data/lang_char/${train_set}_${bpemode}${nbpe}
-elif [ "${token_type}" = char ]; then
-    dict=data/lang_char/lang_char.txt
-    bpe_model=
-else
-    echo "Error: not supported token_type"
-    exit 0
-fi
-
-train_set=train_sp
-train_dev=dev
-recog_set=eval_$recog
-
-# pretrained w2v-conformer encoder
-enc_init=pretrain/conformer.pt
-#reinit last pretrained encoder layer: https://arxiv.org/pdf/2107.04734.pdf
-enc_init_mods='encoder.encoders.0,encoder.encoders.1,encoder.encoders.2,encoder.encoders.3,encoder.encoders.4,encoder.encoders.5,encoder.encoders.6,encoder.encoders.7,encoder.encoders.8,encoder.encoders.9,encoder.encoders.10,encoder.encoders.11,encoder.encoders.12,encoder.encoders.13,encoder.encoders.14,encoder.encoders.15,encoder.encoders.16,encoder.encoders.17,encoder.encoders.18,encoder.encoders.19,encoder.encoders.20,encoder.encoders.21,encoder.encoders.22,encoder.embed'
-
-train_config=conf/train_conformer_large_10h.yaml
-checkpoint=
-cmvn=false
-dir=exp/${langs}_finetune_10h
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=35
-
-. utils/parse_options.sh || exit 1;
-
-#Babel style data preparation
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  echo "stage 0: Setting up individual languages"
-  ./local/setup_languages.sh --langs "${langs}" --recog "${recog}"
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    # Data preparation
-    for x in ${train_set} ${train_dev} ${recog_set}; do
-        # Remove the space in text
-        if [ "${token_type}" = char ]; then
-            cp data/${x}/text data/${x}/text.org
-            paste -d " " <(cut -f 1 -d" " data/${x}/text.org) <(cut -f 2- -d" " data/${x}/text.org | tr -d " ") \
-            > data/${x}/text
-            rm data/${x}/text.org
-        fi
-    done
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # Make train dict
-    echo "Make a dictionary"
-    mkdir -p $(dirname $dict)
-
-    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-    echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-    if [ "${token_type}" = bpe ]; then
-        # we borrowed these code and scripts which are related bpe from ESPnet.
-        cut -f 2- -d" " data/${train_set}/text | sort  > data/lang_char/input.txt
-        tools/spm_train --input=data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
-        tools/spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-    elif [ "${token_type}" = char ]; then
-        tools/text2token.py -s 1 -n 1 data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \
-        | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0 " " NR+1}' >> ${dict}
-    fi
-
-    num_token=$(cat $dict | wc -l)
-    echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "stage 1: format scp "
-  #dumps such pipe-style-wav to real audio file
-  for x in ${train_set} ${train_dev} ${recog_set}; do
-    cp data/${x}/wav.scp data/${x}/wav.scp.org
-    bash local/dump_wav.sh --nj 26 data/$x/wav.scp.org data/$x/segments data/$x/wav.scp
-    rm  data/$x/wav.scp.org
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  echo "Prepare data, prepare required format"
-  # For wav feature, just copy the data. mfcc/fbank extraction is done in training
-  for x in ${train_set} ${train_dev} ${recog_set}; do
-    if [ $data_type == "shard" ]; then
-      tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 data/$x/wav.scp data/$x/text \
-        $(realpath data/$x/shards) data/$x/data.list
-    else
-      tools/make_raw_list.py  data/$x/wav.scp data/$x/text \
-        data/$x/data.list
-    fi
-  done
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-    # Training
-    mkdir -p $dir
-    INIT_FILE=$dir/ddp_init
-    rm -f $INIT_FILE # delete old one before starting
-    init_method=file://$(readlink -f $INIT_FILE)
-    echo "$0: init method is $init_method"
-    num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-    # Use "nccl" if it works, otherwise use "gloo"
-    dist_backend="nccl"
-    cmvn_opts=
-    $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-    # train.py will write $train_config to $dir/train.yaml with model input
-    # and output dimension, train.yaml will be used for inference or model
-    # export later
-    for ((i = 0; i < $num_gpus; ++i)); do
-    {
-        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-        python wenet/bin/train.py --gpu $gpu_id \
-            --config $train_config \
-            --data_type $data_type \
-            --symbol_table $dict \
-            ${bpemodel:+--bpe_model ${bpemodel}.model} \
-            --train_data data/$train_set/data.list \
-            --cv_data data/$train_dev/data.list \
-            ${checkpoint:+--checkpoint $checkpoint} \
-            ${enc_init:+--enc_init $enc_init} \
-            --enc_init_mods $enc_init_mods \
-            --model_dir $dir \
-            --ddp.init_method $init_method \
-            --ddp.world_size $num_gpus \
-            --ddp.rank $i \
-            --ddp.dist_backend $dist_backend \
-            --num_workers 6 \
-            $cmvn_opts
-    } &
-    done
-    wait
-fi
-
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-    # Test model, please specify the model you want to test by --checkpoint
-    cmvn_opts=
-    $cmvn && cmvn_opts="--cmvn data/${train_set}/global_cmvn"
-    # TODO, Add model average here
-    mkdir -p $dir/test
-    if [ ${average_checkpoint} == true ]; then
-        decode_checkpoint=$dir/avg_${average_num}.pt
-        echo "do model average and final checkpoint is $decode_checkpoint"
-        python  wenet/bin/average_model.py \
-            --dst_model $decode_checkpoint \
-            --src_path $dir  \
-            --num ${average_num} \
-            --val_best
-    fi
-    # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-    # -1 for full chunk
-    decoding_chunk_size=
-    ctc_weight=0.5
-    for mode in ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring; do
-    for rtask in ${recog_set}; do
-    {
-        test_dir=$dir/test_${rtask}_${mode}
-        mkdir -p $test_dir
-        python  wenet/bin/recognize.py --gpu 0 \
-            --mode $mode \
-            --config $dir/train.yaml \
-            --data_type $data_type \
-            --test_data data/$rtask/data.list \
-            --checkpoint $decode_checkpoint \
-            --beam_size 5 \
-            --batch_size 1 \
-            --penalty 0.0 \
-            --dict $dict \
-            ${bpemodel:+--bpe_model ${bpemodel}.model} \
-            --ctc_weight $ctc_weight \
-            --result_file $test_dir/text_ori \
-            $cmvn_opts \
-            ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-         if [ "${token_type}" = bpe ]; then
-            tools/spm_decode --model=${bpemodel}.model --input_format=piece < $test_dir/text_ori | sed -e "s/▁/ /g" > $test_dir/text
-            python tools/compute-wer.py --char=0 --v=1 \
-            data/$rtask/text $test_dir/text > $test_dir/wer
-         elif [ "${token_type}" = char ]; then
-            python tools/compute-wer.py --char=1 --v=1 \
-            data/$rtask/text $test_dir/text_ori > $test_dir/wer
-         fi
-    } &
-    done
-    done
-    wait
-
-fi
-
-if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
-    # Export the best model you want
-    python wenet/bin/export_jit.py \
-        --config $dir/train.yaml \
-        --checkpoint $dir/avg_${average_num}.pt \
-        --output_file $dir/final.zip
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/openasr2021/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/README.md
deleted file mode 100644
index 44c7164241b5e5ddcee4ef8bbc2f485d00daa321..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/README.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: dither + specaug + speed perturb
-* Training info: lr 0.001, warmup_steps 25000, batch size 16, 1 gpu, acc_grad 4, 240 epochs
-* Decoding info: average_num 10
-
-|      decoding mode     |   eval2000 (wer) |
-|:----------------------:|:----------------:|
-|   ctc_greedy_search    |       32.39%     |
-| ctc_prefix_beam_search |       32.39%     |
-|         attention      |       31.28%     |
-|  attention_rescoring   |       31.36%     |
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/conf/train_conformer.yaml
deleted file mode 100644
index a871cb11b7af4bffe2e8693e890d1bb11d0e8780..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 31
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 3
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        # batch_size: 32
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/MSU_single_letter.txt b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/MSU_single_letter.txt
deleted file mode 100644
index 1f7b419cca7421b37cfa86507b19c1a23d793a6b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/MSU_single_letter.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-A ey
-B b iy
-C s iy
-D d iy
-E iy
-F eh f
-G jh iy
-H ey ch
-I ay
-J jh ey
-K k ey
-L eh l
-M eh m
-N eh n
-O ow
-P p iy
-Q k y uw
-R aa r
-S eh s
-T t iy
-U y uw
-V v iy
-W d ah b ax l y uw
-X eh k s
-Y w ay
-Z z iy
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/dict.patch b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/dict.patch
deleted file mode 100644
index 12c63d6127cca5acebf70c90599ad8bb3258c150..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/dict.patch
+++ /dev/null
@@ -1,380 +0,0 @@
-1d0
-<  file: $SWB/data/dictionary/sw-ms98-dict.text
-8645a8646
-> uh-hum ah m hh ah m
-9006c9007
-< April ey p r ih l
----
-> April ey p r ax l
-9144d9144
-< B ay zh aa n iy z
-9261c9261
-< Battle b ae t el
----
-> Battle b ae t ax l
-10014a10015
-> Chevy sh eh v iy
-10211a10213
-> Colorado k ao l ax r aa d ow
-10212a10215
-> Colorado' k ao l ax r aa d ow z
-10370c10373
-< Creek k r ih k
----
-> Creek k r iy k
-10889a10893
-> Eleven ax l eh v ih n
-10951c10955
-< Erie ih r iy
----
-> Erie iy r iy
-11183c11187
-< Forever f ax r eh v er
----
-> Forever f er eh v er
-11231a11236
-> Friday f r ay d iy
-11744a11750
-> History hh ih s t r iy
-12004a12011,12012
-> Israel ih z r ih l
-> Israel's ih z r ih l z
-12573a12582
-> Lincoln l ih ng k ih n
-12574a12584
-> Lincolns l ih ng k ih n z
-13268c13278
-< NAACP eh ey ey s iy p iy
----
-> NAACP eh n ey ey s iy p iy
-13286c13296
-< NIT eh ay t iy
----
-> NIT eh n ay t iy
-13292c13302
-< NTSC eh t iy eh s s iy
----
-> NTSC eh n t iy eh s s iy
-14058a14069
-> Quarter k ow r t er
-14059a14071
-> Quarterback k ow r t er b ae k
-14060a14073
-> Quarters k ow r t er z
-14569a14583
-> Science s ay n s
-15087a15102
-> Sunday s ah n d iy
-15088a15104
-> Sunday's s ah n d iy z
-15089a15106
-> Sundays s ah n d iy z
-15290,15291c15307,15308
-< Texan t eh k sh ih n
-< Texan's t eh k sh ih n s
----
-> Texan t eh k s ih n
-> Texan's t eh k s ih n s
-15335a15353
-> Thousands th aw z ih n z
-15739c15757
-< Waco w ae k ow
----
-> Waco w ey k ow
-15841a15860
-> Weekends w iy k eh n z
-16782a16802
-> acceptable eh k s eh p ax b ax l
-16833a16854
-> accounting ax k aw n ih ng
-16948a16970
-> address ax d r eh s
-17281a17304
-> already aa r d iy
-17315a17339
-> am m
-17709a17734
-> asked ae s t
-17847a17873
-> attorney ih t er n iy
-17919a17946
-> autopilot ao t ow p ay l ih t
-17960a17988
-> awfully ao f l iy
-18221a18250
-> basketball b ae s k ax b ao l
-18222a18252
-> basketball's b ae s k ax b ao l z
-18302a18333
-> become b ah k ah m
-18303a18335
-> becomes b iy k ah m z
-18344a18377
-> began b ax g en n
-18817c18850
-< bottle b aa t el
----
-> bottle b aa t ax l
-19332,19333c19365,19367
-< camera's k ae m ax r ax z
-< cameras k ae m ax r ax z
----
-> camera k ae m r ax
-> camera's k ae m r ax z
-> cameras k ae m r ax z
-19411a19446
-> capital k ae p ax l
-19505a19541
-> carrying k ae r ih ng
-20316a20353,20354
-> combination k aa m ih n ey sh ih n
-> combinations k aa m ih n ey sh ih n z
-20831a20870
-> contracts k aa n t r ae k s
-21010a21050
-> costs k ao s
-21062a21103
-> county k aw n iy
-21371a21413
-> cultural k ao l ch ax r ax l
-21372a21415
-> culturally k ao l ch ax r ax l iy
-21373a21417
-> culture k ao l ch er
-21375a21420
-> cultures k ao l ch er z
-21543a21589
-> data d ey t ax
-22097a22144
-> differently d ih f ax r ih n t l iy
-22972a23020
-> effects ax f eh k t s
-23016a23065
-> election ax l eh k sh ih n
-23018a23068
-> elections ax l eh k sh ih n z
-23052a23103
-> eleven ax l eh v ih n
-23242a23294
-> enjoyable ae n jh oy ax b ax l
-23248a23301
-> enjoys ae n jh oy z
-23293a23347
-> entire ih n t ay r
-23295a23350,23351
-> entirely ih n t ay r l iy
-> entirety ih n t ay r t iy
-23745a23802
-> extra eh k s t er
-23818a23876
-> facts f ae k s
-24508c24566
-< forever f ax r eh v er
----
-> forever f er eh v er
-24514c24572
-< forget f ow r g eh t
----
-> forget f er r g eh t
-24521a24580
-> forgot f er r g aa t
-24522a24582
-> forgotten f er r g aa t ax n
-24563a24624
-> forward f ow er d
-24680a24742
-> frightening f r ay t n ih ng
-24742a24805
-> full-time f ax l t ay m
-24862a24926
-> garage g r aa jh
-25218a25283
-> grandmother g r ae m ah dh er
-25790a25856
-> heavily hh eh v ax l iy
-25949a26016
-> history hh ih s t r iy
-26038a26106
-> honestly aa n ax s t l iy
-26039a26108
-> honesty aa n ax s t iy
-26099a26169
-> horror hh ow r
-26155a26226
-> houses hh aw z ih z
-26184c26255
-< huh-uh hh ah hh ah
----
-> huh-uh ah hh ah
-26189c26260
-< hum-um hh m hh m
----
-> hum-um ah m hh ah m
-26236a26308
-> hunting hh ah n ih ng
-26307a26380,26381
-> ideal ay d iy l
-> idealist ay d iy l ih s t
-26369a26444
-> imagine m ae jh ih n
-26628a26704
-> individuals ih n d ih v ih jh ax l z
-26968a27045
-> interest ih n t r ih s t
-27184a27262
-> it'd ih d
-27702a27781
-> lead l iy d
-28378a28458
-> mandatory m ae n d ih t ow r iy
-28885a28966
-> minute m ih n ih t
-29167a29249
-> mountains m aw t n z
-29317a29400
-> mysteries m ih s t r iy z
-29318a29402
-> mystery m ih s t r iy
-29470a29555
-> nervous n er v ih s
-29578,29580c29663,29665
-< nobody n ow b aa d iy
-< nobody'll n ow b aa d iy l
-< nobody's n ow b aa d iy z
----
-> nobody n ow b ah d iy
-> nobody'll n ow b ah d iy l
-> nobody's n ow b ah d iy z
-29712a29798
-> nuclear n uw k l iy r
-29938a30025
-> onto aa n t ax
-30051a30139
-> originally ax r ih jh ax l iy
-30507a30596
-> particularly p er t ih k y ax l iy
-30755a30845
-> perfectly p er f ih k l iy
-30820a30911
-> personally p er s n ax l iy
-30915a31007
-> physically f ih z ih k l iy
-30986a31079
-> pilot p ay l ih t
-30987a31081
-> pilot's p ay l ih t s
-31227a31322
-> police p l iy s
-31513a31609
-> prefer p er f er
-31553a31650
-> prepare p r ax p ey r
-31578a31676
-> prescription p er s k r ih p sh ih n
-31579a31678
-> prescriptions p er s k r ih p sh ih n z
-31770a31870
-> products p r aa d ax k s
-31821a31922
-> projects p r aa jh eh k s
-31908a32010
-> protect p er t eh k t
-31909a32012
-> protected p er t eh k t ih d
-31911a32015
-> protection p er t eh k sh ih n
-31914a32019
-> protection p er t eh k t ih v
-32149a32255
-> quarter k ow r t er
-32414a32521
-> read r iy d
-32785a32893
-> rehabilitation r iy ax b ih l ih t ey sh ih n
-33150a33259
-> resource r ih s ow r s
-33151a33261
-> resources r iy s ow r s ih z
-33539c33649
-< roots r uh t s
----
-> roots r uw t s
-33929a34040
-> science s ay n s
-34315a34427
-> seventy s eh v ih n iy
-34319,34320c34431,34432
-< severe s ax v iy r
-< severely s ax v iy r l iy
----
-> severe s ih v iy r
-> severely s ih v iy r l iy
-35060a35173
-> software s ao f w ey r
-35083a35197
-> solid s ao l ih d
-35084a35199
-> solidly s ao l ih d l iy
-35750a35866
-> stood s t ih d
-35854a35971
-> strictly s t r ih k l iy
-35889c36006
-< stronger s t r ao ng er
----
-> stronger s t r ao ng g er
-36192a36310,36311
-> supposed s p ow z
-> supposed s p ow s
-36510a36630
-> tastes t ey s
-36856a36977
-> thoroughly th er r l iy
-36866a36988
-> thousands th aw z ih n z
-37081c37203
-< toots t uh t s
----
-> toots t uw t s
-37157a37280
-> toward t w ow r d
-37158a37282
-> towards t w ow r d z
-37564a37689
-> twenties t w eh n iy z
-37565a37691
-> twentieth t w eh n iy ih th
-37637a37764
-> unacceptable ah n ae k s eh p ax b ax l
-37728a37856
-> understand ah n d er s t ae n
-37860a37989
-> unless ih n l eh s
-38040a38170
-> use y uw z
-38049a38180
-> uses y uw z ih z
-38125a38257
-> various v ah r iy ih s
-38202a38335
-> versus v er s ih z
-38381c38514
-< wacko w ae k ow
----
-> wacko w ey k ow
-38455c38588
-< wanna w aa n ax
----
-> wanna w ah n ax
-38675c38808
-< whatnot w ah t n aa t
----
-> whatnot w aa t n aa t
-38676a38810
-> whatsoever w aa t s ow eh v er
-38890c39024
-< wok w aa k
----
-> wok w ao k
-38910a39045
-> wondering w ah n d r ih ng
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/eval2000_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/eval2000_data_prep.sh
deleted file mode 100644
index 0c08a92d1b3c764c30412f69f6626d5483a75e0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/eval2000_data_prep.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env bash
-
-# Hub-5 Eval 2000 data preparation
-# Author:  Arnab Ghoshal (Jan 2013)
-
-# To be run from one directory above this script.
-
-# The input is two directory names (possibly the same) containing the
-# 2000 Hub5 english evaluation test set and transcripts, which are
-# respectively: LDC2002S09  LDC2002T43
-# e.g. see
-# http://www.ldc.upenn.edu/Catalog/catalogEntry.jsp?catalogId=LDC2002S09
-# http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC2002T43
-#
-# Example usage:
-# local/eval2000_data_prep_edin.sh /exports/work/inf_hcrc_cstr_general/corpora/hub5/2000 /exports/work/inf_hcrc_cstr_general/corpora/hub5/2000/transcr
-# The first directory ($sdir) contains the speech data, and the directory
-# $sdir/english/ must exist.
-# The second directory ($tdir) contains the transcripts, and the directory
-# $tdir/reference must exist; in particular we need the file
-# $tdir/reference/hub5e00.english.000405.stm
-
-if [ $# -ne 2 ]; then
-  echo "Usage: "`basename $0`" <speech-dir> <transcription-dir>"
-  echo "See comments in the script for more details"
-  exit 1
-fi
-
-sdir=$1
-tdir=$2
-[ ! -d $sdir/english ] \
-  && echo Expecting directory $sdir/english to be present && exit 1;
-[ -d $tdir/2000_hub5_eng_eval_tr ] \
-  && tdir=$tdir/2000_hub5_eng_eval_tr
-[ ! -d $tdir/reference ] \
-  && echo Expecting directory $tdir/reference to be present && exit 1;
-
-. ./path.sh
-
-dir=data/local/eval2000
-mkdir -p $dir
-
-find -L $sdir/english -iname '*.sph' | sort > $dir/sph.flist
-sed -e 's?.*/??' -e 's?.sph??' $dir/sph.flist | paste - $dir/sph.flist \
-  > $dir/sph.scp
-
-# Get segments file...
-# segments file format is: utt-id side-id start-time end-time, e.g.:
-# sw02001-A_000098-001156 sw02001-A 0.98 11.56
-pem=$sdir/english/hub5e_00.pem
-[ ! -f $pem ] && echo "$0: No such file $pem" && exit 1;
-# pem file has lines like:
-# en_4156 A unknown_speaker 301.85 302.48
-
-# we ignore the warnings below for now, although they seem to indicate some problems
-# with the data.
-grep -v ';;' $pem \
-  | awk '{
-           spk=$1"-"$2;
-           utt=sprintf("%s_%06d-%06d",spk,$4*100,$5*100);
-           print utt,spk,$4,$5;}' \
-  | sort -u | local/extend_segments.pl 0.1 > $dir/segments
-
-# stm file has lines like:
-# en_4156 A en_4156_A 357.64 359.64 <O,en,F,en-F>  HE IS A POLICE OFFICER
-# TODO(arnab): We should really be lowercasing this since the Edinburgh
-# recipe uses lowercase. This is not used in the actual scoring.
-grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \
-  | awk '{
-           spk=$1"-"$2;
-           utt=sprintf("%s_%06d-%06d",spk,$4*100,$5*100);
-           printf utt; for(n=7;n<=NF;n++) printf(" %s", $n); print ""; }' \
-  | sort > $dir/text.all
-
-# We'll use the stm file for sclite scoring.  There seem to be various errors
-# in the stm file that upset hubscr.pl, and we fix them here.
-sed -e 's:((:(:' -e 's:<B_ASIDE>::g' -e 's:<E_ASIDE>::g' \
-  $tdir/reference/hub5e00.english.000405.stm >  $dir/stm
-cp $tdir/reference/en20000405_hub5.glm  $dir/glm
-
-# next line uses command substitution
-# Just checking that the segments are the same in pem vs. stm.
-! cmp <(awk '{print $1}' $dir/text.all) <(awk '{print $1}' $dir/segments) && \
-   echo "Segments from pem file and stm file do not match." && exit 1;
-
-grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text
-
-# side A - channel 1, side B - channel 2
-bash tools/sph2wav.sh --nj 16 $dir/sph.scp $dir/segments $dir/wav.scp
-
-# create an utt2spk file that assumes each conversation side is
-# a separate speaker.
-awk '{print $1,$2;}' $dir/segments > $dir/utt2spk
-tools/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
-
-# cp $dir/segments $dir/segments.tmp
-# awk '{x=$3-0.05; if (x<0.0) x=0.0; y=$4+0.05; print $1, $2, x, y; }' \
-#   $dir/segments.tmp > $dir/segments
-
-awk '{print $1}' $dir/wav_ori.scp \
-  | perl -ane '$_ =~ m:^(\S+)-([AB])$: || die "bad label $_";
-               print "$1-$2 $1 $2\n"; ' \
-  > $dir/reco2file_and_channel || exit 1;
-
-dest=data/eval2000
-mkdir -p $dest
-for x in wav.scp text utt2spk spk2utt; do
-  cp $dir/$x $dest/$x
-done
-
-echo Data preparation and formatting completed for Eval 2000
-echo "(but not MFCC extraction)"
-
-tools/fix_data_dir.sh $dest
-if [ $(wc -l < $dest/wav.scp) -ne 80 ]; then
-  echo "$0: error: expected 80 lines in wav.scp, got $(wc -l < $dest/wav.scp)"
-  exit 1;
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/extend_segments.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/extend_segments.pl
deleted file mode 100644
index e8b4894d5f6cc4595cd0cf352200d045da8d87ff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/extend_segments.pl
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-
-if (@ARGV != 1 || !($ARGV[0] =~ m/^-?\d+\.?\d*$/ &&  $ARGV[0] >= 0)) {
-  print STDERR "Usage: extend_segments.pl time-in-seconds <segments >segments.extended \n" .
-       "e.g. extend_segments.pl 0.25 <segments.1 >segments.2\n" .
-       "This command modifies a segments file, with lines like\n" .
-       " <utterance-id> <recording-id> <start-time> <end-time>\n" .
-       "by extending the beginning and end of each segment by a certain\n" .
-       "length of time.  This script makes sure the output segments do not\n" .
-       "overlap as a result of this time-extension, and that there are no\n" .
-       "negative times in the output.\n";
-  exit 1;
-}
-
-$extend = $ARGV[0];
-
-@all_lines = ();
-
-while (<STDIN>) {
-  chop;
-  @A = split(" ", $_);
-  if (@A != 4) {
-    die "invalid line in segments file: $_";
-  }
-  $line = @all_lines;  # current number of lines.
-  ($utt_id, $reco_id, $start_time, $end_time) = @A;
-
-  push @all_lines, [ $utt_id, $reco_id, $start_time, $end_time ]; # anonymous array.
-  if (! defined $lines_for_reco{$reco_id}) {
-    $lines_for_reco{$reco_id} = [ ];  # push new anonymous array.
-  }
-  push @{$lines_for_reco{$reco_id}}, $line;
-}
-
-foreach $reco_id (keys %lines_for_reco) {
-  $ref = $lines_for_reco{$reco_id};
-  @line_numbers = sort { ${$all_lines[$a]}[2] <=> ${$all_lines[$b]}[2] } @$ref;
-
-
-  {
-    # handle start of earliest segment as a special case.
-    $l0 = $line_numbers[0];
-    $tstart = ${$all_lines[$l0]}[2] - $extend;
-    if ($tstart < 0.0) { $tstart = 0.0; }
-    ${$all_lines[$l0]}[2] = $tstart;
-  }
-  {
-    # handle end of latest segment as a special case.
-    $lN = $line_numbers[$#line_numbers];
-    $tend = ${$all_lines[$lN]}[3] + $extend;
-    ${$all_lines[$lN]}[3] = $tend;
-  }
-  for ($i = 0; $i < $#line_numbers; $i++) {
-    $ln = $line_numbers[$i];
-    $ln1 = $line_numbers[$i+1];
-    $tend = ${$all_lines[$ln]}[3]; # end of earlier segment.
-    $tstart = ${$all_lines[$ln1]}[2]; # start of later segment.
-    if ($tend > $tstart) {
-      $utt1 = ${$all_lines[$ln]}[0];
-      $utt2 = ${$all_lines[$ln1]}[0];
-      print STDERR "Warning: for utterances $utt1 and $utt2, segments " .
-        "already overlap; leaving these times unchanged.\n";
-    } else {
-      $my_extend = $extend;
-      $max_extend =  0.5 * ($tstart - $tend);
-      if ($my_extend > $max_extend) { $my_extend = $max_extend; }
-      $tend += $my_extend;
-      $tstart -= $my_extend;
-      ${$all_lines[$ln]}[3] = $tend;
-      ${$all_lines[$ln1]}[2] = $tstart;
-    }
-  }
-}
-
-# leave the numbering of the lines unchanged.
-for ($l = 0; $l < @all_lines; $l++) {
-  $ref = $all_lines[$l];
-  ($utt_id, $reco_id, $start_time, $end_time) = @$ref;
-  printf("%s %s %.2f %.2f\n", $utt_id, $reco_id, $start_time, $end_time);
-}
-
-__END__
-
-# testing below.
-
-# ( echo a1 A 0 1; echo a2 A 3 4; echo b1 B 0 1; echo b2 B 2 3 ) | local/extend_segments.pl 1.0
-a1 A 0.00 2.00
-a2 A 2.00 5.00
-b1 B 0.00 1.50
-b2 B 1.50 4.00
-# ( echo a1 A 0 2; echo a2 A 1 3 ) | local/extend_segments.pl 1.0
-Warning: for utterances a1 and a2, segments already overlap; leaving these times unchanged.
-a1 A 0.00 2.00
-a2 A 1.00 4.00
-# ( echo a1 A 0 2; echo a2 A 5 6; echo a3 A 3 4 ) | local/extend_segments.pl 1.0
-a1 A 0.00 2.50
-a2 A 4.50 7.00
-a3 A 2.50 4.50
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/format_acronyms_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/format_acronyms_dict.py
deleted file mode 100644
index fa598dd03c33540c46d1ec324199a30c15c184f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/format_acronyms_dict.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2015  Minhua Wu
-# Apache 2.0
-
-# convert acronyms in swbd dict to fisher convention
-# IBM to i._b._m.
-# BBC to b._b._c.
-# BBCs to b._b._c.s
-# BBC's to b._b._c.'s
-
-import argparse
-import re
-
-__author__ = "Minhua Wu"
-
-parser = argparse.ArgumentParser(description="format acronyms to a._b._c.")
-parser.add_argument("-i", "--input", help="Input lexicon", required=True)
-parser.add_argument("-o", "--output", help="Output lexicon", required=True)
-parser.add_argument(
-    "-L", "--Letter", help="Input single letter pronunciation", required=True
-)
-parser.add_argument("-M", "--Map", help="Output acronyms mapping", required=True)
-args = parser.parse_args()
-
-
-fin_lex = open(args.input, "r")
-fin_Letter = open(args.Letter, "r")
-fout_lex = open(args.output, "w")
-fout_map = open(args.Map, "w")
-
-# Initialise single letter dictionary
-dict_letter = {}
-for single_letter_lex in fin_Letter:
-    items = single_letter_lex.split()
-    dict_letter[items[0]] = single_letter_lex[len(items[0]) + 1 :].strip()
-fin_Letter.close()
-# print dict_letter
-
-for lex in fin_lex:
-    items = lex.split()
-    word = items[0]
-    lexicon = lex[len(items[0]) + 1 :].strip()
-    # find acronyms from words with only letters and '
-    pre_match = re.match(r"^[A-Za-z]+$|^[A-Za-z]+\'s$|^[A-Za-z]+s$", word)
-    if pre_match:
-        # find if words in the form of xxx's is acronym
-        if word[-2:] == "'s" and (lexicon[-1] == "s" or lexicon[-1] == "z"):
-            actual_word = word[:-2]
-            actual_lexicon = lexicon[:-2]
-            acronym_lexicon = ""
-            for w in actual_word:
-                acronym_lexicon = acronym_lexicon + dict_letter[w.upper()] + " "
-            if acronym_lexicon.strip() == actual_lexicon:
-                acronym_mapped = ""
-                acronym_mapped_back = ""
-                for w in actual_word[:-1]:
-                    acronym_mapped = acronym_mapped + w.lower() + "._"
-                    acronym_mapped_back = acronym_mapped_back + w.lower() + " "
-                acronym_mapped = acronym_mapped + actual_word[-1].lower() + ".'s"
-                acronym_mapped_back = (
-                    acronym_mapped_back + actual_word[-1].lower() + "'s"
-                )
-                fout_map.write(
-                    word + "\t" + acronym_mapped + "\t" + acronym_mapped_back + "\n"
-                )
-                fout_lex.write(acronym_mapped + " " + lexicon + "\n")
-            else:
-                fout_lex.write(lex)
-
-        # find if words in the form of xxxs is acronym
-        elif word[-1] == "s" and (lexicon[-1] == "s" or lexicon[-1] == "z"):
-            actual_word = word[:-1]
-            actual_lexicon = lexicon[:-2]
-            acronym_lexicon = ""
-            for w in actual_word:
-                acronym_lexicon = acronym_lexicon + dict_letter[w.upper()] + " "
-            if acronym_lexicon.strip() == actual_lexicon:
-                acronym_mapped = ""
-                acronym_mapped_back = ""
-                for w in actual_word[:-1]:
-                    acronym_mapped = acronym_mapped + w.lower() + "._"
-                    acronym_mapped_back = acronym_mapped_back + w.lower() + " "
-                acronym_mapped = acronym_mapped + actual_word[-1].lower() + ".s"
-                acronym_mapped_back = (
-                    acronym_mapped_back + actual_word[-1].lower() + "'s"
-                )
-                fout_map.write(
-                    word + "\t" + acronym_mapped + "\t" + acronym_mapped_back + "\n"
-                )
-                fout_lex.write(acronym_mapped + " " + lexicon + "\n")
-            else:
-                fout_lex.write(lex)
-
-        # find if words in the form of xxx (not ended with 's or s) is acronym
-        elif word.find("'") == -1 and word[-1] != "s":
-            acronym_lexicon = ""
-            for w in word:
-                acronym_lexicon = acronym_lexicon + dict_letter[w.upper()] + " "
-            if acronym_lexicon.strip() == lexicon:
-                acronym_mapped = ""
-                acronym_mapped_back = ""
-                for w in word[:-1]:
-                    acronym_mapped = acronym_mapped + w.lower() + "._"
-                    acronym_mapped_back = acronym_mapped_back + w.lower() + " "
-                acronym_mapped = acronym_mapped + word[-1].lower() + "."
-                acronym_mapped_back = acronym_mapped_back + word[-1].lower()
-                fout_map.write(
-                    word + "\t" + acronym_mapped + "\t" + acronym_mapped_back + "\n"
-                )
-                fout_lex.write(acronym_mapped + " " + lexicon + "\n")
-            else:
-                fout_lex.write(lex)
-        else:
-            fout_lex.write(lex)
-
-    else:
-        fout_lex.write(lex)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/map_acronyms_transcripts.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/map_acronyms_transcripts.py
deleted file mode 100644
index ba02aaec34b5b3be7a8fb51dd31abed23c1bacf5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/map_acronyms_transcripts.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2015  Minhua Wu
-# Apache 2.0
-
-# convert acronyms in swbd transcript to fisher convention
-# according to first two columns in the input acronyms mapping
-
-import argparse
-import re
-
-__author__ = "Minhua Wu"
-
-parser = argparse.ArgumentParser(description="format acronyms to a._b._c.")
-parser.add_argument("-i", "--input", help="Input transcripts", required=True)
-parser.add_argument("-o", "--output", help="Output transcripts", required=True)
-parser.add_argument("-M", "--Map", help="Input acronyms mapping", required=True)
-args = parser.parse_args()
-
-fin_map = open(args.Map, "r")
-dict_acronym = {}
-dict_acronym_noi = {}  # Mapping of acronyms without I, i
-for pair in fin_map:
-    items = pair.split("\t")
-    dict_acronym[items[0]] = items[1]
-    dict_acronym_noi[items[0]] = items[1]
-fin_map.close()
-del dict_acronym_noi["I"]
-del dict_acronym_noi["i"]
-
-
-fin_trans = open(args.input, "r")
-fout_trans = open(args.output, "w")
-for line in fin_trans:
-    items = line.split()
-    L = len(items)
-    # First pass mapping to map I as part of acronym
-    for i in range(L):
-        if items[i] == "I":
-            x = 0
-            while i - 1 - x >= 0 and re.match(r"^[A-Z]$", items[i - 1 - x]):
-                x += 1
-
-            y = 0
-            while i + 1 + y < L and re.match(r"^[A-Z]$", items[i + 1 + y]):
-                y += 1
-
-            if x + y > 0:
-                for bias in range(-x, y + 1):
-                    items[i + bias] = dict_acronym[items[i + bias]]
-
-    # Second pass mapping (not mapping 'i' and 'I')
-    for i in range(len(items)):
-        if items[i] in dict_acronym_noi.keys():
-            items[i] = dict_acronym_noi[items[i]]
-    sentence = " ".join(items[1:])
-    fout_trans.write(items[0] + " " + sentence.lower() + "\n")
-
-fin_trans.close()
-fout_trans.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_data_download.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_data_download.sh
deleted file mode 100644
index ee27048bf2f4295f330b7d9dec3597f317e0cda1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_data_download.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env bash
-
-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
-
-# To be run from one directory above this script.
-
-## The input is some directory containing the switchboard-1 release 2
-## corpus (LDC97S62).  Note: we don't make many assumptions about how
-## you unpacked this.  We are just doing a "find" command to locate
-## the .sph files.
-
-. ./path.sh
-
-#check existing directories
-if [ $# != 1 ]; then
-  echo "Usage: swbd1_data_download.sh /path/to/SWBD"
-  exit 1;
-fi
-
-SWBD_DIR=$1
-
-dir=data/local/train
-mkdir -p $dir
-
-# Audio data directory check
-if [ ! -d $SWBD_DIR ]; then
-  echo "Error: run.sh requires a directory argument"
-  exit 1;
-fi
-
-# Trans directory check
-if [ ! -d $SWBD_DIR/transcriptions/swb_ms98_transcriptions ]; then
-  (
-    cd $dir;
-    if [ ! -d swb_ms98_transcriptions ]; then
-      echo " *** Downloading trascriptions and dictionary ***"
-      wget http://www.openslr.org/resources/5/switchboard_word_alignments.tar.gz ||
-      wget http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
-      tar -xf switchboard_word_alignments.tar.gz
-    fi
-  )
-else
-  echo "Directory with transcriptions exists, skipping downloading"
-  [ -f $dir/swb_ms98_transcriptions ] \
-    || ln -sf $SWBD_DIR/transcriptions/swb_ms98_transcriptions $dir/
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_data_prep.sh
deleted file mode 100644
index 6dc8630863bb03e291ec50f7d7b497d8cdca4183..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_data_prep.sh
+++ /dev/null
@@ -1,144 +0,0 @@
-#!/usr/bin/env bash
-
-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
-
-# To be run from one directory above this script.
-
-## The input is some directory containing the switchboard-1 release 2
-## corpus (LDC97S62).  Note: we don't make many assumptions about how
-## you unpacked this.  We are just doing a "find" command to locate
-## the .sph files.
-
-## The second input is optional, which should point to a directory containing
-## Switchboard transcriptions/documentations (specifically, the conv.tab file).
-## If specified, the script will try to use the actual speaker PINs provided
-## with the corpus instead of the conversation side ID (Kaldi default). We
-## will be using "find" to locate this file so we don't make any assumptions
-## on the directory structure. (Peng Qi, Aug 2014)
-
-. ./path.sh
-
-#check existing directories
-if [ $# != 1 -a $# != 2 ]; then
-  echo "Usage: swbd1_data_prep.sh /path/to/SWBD [/path/to/SWBD_DOC]"
-  exit 1;
-fi
-
-SWBD_DIR=$1
-
-dir=data/local/train
-mkdir -p $dir
-
-
-# Audio data directory check
-if [ ! -d $SWBD_DIR ]; then
-  echo "Error: run.sh requires a directory argument"
-  exit 1;
-fi
-
-# Option A: SWBD dictionary file check
-[ ! -f $dir/swb_ms98_transcriptions/sw-ms98-dict.text ] && \
-  echo  "SWBD dictionary file does not exist" &&  exit 1;
-
-# find sph audio files
-find -L $SWBD_DIR -iname '*.sph' | sort > $dir/sph.flist
-
-n=`cat $dir/sph.flist | wc -l`
-[ $n -ne 2435 ] && [ $n -ne 2438 ] && \
-  echo Warning: expected 2435 or 2438 data data files, found $n
-
-
-# (1a) Transcriptions preparation
-# make basic transcription file (add segments info)
-awk '{
-       name=substr($1,1,6); gsub("^sw","sw0",name); side=substr($1,7,1);
-       stime=$2; etime=$3;
-       printf("%s-%s_%06.0f-%06.0f",
-              name, side, int(100*stime+0.5), int(100*etime+0.5));
-       for(i=4;i<=NF;i++) printf(" %s", $i); printf "\n"
-}' $dir/swb_ms98_transcriptions/*/*/*-trans.text  > $dir/transcripts1.txt
-
-# test if trans. file is sorted
-export LC_ALL=C;
-sort -c $dir/transcripts1.txt || exit 1; # check it's sorted.
-
-# Remove SILENCE, <B_ASIDE> and <E_ASIDE>.
-
-# Note: we have [NOISE], [VOCALIZED-NOISE], [LAUGHTER], [SILENCE].
-# removing [SILENCE], and the <B_ASIDE> and <E_ASIDE> markers that mark
-# speech to somone; we will give phones to the other three (NSN, SPN, LAU).
-# There will also be a silence phone, SIL.
-# **NOTE: modified the pattern matches to make them case insensitive
-cat $dir/transcripts1.txt \
-  | perl -ane 's:\s\[SILENCE\](\s|$):$1:gi;
-               s/<B_ASIDE>//gi;
-               s/<E_ASIDE>//gi;
-               print;' \
-  | awk '{if(NF > 1) { print; } } ' > $dir/transcripts2.txt
-
-# **NOTE: swbd1_map_words.pl has been modified to make the pattern matches
-# case insensitive
-local/swbd1_map_words.pl -f 2- $dir/transcripts2.txt  > $dir/text
-
-# format acronyms in text
-python3 local/map_acronyms_transcripts.py -i $dir/text -o $dir/text_map \
-  -M data/local/dict_nosp/acronyms.map
-mv $dir/text_map $dir/text
-
-# (1c) Make segment files from transcript
-#segments file format is: utt-id side-id start-time end-time, e.g.:
-#sw02001-A_000098-001156 sw02001-A 0.98 11.56
-awk '{
-       segment=$1;
-       split(segment,S,"[_-]");
-       side=S[2]; audioname=S[1]; startf=S[3]; endf=S[4];
-       print segment " " audioname "-" side " " startf/100 " " endf/100
-}' < $dir/text > $dir/segments
-
-sed -e 's?.*/??' -e 's?.sph??' $dir/sph.flist | paste - $dir/sph.flist \
-  > $dir/sph.scp
-
-# side A - channel 1, side B - channel 2
-bash tools/sph2wav.sh --nj 16 $dir/sph.scp $dir/segments $dir/wav.scp
-
-# this file reco2file_and_channel maps recording-id (e.g. sw02001-A)
-# to the file name sw02001 and the A, e.g.
-# sw02001-A  sw02001 A
-# In this case it's trivial, but in other corpora the information might
-# be less obvious.  Later it will be needed for ctm scoring.
-awk '{print $1}' $dir/wav_ori.scp \
-  | perl -ane '$_ =~ m:^(\S+)-([AB])$: || die "bad label $_";
-               print "$1-$2 $1 $2\n"; ' \
-  > $dir/reco2file_and_channel || exit 1;
-
-awk '{spk=substr($1,1,9); print $1 " " spk}' $dir/segments > $dir/utt2spk \
-  || exit 1;
-sort -k 2 $dir/utt2spk | tools/utt2spk_to_spk2utt.pl > $dir/spk2utt || exit 1;
-
-# We assume each conversation side is a separate speaker. This is a very
-# reasonable assumption for Switchboard. The actual speaker info file is at:
-# http://www.ldc.upenn.edu/Catalog/desc/addenda/swb-multi-annot.summary
-
-# Copy stuff into its final locations [this has been moved from the format_data
-# script]
-mkdir -p data/train
-for f in spk2utt utt2spk wav.scp text; do
-  cp data/local/train/$f data/train/$f || exit 1;
-done
-
-if [ $# == 2 ]; then # fix speaker IDs
-  find $2 -name conv.tab > $dir/conv.tab
-  local/swbd1_fix_speakerid.pl `cat $dir/conv.tab` data/train
-  tools/utt2spk_to_spk2utt.pl data/train/utt2spk.new > data/train/spk2utt.new
-  # patch files
-  for f in spk2utt utt2spk text segments; do
-    cp data/train/$f data/train/$f.old || exit 1;
-    cp data/train/$f.new data/train/$f || exit 1;
-  done
-  rm $dir/conv.tab
-fi
-
-echo Switchboard-1 data preparation succeeded.
-
-utils/fix_data_dir.sh data/train
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_fix_speakerid.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_fix_speakerid.pl
deleted file mode 100644
index 785493928949377787d816af954ab34f31e73edb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_fix_speakerid.pl
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-
-# Author: Peng Qi (pengqi@cs.stanford.edu)
-# This script maps Switchboard speaker IDs to the true physical speakers
-# and fixes the utterances IDs accordingly. Expected to be run one level of
-# directory above.
-
-sub trim {
-    (my $s = $_[0]) =~ s/^\s+|\s+$//g;
-    return $s;
-}
-
-if ($#ARGV != 1) {
-  print "Usage: swbd1_fix_speakerid.pl <swbd-conv-tab-file> <data-dir>\n";
-  print "E.g.:  swbd1_fix_speakerid.pl /datasets/SWBD1Transcripts/tables/conv.tab data/train\n";
-}
-
-$tab_file = $ARGV[0];
-$dir = $ARGV[1];
-
-%conv_to_spk = ();
-
-open(my $conv_tab, '<', $tab_file) or die "Could not open '$tab_file' $!\n";
-
-while (my $line = <$conv_tab>) {
-  chomp $line;
-
-  my @fields = split "," , $line;
-  #$fields[0] = trim($fields[0]);
-  $fields[2] = trim($fields[2]);
-  $fields[3] = trim($fields[3]);
-  $conv_to_spk{'sw0' . $fields[0] . '-A'} = $fields[2];
-  $conv_to_spk{'sw0' . $fields[0] . '-B'} = $fields[3];
-}
-
-close($conv_tab);
-
-# fix utt2spk
-
-%missingconv = ();
-
-open(my $utt2spk, '<', $dir . '/utt2spk') or die "Could not open '$dir/utt2spk' $!\n";
-open(my $utt2spk_new, '>', $dir . '/utt2spk.new');
-
-while (my $line = <$utt2spk>) {
-  chomp $line;
-
-  my @fields = split " " , $line;
-  my $convid = substr $fields[0], 0, 9;
-
-  if (exists $conv_to_spk{ $convid }) {
-    my $spkid = $conv_to_spk{ $convid };
-    $spkid = "sw" . $spkid;
-    my $newuttid = $spkid . '-' . (substr $fields[0], 2);
-
-    print $utt2spk_new "$newuttid $spkid\n";
-  } else {
-    my $convid = substr $convid, 3, 4;
-    $missingconv{$convid} = 1;
-
-    print $utt2spk_new $fields[0]." ".$fields[1]."\n";
-  }
-}
-
-close($utt2spk);
-close($utt2spk_new);
-
-foreach my $conv (keys %missingconv) {
-  print "Warning: Conversation ID '$conv' not found in conv.tab, retaining old speaker IDs\n"
-}
-
-# fix segments and text
-
-foreach my $file ('segments','text') {
-  open(my $oldfile, '<', "$dir/$file") or die "Could not open '$dir/$file' $!\n";
-  open(my $newfile, '>', "$dir/$file.new");
-
-  while (my $line = <$oldfile>) {
-    chomp $line;
-
-    my $convid = substr $line, 0, 9;
-    if (exists $conv_to_spk{$convid}) {
-      my $spkid = $conv_to_spk{$convid};
-      print $newfile "sw$spkid-" . (substr $line, 2) . "\n";
-    } else {
-      print $newfile "$line\n";
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_map_words.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_map_words.pl
deleted file mode 100644
index 4fb8d4ffe7292121d3e25dd11c2afb0c9386ea0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_map_words.pl
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env perl
-
-# Modified from swbd_map_words.pl in Kaldi s5 recipe to make pattern
-# matches case-insensitive --Arnab (Jan 2013)
-
-if ($ARGV[0] eq "-f") {
-  shift @ARGV;
-  $field_spec = shift @ARGV;
-  if ($field_spec =~ m/^\d+$/) {
-    $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-  }
-  if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesy (properly, 1-10)
-    if ($1 ne "") {
-      $field_begin = $1 - 1;    # Change to zero-based indexing.
-    }
-    if ($2 ne "") {
-      $field_end = $2 - 1;      # Change to zero-based indexing.
-    }
-  }
-  if (!defined $field_begin && !defined $field_end) {
-    die "Bad argument to -f option: $field_spec";
-  }
-}
-
-
-while (<>) {
-  @A = split(" ", $_);
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      # e.g. [LAUGHTER-STORY] -> STORY;
-      $a =~ s:(|\-)^\[LAUGHTER-(.+)\](|\-)$:$1$2$3:i;
-      # $1 and $3 relate to preserving trailing "-"
-      $a =~ s:^\[(.+)/.+\](|\-)$:$1$2:; # e.g. [IT'N/ISN'T] -> IT'N ... note,
-      # 1st part may include partial-word stuff, which we process further below,
-      # e.g. [LEM[GUINI]-/LINGUINI]
-      # the (|\_) at the end is to accept and preserve trailing -'s.
-      $a =~ s:^(|\-)\[[^][]+\](.+)$:-$2:; # e.g. -[AN]Y , note \047 is quote;
-      # let the leading - be optional on input, as sometimes omitted.
-      $a =~ s:^(.+)\[[^][]+\](|\-)$:$1-:; # e.g. AB[SOLUTE]- -> AB-;
-      # let the trailing - be optional on input, as sometimes omitted.
-      $a =~ s:([^][]+)\[.+\]$:$1:; # e.g. EX[SPECIALLY]-/ESPECIALLY] -> EX-
-      # which is a  mistake in the input.
-      $a =~ s:^\{(.+)\}$:$1:;                 # e.g. {YUPPIEDOM} -> YUPPIEDOM
-      $a =~ s:[A-Z]\[([^][])+\][A-Z]:$1-$3:i; # e.g. AMMU[N]IT- -> AMMU-IT-
-      $a =~ s:_\d$::;                         # e.g. THEM_1 -> THEM
-    }
-    $A[$n] = $a;
-  }
-  print join(" ", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_prepare_dict.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_prepare_dict.sh
deleted file mode 100644
index 8b5962d1f3698d2e77cbdf3140badc4a66e867fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/local/swbd1_prepare_dict.sh
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env bash
-
-# Formatting the Mississippi State dictionary for use in Edinburgh. Differs
-# from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
-
-# To be run from one directory above this script.
-
-. ./path.sh
-
-#check existing directories
-[ $# != 0 ] && echo "Usage: local/swbd1_data_prep.sh" && exit 1;
-
-srcdir=data/local/train  # This is where we downloaded some stuff..
-dir=data/local/dict_nosp
-mkdir -p $dir
-srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text
-
-# assume swbd_p1_data_prep.sh was done already.
-[ ! -f "$srcdict" ] && echo "$0: No such file $srcdict" && exit 1;
-
-cp $srcdict $dir/lexicon0.txt || exit 1;
-patch <local/dict.patch $dir/lexicon0.txt || exit 1;
-
-#(2a) Dictionary preparation:
-# Pre-processing (remove comments)
-grep -v '^#' $dir/lexicon0.txt | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1;
-
-cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \
-  grep -v sil > $dir/nonsilence_phones.txt  || exit 1;
-
-( echo sil; echo spn; echo nsn; echo lau ) > $dir/silence_phones.txt
-
-echo sil > $dir/optional_silence.txt
-
-# No "extra questions" in the input to this setup, as we don't
-# have stress or tone.
-echo -n >$dir/extra_questions.txt
-
-cp local/MSU_single_letter.txt $dir/
-# Add to the lexicon the silences, noises etc.
-# Add single letter lexicon
-# The original swbd lexicon does not have precise single letter lexicion
-# e.g. it does not have entry of W
-( echo '!sil sil'; echo '[vocalized-noise] spn'; echo '[noise] nsn'; \
-  echo '[laughter] lau'; echo '<unk> spn' ) \
-  | cat - $dir/lexicon1.txt $dir/MSU_single_letter.txt  > $dir/lexicon2.txt || exit 1;
-
-# Map the words in the lexicon.  That is-- for each word in the lexicon, we map it
-# to a new written form.  The transformations we do are:
-# remove laughter markings, e.g.
-# [LAUGHTER-STORY] -> STORY
-# Remove partial-words, e.g.
-# -[40]1K W AH N K EY
-# becomes -1K
-# and
-# -[AN]Y IY
-# becomes
-# -Y
-# -[A]B[OUT]- B
-# becomes
-# -B-
-# Also, curly braces, which appear to be used for "nonstandard"
-# words or non-words, are removed, e.g.
-# {WOLMANIZED} W OW L M AX N AY Z D
-# -> WOLMANIZED
-# Also, mispronounced words, e.g.
-#  [YEAM/YEAH] Y AE M
-# are changed to just e.g. YEAM, i.e. the orthography
-# of the mispronounced version.
-# Note-- this is only really to be used in training.  The main practical
-# reason is to avoid having tons of disambiguation symbols, which
-# we otherwise would get because there are many partial words with
-# the same phone sequences (most problematic: S).
-# Also, map
-# THEM_1 EH M -> THEM
-# so that multiple pronunciations just have alternate entries
-# in the lexicon.
-
-local/swbd1_map_words.pl -f 1 $dir/lexicon2.txt | sort -u \
-  > $dir/lexicon3.txt || exit 1;
-
-python3 local/format_acronyms_dict.py -i $dir/lexicon3.txt -o $dir/lexicon4.txt \
-  -L $dir/MSU_single_letter.txt -M $dir/acronyms_raw.map
-cat $dir/acronyms_raw.map | sort -u > $dir/acronyms.map
-
-( echo 'i ay' )| cat - $dir/lexicon4.txt | tr '[A-Z]' '[a-z]' | sort -u > $dir/lexicon5.txt
-
-pushd $dir >&/dev/null
-ln -sf lexicon5.txt lexicon.txt # This is the final lexicon.
-popd >&/dev/null
-rm $dir/lexiconp.txt 2>/dev/null
-echo Prepared input dictionary and phone-sets for Switchboard phase 1.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/run.sh
deleted file mode 100644
index 9b94c8cc1d54bfb71d164e8fca8d4d5b421029b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/run.sh
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-
-nj=16
-feat_dir=raw_wav
-data_type=shard # raw or shard
-num_utts_per_shard=1000
-prefetch=100
-# bpemode (unigram or bpe)
-nbpe=2000
-bpemode=bpe
-
-# data directory
-swbd1_dir=/home/backup_nfs2/hlyu/swbd/LDC97S62
-eval2000_dir="/home/backup_nfs2/hlyu/swbd/LDC2002S09/hub5e_00 /home/backup_nfs2/hlyu/swbd/LDC2002T43"
-
-train_set=train_nodup
-train_config=conf/train_conformer.yaml
-cmvn=true
-dir=exp/conformer
-checkpoint=
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=10
-decode_modes="ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  # Data preparation
-  local/swbd1_data_download.sh ${swbd1_dir}
-  local/swbd1_prepare_dict.sh
-  local/swbd1_data_prep.sh ${swbd1_dir}
-  local/eval2000_data_prep.sh ${eval2000_dir}
-  # process the train set by
-  # 1) convert lower to upper
-  # 2) remove ._._ -1 symbols from text
-  # 3) subset training set and dev set
-  # 4) remove duplicated utterances
-  cp data/train/text data/train/text.org
-  paste -d" " <(cut -f 1 -d" " data/train/text.org) \
-    <(cut -f 2- -d" " data/train/text.org | tr "[:lower:]" "[:upper:]") > data/train/text
-  sed -i 's/\._/ /g; s/\.//g; s/THEM_1/THEM/g' data/train/text
-  tools/subset_data_dir.sh --first data/train 4000 data/train_dev  # 5hr 6min
-  n=$(($(wc -l < data/train/text) - 4000))
-  tools/subset_data_dir.sh --last data/train ${n} data/train_nodev
-  tools/data/remove_dup_utts.sh 300 data/train_nodev data/train_nodup
-  # process eval2000 set by
-  # 1) remove tags (%AH) (%HESITATION) (%UH)
-  # 2) remove <B_ASIDE> <E_ASIDE>
-  # 3) remove "(" or ")"
-  # 4) remove file with empty text
-  cp data/eval2000/text data/eval2000/text.org
-  paste -d "" \
-      <(cut -f 1 -d" " data/eval2000/text.org) \
-      <(awk '{$1=""; print toupper($0)}' data/eval2000/text.org \
-      | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' \
-      | sed -e "s/(//g" -e "s/)//g") \
-      | sed -e 's/\s\+/ /g' > data/eval2000/text.org2
-  awk -F ' ' '{if(length($2) != 0) print $0}' data/eval2000/text.org2 > data/eval2000/text
-  tools/fix_data_dir.sh data/eval2000
-fi
-
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  # For wav feature, just copy the data. Fbank extraction is done in training
-  mkdir -p ${feat_dir}
-  for x in ${train_set} train_dev eval2000; do
-    cp -r data/${x} ${feat_dir}
-  done
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config ${train_config} \
-    --in_scp data/${train_set}/wav.scp \
-    --out_cmvn ${feat_dir}/${train_set}/global_cmvn
-
-fi
-
-dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=data/lang_char/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 2: Dictionary and Json Data Preparation"
-  mkdir -p data/lang_char/
-
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " data/${train_set}/text > data/lang_char/input.txt
-
-  tools/spm_train --input=data/lang_char/input.txt \
-    --vocab_size=${nbpe} \
-    --character_coverage=1.0 \
-    --model_type=${bpemode} \
-    --model_prefix=${bpemodel} \
-    --input_sentence_size=100000000 \
-    --user_defined_symbols="[LAUGHTER],[NOISE],[VOCALIZED-NOISE]"
-  tools/spm_encode --model=${bpemodel}.model \
-    --output_format=piece < data/lang_char/input.txt | \
-    tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-
-  num_token=$(cat ${dict} | wc -l)
-  echo "<sos/eos> ${num_token}" >> ${dict} # <eos>
-  wc -l ${dict}
-fi
-
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Prepare data, prepare required format"
-  for x in ${train_set} train_dev eval2000; do
-    if [ ${data_type} == "shard" ]; then
-      tools/make_shard_list.py --num_utts_per_shard ${num_utts_per_shard} \
-        --num_threads ${nj} ${feat_dir}/${x}/wav.scp ${feat_dir}/${x}/text \
-        $(realpath ${feat_dir}/${x}/shards) ${feat_dir}/${x}/data.list
-    else
-      tools/make_raw_list.py ${feat_dir}/${x}/wav.scp ${feat_dir}/${x}/text \
-        ${feat_dir}/${x}/data.list
-    fi
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p ${dir}
-  INIT_FILE=${dir}/ddp_init
-  # You had better rm it manually before you start run.sh on first node.
-  # rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f ${INIT_FILE})
-  echo "$0: init method is $init_method"
-  # The number of gpus runing on each node/machine
-  num_gpus=$(echo ${CUDA_VISIBLE_DEVICES} | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="nccl"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr ${num_gpus} \* ${num_nodes}`
-  echo "total gpus is: ${world_size}"
-  cmvn_opts=
-  ${cmvn} && cp ${feat_dir}/${train_set}/global_cmvn ${dir}
-  ${cmvn} && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < ${num_gpus}; ++i)); do
-  {
-    gpu_id=$(echo ${CUDA_VISIBLE_DEVICES} | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr ${node_rank} \* ${num_gpus} + ${i}`
-    python wenet/bin/train.py --gpu ${gpu_id} \
-      --config ${train_config} \
-      --data_type ${data_type} \
-      --symbol_table ${dict} \
-      --prefetch ${prefetch} \
-      --bpe_model ${bpemodel}.model \
-      --train_data ${feat_dir}/${train_set}/data.list \
-      --cv_data ${feat_dir}/train_dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir ${dir} \
-      --ddp.init_method ${init_method} \
-      --ddp.world_size ${world_size} \
-      --ddp.rank ${rank} \
-      --ddp.dist_backend ${dist_backend} \
-      --num_workers 4 \
-      ${cmvn_opts} \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=${dir}/avg_${average_num}.pt
-    echo "do model average and final checkpoint is ${decode_checkpoint}"
-    python wenet/bin/average_model.py \
-      --dst_model ${decode_checkpoint} \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-  for mode in ${decode_modes}; do
-  {
-    test_dir=${dir}/test_${mode}
-    mkdir -p ${test_dir}
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data $feat_dir/eval2000/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --bpe_model $bpemodel.model \
-      --ctc_weight $ctc_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $test_dir/text \
-    ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    sed -i.bak -r 's/<blank> //g' ${test_dir}/text
-    mv ${test_dir}/text ${test_dir}/text.bak2
-    tools/spm_decode --model=${bpemodel}.model --input_format=piece \
-        < ${test_dir}/text.bak2 | sed -e "s/▁/ /g" > ${test_dir}/text
-    python tools/compute-wer.py --char=1 --v=1 \
-      $feat_dir/eval2000/text $test_dir/text > $test_dir/wer
-  }
-  done
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/swbd/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/README.md
deleted file mode 100644
index 1555e465ef036790ae2067bf2a54a6a3e0f8b7a9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: using fbank feature, dither, cmvn, without speed perturb (not supported segments yet)
-* Training info: lr 0.001, batch size 20, 8 gpu, acc_grad 1, 240 epochs, dither 0.1
-* Decoding info: ctc_weight 0.5, average_num 10
-
-
-| decoding mode       | Dev WER | Test WER |
-|---------------------|---------|----------|
-| attention rescoring | 9.54%   | 8.66%    |
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/conf/train_conformer.yaml
deleted file mode 100644
index a610ea5fb7829e44a201f45845d415a14f16bd38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 31
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 2000
-        min_length: 10
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 3
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 20
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 240
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/download_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/download_data.sh
deleted file mode 100644
index 0ac85fcbd14a257ae73b6a2b81dbf6cef021bfea..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/download_data.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright  2014  Nickolay V. Shmyrev
-#            2014  Brno University of Technology (Author: Karel Vesely)
-#            2016  John Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-mkdir -p db
-
-cd db  ### Note: the rest of this script is executed from the directory 'db'.
-
-# TED-LIUM database:
-if [[ $(hostname -f) == *.clsp.jhu.edu ]] ; then
-  if [ ! -e TEDLIUM_release-3 ]; then
-    ln -sf /export/corpora5/TEDLIUM_release-3
-  fi
-  echo "$0: linking the TEDLIUM data from /export/corpora5/TEDLIUM_release-3"
-else
-  if [ ! -e TEDLIUM_release-3 ]; then
-    echo "$0: downloading TEDLIUM_release2 data (it won't re-download if it was already downloaded.)"
-    # the following command won't re-get it if it's already there
-    # because of the --continue switch.
-    wget --continue http://www.openslr.org/resources/51/TEDLIUM_release-3.tgz || exit 1
-    tar xf "TEDLIUM_release-3.tgz"
-  else
-    echo "$0: not downloading or un-tarring TEDLIUM_release2 because it already exists."
-  fi
-fi
-
-
-num_sph=$(find -L TEDLIUM_release-3/legacy -name '*.sph' | wc -l)
-# We mainly use TED-LIUM 3 "legacy" distribution, on which the dev and test datasets are the same as in TED-LIUM 2 (and TED-LIUM1).
-# It contains 2351 sph files for training and 19 sph files for dev/test (total 2370).
-# Because the "legacy" contains symbolic links to "data", we use `find -L`.
-if [ "$num_sph" != 2370 ]; then
-    echo "$0: expected to find 2370 .sph files in the directory db/TEDLIUM_release3/legacy, found $num_sph"
-  exit 1
-fi
-
-exit 0
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/join_suffix.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/join_suffix.py
deleted file mode 100644
index e496c4d074144d6d99a2affc5ee72286d35542ef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/join_suffix.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright  2014  Nickolay V. Shmyrev
-#            2016  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-
-import sys
-
-# This script joins together pairs of split-up words like "you 're" -> "you're".
-# The TEDLIUM transcripts are normalized in a way that's not traditional for
-# speech recognition.
-
-prev_line = ""
-for line in sys.stdin:
-    if line == prev_line:
-        continue
-    items = line.split()
-    new_items = []
-    i = 0
-    while i < len(items):
-        if i < len(items) - 1 and items[i + 1][0] == "'":
-            new_items.append(items[i] + items[i + 1])
-            i = i + 1
-        else:
-            new_items.append(items[i])
-        i = i + 1
-    print(" ".join(new_items))
-    prev_line = line
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/prepare_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/prepare_data.sh
deleted file mode 100644
index f5a22c2c2913a9eb916c2347aad9b2282ec3cf3c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/local/prepare_data.sh
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env bash
-#
-# Copyright  2014  Nickolay V. Shmyrev
-#            2014  Brno University of Technology (Author: Karel Vesely)
-#            2016  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-# To be run from one directory above this script.
-
-. ./path.sh
-
-export LC_ALL=C
-
-sph2pipe=sph2pipe
-
-data_type=$1
-
-# Prepare: test, train,
-for set in dev test train; do
-  dir=data/$set.orig
-  mkdir -p $dir
-
-  # Merge transcripts into a single 'stm' file, do some mappings:
-  # - <F0_M> -> <o,f0,male> : map dev stm labels to be coherent with train + test,
-  # - <F0_F> -> <o,f0,female> : --||--
-  # - (2) -> null : remove pronunciation variants in transcripts, keep in dictionary
-  # - <sil> -> null : remove marked <sil>, it is modelled implicitly (in kaldi)
-  # - (...) -> null : remove utterance names from end-lines of train
-  # - it 's -> it's : merge words that contain apostrophe (if compound in dictionary, local/join_suffix.py)
-  { # Add STM header, so sclite can prepare the '.lur' file
-    echo ';;
-;; LABEL "o" "Overall" "Overall results"
-;; LABEL "f0" "f0" "Wideband channel"
-;; LABEL "f2" "f2" "Telephone channel"
-;; LABEL "male" "Male" "Male Talkers"
-;; LABEL "female" "Female" "Female Talkers"
-;;'
-    # Process the STMs
-    cat db/TEDLIUM_release-3/${data_type}/$set/stm/*.stm | sort -k1,1 -k2,2 -k4,4n | \
-      sed -e 's:<F0_M>:<o,f0,male>:' \
-          -e 's:<F0_F>:<o,f0,female>:' \
-          -e 's:([0-9])::g' \
-          -e 's:<sil>::g' \
-          -e 's:([^ ]*)$::' | \
-      awk '{ $2 = "A"; print $0; }'
-  } | local/join_suffix.py > data/$set.orig/stm
-
-  # Prepare 'text' file
-  # - {NOISE} -> [NOISE] : map the tags to match symbols in dictionary
-  cat $dir/stm | grep -v -e 'ignore_time_segment_in_scoring' -e ';;' | \
-    awk '{ printf ("%s-%07d-%07d", $1, $4*100, $5*100);
-           for (i=7;i<=NF;i++) { printf(" %s", $i); }
-           printf("\n");
-         }' | tr '{}' '[]' | sort -k1,1 > $dir/text || exit 1
-
-  # Prepare 'segments', 'utt2spk', 'spk2utt'
-  cat $dir/text | cut -d" " -f 1 | awk -F"-" '{printf("%s %s %07.2f %07.2f\n", $0, $1, $2/100.0, $3/100.0)}' > $dir/segments
-  cat $dir/segments | awk '{print $1, $2}' > $dir/utt2spk
-  cat $dir/utt2spk | utils/utt2spk_to_spk2utt.pl > $dir/spk2utt
-
-  # Prepare 'wav.scp', 'reco2file_and_channel'
-  cat $dir/spk2utt | awk -v data_type=$data_type -v set=$set -v pwd=$PWD '{ printf("%s %s/db/TEDLIUM_release-3/%s/%s/sph/%s.sph\n", $1, pwd, data_type, set, $1); }' > $dir/wav.scp
-  cat $dir/wav.scp | awk '{ print $1, $1, "A"; }' > $dir/reco2file_and_channel
-
-  # Create empty 'glm' file
-  echo ';; empty.glm
-  [FAKE]     =>  %HESITATION     / [ ] __ [ ] ;; hesitation token
-  ' > data/$set.orig/glm
-
-  # The training set seems to not have enough silence padding in the segmentations,
-  # especially at the beginning of segments.  Extend the times.
-  if [ $set == "train" ]; then
-    mv data/$set.orig/segments data/$set.orig/segments.temp
-    utils/data/extend_segment_times.py --start-padding=0.15 \
-      --end-padding=0.1 <data/$set.orig/segments.temp >data/$set.orig/segments || exit 1
-    rm data/$set.orig/segments.temp
-  fi
-
-  # Check that data dirs are okay!
-  utils/validate_data_dir.sh --no-feats $dir || exit 1
-done
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/run.sh
deleted file mode 100644
index 9697e02f1af3249d2de70fcbb42aa13e3dcef483..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/run.sh
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=5
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-
-
-nj=16
-feat_dir=raw_wav
-
-data_type=raw # raw or shard
-num_utts_per_shard=1000
-
-data_cat=legacy
-
-train_set=train
-train_config=conf/train_conformer.yaml
-cmvn=true
-dir=exp/conformer
-checkpoint=
-
-# bpemode (unigram or bpe)
-nbpe=500
-bpemode=unigram
-
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=10
-decode_modes="ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  echo "stage -1: Data Download"
-  local/download_data.sh # make soft link by yourself if you already have the dataset
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  # Data preparation
-  local/prepare_data.sh $data_cat
-  for dset in dev test train; do
-    utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 \
-      data/${dset}.orig data/${dset}
-  done
-fi
-
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  # For wav feature, just copy the data. Fbank extraction is done in training
-  mkdir -p $feat_dir
-  for x in ${train_set} dev test; do
-    cp -r data/$x $feat_dir
-  done
-  tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-    --in_scp data/${train_set}/wav.scp \
-    --out_cmvn $feat_dir/$train_set/global_cmvn
-
-fi
-
-dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
-bpemodel=data/lang_char/${train_set}_${bpemode}${nbpe}
-echo "dictionary: ${dict}"
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  ### Task dependent. You have to check non-linguistic symbols used in the corpus.
-  echo "stage 2: Dictionary and Json Data Preparation"
-  mkdir -p data/lang_char/
-
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  # we borrowed these code and scripts which are related bpe from ESPnet.
-  cut -f 2- -d" " data/${train_set}/text > data/lang_char/input.txt
-  tools/spm_train --input=data/lang_char/input.txt --vocab_size=${nbpe} \
-    --model_type=${bpemode} --model_prefix=${bpemodel} \
-    --input_sentence_size=100000000
-  tools/spm_encode --model=${bpemodel}.model \
-    --output_format=piece < data/lang_char/input.txt | \
-    tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-  wc -l ${dict}
-fi
-
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Prepare data, prepare required format"
-  if [ ! -f $feat_dir/$train_set/segments ]; then
-    echo "$0: No such file segments" && exit 1;
-  else
-  for x in dev test ${train_set}; do
-    tools/make_raw_list.py --segments $feat_dir/$x/segments \
-    $feat_dir/$x/wav.scp $feat_dir/$x/text $feat_dir/$x/data.list
-  done
-  fi
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  # You had better rm it manually before you start run.sh on first node.
-  # rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  # The number of gpus runing on each node/machine
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="nccl"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp ${feat_dir}/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --bpe_model $bpemodel.model \
-      --train_data $feat_dir/$train_set/data.list \
-      --cv_data $feat_dir/dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 8 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-    # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-  for mode in ${decode_modes}; do
-  {
-    test_dir=$dir/test_${mode}
-    mkdir -p $test_dir
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data $feat_dir/test/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --bpe_model $bpemodel.model \
-      --ctc_weight $ctc_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $test_dir/text \
-    ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    python tools/compute-wer.py --char=1 --v=1 \
-      $feat_dir/test/text $test_dir/text > $test_dir/wer
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # Export the best model you want
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/tedlium3/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/README.md
deleted file mode 100644
index 811581f4881478ac430e59bf15d4c7c4f9f6eb83..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: dither + specaug + speed perturb
-* Training info: lr 0.002, warmup_steps 5000 batch size 16, 1 gpu, acc_grad 4, 120 epochs
-* Decoding info: average_num 20
-* trans_type: phn
-
-
-|     decoding mode      | test (wer) |
-| :--------------------: | :---------: |
-|   ctc_greedy_search    |   16.70%    |
-| ctc_prefix_beam_search |   16.60%    |
-|       attention        |   22.37%    |
-|  attention_rescoring   |   16.60%    |
-
-## transformer Result
-
-* Feature info: dither + specaug + speed perturb
-* Training info: lr 0.002, warmup_steps 5000 batch size 16, 1 gpu, acc_grad 4, 120 epochs
-* Decoding info: average_num 20
-* trans_type: phn
-
-
-|     decoding mode      | test (wer) |
-| :--------------------: | :---------: |
-|   ctc_greedy_search    |   17.78%    |
-| ctc_prefix_beam_search |   17.46%    |
-|       attention        |   21.77%    |
-|  attention_rescoring   |   17.06%    |
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/conf/train_conformer.yaml
deleted file mode 100644
index 92c8bfbedf1acf7525370b67d4c138bece38b523..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/conf/train_conformer.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    split_with_space: true
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4 #
-max_epoch: 120
-log_interval: 10
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 5000 # 20000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/conf/train_transformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/conf/train_transformer.yaml
deleted file mode 100644
index 13d7a5887c9b0dff3f47822bec360be12b758742..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/conf/train_transformer.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-# network architecture
-# encoder related
-encoder: transformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.2
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder architecture type
-    normalize_before: true
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.2
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    split_with_space: true
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 120
-log_interval: 10
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 5000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/dev_spk.list b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/dev_spk.list
deleted file mode 100644
index 564da1f1ec672839fcd1531766800c774f0d1398..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/dev_spk.list
+++ /dev/null
@@ -1,50 +0,0 @@
-faks0
-fdac1
-fjem0
-mgwt0
-mjar0
-mmdb1
-mmdm2
-mpdf0
-fcmh0
-fkms0
-mbdg0
-mbwm0
-mcsh0
-fadg0
-fdms0
-fedw0
-mgjf0
-mglb0
-mrtk0
-mtaa0
-mtdt0
-mthc0
-mwjg0
-fnmr0
-frew0
-fsem0
-mbns0
-mmjr0
-mdls0
-mdlf0
-mdvc0
-mers0
-fmah0
-fdrw0
-mrcs0
-mrjm4
-fcal1
-mmwh0
-fjsj0
-majc0
-mjsw0
-mreb0
-fgjd0
-fjmg0
-mroa0
-mteb0
-mjfc0
-mrjr0
-fmml0
-mrws1
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/phones.60-48-39.map b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/phones.60-48-39.map
deleted file mode 100644
index 6d24f094d198df6cc33190a835d5c19a2122827b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/phones.60-48-39.map
+++ /dev/null
@@ -1,61 +0,0 @@
-aa  aa  aa
-ae  ae  ae
-ah  ah  ah
-ao  ao  aa
-aw  aw  aw
-ax  ax  ah
-ax-h  ax  ah
-axr  er  er
-ay  ay  ay
-b  b  b
-bcl  vcl  sil
-ch  ch  ch
-d  d  d
-dcl  vcl  sil
-dh  dh  dh
-dx  dx  dx
-eh  eh  eh
-el  el  l
-em  m  m
-en  en  n
-eng  ng  ng
-epi  epi  sil
-er  er  er
-ey  ey  ey
-f  f  f
-g  g  g
-gcl  vcl  sil
-h#  sil  sil
-hh  hh  hh
-hv  hh  hh
-ih  ih  ih
-ix  ix  ih
-iy  iy  iy
-jh  jh  jh
-k  k  k
-kcl  cl  sil
-l  l  l
-m  m  m
-n  n  n
-ng  ng  ng
-nx  n  n
-ow  ow  ow
-oy  oy  oy
-p  p  p
-pau  sil  sil
-pcl  cl  sil
-q
-r  r  r
-s  s  s
-sh  sh  sh
-t  t  t
-tcl  cl  sil
-th  th  th
-uh  uh  uh
-uw  uw  uw
-ux  uw  uw
-v  v  v
-w  w  w
-y  y  y
-z  z  z
-zh  zh  sh
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/sph2pipe_process.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/sph2pipe_process.py
deleted file mode 100644
index 25195407f54f2693f79d21f2febb533b0e6196ef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/sph2pipe_process.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import sys
-import os
-
-
-def sph2pipe_wav(in_wav, tmp_out_wav, out_wav):
-    with open(in_wav, 'r', encoding='utf-8') as in_f:
-        with open(tmp_out_wav, 'w', encoding='utf-8') as tmp_out_f:
-            with open(out_wav, 'w', encoding='utf-8') as out_f:
-                for line in in_f:
-                    _tmp = line.strip().split(' ')
-                    wav_out_path = _tmp[4]
-                    wav_out_path = wav_out_path.split('/')
-                    wav_out_path[-4] = wav_out_path[-4] + '_pipe'
-                    if not os.path.exists('/'.join(wav_out_path[:-1])):
-                        os.makedirs('/'.join(wav_out_path[:-1]))
-                    wav_out_path = '/'.join(wav_out_path)
-                    tmp_out_f.write(' '.join(_tmp[1:5]) + ' ' + wav_out_path +
-                                    '\n')
-                    out_f.write(_tmp[0] + ' ' + wav_out_path + '\n')
-
-
-if __name__ == '__main__':
-    if len(sys.argv) != 4:
-        print('wrong input parameter')
-        raise NotImplementedError(len(sys.argv))
-    in_wav = sys.argv[1]
-    tmp_out_wav = sys.argv[2]
-    out_wav = sys.argv[3]
-    sph2pipe_wav(in_wav, tmp_out_wav, out_wav)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/test_spk.list b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/test_spk.list
deleted file mode 100644
index 47f6653d64d412e61bdd2e7a10646f581fdbf96d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/test_spk.list
+++ /dev/null
@@ -1,24 +0,0 @@
-mdab0
-mwbt0
-felc0
-mtas1
-mwew0
-fpas0
-mjmp0
-mlnt0
-fpkt0
-mlll0
-mtls0
-fjlm0
-mbpm0
-mklt0
-fnlp0
-mcmj0
-mjdh0
-fmgd0
-mgrt0
-mnjm0
-fdhc0
-mjln0
-mpam0
-fmld0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_data_prep.sh
deleted file mode 100644
index 080fa1b9426962df22e019f436baf803568075db..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_data_prep.sh
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2013   (Authors: Bagher BabaAli, Daniel Povey, Arnab Ghoshal)
-#           2014   Brno University of Technology (Author: Karel Vesely)
-#           2019   IIIT-Bangalore (Shreekantha Nadig)
-# Apache 2.0.
-
-
-create_glm_stm=false
-
-if [ $# -le 0 ]; then
-    echo "Argument should be the Timit directory, see ../run.sh for example."
-    exit 1;
-fi
-
-dir=`pwd`/data/local/data
-lmdir=`pwd`/data/local/nist_lm
-mkdir -p $dir $lmdir
-local=`pwd`/local
-utils=`pwd`/utils
-conf=`pwd`/conf
-
-if [ $2 ]; then
-    if [[ $2 = "char" || $2 = "phn" ]]; then
-        trans_type=$2
-    else
-        echo "Transcript type must be one of [phn, char]"
-        echo $2
-    fi
-else
-    trans_type=phn
-fi
-
-. ./path.sh
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-
-if ! command -v "${sph2pipe}" &> /dev/null; then
-    echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
-    exit 1;
-fi
-
-[ -f $local/test_spk.list ] || error_exit "$PROG: Eval-set speaker list not found.";
-[ -f $local/dev_spk.list ] || error_exit "$PROG: dev-set speaker list not found.";
-
-# First check if the train & test directories exist (these can either be upper-
-# or lower-cased
-if [ ! -d $1/TRAIN -o ! -d $1/TEST ] && [ ! -d $1/train -o ! -d $1/test ]; then
-    echo "timit_data_prep.sh: Spot check of command line argument failed"
-    echo "Command line argument must be absolute pathname to TIMIT directory"
-    echo "with name like /export/corpora5/LDC/LDC93S1/timit/TIMIT"
-    exit 1;
-fi
-
-# Now check what case the directory structure is
-uppercased=false
-train_dir=train
-test_dir=test
-if [ -d $1/TRAIN ]; then
-    uppercased=true
-    train_dir=TRAIN
-    test_dir=TEST
-fi
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT
-
-# Get the list of speakers. The list of speakers in the 24-speaker core test
-# set and the 50-speaker development set must be supplied to the script. All
-# speakers in the 'train' directory are used for training.
-if $uppercased; then
-    tr '[:lower:]' '[:upper:]' < $local/dev_spk.list > $tmpdir/dev_spk
-    tr '[:lower:]' '[:upper:]' < $local/test_spk.list > $tmpdir/test_spk
-    ls -d "$1"/TRAIN/DR*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
-else
-    tr '[:upper:]' '[:lower:]' < $local/dev_spk.list > $tmpdir/dev_spk
-    tr '[:upper:]' '[:lower:]' < $local/test_spk.list > $tmpdir/test_spk
-    ls -d "$1"/train/dr*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
-fi
-
-cd $dir
-for x in train dev test; do
-    # First, find the list of audio files (use only si & sx utterances).
-    # Note: train & test sets are under different directories, but doing find on
-    # both and grepping for the speakers will work correctly.
-    find $1/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.WAV' \
-    | grep -f $tmpdir/${x}_spk > ${x}_sph.flist
-
-    sed -e 's:.*/\(.*\)/\(.*\).WAV$:\1_\2:i' ${x}_sph.flist \
-    > $tmpdir/${x}_sph.uttids
-    paste $tmpdir/${x}_sph.uttids ${x}_sph.flist \
-    | sort -k1,1 > ${x}_sph.scp
-
-    cat ${x}_sph.scp | awk '{print $1}' > ${x}.uttids
-
-    # Now, Convert the transcripts into our format (no normalization yet)
-    # Get the transcripts: each line of the output contains an utterance
-    # ID followed by the transcript.
-
-    if [ $trans_type = "phn" ]
-    then
-        echo "phone transcript!"
-        find $1/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.PHN' \
-        | grep -f $tmpdir/${x}_spk > $tmpdir/${x}_phn.flist
-        sed -e 's:.*/\(.*\)/\(.*\).PHN$:\1_\2:i' $tmpdir/${x}_phn.flist \
-        > $tmpdir/${x}_phn.uttids
-        while read line; do
-            [ -f $line ] || error_exit "Cannot find transcription file '$line'";
-            cut -f3 -d' ' "$line" | tr '\n' ' ' | perl -ape 's: *$:\n:;'
-        done < $tmpdir/${x}_phn.flist > $tmpdir/${x}_phn.trans
-        paste $tmpdir/${x}_phn.uttids $tmpdir/${x}_phn.trans \
-        | sort -k1,1 > ${x}.trans
-
-    elif [ $trans_type = "char" ]
-    then
-        echo "char transcript!"
-        find $1/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.WRD' \
-        | grep -f $tmpdir/${x}_spk > $tmpdir/${x}_wrd.flist
-        sed -e 's:.*/\(.*\)/\(.*\).WRD$:\1_\2:i' $tmpdir/${x}_wrd.flist \
-        > $tmpdir/${x}_wrd.uttids
-        while read line; do
-            [ -f $line ] || error_exit "Cannot find transcription file '$line'";
-            cut -f3 -d' ' "$line" | tr '\n' ' ' | perl -ape 's: *$:\n:;' | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z  A-Z]//g'
-        done < $tmpdir/${x}_wrd.flist > $tmpdir/${x}_wrd.trans
-        paste $tmpdir/${x}_wrd.uttids $tmpdir/${x}_wrd.trans \
-        | sort -k1,1 > ${x}.trans
-    else
-        echo "WRONG!"
-        echo $trans_type
-        exit 0;
-    fi
-
-    # Do normalization steps.
-    cat ${x}.trans | $local/timit_norm_trans.pl -i - -m $local/phones.60-48-39.map -to 39 | sort > $x.text || exit 1;
-    # cat ${x}.trans | sort > $x.text || exit 1;
-
-    # Create wav.scp
-    awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp > ${x}_wav.scp
-
-    # Make the utt2spk and spk2utt files.
-    cut -f1 -d'_'  $x.uttids | paste -d' ' $x.uttids - > $x.utt2spk
-    cat $x.utt2spk | $local/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
-
-    # Prepare gender mapping
-    cat $x.spk2utt | awk '{print $1}' | perl -ane 'chop; m:^.:; $g = lc($&); print "$_ $g\n";' > $x.spk2gender
-
-
-    if "${create_glm_stm}"; then
-        # Prepare STM file for sclite:
-        wav-to-duration --read-entire-file=true scp:${x}_wav.scp ark,t:${x}_dur.ark || exit 1
-        awk -v dur=${x}_dur.ark \
-        'BEGIN{
-         while(getline < dur) { durH[$1]=$2; }
-         print ";; LABEL \"O\" \"Overall\" \"Overall\"";
-         print ";; LABEL \"F\" \"Female\" \"Female speakers\"";
-         print ";; LABEL \"M\" \"Male\" \"Male speakers\"";
-       }
-       { wav=$1; spk=wav; sub(/_.*/,"",spk); $1=""; ref=$0;
-         gender=(substr(spk,0,1) == "f" ? "F" : "M");
-         printf("%s 1 %s 0.0 %f <O,%s> %s\n", wav, spk, durH[wav], gender, ref);
-       }
-        ' ${x}.text >${x}.stm || exit 1
-
-        # Create dummy GLM file for sclite:
-        echo ';; empty.glm
-      [FAKE]     =>  %HESITATION     / [ ] __ [ ] ;; hesitation token
-        ' > ${x}.glm
-    fi
-done
-
-echo "Data preparation succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_format_data.sh
deleted file mode 100644
index 8cb531f36fdcff697f99cdc4cbddec552d7a5013..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_format_data.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2013  (Author: Daniel Povey)
-# Apache 2.0
-
-# This script takes data prepared in a corpus-dependent way
-# in data/local/, and converts it into the "canonical" form,
-# in various subdirectories of data/, e.g. data/lang, data/train, etc.
-
-. ./path.sh || exit 1;
-
-echo "Preparing train, dev and test data"
-srcdir=data/local/data
-
-
-for x in train dev test; do
-    mkdir -p data/$x
-    # cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
-    local/sph2pipe_process.py $srcdir/${x}_wav.scp data/${x}/tmp_wav.scp data/${x}/wav.scp || exit 1;
-    while read line
-    do
-      echo $line
-      $line
-    done < data/${x}/tmp_wav.scp
-    rm data/${x}/tmp_wav.scp
-
-    cp $srcdir/$x.text data/$x/text || exit 1;
-    cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
-    cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
-    tools/filter_scp.pl data/$x/spk2utt $srcdir/$x.spk2gender > data/$x/spk2gender || exit 1;
-    [ -e $srcdir/${x}.stm ] && cp $srcdir/${x}.stm data/$x/stm
-    [ -e $srcdir/${x}.glm ] && cp $srcdir/${x}.glm data/$x/glm
-    # tools/validate_data_dir.sh --no-feats data/$x || exit 1
-done
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_norm_trans.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_norm_trans.pl
deleted file mode 100644
index 566eb7693ac8901e3eeacbd795eddd2b0502a002..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/timit_norm_trans.pl
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-
-# Copyright 2012  Arnab Ghoshal
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script normalizes the TIMIT phonetic transcripts that have been
-# extracted in a format where each line contains an utterance ID followed by
-# the transcript, e.g.:
-# fcke0_si1111 h# hh ah dx ux w iy dcl d ix f ay n ih q h#
-
-my $usage = "Usage: timit_norm_trans.pl -i transcript -m phone_map -from [60|48] -to [48|39] > normalized\n
-Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a
-smaller set defined by the -m option. This script assumes that the mapping is
-done in the \"standard\" fashion, i.e. to 48 or 39 phones.  The input is
-assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can
-be changed using the -from option. The input format is assumed to be utterance
-ID followed by transcript on the same line.\n";
-
-use strict;
-use Getopt::Long;
-die "$usage" unless(@ARGV >= 1);
-my ($in_trans, $phone_map, $num_phones_out);
-my $num_phones_in = 60;
-GetOptions ("i=s" => \$in_trans,          # Input transcription
-      "m=s" => \$phone_map,         # File containing phone mappings
-      "from=i" => \$num_phones_in,  # Input #phones: must be 60 or 48
-      "to=i" => \$num_phones_out ); # Output #phones: must be 48 or 39
-
-die $usage unless(defined($in_trans) && defined($phone_map) &&
-      defined($num_phones_out));
-if ($num_phones_in != 60 && $num_phones_in != 48) {
-  die "Can only used 60 or 48 for -from (used $num_phones_in)."
-}
-if ($num_phones_out != 48 && $num_phones_out != 39) {
-  die "Can only used 48 or 39 for -to (used $num_phones_out)."
-}
-unless ($num_phones_out < $num_phones_in) {
-  die "Argument to -from ($num_phones_in) must be greater than that to -to ($num_phones_out)."
-}
-
-
-open(M, "<$phone_map") or die "Cannot open mappings file '$phone_map': $!";
-my (%phonemap, %seen_phones);
-my $num_seen_phones = 0;
-while (<M>) {
-  chomp;
-  next if ($_ =~ /^q\s*.*$/); # Ignore glottal stops.
-  m:^(\S+)\s+(\S+)\s+(\S+)$: or die "Bad line: $_";
-  my $mapped_from = ($num_phones_in == 60)? $1 : $2;
-  my $mapped_to = ($num_phones_out == 48)? $2 : $3;
-  if (!defined($seen_phones{$mapped_to})) {
-    $seen_phones{$mapped_to} = 1;
-    $num_seen_phones += 1;
-  }
-  $phonemap{$mapped_from} = $mapped_to;
-}
-if ($num_seen_phones != $num_phones_out) {
-  die "Trying to map to $num_phones_out phones, but seen only $num_seen_phones";
-}
-
-open(T, "<$in_trans") or die "Cannot open transcription file '$in_trans': $!";
-while (<T>) {
-  chomp;
-  $_ =~ m:^(\S+)\s+(.+): or die "Bad line: $_";
-  my $utt_id = $1;
-  my $trans = $2;
-
-  $trans =~ s/q//g;  # Remove glottal stops.
-  $trans =~ s/^\s*//; $trans =~ s/\s*$//;  # Normalize spaces
-
-  print $utt_id;
-  for my $phone (split(/\s+/, $trans)) {
-    if(exists $phonemap{$phone}) { print " $phonemap{$phone}"; }
-    if(not exists $phonemap{$phone}) { print " $phone"; }
-  }
-  print "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/validate_data_dir.sh
deleted file mode 100644
index 9c0e350eeef428dd29501dc3368d373dd749b437..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/local/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/usr/bin/env bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(utils/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  utils/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/run.sh
deleted file mode 100644
index 01a08a8ffb9f04a49b6dde37552006909f178042..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/run.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0     # start from 0 if you need to start from data preparation
-stop_stage=4
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-# data
-timit_data=/home/Liangcd/data/timit
-# path to save preproecssed data
-# export data=data
-
-
-nj=16
-
-# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
-# `shard` is used for large dataset which is over 1k hours, and `shard` is
-# faster on reading data and training.
-data_type=raw
-num_utts_per_shard=1000
-
-train_set=train
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard transformer
-# 2. conf/train_conformer.yaml: Standard conformer
-# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
-# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
-# 5. conf/train_conformer_no_pos.yaml: Conformer without relative positional encoding
-# 6. conf/train_u2++_conformer.yaml: U2++ conformer
-# 7. conf/train_u2++_transformer.yaml: U2++ transformer
-train_config=conf/train_transformer.yaml
-cmvn=true
-dir=exp/transformer_phn_5k_acc4_bs16
-checkpoint=
-
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=20
-decode_modes="ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring"
-# choose in [phn]
-trans_type=phn
-
-dict=data/dict/${trans_type}_units.txt
-
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-
-    echo "stage 0: Data preparation"
-    echo "preparing data for TIMIT for ${trans_type} level transcripts"
-    local/timit_data_prep.sh ${timit_data} ${trans_type} || exit 1;
-    local/timit_format_data.sh
-    echo "Finish stage 0"
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: compute global cmvn"
-    # compute cmvn
-    tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-        --in_scp data/${train_set}/wav.scp \
-        --out_cmvn data/${train_set}/global_cmvn
-    echo "Finish stage 1"
-fi
-
-
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: make train dict"
-    # Make train dict
-    echo "Make a dictionary"
-    mkdir -p $(dirname $dict)
-    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-    echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-    tools/text2token.py -s 1 -n 1 --space sil --trans_type ${trans_type} data/${train_set}/text  \
-      | cut -f 2- -d" " | tr " " "\n" | sort | uniq | grep -v -e '^\s*$' | \
-      awk '{print $0 " " NR+1}' >> ${dict}
-    wc -l ${dict}
-    num_token=$(cat $dict | wc -l)
-    echo "<sos/eos> $num_token" >> $dict # <eos>
-    echo "Finish stage 2"
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "stage 3: Prepare data, prepare required format"
-  for x in dev test ${train_set}; do
-    if [ $data_type == "shard" ]; then
-      tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 data/$x/wav.scp data/$x/text \
-        $(realpath data/$x/shards) data/$x/data.list
-    else
-      tools/make_raw_list.py data/$x/wav.scp data/$x/text \
-        data/$x/data.list
-    fi
-  done
-  echo "Finish stage 3"
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  mkdir -p $dir
-  # You have to rm `INIT_FILE` manually when you resume or restart a
-  # multi-machine training.
-  INIT_FILE=$dir/ddp_init
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-
-  # train.py rewrite $train_config to $dir/train.yaml with model input
-  # and output dimension, and $dir/train.yaml will be used for inference
-  # and export.
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data data/$train_set/data.list \
-      --cv_data data/dev/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Please specify decoding_chunk_size for unified streaming and
-  # non-streaming model. The default value is -1, which is full chunk
-  # for non-streaming inference.
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-  for mode in ${decode_modes}; do
-  {
-    test_dir=$dir/test_${mode}
-    mkdir -p $test_dir
-    python wenet/bin/recognize.py --gpu 0 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data data/test/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --ctc_weight $ctc_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $test_dir/text \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size} \
-      --connect_symbol ▁
-    python tools/compute-wer.py --char=1 --v=1 \
-      data/test/text $test_dir/text > $test_dir/wer
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # compute wer
-  for mode in ${decode_modes}; do
-    for test_set in test; do
-     test_dir=$dir/test_${mode}
-     sed 's:▁: :g' $test_dir/text > $test_dir/text.norm
-     python tools/compute-wer.py --char=1 --v=1 \
-       data/$test_set/text $test_dir/text.norm > $test_dir/wer
-    done
-  done
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/tools b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/tools
deleted file mode 100644
index 570c2efd663fd0125c0f115baf89e9f0c3c4433f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/tools
+++ /dev/null
@@ -1 +0,0 @@
-../../../tools/
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/wenet b/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/wenet
deleted file mode 100644
index 5f46eee4df1252a868b5524c689353f264df6921..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/timit/wenet
+++ /dev/null
@@ -1 +0,0 @@
-../../../wenet/
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/README.md
deleted file mode 100644
index eccf85f6497da528ed3c6516a2424e7e86ad6a05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# conformer based end-to-end model for VKW challenge
-
-## Standard E2E Results
-
-Conformer without speed perpurb and lm
-* config: conf/train_train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml
-* beam: 10
-* num of gpu: 8
-* num of averaged model: 5
-* ctc weight (used for attention rescoring): 0.5
-
-dev set results trained only with training set (785 keywords, 1505 hour train set)
-
-| scenario | Precision | Recall   | F1     | ATWV   |
-|----------|-----------|----------|--------|--------|
-| lgv      | 0.9281    | 0.6420   | 0.7590 | 0.5183 |
-| liv      | 0.8886    | 0.6515   | 0.7518 | 0.6050 |
-| stv      | 0.9120    | 0.7471   | 0.8213 | 0.6256 |
-
-dev set results trained with training set and finetune set (785 keywords, 1505 hour train set + 15 hour finetune set)
-
-| scenario | Precision | Recall   | F1     | ATWV   |
-|----------|-----------|----------|--------|--------|
-| lgv      | 0.9478    | 0.7311   | 0.8255 | 0.6352 |
-| liv      | 0.9177    | 0.8398   | 0.8770 | 0.7412 |
-| stv      | 0.9320    | 0.8207   | 0.8729 | 0.7120 |
-
-test set results trained only with training set (384 keywords, 1505 hour train set)
-
-| scenario | Precision | Recall   | F1     | ATWV   |
-|----------|-----------|----------|--------|--------|
-| lgv      | 0.6262    | 0.5648   | 0.5939 | 0.5825 |
-| liv      | 0.8797    | 0.6282   | 0.7330 | 0.6061 |
-| stv      | 0.9102    | 0.7221   | 0.8053 | 0.6682 |
-
-test set results trained with training set and finetune set (384 keywords, 1505 hour train set + 15 hour finetune set)
-
-| scenario | Precision | Recall   | F1     | ATWV   |
-|----------|-----------|----------|--------|--------|
-| lgv      | 0.6469    | 0.6276   | 0.6371 | 0.6116 |
-| liv      | 0.9278    | 0.7560   | 0.8331 | 0.6927 |
-| stv      | 0.9434    | 0.8061   | 0.8693 | 0.7275 |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/conf/combine_finetune_5h_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/conf/combine_finetune_5h_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml
deleted file mode 100644
index dc1d25a27ae4c161e77088356be9dc9a7549b586..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/conf/combine_finetune_5h_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 8
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: false
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 100
-log_interval: 400
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/conf/train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/conf/train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml
deleted file mode 100644
index d2e6c4b6fb1af08da5cfae1a8dd3831d47801d0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/conf/train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 8
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-    causal: true
-    use_dynamic_chunk: false
-    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
-    use_dynamic_left_chunk: false
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.1
-    src_attention_dropout_rate: 0.1
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-# use raw_wav or kaldi feature
-raw_wav: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 1
-max_epoch: 100
-log_interval: 400
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 25000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/run_finetune_5h.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/run_finetune_5h.sh
deleted file mode 100644
index 4d6a8a1eda3e26ea797cfed9cb85be25bf5f1a9b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/run_finetune_5h.sh
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/bin/bash
-# Copyright 2021 Tencent Inc. (Author: Yougen Yuan).
-# Apach 2.0
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-stage=-1
-stop_stage=0
-
-# The num of nodes
-num_nodes=1
-# The rank of current node
-node_rank=0
-
-# data
-data=data
-dict=data/dict/lang_char.txt
-data_type=raw # raw or shard
-
-train_set=train
-dev_set=combine_dev
-finetune2_set=combine_finetune_5h
-# Optional train_config
-name=vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char
-train_config=conf/${finetune2_set}_${name}.yaml
-cmvn=true
-dir=exp/${finetune2_set}_${name}_new
-checkpoint= #$dir/0.pt
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=10
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  # Data preparation
-  local/vkw_data_prep.sh
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  x=finetune_5h
-  for z in lgv liv stv; do
-    [ ! -f data/vkw/label/lab_${z}/${x}/wav_ori.scp ] && \
-      mv data/vkw/label/lab_${z}/${x}/wav.scp \
-        data/vkw/label/lab_${z}/${x}/wav_ori.scp && \
-      cut -d " " -f 1,4 data/vkw/label/lab_${z}/${x}/wav_ori.scp \
-        > data/vkw/label/lab_${z}/${x}/wav.scp
-  done
-  y=`echo $x | cut -d "_" -f 1`
-  mkdir -p combine_${y}
-  for f in text wav.scp segments; do
-    for z in lgv liv stv; do
-      cat data/vkw/label/lab_${z}/${x}/$f
-    done > combine_${y}/$f
-  done
-  # remove the space between the text labels for Mandarin dataset
-  # download and transfer to wav.scp
-  cp data/${finetune2_set}/text data/${finetune2_set}/text.org
-  paste -d " " <(cut -f 1 -d" " data/${finetune2_set}/text.org) \
-    <(cut -f 2- -d" " data/${finetune2_set}/text.org | tr -d " ") \
-    > data/${finetune2_set}/text
-  rm data/${finetune2_set}/text.org
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  echo "stage 1: generate segmented wav.scp and compute cmvn"
-  ## For wav feature, just copy the data. Fbank extraction is done in training
-  [ ! -f $data/$finetune2_set/segmentd_wav.scp ] && \
-    python tools/segment.py --segments $data/$finetune2_set/segments \
-      --input $data/$finetune2_set/wav.scp \
-      --output $data/$finetune2_set/segmented_wav.scp
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Prepare data, prepare required format"
-  tools/make_raw_list.py --segments $data/$finetune2_set/segments \
-    $data/$finetune2_set/wav.scp $data/$finetune2_set/text $data/$finetune2_set/data.list
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  # You had better rm it manually before you start run.sh on first node.
-  # rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  # The number of gpus runing on each node/machine
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp ${data}/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=$i ###`expr $node_rank \* $num_gpus + $i`
-    echo "start training"
-    [ ! -f exp/train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char_new/avg_5.pt ] && \
-      echo "Please use a pretrained model for finetuning" && exit 0
-    [ ! -f $checkpoint ] && \
-      cp exp/train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char_new/avg_5.pt $checkpoint && \
-      cp exp/train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char_new/0.yaml $dir/0.yaml
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data $data/${finetune2_set}/data.list \
-      --cv_data $data/${dev_set}/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 4 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    [ ! -f $decode_checkpoint ] && \
-    python3 wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Test model, please specify the model you want to use by --checkpoint
-  sets=${dev_set}
-  keywords_list=$data/vkw/keyword/kwlist
-  input_data=$feat_dir/${sets}/data.list
-  checkpoint=$dir/avg_${average_num}.pt
-  keyword_results=$dir/keyword_results_${sets}
-  ctc_results=$dir/ctc_results_${sets}
-  python3 local/vkw_kws_results.py --gpu 0 \
-    --config $dir/train.yaml \
-    --data_type $data_type \
-    --symbol_table $dict \
-    --num_workers 4 \
-    --prefetch 32 \
-    --input_data $input_data \
-    --checkpoint $checkpoint \
-    --keyword_unit_dict $keywords_list \
-    --keyword_results $keyword_results \
-    --ctc_results $ctc_results
-
-  [ ! -f scripts/bin/results_to_score.sh ] && \
-    ln -sf data/vkw/scripts scripts && chmod -R 755 scripts
-  ### attention: install the F4DE tool before testing
-  for y in "stv" "lgv" "liv"; do
-    mkdir -p $dir/dev_${y}
-    #[ ! -f data/vkw/score/dev_${y}/utter_map ] && \
-    if [ $y == "lgv" ]; then
-      grep "TV1" $keyword_results > $dir/dev_${y}/kws_results
-    elif [ $y == "liv" ]; then
-      grep "sph_live" $keyword_results > $dir/dev_${y}/kws_results
-    elif [ $y == "stv" ]; then
-      grep "sph_video" $keyword_results > $dir/dev_${y}/kws_results
-    else
-      "invalid $y"
-    fi
-    ./data/vkw/scripts/bin/results_to_score.sh \
-      data/vkw/score/dev_${y}/ecf \
-      data/vkw/label/lab_${y}/dev_5h/segments \
-      data/vkw/score/dev_${y}/utter_map \
-      $dir/dev_${y}/kws_results \
-      data/vkw/keyword/kwlist.xml \
-      data/vkw/score/dev_${y}/rttm
-    ./data/vkw/scripts/bin/F1.sh \
-      $dir/dev_${y}/kws_outputs/f4de_scores_unnormalized/alignment.csv
-  done
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/vkw_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/vkw_data_prep.sh
deleted file mode 100644
index 58eb552e5ba51d6651b12e8505b5f53bd3518fc8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/vkw_data_prep.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Copyright 2021 Tencent Inc. (Author: Yougen Yuan).
-# Apach 2.0
-
-current_dir=$(pwd)
-stage=0
-stop_stage=0
-. ./path.sh || exit 1;
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  cd $current_dir/data/
-  [ ! -z vkw_v1.1.zip ] && echo "wget vkw challenge data to this directory" && exit 0
-  [ ! -z vkw ] && unzip vkw_v1.1.zip
-  cd $current_dir
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  x=train
-  [ ! -f data/${x}/text ] && echo "vkw trainset is missing, wget to this directory" && exit 0
-fi
-
-echo "$0: vkw  data preparation succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/vkw_kws_results.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/vkw_kws_results.py
deleted file mode 100644
index 45ac39e7d0e95e69fc7fd30011c9b191b3a5fef0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/local/vkw_kws_results.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#                    Tencent (Yougen Yuan)
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.transformer.asr_model import init_asr_model
-from wenet.utils.checkpoint import load_checkpoint
-
-from wenet.utils.common import get_subsample
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-
-
-def map_words2char(word_list_file):
-    word_unit_dict = {}
-    word_id_dict = {}
-    for line in open(word_list_file, mode="r", encoding="utf8"):
-        ids, keyword = line.split("\n")[0].split()
-        keyword_char = []
-        for i in keyword:
-            keyword_char.append(i)
-        word_unit_dict[keyword] = keyword_char
-        word_id_dict[keyword] = ids
-    return word_id_dict, word_unit_dict
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-
-        if end == len(alignment) and start < end:
-            if start == 0:
-                timestamp.append(alignment[start:])
-            else:
-                timestamp[-1] += alignment[start:]
-            break
-
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat_frames(timestamp, subsample, char_dict):
-    begin = 0
-    duration = 0
-    word_seq = []
-    word_time = []
-    for idx, t in enumerate(timestamp):
-        duration = len(t) * subsample
-        if idx < len(timestamp) - 1:
-            word_seq.append(char_dict[t[-1]])
-            word_time.append([begin, begin + duration])
-        else:
-            non_blank = 0
-            token = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            word_seq.append(char_dict[token])
-            word_time.append([begin, begin + duration])
-        begin = begin + duration
-    return word_seq, word_time
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--input_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument('--keyword_unit_dict',
-                        required=True,
-                        help='keyword id')
-    parser.add_argument('--keyword_results',
-                        required=True,
-                        help='keyword results')
-    parser.add_argument('--ctc_results', required=True, help='ctc results')
-
-    args = parser.parse_args()
-
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-    # Load dict
-    char_dict = {}
-    with open(args.symbol_table, mode='r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-
-    cv_dataset = Dataset(args.data_type,
-                         args.input_data,
-                         symbol_table,
-                         cv_conf,
-                         None,
-                         partition=False)
-
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    print("Reading: ", args.keyword_unit_dict)
-    word_id_dict, word_unit_dict = map_words2char(args.keyword_unit_dict)
-    word_unit_list = list(word_unit_dict.keys())
-    print("word_unit_list has the size of %d" % (len(word_unit_list)))
-
-    # Init asr model from configs
-    model = init_asr_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    f_keyword_results = open(args.keyword_results, 'w', encoding='utf-8')
-    f_ctc_results = open(args.ctc_results, 'w', encoding='utf-8')
-    with torch.no_grad():
-        for batch_idx, batch in enumerate(cv_data_loader):
-            key, feat, target, feats_length, target_length = batch
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            batch_size = encoder_out.size(0)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            alignment = [hyp.tolist() for hyp in topk_index]
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in alignment]
-            for index, i in enumerate(key):
-                content = []
-                if len(hyps[index]) > 0:
-                    for w in hyps[index]:
-                        if w == eos:
-                            break
-                        content.append(char_dict[w])
-                f_ctc_results.write('{} {}\n'.format(i, " ".join(content)))
-            f_ctc_results.flush()
-            for index, i in enumerate(key):
-                timestamp = get_frames_timestamp(alignment[index])
-                subsample = get_subsample(configs)
-                word_seq, word_time = get_labformat_frames(
-                    timestamp, subsample, char_dict)
-                for index_j in range(len(word_seq)):
-                    for keyword in word_unit_list:
-                        keyword_len = len(word_unit_dict[keyword])
-                        if index_j + keyword_len > len(word_seq):
-                            continue
-                        if (word_seq[index_j:index_j +
-                                     keyword_len] == word_unit_dict[keyword]):
-                            f_keyword_results.write("{} {} {} {} {}\n".format(
-                                word_id_dict[keyword], i,
-                                word_time[index_j][0],
-                                word_time[index_j + keyword_len - 1][1], 0.0))
-            f_keyword_results.flush()
-    f_keyword_results.close()
-    f_ctc_results.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/run.sh
deleted file mode 100644
index 4c3cf1bcfd42e909f090b0d159cbb1c4fb10a385..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/run.sh
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/bin/bash
-# Copyright 2021 Tencent Inc. (Author: Yougen Yuan).
-# Apach 2.0
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-stage=-1
-stop_stage=0
-
-# The num of nodes
-num_nodes=1
-# The rank of current node
-node_rank=0
-
-# data
-data=data
-dict=data/dict/lang_char.txt
-data_type=raw # raw or shard
-
-train_set=train
-dev_set=combine_dev
-# Optional train_config
-name=vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char
-train_config=conf/train_${name}.yaml
-cmvn=true
-dir=exp/train_${name}_new
-checkpoint= #$dir/0.pt
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=10
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-  # Data preparation
-  local/vkw_data_prep.sh
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  x=dev_5h
-  for z in lgv liv stv; do
-    [ ! -f data/vkw/label/lab_${z}/${x}/wav_ori.scp ] && \
-      mv data/vkw/label/lab_${z}/${x}/wav.scp \
-        data/vkw/label/lab_${z}/${x}/wav_ori.scp && \
-      cut -d " " -f 1,4 data/vkw/label/lab_${z}/${x}/wav_ori.scp \
-        > data/vkw/label/lab_${z}/${x}/wav.scp
-  done
-  y=`echo $x | cut -d "_" -f 1`
-  mkdir -p combine_${y}
-  for f in text wav.scp segments; do
-    for z in lgv liv stv; do
-      cat data/vkw/label/lab_${z}/${x}/$f
-    done > combine_${y}/$f
-  done
-  # remove the space between the text labels for Mandarin dataset
-  # download and transfer to wav.scp
-  for x in ${dev_set} ${train_set}; do
-    cp data/${x}/text data/${x}/text.org
-    paste -d " " <(cut -f 1 -d" " data/${x}/text.org) <(cut -f 2- -d" " \
-      data/${x}/text.org | tr -d " ") > data/${x}/text
-    rm data/${x}/text.org
-  done
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-  echo "stage 1: generate segmented wav.scp and compute cmvn"
-  ## For wav feature, just copy the data. Fbank extraction is done in training
-  for x in ${dev_set} ${train_set}; do
-    [ ! -f $data/$x/segmentd_wav.scp ] && \
-      python tools/segment.py --segments $data/$x/segments \
-        --input $data/$x/wav.scp \
-        --output $data/$x/segmented_wav.scp
-  done
-
-  ### generate global_cmvn using training set
-  tools/compute_cmvn_stats.py --num_workers 12 --train_config $train_config \
-    --in_scp $data/${train_set}/segmented_wav.scp \
-    --out_cmvn $data/$train_set/global_cmvn
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  # Make train dict
-  echo "Make a dictionary"
-  mkdir -p $(dirname $dict)
-  echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-  echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-  tools/text2token.py -s 1 -n 1 $data/${train_set}/text | cut -f 2- -d" " | \
-    tr " " "\n" | sort | uniq | grep -a -v -e '^\s*$' | grep -P '[\p{Han}]'\
-    | awk '{print $0 " " NR+1}' >> ${dict}
-
-  num_token=$(cat $dict | wc -l)
-  echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Prepare data, prepare required format"
-  for x in ${dev_set} ${train_set}; do
-    tools/make_raw_list.py --segments $data/$x/segments \
-      $data/$x/wav.scp $data/$x/text $data/$x/data.list
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  # Training
-  mkdir -p $dir
-  INIT_FILE=$dir/ddp_init
-  # You had better rm it manually before you start run.sh on first node.
-  # rm -f $INIT_FILE # delete old one before starting
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  # The number of gpus runing on each node/machine
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  # The total number of processes/gpus, so that the master knows
-  # how many workers to wait for.
-  # More details about ddp can be found in
-  # https://pytorch.org/tutorials/intermediate/dist_tuto.html
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp ${data}/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=$i ###`expr $node_rank \* $num_gpus + $i`
-    echo "start training"
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data $data/$train_set/data.list \
-      --cv_data $data/${dev_set}/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 4 \
-      $cmvn_opts \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    [ ! -f $decode_checkpoint ] && \
-    python3 wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Test model, please specify the model you want to use by --checkpoint
-  sets=${dev_set}
-  keywords_list=$data/vkw/keyword/kwlist
-  input_data=$feat_dir/${sets}/data.list
-  checkpoint=$dir/avg_${average_num}.pt
-  keyword_results=$dir/keyword_results_${sets}
-  ctc_results=$dir/ctc_results_${sets}
-  python3 local/vkw_kws_results.py --gpu 0 \
-    --config $dir/train.yaml \
-    --data_type $data_type \
-    --symbol_table $dict \
-    --num_workers 4 \
-    --prefetch 32 \
-    --input_data $input_data \
-    --checkpoint $checkpoint \
-    --keyword_unit_dict $keywords_list \
-    --keyword_results $keyword_results \
-    --ctc_results $ctc_results
-
-  [ ! -f scripts/bin/results_to_score.sh ] && \
-    ln -sf data/vkw/scripts scripts && chmod -R 755 scripts
-  ### attention: install the F4DE tool before testing
-  for y in "stv" "lgv" "liv"; do
-    mkdir -p $dir/dev_${y}
-    #[ ! -f data/vkw/score/dev_${y}/utter_map ] && \
-    if [ $y == "lgv" ]; then
-      grep "TV1" $keyword_results > $dir/dev_${y}/kws_results
-    elif [ $y == "liv" ]; then
-      grep "sph_live" $keyword_results > $dir/dev_${y}/kws_results
-    elif [ $y == "stv" ]; then
-      grep "sph_video" $keyword_results > $dir/dev_${y}/kws_results
-    else
-      "invalid $y"
-    fi
-    ./data/vkw/scripts/bin/results_to_score.sh \
-      data/vkw/score/dev_${y}/ecf \
-      data/vkw/label/lab_${y}/dev_5h/segments \
-      data/vkw/score/dev_${y}/utter_map \
-      $dir/dev_${y}/kws_results \
-      data/vkw/keyword/kwlist.xml \
-      data/vkw/score/dev_${y}/rttm
-    ./data/vkw/scripts/bin/F1.sh \
-      $dir/dev_${y}/kws_outputs/f4de_scores_unnormalized/alignment.csv
-  done
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-    echo "adding 5h finetune data for each scenario to obtain better results"
-    local/run_finetune_5h.sh
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/vkw2021/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/README.md
deleted file mode 100644
index 0b84381c047a11f19950ced7d839951c764506cf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Performance Record
-
-## Conformer
-
-* Feature info: using fbank feature, with dither 1.0, with cmvn
-* Training info: lr 0.001, batch size 32, 24 gpus on V100, acc_grad 16, 26 epochs
-* Decoding info: ctc_weight 0.5, average_num 10
-
-|   decoding_method   |  Dev | Test\_Net | Test\_Meeting |
-|:-------------------:|:----:|:---------:|:-------------:|
-|  ctc_greedy_search  | 8.88 |   10.29   |     15.96     |
-|      attention      | 9.38 |   10.12   |     17.28     |
-| attention_rescoring | 8.69 |    9.7    |     15.59     |
-
-## Conformer bidecoder
-
-* Feature info: using fbank feature, with dither 1.0, with cmvn
-* Training info: lr 0.001, batch size 32, 24 gpus on V100, acc_grad 16, 26 epochs
-* Decoding info: ctc_weight 0.5, average_num 10
-
-|   decoding_method   |  Dev | Test\_Net | Test\_Meeting |
-|:-------------------:|:----:|:---------:|:-------------:|
-|  ctc_greedy_search  | 8.98 |    9.55   |     16.48     |
-|      attention      | 9.42 |   10.57   |     18.05     |
-| attention_rescoring | 8.85 |    9.25   |     16.18     |
-
-## U2++ conformer
-
-* Feature info: using fbank feature, with dither 1.0, with cmvn
-* Training info: lr 0.001, batch size 48, 8 gpus on A100, acc_grad 16, 50 epochs
-* Decoding info: ctc_weight 0.5, reverse_weight 0.3, average_num 10
-
-| Decoding mode - Chunk size    | Dev  | Test\_Net | Test\_Meeting |
-|:-----------------------------:|:----:|:---------:|:-------------:|
-| ctc greedy search - full      | 8.85 | 9.78      | 17.77         |
-| ctc greedy search - 16        | 9.32 | 11.02     | 18.79         |
-| ctc prefix beam search - full | 8.80 | 9.73      | 17.57         |
-| ctc prefix beam search - 16   | 9.25 | 10.96     | 18.62         |
-| attention rescoring - full    | 8.60 | 9.26      | 17.34         |
-| attention rescoring - 16      | 8.87 | 10.22     | 18.11         |
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/conf/train_conformer.yaml
deleted file mode 100644
index aaa0d0ab7080fe3d64ab17eaf6c9695820ce6431..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    cnn_module_norm: 'layer_norm'
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 1200
-        min_length: 10
-        token_max_length: 100
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: false
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 30
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 1000  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 32
-
-grad_clip: 5
-accum_grad: 16
-max_epoch: 26
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 5000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/conf/train_conformer_bidecoder.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/conf/train_conformer_bidecoder.yaml
deleted file mode 100644
index 6451e71ae684a76b5774010e84a2a38634514019..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/conf/train_conformer_bidecoder.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    cnn_module_norm: 'layer_norm'
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: bitransformer
-decoder_conf:
-    attention_heads: 8
-    linear_units: 2048
-    num_blocks: 3
-    r_num_blocks: 3
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-    reverse_weight: 0.3
-
-dataset_conf:
-    filter_conf:
-        max_length: 1200
-        min_length: 10
-        token_max_length: 100
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: false
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 1.0
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 30
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 1000  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 32
-
-grad_clip: 5
-accum_grad: 16
-max_epoch: 26
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.001
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 5000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/extract_meta.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/extract_meta.py
deleted file mode 100644
index ce2871d0b8e5cf14a552175cfe5d1699d8bf226d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/extract_meta.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
-#                 Mobvoi Inc(Author: Di Wu, Binbin Zhang)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import os
-import argparse
-import json
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description="""
-      This script is used to process raw json dataset of WenetSpeech,
-      where the long wav is splitinto segments and
-      data of wenet format is generated.
-      """)
-    parser.add_argument('input_json', help="""Input json file of WenetSpeech""")
-    parser.add_argument('output_dir', help="""Output dir for prepared data""")
-
-    args = parser.parse_args()
-    return args
-
-
-def meta_analysis(input_json, output_dir):
-    input_dir = os.path.dirname(input_json)
-
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    try:
-        with open(input_json, 'r') as injson:
-            json_data = json.load(injson)
-    except Exception:
-        sys.exit(f'Failed to load input json file: {input_json}')
-    else:
-        if json_data['audios'] is not None:
-            with open(f'{output_dir}/text', 'w') as utt2text, \
-                 open(f'{output_dir}/segments', 'w') as segments, \
-                 open(f'{output_dir}/utt2dur', 'w') as utt2dur, \
-                 open(f'{output_dir}/wav.scp', 'w') as wavscp, \
-                 open(f'{output_dir}/utt2subsets', 'w') as utt2subsets, \
-                 open(f'{output_dir}/reco2dur', 'w') as reco2dur:
-                for long_audio in json_data['audios']:
-                    try:
-                        long_audio_path = os.path.realpath(
-                            os.path.join(input_dir, long_audio['path']))
-                        aid = long_audio['aid']
-                        segments_lists = long_audio['segments']
-                        duration = long_audio['duration']
-                        assert (os.path.exists(long_audio_path))
-                    except AssertionError:
-                        print(f'''Warning: {aid} something is wrong,
-                                  maybe AssertionError, skipped''')
-                        continue
-                    except Exception:
-                        print(f'''Warning: {aid} something is wrong, maybe the
-                                  error path: {long_audio_path}, skipped''')
-                        continue
-                    else:
-                        wavscp.write(f'{aid}\t{long_audio_path}\n')
-                        reco2dur.write(f'{aid}\t{duration}\n')
-                        for segment_file in segments_lists:
-                            try:
-                                sid = segment_file['sid']
-                                start_time = segment_file['begin_time']
-                                end_time = segment_file['end_time']
-                                dur = end_time - start_time
-                                text = segment_file['text']
-                                segment_subsets = segment_file["subsets"]
-                            except Exception:
-                                print(f'''Warning: {segment_file} something
-                                          is wrong, skipped''')
-                                continue
-                            else:
-                                utt2text.write(f'{sid}\t{text}\n')
-                                segments.write(
-                                    f'{sid}\t{aid}\t{start_time}\t{end_time}\n'
-                                )
-                                utt2dur.write(f'{sid}\t{dur}\n')
-                                segment_sub_names = " ".join(segment_subsets)
-                                utt2subsets.write(
-                                    f'{sid}\t{segment_sub_names}\n')
-
-def main():
-    args = get_args()
-
-    meta_analysis(args.input_json, args.output_dir)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/process_opus.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/process_opus.py
deleted file mode 100644
index 9f71eb1a62509739d318b564a3deb2e7acc3347f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/process_opus.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2021  NPU, ASLP Group (Author: Qijie Shao)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# process_opus.py: segmentation and downsampling of opus audio
-
-# usage: python3 process_opus.py wav.scp segments output_wav.scp
-
-from pydub import AudioSegment
-import sys
-import os
-
-
-def read_file(wav_scp, segments):
-    wav_scp_dict = {}
-    with open(wav_scp, 'r', encoding='UTF-8') as fin:
-        for line_str in fin:
-            wav_id, path = line_str.strip().split()
-            wav_scp_dict[wav_id] = path
-
-    utt_list = []
-    seg_path_list = []
-    start_time_list = []
-    end_time_list = []
-    with open(segments, 'r', encoding='UTF-8') as fin:
-        for line_str in fin:
-            arr = line_str.strip().split()
-            assert len(arr) == 4
-            utt_list.append(arr[0])
-            seg_path_list.append(wav_scp_dict[arr[1]])
-            start_time_list.append(float(arr[2]))
-            end_time_list.append(float(arr[3]))
-    return utt_list, seg_path_list, start_time_list, end_time_list
-
-
-# TODO(Qijie): Fix the process logic
-def output(output_wav_scp, utt_list, seg_path_list, start_time_list,
-           end_time_list):
-    num_utts = len(utt_list)
-    step = int(num_utts * 0.01)
-    with open(output_wav_scp, 'w', encoding='UTF-8') as fout:
-        previous_wav_path = ""
-        for i in range(num_utts):
-            utt_id = utt_list[i]
-            current_wav_path = seg_path_list[i]
-            output_dir = (os.path.dirname(current_wav_path)) \
-                .replace("audio", 'audio_seg')
-            seg_wav_path = os.path.join(output_dir, utt_id + '.wav')
-
-            # if not os.path.exists(output_dir):
-            #     os.makedirs(output_dir)
-
-            if current_wav_path != previous_wav_path:
-                source_wav = AudioSegment.from_file(current_wav_path)
-            previous_wav_path = current_wav_path
-
-            start = int(start_time_list[i] * 1000)
-            end = int(end_time_list[i] * 1000)
-            target_audio = source_wav[start:end].set_frame_rate(16000) \
-                .set_sample_width(2)
-            target_audio.export(seg_wav_path, format="wav")
-
-            fout.write("{} {}\n".format(utt_id, seg_wav_path))
-            if i % step == 0:
-                print("seg wav finished: {}%".format(int(i / step)))
-
-
-def main():
-    wav_scp = sys.argv[1]
-    segments = sys.argv[2]
-    output_wav_scp = sys.argv[3]
-
-    utt_list, seg_path_list, start_time_list, end_time_list \
-        = read_file(wav_scp, segments)
-    output(output_wav_scp, utt_list, seg_path_list, start_time_list,
-           end_time_list)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/wenetspeech_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/wenetspeech_data_prep.sh
deleted file mode 100644
index 0fd3b5bc3893f7ef534010203dc3c97337277df7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/local/wenetspeech_data_prep.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
-#                 Seasalt AI, Inc (Author: Guoguo Chen)
-#                 Mobvoi Inc(Author: Di Wu, Binbin Zhang)
-#                 NPU, ASLP Group (Author: Qijie Shao)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-set -o pipefail
-
-stage=1
-prefix=
-train_subset=L
-
-. ./tools/parse_options.sh || exit 1;
-
-filter_by_id () {
-  idlist=$1
-  input=$2
-  output=$3
-  field=1
-  if [ $# -eq 4 ]; then
-    field=$4
-  fi
-  cat $input | perl -se '
-    open(F, "<$idlist") || die "Could not open id-list file $idlist";
-    while(<F>) {
-      @A = split;
-      @A>=1 || die "Invalid id-list file line $_";
-      $seen{$A[0]} = 1;
-    }
-    while(<>) {
-      @A = split;
-      @A > 0 || die "Invalid file line $_";
-      @A >= $field || die "Invalid file line $_";
-      if ($seen{$A[$field-1]}) {
-        print $_;
-      }
-    }' -- -idlist="$idlist" -field="$field" > $output ||\
-  (echo "$0: filter_by_id() error: $input" && exit 1) || exit 1;
-}
-
-subset_data_dir () {
-  utt_list=$1
-  src_dir=$2
-  dest_dir=$3
-  mkdir -p $dest_dir || exit 1;
-  # wav.scp text segments utt2dur
-  filter_by_id $utt_list $src_dir/utt2dur $dest_dir/utt2dur ||\
-    (echo "$0: subset_data_dir() error: $src_dir/utt2dur" && exit 1) || exit 1;
-  filter_by_id $utt_list $src_dir/text $dest_dir/text ||\
-    (echo "$0: subset_data_dir() error: $src_dir/text" && exit 1) || exit 1;
-  filter_by_id $utt_list $src_dir/segments $dest_dir/segments ||\
-    (echo "$0: subset_data_dir() error: $src_dir/segments" && exit 1) || exit 1;
-  awk '{print $2}' $dest_dir/segments | sort | uniq > $dest_dir/reco
-  filter_by_id $dest_dir/reco $src_dir/wav.scp $dest_dir/wav.scp ||\
-    (echo "$0: subset_data_dir() error: $src_dir/wav.scp" && exit 1) || exit 1;
-  rm -f $dest_dir/reco
-}
-
-if [ $# -ne 2 ]; then
-  echo "Usage: $0 [options] <wenetspeech-dataset-dir> <data-dir>"
-  echo " e.g.: $0 --train-subset L /disk1/audio_data/wenetspeech/ data/"
-  echo ""
-  echo "This script takes the WenetSpeech source directory, and prepares the"
-  echo "WeNet format data directory."
-  echo "  --prefix <prefix>                # Prefix for output data directory."
-  echo "  --stage <stage>                  # Processing stage."
-  echo "  --train-subset <L|M|S|W>     # Train subset to be created."
-  exit 1
-fi
-
-wenetspeech_dir=$1
-data_dir=$2
-
-declare -A subsets
-subsets=(
-  [L]="train_l"
-  [M]="train_m"
-  [S]="train_s"
-  [W]="train_w"
-  [DEV]="dev"
-  [TEST_NET]="test_net"
-  [TEST_MEETING]="test_meeting")
-
-prefix=${prefix:+${prefix}_}
-
-corpus_dir=$data_dir/${prefix}corpus/
-if [ $stage -le 1 ]; then
-  echo "$0: Extract meta into $corpus_dir"
-  # Sanity check.
-  [ ! -f $wenetspeech_dir/WenetSpeech.json ] &&\
-    echo "$0: Please download $wenetspeech_dir/WenetSpeech.json!" && exit 1;
-  [ ! -d $wenetspeech_dir/audio ] &&\
-    echo "$0: Please download $wenetspeech_dir/audio!" && exit 1;
-
-  [ ! -d $corpus_dir ] && mkdir -p $corpus_dir
-
-  # Files to be created:
-  # wav.scp text segments utt2dur
-  python3 local/extract_meta.py \
-    $wenetspeech_dir/WenetSpeech.json $corpus_dir || exit 1;
-fi
-
-if [ $stage -le 2 ]; then
-  echo "$0: Split data to train, dev, test_net, and test_meeting"
-  [ ! -f $corpus_dir/utt2subsets ] &&\
-    echo "$0: No such file $corpus_dir/utt2subsets!" && exit 1;
-  for label in $train_subset DEV TEST_NET TEST_MEETING; do
-    if [ ! ${subsets[$label]+set} ]; then
-      echo "$0: Subset $label is not defined in WenetSpeech.json." && exit 1;
-    fi
-    subset=${subsets[$label]}
-    [ ! -d $data_dir/${prefix}$subset ] && mkdir -p $data_dir/${prefix}$subset
-    cat $corpus_dir/utt2subsets | \
-       awk -v s=$label '{for (i=2;i<=NF;i++) if($i==s) print $0;}' \
-       > $corpus_dir/${prefix}${subset}_utt_list|| exit 1;
-    subset_data_dir $corpus_dir/${prefix}${subset}_utt_list \
-      $corpus_dir $data_dir/${prefix}$subset || exit 1;
-  done
-fi
-
-echo "$0: Done"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/run.sh
deleted file mode 100644
index 52d288375503321a1a2dd702ee8806a213bb44e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/run.sh
+++ /dev/null
@@ -1,208 +0,0 @@
-#!/bin/bash
-
-# Copyright 2021  Mobvoi Inc(Author: Di Wu, Binbin Zhang)
-#                 NPU, ASLP Group (Author: Qijie Shao)
-
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-stage=0
-stop_stage=5
-
-# The num of nodes
-num_nodes=1
-# The rank of current node
-node_rank=0
-
-# Use your own data path. You need to download the WenetSpeech dataset by yourself.
-wenetspeech_data_dir=/ssd/nfs07/binbinzhang/wenetspeech
-# Make sure you have 1.2T for ${shards_dir}
-shards_dir=/ssd/nfs06/unified_data/wenetspeech_shards
-
-# WenetSpeech training set
-set=L
-train_set=train_`echo $set | tr 'A-Z' 'a-z'`
-dev_set=dev
-test_sets="test_net test_meeting"
-
-train_config=conf/train_conformer.yaml
-checkpoint=
-cmvn=true
-cmvn_sampling_divisor=20 # 20 means 5% of the training data to estimate cmvn
-dir=exp/conformer
-
-decode_checkpoint=
-average_checkpoint=true
-average_num=10
-decode_modes="attention_rescoring ctc_greedy_search"
-
-. tools/parse_options.sh || exit 1;
-
-set -u
-set -o pipefail
-
-# Data download
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "Please follow https://github.com/wenet-e2e/WenetSpeech to download the data."
-    exit 0;
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-  echo "Data preparation"
-  local/wenetspeech_data_prep.sh \
-    --train-subset $set \
-    $wenetspeech_data_dir \
-    data || exit 1;
-fi
-
-dict=data/dict/lang_char.txt
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "Make a dictionary"
-    echo "dictionary: ${dict}"
-    mkdir -p $(dirname $dict)
-    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-    echo "<unk> 1" >> ${dict} # <unk> must be 1
-    echo "▁ 2" >> ${dict} # ▁ is for space
-    tools/text2token.py -s 1 -n 1 --space "▁" data/${train_set}/text \
-        | cut -f 2- -d" " | tr " " "\n" \
-        | sort | uniq | grep -a -v -e '^\s*$' \
-        | grep -v "▁" \
-        | awk '{print $0 " " NR+2}' >> ${dict} \
-        || exit 1;
-    num_token=$(cat $dict | wc -l)
-    echo "<sos/eos> $num_token" >> $dict
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-  echo "Compute cmvn"
-  # Here we use all the training data, you can sample some some data to save time
-  # BUG!!! We should use the segmented data for CMVN
-  if $cmvn; then
-    full_size=`cat data/${train_set}/wav.scp | wc -l`
-    sampling_size=$((full_size / cmvn_sampling_divisor))
-    shuf -n $sampling_size data/$train_set/wav.scp \
-      > data/$train_set/wav.scp.sampled
-    python3 tools/compute_cmvn_stats.py \
-    --num_workers 16 \
-    --train_config $train_config \
-    --in_scp data/$train_set/wav.scp.sampled \
-    --out_cmvn data/$train_set/global_cmvn \
-    || exit 1;
-  fi
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Making shards, please wait..."
-  RED='\033[0;31m'
-  NOCOLOR='\033[0m'
-  echo -e "It requires ${RED}1.2T ${NOCOLOR}space for $shards_dir, please make sure you have enough space"
-  echo -e "It takes about ${RED}12 ${NOCOLOR}hours with 32 threads"
-  for x in $dev_set $test_sets ${train_set}; do
-    dst=$shards_dir/$x
-    mkdir -p $dst
-    tools/make_shard_list.py --resample 16000 --num_utts_per_shard 1000 \
-      --num_threads 32 --segments data/$x/segments \
-      data/$x/wav.scp data/$x/text \
-      $(realpath $dst) data/$x/data.list
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  echo "Start training"
-  mkdir -p $dir
-  # INIT_FILE is for DDP synchronization
-  INIT_FILE=$dir/ddp_init
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="nccl"
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-  # train.py will write $train_config to $dir/train.yaml with model input
-  # and output dimension, train.yaml will be used for inference or model
-  # export later
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type "shard" \
-      --symbol_table $dict \
-      --train_data data/$train_set/data.list \
-      --cv_data data/$dev_set/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      $cmvn_opts \
-      --num_workers 8 \
-      --pin_memory
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  echo "Test model"
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-        --dst_model $decode_checkpoint \
-        --src_path $dir  \
-        --num ${average_num} \
-        --val_best
-  fi
-  # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
-  # -1 for full chunk
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-  for testset in ${test_sets} ${dev_set}; do
-  {
-    for mode in ${decode_modes}; do
-    {
-      base=$(basename $decode_checkpoint)
-      result_dir=$dir/${testset}_${mode}_${base}
-      mkdir -p $result_dir
-      python wenet/bin/recognize.py --gpu 0 \
-        --mode $mode \
-        --config $dir/train.yaml \
-        --data_type "shard" \
-        --test_data data/$testset/data.list \
-        --checkpoint $decode_checkpoint \
-        --beam_size 10 \
-        --batch_size 1 \
-        --penalty 0.0 \
-        --dict $dict \
-        --ctc_weight $ctc_weight \
-        --reverse_weight $reverse_weight \
-        --result_file $result_dir/text \
-        ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-      python tools/compute-wer.py --char=1 --v=1 \
-        data/$testset/text $result_dir/text > $result_dir/wer
-    }
-    done
-    wait
-  }
-  done
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  echo "Export the best model you want"
-  python wenet/bin/export_jit.py \
-    --config $dir/train.yaml \
-    --checkpoint $dir/avg_${average_num}.pt \
-    --output_file $dir/final.zip
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wenetspeech/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/README.md b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/README.md
deleted file mode 100644
index d5a7b6da20712f81354ea6f25b309804d4df3e71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Performance Record
-
-## Conformer Result
-
-* Feature info: dither + specaug + speed perturb
-* Training info: lr 0.002, warmup_steps 20000 batch size 16, 1 gpu, acc_grad 4, 120 epochs
-* Decoding info: average_num 20
-
-|      decoding mode     |   dev93 (cer) |  dev93 (wer)  |
-|:----------------------:|:-------------:|:-------------:|
-|   ctc_greedy_search    |     5.25%     |    13.16%     |
-| ctc_prefix_beam_search |     5.17%     |    13.10%     |
-|  attention_rescoring   |     5.11%     |    12.17%     |
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/conf/train_conformer.yaml b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/conf/train_conformer.yaml
deleted file mode 100644
index 785911d09e74d84f516915dc11354c164d5e0554..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/conf/train_conformer.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# network architecture
-# encoder related
-encoder: conformer
-encoder_conf:
-    output_size: 256    # dimension of attention
-    attention_heads: 4
-    linear_units: 2048  # the number of units of position-wise feed forward
-    num_blocks: 12      # the number of encoder blocks
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    attention_dropout_rate: 0.0
-    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-    normalize_before: true
-    cnn_module_kernel: 15
-    use_cnn_module: True
-    activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
-    selfattention_layer_type: 'rel_selfattn'
-
-# decoder related
-decoder: transformer
-decoder_conf:
-    attention_heads: 4
-    linear_units: 2048
-    num_blocks: 6
-    dropout_rate: 0.1
-    positional_dropout_rate: 0.1
-    self_attention_dropout_rate: 0.0
-    src_attention_dropout_rate: 0.0
-
-# hybrid CTC/attention
-model_conf:
-    ctc_weight: 0.3
-    lsm_weight: 0.1     # label smoothing option
-    length_normalized_loss: false
-
-dataset_conf:
-    filter_conf:
-        max_length: 40960
-        min_length: 0
-        token_max_length: 200
-        token_min_length: 1
-    resample_conf:
-        resample_rate: 16000
-    speed_perturb: true
-    fbank_conf:
-        num_mel_bins: 80
-        frame_shift: 10
-        frame_length: 25
-        dither: 0.1
-    spec_aug: true
-    spec_aug_conf:
-        num_t_mask: 2
-        num_f_mask: 2
-        max_t: 50
-        max_f: 10
-    shuffle: true
-    shuffle_conf:
-        shuffle_size: 1500
-    sort: true
-    sort_conf:
-        sort_size: 500  # sort_size should be less than shuffle_size
-    batch_conf:
-        batch_type: 'static' # static or dynamic
-        batch_size: 16
-
-grad_clip: 5
-accum_grad: 4
-max_epoch: 120
-log_interval: 100
-
-optim: adam
-optim_conf:
-    lr: 0.002
-scheduler: warmuplr     # pytorch v1.1.0+ required
-scheduler_conf:
-    warmup_steps: 20000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/find_transcripts.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/find_transcripts.pl
deleted file mode 100644
index e6d93027c5a29dd20293d9eada3bdaee192457f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/find_transcripts.pl
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-
-# This program takes on its standard input a list of utterance
-# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
-# It takes as
-# Extracts from the dot files the transcripts for a given
-# dataset (represented by a file list).
-
-
-@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
-$dot_flist = shift @ARGV;
-
-open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
-while(<L>){
-    chop;
-    m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
-    $spk = $1;
-    $spk2dot{$spk} = $_;
-}
-
-
-while(<STDIN>){
-    chop;
-    $uttid = $_;
-    $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
-    $spk = $1;
-    if($spk ne $curspk) {
-        %utt2trans = { }; # Don't keep all the transcripts in memory...
-        $curspk = $spk;
-        $dotfile = $spk2dot{$spk};
-        defined $dotfile || die "No dot file for speaker $spk\n";
-        open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
-        while(<F>) {
-            $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
-            $trans = $1;
-            $utt = $2;
-            $utt2trans{$utt} = $trans;
-        }
-    }
-    if(!defined $utt2trans{$uttid}) {
-        print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
-    } else {
-        print "$uttid $utt2trans{$uttid}\n";
-    }
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/flist2scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/flist2scp.pl
deleted file mode 100644
index 7edf1e3f1f44e4ac3b97b39361a46ba8c453c88d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/flist2scp.pl
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# takes in a file list with lines like
-# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
-# and outputs an scp in kaldi format with lines like
-# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
-# (the first thing is the utterance-id, which is the same as the basename of the file.
-
-
-while(<>){
-    m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_";
-    $id = $1;
-    $id =~ tr/A-Z/a-z/;  # Necessary because of weirdness on disk 13-16.1 (uppercase filenames)
-    print "$id $_";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/ndx2flist.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/ndx2flist.pl
deleted file mode 100644
index 0dae1556858250654016920ed98a71fea5440a02..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/ndx2flist.pl
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This program takes as its standard input an .ndx file from the WSJ corpus that looks
-# like this:
-#;; File: tr_s_wv1.ndx, updated 04/26/94
-#;;
-#;; Index for WSJ0 SI-short Sennheiser training data
-#;; Data is read WSJ sentences, Sennheiser mic.
-#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts
-#;; per speaker TI) = 7236 utts
-#;;
-#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
-
-#and as command-line arguments it takes the names of the WSJ disk locations, e.g.:
-#/mnt/matylda2/data/WSJ0/11-1.1 /mnt/matylda2/data/WSJ0/11-10.1  ... etc.
-# It outputs a list of absolute pathnames (it does this by replacing e.g. 11_1_1 with
-# /mnt/matylda2/data/WSJ0/11-1.1.
-# It also does a slight fix because one of the WSJ disks (WSJ1/13-16.1) was distributed with
-# uppercase rather than lower case filenames.
-
-foreach $fn (@ARGV) {
-    $fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n";
-    $disk_id=$1;
-    $disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1
-    $fn =~ s:/$::; # Remove final slash, just in case it is present.
-    $disk2fn{$disk_id} = $fn;
-}
-
-while(<STDIN>){
-    if(m/^;/){ next; } # Comment.  Ignore it.
-    else {
-      m/^([0-9_]+):\s*(\S+)$/  || die "Could not parse line $_";
-      $disk=$1;
-      if(!defined $disk2fn{$disk}) {
-          die "Disk id $disk not found";
-      }
-      $filename = $2; # as a subdirectory of the distributed disk.
-      if($disk eq "13_16_1" && `hostname` =~ m/fit.vutbr.cz/) {
-          # The disk 13-16.1 has been uppercased for some reason, on the
-          # BUT system.  This is a fix specifically for that case.
-          $filename =~ tr/a-z/A-Z/; # This disk contains all uppercase filenames.  Why?
-      }
-      print "$disk2fn{$disk}/$filename\n";
-  }
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/normalize_transcript.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/normalize_transcript.pl
deleted file mode 100644
index 7a696956cebd4e8de4281b57eabbf48f62b751a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/normalize_transcript.pl
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This takes data from the standard input that's unnormalized transcripts in the format
-# 4k2c0308 Of course there isn\'t any guarantee the company will keep its hot hand [misc_noise]
-# 4k2c030a [loud_breath] And new hardware such as the set of personal computers I\. B\. M\. introduced last week can lead to unexpected changes in the software business [door_slam]
-# and outputs normalized transcripts.
-# c.f. /mnt/matylda2/data/WSJ0/11-10.1/wsj0/transcrp/doc/dot_spec.doc
-
-@ARGV == 1 ||  die "usage: normalize_transcript.pl noise_word < transcript > transcript2";
-$noise_word = shift @ARGV;
-
-while(<STDIN>) {
-    $_ =~ m:^(\S+) (.+): || die "bad line $_";
-    $utt = $1;
-    $trans = $2;
-    print "$utt";
-    foreach $w (split (" ",$trans)) {
-        $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. .
-        $w =~ s:\\::g;      # Remove backslashes.  We don't need the quoting.
-        $w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts.
-        $w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts.
-        if($w =~ m:^\[\<\w+\]$:  || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete.
-           $w =~ m:^\[\w+\>\]$:  ||  # E.g. [door_slam>], this means a door slammed in the next word.  Delete.
-           $w =~ m:\[\w+/\]$: ||  # E.g. [phone_ring/], which indicates the start of this phenomenon.
-           $w =~ m:\[\/\w+]$: ||  # E.g. [/phone_ring], which indicates the end of this phenomenon.
-           $w eq "~" ||        # This is used to indicate truncation of an utterance.  Not a word.
-           $w eq ".") {      # "." is used to indicate a pause.  Silence is optional anyway so not much
-                             # point including this in the transcript.
-            next; # we won't print this word.
-        } elsif($w =~ m:\[\w+\]:) { # Other noises, e.g. [loud_breath].
-            print " $noise_word";
-        } elsif($w =~ m:^\<([\w\']+)\>$:) {
-            # e.g. replace <and> with and.  (the <> means verbal deletion of a word).. but it's pronounced.
-            print " $1";
-        } elsif($w eq "--DASH") {
-            print " -DASH";  # This is a common issue; the CMU dictionary has it as -DASH.
-#        } elsif($w =~ m:(.+)\-DASH$:) { # E.g. INCORPORATED-DASH... seems the DASH gets combined with previous word
-#            print " $1 -DASH";
-        } else {
-            print " $w";
-        }
-    }
-    print "\n";
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_data_prep.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_data_prep.sh
deleted file mode 100644
index 9272cdc3208011c9538a1f8cd1ba2e333d1533bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_data_prep.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0.
-
-
-# set -eu
-
-if [ $# -le 3 ]; then
-   echo "Arguments should be a list of WSJ directories, see ../run.sh for example."
-   exit 1;
-fi
-
-dir=`pwd`/data/local/data
-mkdir -p $dir
-local=`pwd`/local
-
-cd $dir
-# Make directory of links to the WSJ disks such as 11-13.1.  This relies on the command
-# line arguments being absolute pathnames.
-rm -r links/ 2>/dev/null
-mkdir links/
-ln -s $* links
-
-# Do some basic checks that we have what we expected.
-if [ ! -d links/11-13.1 -o ! -d links/13-34.1 -o ! -d links/11-2.1 ]; then
-  echo "wsj_data_prep.sh: Spot check of command line arguments failed"
-  echo "Command line arguments must be absolute pathnames to WSJ directories"
-  echo "with names like 11-13.1."
-  echo "Note: if you have old-style WSJ distribution,"
-  echo "local/cstr_wsj_data_prep.sh may work instead, see run.sh for example."
-  exit 1;
-fi
-
-# This version for SI-284
-cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \
- links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
- $local/ndx2flist.pl  $* | sort | \
- grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si284.flist
-
-nl=`cat train_si284.flist | wc -l`
-[ "$nl" -eq 37416 ] || echo "Warning: expected 37416 lines in train_si284.flist, got $nl"
-
-
-# Nov'92 (333 utts)
-# These index files have a slightly different format;
-# have to add .wv1, which is done in cstr_ndx2flist.pl
-cat links/11-13.1/wsj0/doc/indices/test/nvp/si_et_20.ndx | \
-  $local/ndx2flist.pl $* |  awk '{printf("%s.wv1\n", $1)}' | \
-  sort > test_eval92.flist
-
-# Dev-set for Nov'93 (503 utts)
-cat links/13-34.1/wsj1/doc/indices/h1_p0.ndx | \
-  $local/ndx2flist.pl $* | sort > test_dev93.flist
-
-# Finding the transcript files:
-for x in $*; do find -L $x -iname '*.dot'; done > dot_files.flist
-
-# Convert the transcripts into our format (no normalization yet)
-for x in train_si284 test_eval92 test_dev93; do
-   $local/flist2scp.pl $x.flist | sort > ${x}_sph.scp
-   cat ${x}_sph.scp | awk '{print $1}' | $local/find_transcripts.pl  dot_files.flist > $x.trans1
-done
-
-# Do some basic normalization steps.  At this point we don't remove OOVs--
-# that will be done inside the training scripts, as we'd like to make the
-# data-preparation stage independent of the specific lexicon used.
-noiseword="<NOISE>";
-for x in train_si284 test_eval92 test_dev93; do
-   cat $x.trans1 | $local/normalize_transcript.pl $noiseword | sort > $x.txt || exit 1;
-done
-
-# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
-
-sph2pipe=/home/lsq/kaldi/tools/sph2pipe_v2.5/sph2pipe
-for x in train_si284 test_eval92 test_dev93; do
-  awk '{printf("%s '$sph2pipe' -f wav %s \n", $1, $2);}' < ${x}_sph.scp > ${x}_wav.scp
-done
-
-echo "Data preparation succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_format_data.sh
deleted file mode 100644
index 7e2096b9061e0996824c541f11de7952691bcbe8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_format_data.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-#           2015  Guoguo Chen
-# Apache 2.0
-
-# This script takes data prepared in a corpus-dependent way
-# in data/local/, and converts it into the "canonical" form,
-# in various subdirectories of data/, e.g. data/lang, data/lang_test_ug,
-# data/train_si284, data/train_si84, etc.
-
-# Don't bother doing train_si84 separately (although we have the file lists
-# in data/local/) because it's just the first 7138 utterances in train_si284.
-# We'll create train_si84 after doing the feature extraction.
-
-echo "$0 $@"  # Print the command line for logging
-. ./tools/parse_options.sh || exit 1;
-
-. ./path.sh || exit 1;
-
-echo "Preparing train and test data"
-srcdir=data/local/data
-
-for x in train_si284 test_eval92 test_dev93; do
-  mkdir -p data/$x
-  cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
-  cp $srcdir/$x.txt data/$x/text || exit 1;
-done
-
-echo "Succeeded in formatting data."
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_gen_wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_gen_wav.sh
deleted file mode 100644
index 056c4be418e64f761bf319b1c62e9321996f37f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/local/wsj_gen_wav.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-
-set -eu
-
-[ $# -ne 2 ] && echo "Script format error: $0 <data-dir> <dump-dir>" && exit 0
-
-data_dir=$1
-dump_dir=$2
-
-mkdir -p $dump_dir
-
-num_utts=$(cat $data_dir/wav.scp | wc -l)
-echo "Orginal utterances (.wav + .wv1): $num_utts"
-
-# cat $data_dir/wav.scp | grep "sph2pipe" | \
-#   awk -v dir=$dump_dir '{printf("%s -f wav %s %s/%s.wav\n", $2, $5, dir, $1)}' | bash
-
-awk '{print $1,$5}' $data_dir/wav.scp > $data_dir/raw_wav.scp
-find $dump_dir -name "*.wav" | awk -F '/' '{printf("%s %s\n", $NF, $0)}' | \
-  sed 's:\.wav::' > $data_dir/wav.scp
-
-num_utts=$(cat $data_dir/wav.scp | wc -l)
-echo "Wave utterances (.wav): $num_utts"
-
-echo "$0: Generate wav => $dump_dir done"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/path.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/path.sh
deleted file mode 100644
index 73fc1c56602086182f66201870e28d46a0cada55..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-export WENET_DIR=$PWD/../../..
-export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
-export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
-export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=../../../:$PYTHONPATH
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/run.sh
deleted file mode 100644
index 1b240d7186ba5da3ec9936accb1a7e55afa1db1c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/run.sh
+++ /dev/null
@@ -1,227 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-# Use this to control how many gpu you use, It's 1-gpu training if you specify
-# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
-export CUDA_VISIBLE_DEVICES="0"
-# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
-# communication. More details can be found in
-# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
-# export NCCL_SOCKET_IFNAME=ens4f1
-export NCCL_DEBUG=INFO
-stage=0     # start from 0 if you need to start from data preparation
-stop_stage=4
-# The num of nodes or machines used for multi-machine training
-# Default 1 for single machine/node
-# NFS will be needed if you want run multi-machine training
-num_nodes=1
-# The rank of each node or machine, range from 0 to num_nodes -1
-# The first node/machine sets node_rank 0, the second one sets node_rank 1
-# the third one set node_rank 2, and so on. Default 0
-node_rank=0
-# data
-WSJ0=/home/lsq/corpus/WSJ/wsj0
-WSJ1=/home/lsq/corpus/WSJ/wsj1
-
-
-nj=16
-
-train_set=train_si284
-valid_set=test_dev93
-test_sets="test_dev93"
-
-data_type=raw
-# for lm training
-other_text=data/local/other_text/text
-
-# Optional train_config
-# 1. conf/train_transformer.yaml: Standard transformer
-# 2. conf/train_conformer.yaml: Standard conformer
-# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
-# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
-# 5. conf/train_conformer_no_pos.yaml: Conformer without relative positional encoding
-# 6. conf/train_u2++_conformer.yaml: U2++ conformer
-# 7. conf/train_u2++_transformer.yaml: U2++ transformer
-train_config=conf/train_conformer.yaml
-cmvn=true
-dir=/home/lsq/exp_dir/exp_wenet/wsj/conformer_1202
-dump_wav_dir=/home/lsq/corpus/wsj_wav
-checkpoint=
-
-
-# use average_checkpoint will get better result
-average_checkpoint=true
-decode_checkpoint=$dir/final.pt
-average_num=20
-decode_modes="ctc_greedy_search ctc_prefix_beam_search attention attention_rescoring"
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-
-    echo "stage 0: Data preparation"
-    local/wsj_data_prep.sh ${WSJ0}/??-{?,??}.? ${WSJ1}/??-{?,??}.?
-    local/wsj_format_data.sh
-
-    for x in ${valid_set} ${train_set}; do
-    {
-      ./local/wsj_gen_wav.sh data/$x $dump_wav_dir/$x
-    }
-    done
-
-    echo "Prepare text from lng_modl dir: ${WSJ1}/13-32.1/wsj1/doc/lng_modl/lm_train/np_data/{87,88,89}/*.z -> ${other_text}"
-    mkdir -p "$(dirname ${other_text})"
-    # NOTE(kamo): Give utterance id to each texts.
-    zcat ${WSJ1}/13-32.1/wsj1/doc/lng_modl/lm_train/np_data/{87,88,89}/*.z | \
-    grep -v "<" | tr "[:lower:]" "[:upper:]" | \
-    awk '{ printf("wsj1_lng_%07d %s\n",NR,$0) } ' > ${other_text}
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-
-    # compute cmvn
-    tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
-        --in_scp data/${train_set}/wav.scp \
-        --out_cmvn data/${train_set}/global_cmvn
-
-fi
-
-dict=data/dict/${train_set}_units.txt
-nlsyms=data/nlsyms.txt
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # Make train dict
-    echo "Make a dictionary"
-    mkdir -p $(dirname $dict)
-    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
-    echo "<unk> 1" >> ${dict} # <unk> must be 1
-
-    echo "make a non-linguistic symbol list"
-    cut -f 2- data/${train_set}/text | tr " " "\n" | sort | uniq | grep "<" > ${nlsyms}
-    cat ${nlsyms}
-
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} --space ▁ data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \
-    | sort | uniq | grep -v -e '^\s*$' | awk '{print $0 " " NR+1}' >> ${dict}
-    wc -l ${dict}
-    num_token=$(cat $dict | wc -l)
-    echo "<sos/eos> $num_token" >> $dict # <eos>
-fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-  echo "Prepare data, prepare required format"
-  for x in ${valid_set} ${train_set}; do
-    if [ $data_type == "shard" ]; then
-      tools/make_shard_list.py --num_utts_per_shard $num_utts_per_shard \
-        --num_threads 16 data/$x/wav.scp data/$x/text \
-        $(realpath data/$x/shards) data/$x/data.list
-    else
-      tools/make_raw_list.py data/$x/wav.scp data/$x/text \
-        data/$x/data.list
-    fi
-  done
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-  mkdir -p $dir
-  # You have to rm `INIT_FILE` manually when you resume or restart a
-  # multi-machine training.
-  INIT_FILE=$dir/ddp_init
-  init_method=file://$(readlink -f $INIT_FILE)
-  echo "$0: init method is $init_method"
-  num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-  # Use "nccl" if it works, otherwise use "gloo"
-  dist_backend="gloo"
-  world_size=`expr $num_gpus \* $num_nodes`
-  echo "total gpus is: $world_size"
-  cmvn_opts=
-  $cmvn && cp data/${train_set}/global_cmvn $dir
-  $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
-
-  # train.py rewrite $train_config to $dir/train.yaml with model input
-  # and output dimension, and $dir/train.yaml will be used for inference
-  # and export.
-  for ((i = 0; i < $num_gpus; ++i)); do
-  {
-    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-    # Rank of each gpu/process used for knowing whether it is
-    # the master of a worker.
-    rank=`expr $node_rank \* $num_gpus + $i`
-    python wenet/bin/train.py --gpu $gpu_id \
-      --config $train_config \
-      --data_type $data_type \
-      --symbol_table $dict \
-      --train_data data/$train_set/data.list \
-      --cv_data data/$valid_set/data.list \
-      ${checkpoint:+--checkpoint $checkpoint} \
-      --model_dir $dir \
-      --ddp.init_method $init_method \
-      --ddp.world_size $world_size \
-      --ddp.rank $rank \
-      --ddp.dist_backend $dist_backend \
-      --num_workers 1 \
-      $cmvn_opts \
-      --pin_memory \
-      --non_lang_syms ${nlsyms}
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-  # Test model, please specify the model you want to test by --checkpoint
-  if [ ${average_checkpoint} == true ]; then
-    decode_checkpoint=$dir/avg_${average_num}.pt
-    echo "do model average and final checkpoint is $decode_checkpoint"
-    python wenet/bin/average_model.py \
-      --dst_model $decode_checkpoint \
-      --src_path $dir  \
-      --num ${average_num} \
-      --val_best
-  fi
-  # Please specify decoding_chunk_size for unified streaming and
-  # non-streaming model. The default value is -1, which is full chunk
-  # for non-streaming inference.
-  decoding_chunk_size=
-  ctc_weight=0.5
-  reverse_weight=0.0
-  for mode in ${decode_modes}; do
-  {
-    test_dir=$dir/test_${mode}
-    result_text=$test_dir/text
-    mkdir -p $(dirname $result_text)
-    python wenet/bin/recognize.py --gpu 3 \
-      --mode $mode \
-      --config $dir/train.yaml \
-      --data_type $data_type \
-      --test_data data/test_dev93/data.list \
-      --checkpoint $decode_checkpoint \
-      --beam_size 10 \
-      --batch_size 1 \
-      --penalty 0.0 \
-      --dict $dict \
-      --non_lang_syms $nlsyms \
-      --ctc_weight $ctc_weight \
-      --reverse_weight $reverse_weight \
-      --result_file $result_text \
-      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-    python tools/compute-wer.py --char=1 --v=1 \
-      data/test_dev93/text $test_dir/text > $test_dir/wer
-  } &
-  done
-  wait
-fi
-
-if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
-  # compute wer
-  for mode in ${decode_modes}; do
-    for test_set in $test_sets; do
-     test_dir=$dir/test_${mode}
-     sed 's:▁: :g' $test_dir/text > $test_dir/text.norm
-     python tools/compute-wer.py --char=1 --v=1 \
-       data/$test_set/text $test_dir/text.norm > $test_dir/wer
-    done
-  done
-fi
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/init_model.py
deleted file mode 100644
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/wsj/s0/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/file_utils.py
similarity index 100%
rename from models/audio/speech_recognition/conformer/igie/wenet/examples/aishell/rnnt/wenet/utils/file_utils.py
rename to models/audio/speech_recognition/conformer/igie/wenet/file_utils.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/processor.py
similarity index 83%
rename from models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/processor.py
rename to models/audio/speech_recognition/conformer/igie/wenet/processor.py
index b4bd07ce674eb3288cd1b13a09085eec48d40845..9a542a3d204cdb3def8cf61ce0b0fd8bb31af32e 100644
--- a/models/audio/speech_recognition/conformer/igie/wenet/examples/aishell2/rnnt/wenet/dataset/processor.py
+++ b/models/audio/speech_recognition/conformer/igie/wenet/processor.py
@@ -49,7 +49,7 @@ def url_opener(data):
                 stream = open(url, 'rb')
             # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
             else:
-                cmd = f'wget -q -O - {url}'
+                cmd = f'curl -s -L {url}'
                 process = Popen(cmd, shell=True, stdout=PIPE)
                 sample.update(process=process)
                 stream = process.stdout
@@ -282,43 +282,6 @@ def compute_fbank(data,
         yield dict(key=sample['key'], label=sample['label'], feat=mat)
 
 
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
 def __tokenize_by_bpe_model(sp, txt):
     tokens = []
     # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
@@ -342,10 +305,7 @@ def __tokenize_by_bpe_model(sp, txt):
     return tokens
 
 
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
+def tokenize(data, symbol_table, bpe_model=None, non_lang_syms=None,
              split_with_space=False):
     """ Decode text to chars or BPE
         Inplace operation
@@ -443,58 +403,6 @@ def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
         yield sample
 
 
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
 def shuffle(data, shuffle_size=10000):
     """ Local shuffle the data
 
@@ -634,24 +542,6 @@ def padding(data):
         padded_feats = pad_sequence(sorted_feats,
                                     batch_first=True,
                                     padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
         padding_labels = pad_sequence(sorted_labels,
                                       batch_first=True,
                                       padding_value=-1)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/requirements.txt b/models/audio/speech_recognition/conformer/igie/wenet/requirements.txt
deleted file mode 100644
index 72c857232732836576c12ccf4a7e590f907370fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/requirements.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Pillow
-pyyaml>=5.1
-sentencepiece
-tensorboard
-tensorboardX
-typeguard
-textgrid
-pytest
-flake8==3.8.2
-flake8-bugbear
-flake8-comprehensions
-flake8-executable
-flake8-pyi==20.5.0
-mccabe
-pycodestyle==2.6.0
-pyflakes==2.2.0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/README.md
deleted file mode 100644
index 8275416dcf01de842d9969c8fe666c4eed06ecf4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Runtime on WeNet
-
-This is the runtime of WeNet.
-
-We are going to support the following platforms:
-
-1. Various deep learning inference engines, such as LibTorch, ONNX, OpenVINO, TVM, and so on.
-2. Various OS, such as android, iOS, Harmony, and so on.
-3. Various AI chips, such as GPU, Horzion BPU, and so on.
-4. Various hardware platforms, such as Raspberry Pi.
-5. Various language binding, such as python and go.
-
-Feel free to volunteer yourself if you are interested in trying out some items(they do not have to be on the list).
-
-## Introduction
-
-Here is a brief summary of all platforms and OSs. please note the corresponding working `OS` and `inference engine`.
-
-| runtime         | OS                  | inference engine     | Description                                                                                      |
-|-----------------|---------------------|----------------------|--------------------------------------------------------------------------------------------------|
-| core            | /                   | /                    | common core code of all runtime                                                                  |
-| android         | android             | libtorch             | android demo, [English demo](https://www.youtube.com/shorts/viEnvmZf03s ), [Chinese demo](TODO)  |
-| bingding/python | linux, windows, mac | libtorch             | python binding of wenet, mac M1/M2 are is not supported now.                                     |
-| gpu             | linux               | onnxruntime/tensorrt | GPU inference with NV's Triton and TensorRT                                                      |
-| horizonbpu      | linux               | bpu runtime          | Horizon BPU runtime                                                                              |
-| ios             | ios                 | libtorch             | ios demo, [link](TODO)                                                                           |
-| kunlun          | linux               | xpu runtime          | Kunlun XPU runtime                                                                               |
-| libtorch        | linux, windows, mac | libtorch             | c++ build with libtorch                                                                          |
-| onnxrutnime     | linux, windows, mac | onnxruntime          | c++ build with onnxruntime                                                                       |
-| raspberrypi     | linux               | onnxruntime          | c++ build on raspberrypi with onnxruntime                                                        |
-| web             | linux, windows, mac | libtorch             | web demo with gradio and python binding, [link]()                                                |
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/README.md
deleted file mode 100644
index 44fbf619a43c687e0c8132d2f79b3f3ce8bbdfe2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# WeNet On-device ASR Android Demo
-
-This Android demo shows we can run on-device streaming ASR with WeNet. You can download our prebuilt APK or build your APK from source code.
-
-## Prebuilt APK
-
-* [Chinese ASR Demo APK, with model trained on AIShell data](http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20210202_app.apk)
-* [English ASR Demo APK, with model trained on GigaSpeech data](http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/gigaspeech/20210823_app.apk)
-
-## Build your APK from source code
-
-### 1) Build model
-
-You can use our pretrained model (click the following link to download):
-
-[中文(WenetSpeech)](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/wenetspeech/wenetspeech_u2pp_conformer_libtorch_quant.tar.gz)
-| [English(GigaSpeech)](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/gigaspeech/gigaspeech_u2pp_conformer_libtorch_quant.tar.gz)
-
-Or you can train your own model using WeNet training pipeline on your data.
-
-### 2) Build APK
-
-When your model is ready, put `final.zip` and `units.txt` into Android assets (`app/src/main/assets`) folder,
-then just build and run the APK. Here is a gif demo, which shows how our on-device streaming e2e ASR runs with low latency.
-Please note the wifi and data has been disabled in the demo so there is no network connection ^\_^.
-
-![Runtime android demo](../../../../docs/images/runtime_android.gif)
-
-## Compute the RTF
-
-Step 1, connect your Android phone, and use `adb push` command to push your model, wav scp, and waves to the sdcard.
-
-Step 2, build the binary and the APK with Android Studio directly, or with the commands as follows:
-
-``` sh
-cd runtime/android
-./gradlew build
-```
-
-Step 3, push your binary and the dynamic library to `/data/local/tmp` as follows:
-
-``` sh
-adb push app/.cxx/cmake/release/arm64-v8a/decoder_main /data/local/tmp
-adb push app/build/pytorch_android-1.10.0.aar/jni/arm64-v8a/* /data/local/tmp
-```
-
-Step 4, change to the directory `/data/local/tmp` of your phone, and export the library path by:
-
-``` sh
-adb shell
-cd /data/local/tmp
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:.
-```
-
-Step 5, execute the same command as the [x86 demo](../../../libtorch) to run the binary to decode and compute the RTF.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/build.gradle b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/build.gradle
deleted file mode 100644
index 8f760052d552cb6eff57419761b8cff9d8cd93d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/build.gradle
+++ /dev/null
@@ -1,103 +0,0 @@
-plugins {
-    id 'com.android.application'
-}
-
-repositories {
-    jcenter()
-    maven {
-        url "https://oss.sonatype.org/content/repositories/snapshots"
-    }
-}
-
-android {
-    signingConfigs {
-        release {
-            storeFile file('wenet.keystore')
-            storePassword '123456'
-            keyAlias 'wenet'
-            keyPassword '123456'
-        }
-    }
-    packagingOptions {
-        pickFirst 'lib/arm64-v8a/libc++_shared.so'
-    }
-    configurations {
-        extractForNativeBuild
-    }
-    compileSdkVersion 30
-    buildToolsVersion "30.0.3"
-
-    defaultConfig {
-        applicationId "com.mobvoi.wenet"
-        minSdkVersion 21
-        targetSdkVersion 30
-        versionCode 1
-        versionName "1.0"
-
-        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
-        externalNativeBuild {
-            cmake {
-                targets  "wenet", "decoder_main"
-                cppFlags "-std=c++14", "-DC10_USE_GLOG", "-DC10_USE_MINIMAL_GLOG", "-DANDROID", "-Wno-c++11-narrowing", "-fexceptions"
-            }
-        }
-
-        ndkVersion '21.1.6352462'
-        ndk {
-            abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86', 'x86_64'
-        }
-    }
-
-    buildTypes {
-        release {
-            minifyEnabled false
-            signingConfig signingConfigs.release
-            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
-        }
-    }
-    externalNativeBuild {
-        cmake {
-            path "src/main/cpp/CMakeLists.txt"
-        }
-    }
-    compileOptions {
-        sourceCompatibility JavaVersion.VERSION_1_8
-        targetCompatibility JavaVersion.VERSION_1_8
-    }
-}
-
-dependencies {
-
-    implementation 'androidx.appcompat:appcompat:1.2.0'
-    implementation 'com.google.android.material:material:1.2.1'
-    implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
-    testImplementation 'junit:junit:4.+'
-    androidTestImplementation 'androidx.test.ext:junit:1.1.2'
-    androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
-
-    implementation 'org.pytorch:pytorch_android:1.10.0'
-    extractForNativeBuild 'org.pytorch:pytorch_android:1.10.0'
-
-    implementation 'com.github.pengzhendong:wenet-openfst-android:1.0.2'
-    extractForNativeBuild 'com.github.pengzhendong:wenet-openfst-android:1.0.2'
-}
-
-task extractAARForNativeBuild {
-    doLast {
-        configurations.extractForNativeBuild.files.each {
-            def file = it.absoluteFile
-            copy {
-                from zipTree(file)
-                into "$buildDir/$file.name"
-                include "headers/**"
-                include "jni/**"
-            }
-        }
-    }
-}
-
-tasks.whenTaskAdded { task ->
-    if (task.name.contains('externalNativeBuild')) {
-        task.dependsOn(extractAARForNativeBuild)
-    }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/proguard-rules.pro b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/proguard-rules.pro
deleted file mode 100644
index 481bb434814107eb79d7a30b676d344b0df2f8ce..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/proguard-rules.pro
+++ /dev/null
@@ -1,21 +0,0 @@
-# Add project specific ProGuard rules here.
-# You can control the set of applied configuration files using the
-# proguardFiles setting in build.gradle.
-#
-# For more details, see
-#   http://developer.android.com/guide/developing/tools/proguard.html
-
-# If your project uses WebView with JS, uncomment the following
-# and specify the fully qualified class name to the JavaScript interface
-# class:
-#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
-#   public *;
-#}
-
-# Uncomment this to preserve the line number information for
-# debugging stack traces.
-#-keepattributes SourceFile,LineNumberTable
-
-# If you keep the line number information, uncomment this to
-# hide the original source file name.
-#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/androidTest/java/com/mobvoi/wenet/ExampleInstrumentedTest.java b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/androidTest/java/com/mobvoi/wenet/ExampleInstrumentedTest.java
deleted file mode 100644
index e1943606a656b95647a415d66aee79f0c97c2232..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/androidTest/java/com/mobvoi/wenet/ExampleInstrumentedTest.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package com.mobvoi.wenet;
-
-import android.content.Context;
-
-import androidx.test.platform.app.InstrumentationRegistry;
-import androidx.test.ext.junit.runners.AndroidJUnit4;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-
-import static org.junit.Assert.*;
-
-/**
- * Instrumented test, which will execute on an Android device.
- *
- * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
- */
-@RunWith(AndroidJUnit4.class)
-public class ExampleInstrumentedTest {
-    @Test
-    public void useAppContext() {
-        // Context of the app under test.
-        Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
-        assertEquals("com.mobvoi.wenet", appContext.getPackageName());
-    }
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/AndroidManifest.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/AndroidManifest.xml
deleted file mode 100644
index 6da613a4c20b330f30853241ac1a30501e6d5cc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/AndroidManifest.xml
+++ /dev/null
@@ -1,23 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<manifest xmlns:android="http://schemas.android.com/apk/res/android"
-    xmlns:tools="http://schemas.android.com/tools"
-    package="com.mobvoi.wenet">
-    <uses-permission android:name="android.permission.RECORD_AUDIO" />
-    <application
-        android:allowBackup="true"
-        android:icon="@mipmap/ic_launcher"
-        android:label="@string/app_name"
-        android:roundIcon="@mipmap/ic_launcher_round"
-        android:supportsRtl="true"
-        tools:replace="android:theme"
-        android:theme="@style/Theme.Wenet">
-        <activity android:name=".MainActivity">
-            <intent-filter>
-                <action android:name="android.intent.action.MAIN" />
-
-                <category android:name="android.intent.category.LAUNCHER" />
-            </intent-filter>
-        </activity>
-    </application>
-
-</manifest>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/assets/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/assets/README.md
deleted file mode 100644
index 2d78237d0f670185692a1a292c7acdedd4730e90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/assets/README.md
+++ /dev/null
@@ -1 +0,0 @@
-put final.zip and units.txt here.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/CMakeLists.txt
deleted file mode 100644
index d548d38dde926240ede8c090bd2aa4663b166102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-cmake_minimum_required(VERSION 3.4.1)
-set(TARGET wenet)
-project(${TARGET} CXX)
-set(CMAKE_CXX_STANDARD 14)
-include(ExternalProject)
-
-option(TORCH "whether to build with Torch" ON)
-option(ONNX "whether to build with ONNX" OFF)
-set(CMAKE_VERBOSE_MAKEFILE on)
-set(build_DIR ${CMAKE_SOURCE_DIR}/../../../build)
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-string(REPLACE "-Wl,--exclude-libs,libgcc_real.a" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
-
-include(libtorch)
-include(openfst)
-
-include_directories(
-  ${CMAKE_SOURCE_DIR}
-  ${CMAKE_SOURCE_DIR}/kaldi
-)
-
-add_subdirectory(utils)
-add_subdirectory(frontend)
-add_subdirectory(post_processor)
-add_subdirectory(kaldi)  # kaldi: wfst based decoder
-add_subdirectory(decoder)
-
-link_libraries(frontend decoder android)
-add_library(${TARGET} SHARED wenet.cc)
-
-add_executable(decoder_main bin/decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC libc++_shared.so)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/CMakeLists.txt
deleted file mode 100644
index a117b8bcb580c8738a7ce72f88bc10ff0a450e98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_executable(decoder_main decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC decoder)
-
-add_executable(label_checker_main label_checker_main.cc)
-target_link_libraries(label_checker_main PUBLIC decoder)
-
-# if(TORCH)
-#  add_executable(api_main api_main.cc)
-#  target_link_libraries(api_main PUBLIC wenet_api)
-# endif()
-
-if(WEBSOCKET)
-  add_executable(websocket_client_main websocket_client_main.cc)
-  target_link_libraries(websocket_client_main PUBLIC websocket)
-  add_executable(websocket_server_main websocket_server_main.cc)
-  target_link_libraries(websocket_server_main PUBLIC websocket)
-endif()
-
-if(GRPC)
-  add_executable(grpc_server_main grpc_server_main.cc)
-  target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
-  add_executable(grpc_client_main grpc_client_main.cc)
-  target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
-endif()
-
-if(HTTP)
-  add_executable(http_client_main http_client_main.cc)
-  target_link_libraries(http_client_main PUBLIC http)
-  add_executable(http_server_main http_server_main.cc)
-  target_link_libraries(http_server_main PUBLIC http)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/api_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/api_main.cc
deleted file mode 100644
index 94b20d52a7b8eee5c39a12af4e1e25324d7d880f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/api_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-
-DEFINE_string(model_dir, "", "model dir path");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_bool(enable_timestamp, false, "enable timestamps");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet_set_log_level(2);
-
-  void* decoder = wenet_init(FLAGS_model_dir.c_str());
-  wenet_set_timestamp(decoder, FLAGS_enable_timestamp == true ? 1 : 0);
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  std::vector<int16_t> data(wav_reader.num_samples());
-  for (int i = 0; i < wav_reader.num_samples(); i++) {
-    data[i] = static_cast<int16_t>(*(wav_reader.data() + i));
-  }
-
-  for (int i = 0; i < 10; i++) {
-    // Return the final result when last is 1
-    wenet_decode(decoder, reinterpret_cast<const char*>(data.data()),
-                 data.size() * 2, 1);
-    const char* result = wenet_get_result(decoder);
-    LOG(INFO) << i << " " << result;
-    wenet_reset(decoder);
-  }
-  wenet_free(decoder);
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/decoder_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/decoder_main.cc
deleted file mode 100644
index b8f1dbae6b88390504cc9ce63f33dc9bd54a2d6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/decoder_main.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <iomanip>
-#include <thread>
-#include <utility>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-#include "utils/thread_pool.h"
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-DEFINE_bool(simulate_streaming, false, "simulate streaming input");
-DEFINE_bool(output_nbest, false, "output n-best of decode result");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_string(wav_scp, "", "input wav scp");
-DEFINE_string(result, "", "result output file");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-DEFINE_int32(thread_num, 1, "num of decode thread");
-DEFINE_int32(warmup, 0, "num of warmup decode, 0 means no warmup");
-
-std::shared_ptr<wenet::DecodeOptions> g_decode_config;
-std::shared_ptr<wenet::FeaturePipelineConfig> g_feature_config;
-std::shared_ptr<wenet::DecodeResource> g_decode_resource;
-
-std::ofstream g_result;
-std::mutex g_mutex;
-int g_total_waves_dur = 0;
-int g_total_decode_time = 0;
-
-void decode(std::pair<std::string, std::string> wav, bool warmup = false) {
-  wenet::WavReader wav_reader(wav.second);
-  int num_samples = wav_reader.num_samples();
-  CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-
-  auto feature_pipeline =
-      std::make_shared<wenet::FeaturePipeline>(*g_feature_config);
-  feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-  feature_pipeline->set_input_finished();
-  LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-
-  wenet::AsrDecoder decoder(feature_pipeline, g_decode_resource,
-                            *g_decode_config);
-
-  int wave_dur = static_cast<int>(static_cast<float>(num_samples) /
-                                  wav_reader.sample_rate() * 1000);
-  int decode_time = 0;
-  std::string final_result;
-  while (true) {
-    wenet::Timer timer;
-    wenet::DecodeState state = decoder.Decode();
-    if (state == wenet::DecodeState::kEndFeats) {
-      decoder.Rescoring();
-    }
-    int chunk_decode_time = timer.Elapsed();
-    decode_time += chunk_decode_time;
-    if (decoder.DecodedSomething()) {
-      LOG(INFO) << "Partial result: " << decoder.result()[0].sentence;
-    }
-
-    if (FLAGS_continuous_decoding && state == wenet::DecodeState::kEndpoint) {
-      if (decoder.DecodedSomething()) {
-        decoder.Rescoring();
-        LOG(INFO) << "Final result (continuous decoding): "
-                  << decoder.result()[0].sentence;
-        final_result.append(decoder.result()[0].sentence);
-      }
-      decoder.ResetContinuousDecoding();
-    }
-
-    if (state == wenet::DecodeState::kEndFeats) {
-      break;
-    } else if (FLAGS_chunk_size > 0 && FLAGS_simulate_streaming) {
-      float frame_shift_in_ms =
-          static_cast<float>(g_feature_config->frame_shift) /
-          wav_reader.sample_rate() * 1000;
-      auto wait_time =
-          decoder.num_frames_in_current_chunk() * frame_shift_in_ms -
-          chunk_decode_time;
-      if (wait_time > 0) {
-        LOG(INFO) << "Simulate streaming, waiting for " << wait_time << "ms";
-        std::this_thread::sleep_for(
-            std::chrono::milliseconds(static_cast<int>(wait_time)));
-      }
-    }
-  }
-  if (decoder.DecodedSomething()) {
-    final_result.append(decoder.result()[0].sentence);
-  }
-  LOG(INFO) << wav.first << " Final result: " << final_result << std::endl;
-  LOG(INFO) << "Decoded " << wave_dur << "ms audio taken " << decode_time
-            << "ms.";
-
-  if (!warmup) {
-    g_mutex.lock();
-    std::ostream& buffer = FLAGS_result.empty() ? std::cout : g_result;
-    if (!FLAGS_output_nbest) {
-      buffer << wav.first << " " << final_result << std::endl;
-    } else {
-      buffer << "wav " << wav.first << std::endl;
-      auto& results = decoder.result();
-      for (auto& r : results) {
-        if (r.sentence.empty()) continue;
-        buffer << "candidate " << r.score << " " << r.sentence << std::endl;
-      }
-    }
-    g_total_waves_dur += wave_dur;
-    g_total_decode_time += decode_time;
-    g_mutex.unlock();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  g_decode_config = wenet::InitDecodeOptionsFromFlags();
-  g_feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  g_decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  if (FLAGS_wav_path.empty() && FLAGS_wav_scp.empty()) {
-    LOG(FATAL) << "Please provide the wave path or the wav scp.";
-  }
-  std::vector<std::pair<std::string, std::string>> waves;
-  if (!FLAGS_wav_path.empty()) {
-    waves.emplace_back(make_pair("test", FLAGS_wav_path));
-  } else {
-    std::ifstream wav_scp(FLAGS_wav_scp);
-    std::string line;
-    while (getline(wav_scp, line)) {
-      std::vector<std::string> strs;
-      wenet::SplitString(line, &strs);
-      CHECK_GE(strs.size(), 2);
-      waves.emplace_back(make_pair(strs[0], strs[1]));
-    }
-
-    if (waves.empty()) {
-      LOG(FATAL) << "Please provide non-empty wav scp.";
-    }
-  }
-
-  if (!FLAGS_result.empty()) {
-    g_result.open(FLAGS_result, std::ios::out);
-  }
-
-  // Warmup
-  if (FLAGS_warmup > 0) {
-    LOG(INFO) << "Warming up...";
-    {
-      ThreadPool pool(FLAGS_thread_num);
-      auto wav = waves[0];
-      for (int i = 0; i < FLAGS_warmup; i++) {
-        pool.enqueue(decode, wav, true);
-      }
-    }
-    LOG(INFO) << "Warmup done.";
-  }
-
-  {
-    ThreadPool pool(FLAGS_thread_num);
-    for (auto& wav : waves) {
-      pool.enqueue(decode, wav, false);
-    }
-  }
-
-  LOG(INFO) << "Total: decoded " << g_total_waves_dur << "ms audio taken "
-            << g_total_decode_time << "ms.";
-  LOG(INFO) << "RTF: " << std::setprecision(4)
-            << static_cast<float>(g_total_decode_time) / g_total_waves_dur;
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/grpc_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/grpc_client_main.cc
deleted file mode 100644
index f2d226d48d3757c5f095335eff3288f5d227282b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/grpc_client_main.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "grpc/grpc_client.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::GrpcClient client(FLAGS_hostname, FLAGS_port, FLAGS_nbest,
-                           FLAGS_continuous_decoding);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  std::vector<float> pcm_data(wav_reader.data(),
-                              wav_reader.data() + num_samples);
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(pcm_data[j]));
-    }
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/grpc_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/grpc_server_main.cc
deleted file mode 100644
index b00f3cbade1ee70dadfb49829e9ca73fd50c2be2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/grpc_server_main.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <grpcpp/ext/proto_server_reflection_plugin.h>
-#include <grpcpp/grpcpp.h>
-#include <grpcpp/health_check_service_interface.h>
-
-#include "decoder/params.h"
-#include "grpc/grpc_server.h"
-#include "utils/log.h"
-
-DEFINE_int32(port, 10086, "grpc listening port");
-DEFINE_int32(workers, 4, "grpc num workers");
-
-using grpc::Server;
-using grpc::ServerBuilder;
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::GrpcServer service(feature_config, decode_config, decode_resource);
-  grpc::EnableDefaultHealthCheckService(true);
-  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
-  ServerBuilder builder;
-  std::string address("0.0.0.0:" + std::to_string(FLAGS_port));
-  builder.AddListeningPort(address, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  builder.SetSyncServerOption(ServerBuilder::SyncServerOption::NUM_CQS,
-                              FLAGS_workers);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server->Wait();
-  google::ShutdownGoogleLogging();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/http_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/http_client_main.cc
deleted file mode 100644
index b59ee3f5f32bf08552416b183802029ac5d5afa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/http_client_main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "http/http_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of http server");
-DEFINE_int32(port, 10086, "port of http server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Convert to short
-  std::vector<int16_t> data;
-  data.reserve(num_samples);
-  for (int j = 0; j < num_samples; j++) {
-    data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-  }
-  // Send data
-  wenet::HttpClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  wenet::Timer timer;
-  VLOG(2) << "Send " << data.size() << " samples";
-  client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/http_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/http_server_main.cc
deleted file mode 100644
index e30cf2bcdf746c2072f023e90f470ccba5467c2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/http_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "http/http_server.h"
-
-DEFINE_int32(port, 10086, "http listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
-                           decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/label_checker_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/label_checker_main.cc
deleted file mode 100644
index e36e3d5c29a38a7ebee80606ebd8e69ae8b1eb96..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/label_checker_main.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_string(text, "", "kaldi style text input file");
-DEFINE_string(wav_scp, "", "kaldi style wav scp");
-DEFINE_double(is_penalty, 1.0,
-              "insertion/substitution penalty for align insertion");
-DEFINE_double(del_penalty, 1.0, "deletion penalty for align insertion");
-DEFINE_string(result, "", "result output file");
-DEFINE_string(timestamp, "", "timestamp output file");
-
-namespace wenet {
-
-const char* kDeletion = "<del>";
-// Is: Insertion and substitution
-const char* kIsStart = "<is>";
-const char* kIsEnd = "</is>";
-
-bool MapToLabel(const std::string& text,
-                std::shared_ptr<fst::SymbolTable> symbol_table,
-                std::vector<int>* labels) {
-  labels->clear();
-  // Split label to char sequence
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(text, &chars);
-  for (size_t i = 0; i < chars.size(); i++) {
-    // ▁ is special symbol for white space
-    std::string label = chars[i] != " " ? chars[i] : "▁";
-    int id = symbol_table->Find(label);
-    if (id != -1) {  // fst::kNoSymbol
-      // LOG(INFO) << label << " " << id;
-      labels->push_back(id);
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<fst::SymbolTable> MakeSymbolTableForFst(
-    std::shared_ptr<fst::SymbolTable> isymbol_table) {
-  LOG(INFO) << isymbol_table;
-  CHECK(isymbol_table != nullptr);
-  auto osymbol_table = std::make_shared<fst::SymbolTable>();
-  osymbol_table->AddSymbol("<eps>", 0);
-  CHECK_EQ(isymbol_table->Find("<blank>"), 0);
-  osymbol_table->AddSymbol("<blank>", 1);
-  for (int i = 1; i < isymbol_table->NumSymbols(); i++) {
-    std::string symbol = isymbol_table->Find(i);
-    osymbol_table->AddSymbol(symbol, i + 1);
-  }
-  osymbol_table->AddSymbol(kDeletion, isymbol_table->NumSymbols() + 1);
-  osymbol_table->AddSymbol(kIsStart, isymbol_table->NumSymbols() + 2);
-  osymbol_table->AddSymbol(kIsEnd, isymbol_table->NumSymbols() + 3);
-  return osymbol_table;
-}
-
-void CompileCtcFst(std::shared_ptr<fst::SymbolTable> symbol_table,
-                   fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  CHECK_EQ(symbol_table->Find("<eps>"), 0);
-  CHECK_EQ(symbol_table->Find("<blank>"), 1);
-  ofst->AddArc(start, fst::StdArc(1, 0, 0.0, start));
-  // Exclude kDeletion and kInsertion
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    int s = ofst->AddState();
-    ofst->AddArc(start, fst::StdArc(i, i, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(i, 0, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(0, 0, 0.0, start));
-  }
-  ofst->SetFinal(start, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdOLabelCompare());
-}
-
-void CompileAlignFst(std::vector<int> labels,
-                     std::shared_ptr<fst::SymbolTable> symbol_table,
-                     fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int deletion = symbol_table->Find(kDeletion);
-  int insertion_start = symbol_table->Find(kIsStart);
-  int insertion_end = symbol_table->Find(kIsEnd);
-
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  // Filler State
-  int filler_start = ofst->AddState();
-  int filler_end = ofst->AddState();
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    ofst->AddArc(filler_start, fst::StdArc(i, i, FLAGS_is_penalty, filler_end));
-  }
-  ofst->AddArc(filler_end, fst::StdArc(0, 0, 0.0, filler_start));
-
-  int prev = start;
-  // Alignment path and optional filler
-  for (size_t i = 0; i < labels.size(); i++) {
-    int cur = ofst->AddState();
-    // 1. Insertion or Substitution
-    ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-    ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-    // 2. Correct
-    ofst->AddArc(prev, fst::StdArc(labels[i], labels[i], 0.0, cur));
-    // 3. Deletion
-    ofst->AddArc(prev, fst::StdArc(0, deletion, FLAGS_del_penalty, cur));
-
-    prev = cur;
-  }
-  // Optional add endding filler
-  ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-  ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-  ofst->SetFinal(prev, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdILabelCompare());
-}
-
-}  // namespace wenet
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-  CHECK(decode_resource->unit_table != nullptr);
-
-  auto wfst_symbol_table =
-      wenet::MakeSymbolTableForFst(decode_resource->unit_table);
-  // wfst_symbol_table->WriteText("fst.txt");
-  // Reset symbol_table to on-the-fly generated wfst_symbol_table
-  decode_resource->symbol_table = wfst_symbol_table;
-
-  // Compile ctc FST
-  fst::StdVectorFst ctc_fst;
-  wenet::CompileCtcFst(wfst_symbol_table, &ctc_fst);
-  // ctc_fst.Write("ctc.fst");
-
-  std::unordered_map<std::string, std::string> wav_table;
-  std::ifstream wav_is(FLAGS_wav_scp);
-  std::string line;
-  while (std::getline(wav_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    CHECK_EQ(strs.size(), 2);
-    wav_table[strs[0]] = strs[1];
-  }
-
-  std::ifstream text_is(FLAGS_text);
-  std::ofstream result_os(FLAGS_result, std::ios::out);
-  std::ofstream timestamp_out;
-  if (!FLAGS_timestamp.empty()) {
-    timestamp_out.open(FLAGS_timestamp, std::ios::out);
-  }
-  std::ostream& timestamp_os =
-      FLAGS_timestamp.empty() ? std::cout : timestamp_out;
-
-  while (std::getline(text_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    if (strs.size() < 2) continue;
-    std::string key = strs[0];
-    LOG(INFO) << "Processing " << key;
-    if (wav_table.find(key) != wav_table.end()) {
-      strs.erase(strs.begin());
-      std::string text = wenet::JoinString(" ", strs);
-      std::vector<int> labels;
-      wenet::MapToLabel(text, wfst_symbol_table, &labels);
-      // Prepare FST for alignment decoding
-      fst::StdVectorFst align_fst;
-      wenet::CompileAlignFst(labels, wfst_symbol_table, &align_fst);
-      // align_fst.Write("align.fst");
-      auto decoding_fst = std::make_shared<fst::StdVectorFst>();
-      fst::Compose(ctc_fst, align_fst, decoding_fst.get());
-      // decoding_fst->Write("decoding.fst");
-      // Preapre feature pipeline
-      wenet::WavReader wav_reader;
-      if (!wav_reader.Open(wav_table[key])) {
-        LOG(WARNING) << "Error in reading " << wav_table[key];
-        continue;
-      }
-      int num_samples = wav_reader.num_samples();
-      CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-      auto feature_pipeline =
-          std::make_shared<wenet::FeaturePipeline>(*feature_config);
-      feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-      feature_pipeline->set_input_finished();
-      decode_resource->fst = decoding_fst;
-      LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-      wenet::AsrDecoder decoder(feature_pipeline, decode_resource,
-                                *decode_config);
-      while (true) {
-        wenet::DecodeState state = decoder.Decode();
-        if (state == wenet::DecodeState::kEndFeats) {
-          decoder.Rescoring();
-          break;
-        }
-      }
-      std::string final_result;
-      std::string timestamp_str;
-      if (decoder.DecodedSomething()) {
-        const wenet::DecodeResult& result = decoder.result()[0];
-        final_result = result.sentence;
-        std::stringstream ss;
-        for (const auto& w : result.word_pieces) {
-          ss << " " << w.word << " " << w.start << " " << w.end;
-        }
-        timestamp_str = ss.str();
-      }
-      result_os << key << " " << final_result << std::endl;
-      timestamp_os << key << " " << timestamp_str << std::endl;
-      LOG(INFO) << key << " " << final_result;
-    } else {
-      LOG(WARNING) << "No wav file for " << key;
-    }
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/websocket_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/websocket_client_main.cc
deleted file mode 100644
index 3eaa96069dc5f57673fbb2819bf7d4883e0d5ffa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/websocket_client_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "websocket/websocket_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::WebSocketClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  client.set_continuous_decoding(FLAGS_continuous_decoding);
-  client.SendStartSignal();
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-    }
-    // TODO(Binbin Zhang): Network order?
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-  client.SendEndSignal();
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/websocket_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/websocket_server_main.cc
deleted file mode 100644
index 796d9d2e6d151f7c08b43d66b7245c58ee086cc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/bin/websocket_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "websocket/websocket_server.h"
-
-DEFINE_int32(port, 10086, "websocket listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
-                                decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/wenet.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/wenet.cc
deleted file mode 100644
index 7c8e92a37336cd0bd647b213784a86f8366f9b60..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/cpp/wenet.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (authors: Xiaoyu Chen)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <jni.h>
-
-#include "torch/script.h"
-#include "torch/torch.h"
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "frontend/feature_pipeline.h"
-#include "frontend/wav.h"
-#include "post_processor/post_processor.h"
-#include "utils/log.h"
-#include "utils/string.h"
-
-namespace wenet {
-
-std::shared_ptr<DecodeOptions> decode_config;
-std::shared_ptr<FeaturePipelineConfig> feature_config;
-std::shared_ptr<FeaturePipeline> feature_pipeline;
-std::shared_ptr<AsrDecoder> decoder;
-std::shared_ptr<DecodeResource> resource;
-DecodeState state = kEndBatch;
-std::string total_result;  // NOLINT
-
-void init(JNIEnv* env, jobject, jstring jModelDir) {
-  const char* pModelDir = env->GetStringUTFChars(jModelDir, nullptr);
-
-  std::string modelPath = std::string(pModelDir) + "/final.zip";
-  std::string dictPath = std::string(pModelDir) + "/units.txt";
-  auto model = std::make_shared<TorchAsrModel>();
-  model->Read(modelPath);
-  LOG(INFO) << "model path: " << modelPath;
-
-  resource = std::make_shared<DecodeResource>();
-  resource->model = model;
-  resource->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(dictPath));
-  LOG(INFO) << "dict path: " << dictPath;
-
-  PostProcessOptions post_process_opts;
-  resource->post_processor =
-    std::make_shared<PostProcessor>(post_process_opts);
-
-  feature_config = std::make_shared<FeaturePipelineConfig>(80, 16000);
-  feature_pipeline = std::make_shared<FeaturePipeline>(*feature_config);
-
-  decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = 16;
-  decoder = std::make_shared<AsrDecoder>(feature_pipeline, resource,
-                                         *decode_config);
-}
-
-void reset(JNIEnv *env, jobject) {
-  LOG(INFO) << "wenet reset";
-  decoder->Reset();
-  state = kEndBatch;
-  total_result = "";
-}
-
-void accept_waveform(JNIEnv *env, jobject, jshortArray jWaveform) {
-  jsize size = env->GetArrayLength(jWaveform);
-  int16_t* waveform = env->GetShortArrayElements(jWaveform, 0);
-  feature_pipeline->AcceptWaveform(waveform, size);
-  LOG(INFO) << "wenet accept waveform in ms: " << int(size / 16);
-}
-
-void set_input_finished() {
-  LOG(INFO) << "wenet input finished";
-  feature_pipeline->set_input_finished();
-}
-
-void decode_thread_func() {
-  while (true) {
-    state = decoder->Decode();
-    if (state == kEndFeats || state == kEndpoint) {
-      decoder->Rescoring();
-    }
-
-    std::string result;
-    if (decoder->DecodedSomething()) {
-      result = decoder->result()[0].sentence;
-    }
-
-    if (state == kEndFeats) {
-      LOG(INFO) << "wenet endfeats final result: " << result;
-      total_result += result;
-      break;
-    } else if (state == kEndpoint) {
-      LOG(INFO) << "wenet endpoint final result: " << result;
-      total_result += result + "，";
-      decoder->ResetContinuousDecoding();
-    } else {
-      if (decoder->DecodedSomething()) {
-        LOG(INFO) << "wenet partial result: " << result;
-      }
-    }
-  }
-}
-
-void start_decode() {
-  std::thread decode_thread(decode_thread_func);
-  decode_thread.detach();
-}
-
-jboolean get_finished(JNIEnv *env, jobject) {
-  if (state == kEndFeats) {
-    LOG(INFO) << "wenet recognize finished";
-    return JNI_TRUE;
-  }
-  return JNI_FALSE;
-}
-
-jstring get_result(JNIEnv *env, jobject) {
-  std::string result;
-  if (decoder->DecodedSomething()) {
-    result = decoder->result()[0].sentence;
-  }
-  LOG(INFO) << "wenet ui result: " << total_result + result;
-  return env->NewStringUTF((total_result + result).c_str());
-}
-}  // namespace wenet
-
-JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) {
-  JNIEnv *env;
-  if (vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6) != JNI_OK) {
-    return JNI_ERR;
-  }
-
-  jclass c = env->FindClass("com/mobvoi/wenet/Recognize");
-  if (c == nullptr) {
-    return JNI_ERR;
-  }
-
-  static const JNINativeMethod methods[] = {
-    {"init", "(Ljava/lang/String;)V", reinterpret_cast<void*>(wenet::init)},
-    {"reset", "()V", reinterpret_cast<void *>(wenet::reset)},
-    {"acceptWaveform", "([S)V",
-     reinterpret_cast<void *>(wenet::accept_waveform)},
-    {"setInputFinished", "()V",
-     reinterpret_cast<void *>(wenet::set_input_finished)},
-    {"getFinished", "()Z", reinterpret_cast<void *>(wenet::get_finished)},
-    {"startDecode", "()V", reinterpret_cast<void *>(wenet::start_decode)},
-    {"getResult", "()Ljava/lang/String;",
-     reinterpret_cast<void *>(wenet::get_result)},
-  };
-  int rc = env->RegisterNatives(c, methods,
-                                sizeof(methods) / sizeof(JNINativeMethod));
-
-  if (rc != JNI_OK) {
-    return rc;
-  }
-
-  return JNI_VERSION_1_6;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/MainActivity.java b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/MainActivity.java
deleted file mode 100644
index 715170326149c614ab518343535adc1c180b96d5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/MainActivity.java
+++ /dev/null
@@ -1,220 +0,0 @@
-package com.mobvoi.wenet;
-
-import android.Manifest;
-import android.content.Context;
-import android.content.pm.PackageManager;
-import android.content.res.AssetManager;
-import android.media.AudioFormat;
-import android.media.AudioRecord;
-import android.media.MediaRecorder;
-import android.os.Bundle;
-import android.os.Process;
-import android.util.Log;
-import android.widget.Button;
-import android.widget.TextView;
-import android.widget.Toast;
-import androidx.appcompat.app.AppCompatActivity;
-import androidx.core.app.ActivityCompat;
-import androidx.core.content.ContextCompat;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.BlockingQueue;
-
-public class MainActivity extends AppCompatActivity {
-
-  private final int MY_PERMISSIONS_RECORD_AUDIO = 1;
-  private static final String LOG_TAG = "WENET";
-  private static final int SAMPLE_RATE = 16000;  // The sampling rate
-  private static final int MAX_QUEUE_SIZE = 2500;  // 100 seconds audio, 1 / 0.04 * 100
-  private static final List<String> resource = Arrays.asList(
-    "final.zip", "units.txt", "ctc.ort", "decoder.ort", "encoder.ort"
-  );
-
-  private boolean startRecord = false;
-  private AudioRecord record = null;
-  private int miniBufferSize = 0;  // 1280 bytes 648 byte 40ms, 0.04s
-  private final BlockingQueue<short[]> bufferQueue = new ArrayBlockingQueue<>(MAX_QUEUE_SIZE);
-
-  public static void assetsInit(Context context) throws IOException {
-    AssetManager assetMgr = context.getAssets();
-    // Unzip all files in resource from assets to context.
-    // Note: Uninstall the APP will remove the resource files in the context.
-    for (String file : assetMgr.list("")) {
-      if (resource.contains(file)) {
-        File dst = new File(context.getFilesDir(), file);
-        if (!dst.exists() || dst.length() == 0) {
-          Log.i(LOG_TAG, "Unzipping " + file + " to " + dst.getAbsolutePath());
-          InputStream is = assetMgr.open(file);
-          OutputStream os = new FileOutputStream(dst);
-          byte[] buffer = new byte[4 * 1024];
-          int read;
-          while ((read = is.read(buffer)) != -1) {
-            os.write(buffer, 0, read);
-          }
-          os.flush();
-        }
-      }
-    }
-  }
-
-  @Override
-  public void onRequestPermissionsResult(int requestCode,
-      String[] permissions, int[] grantResults) {
-    if (requestCode == MY_PERMISSIONS_RECORD_AUDIO) {
-      if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
-        Log.i(LOG_TAG, "record permission is granted");
-        initRecorder();
-      } else {
-        Toast.makeText(this, "Permissions denied to record audio", Toast.LENGTH_LONG).show();
-        Button button = findViewById(R.id.button);
-        button.setEnabled(false);
-      }
-    }
-  }
-
-  @Override
-  protected void onCreate(Bundle savedInstanceState) {
-    super.onCreate(savedInstanceState);
-    setContentView(R.layout.activity_main);
-    requestAudioPermissions();
-    try {
-      assetsInit(this);
-    } catch (IOException e) {
-      Log.e(LOG_TAG, "Error process asset files to file path");
-    }
-
-    TextView textView = findViewById(R.id.textView);
-    textView.setText("");
-    Recognize.init(getFilesDir().getPath());
-
-    Button button = findViewById(R.id.button);
-    button.setText("Start Record");
-    button.setOnClickListener(view -> {
-      if (!startRecord) {
-        startRecord = true;
-        Recognize.reset();
-        startRecordThread();
-        startAsrThread();
-        Recognize.startDecode();
-        button.setText("Stop Record");
-      } else {
-        startRecord = false;
-        Recognize.setInputFinished();
-        button.setText("Start Record");
-      }
-      button.setEnabled(false);
-    });
-  }
-
-  private void requestAudioPermissions() {
-    if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
-        != PackageManager.PERMISSION_GRANTED) {
-      ActivityCompat.requestPermissions(this,
-          new String[]{Manifest.permission.RECORD_AUDIO},
-          MY_PERMISSIONS_RECORD_AUDIO);
-    } else {
-      initRecorder();
-    }
-  }
-
-  private void initRecorder() {
-    // buffer size in bytes 1280
-    miniBufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE,
-        AudioFormat.CHANNEL_IN_MONO,
-        AudioFormat.ENCODING_PCM_16BIT);
-    if (miniBufferSize == AudioRecord.ERROR || miniBufferSize == AudioRecord.ERROR_BAD_VALUE) {
-      Log.e(LOG_TAG, "Audio buffer can't initialize!");
-      return;
-    }
-    record = new AudioRecord(MediaRecorder.AudioSource.DEFAULT,
-        SAMPLE_RATE,
-        AudioFormat.CHANNEL_IN_MONO,
-        AudioFormat.ENCODING_PCM_16BIT,
-        miniBufferSize);
-    if (record.getState() != AudioRecord.STATE_INITIALIZED) {
-      Log.e(LOG_TAG, "Audio Record can't initialize!");
-      return;
-    }
-    Log.i(LOG_TAG, "Record init okay");
-  }
-
-  private void startRecordThread() {
-    new Thread(() -> {
-      VoiceRectView voiceView = findViewById(R.id.voiceRectView);
-      record.startRecording();
-      Process.setThreadPriority(Process.THREAD_PRIORITY_AUDIO);
-      while (startRecord) {
-        short[] buffer = new short[miniBufferSize / 2];
-        int read = record.read(buffer, 0, buffer.length);
-        voiceView.add(calculateDb(buffer));
-        try {
-          if (AudioRecord.ERROR_INVALID_OPERATION != read) {
-            bufferQueue.put(buffer);
-          }
-        } catch (InterruptedException e) {
-          Log.e(LOG_TAG, e.getMessage());
-        }
-        Button button = findViewById(R.id.button);
-        if (!button.isEnabled() && startRecord) {
-          runOnUiThread(() -> button.setEnabled(true));
-        }
-      }
-      record.stop();
-      voiceView.zero();
-    }).start();
-  }
-
-  private double calculateDb(short[] buffer) {
-    double energy = 0.0;
-    for (short value : buffer) {
-      energy += value * value;
-    }
-    energy /= buffer.length;
-    energy = (10 * Math.log10(1 + energy)) / 100;
-    energy = Math.min(energy, 1.0);
-    return energy;
-  }
-
-  private void startAsrThread() {
-    new Thread(() -> {
-      // Send all data
-      while (startRecord || bufferQueue.size() > 0) {
-        try {
-          short[] data = bufferQueue.take();
-          // 1. add data to C++ interface
-          Recognize.acceptWaveform(data);
-          // 2. get partial result
-          runOnUiThread(() -> {
-            TextView textView = findViewById(R.id.textView);
-            textView.setText(Recognize.getResult());
-          });
-        } catch (InterruptedException e) {
-          Log.e(LOG_TAG, e.getMessage());
-        }
-      }
-
-      // Wait for final result
-      while (true) {
-        // get result
-        if (!Recognize.getFinished()) {
-          runOnUiThread(() -> {
-            TextView textView = findViewById(R.id.textView);
-            textView.setText(Recognize.getResult());
-          });
-        } else {
-          runOnUiThread(() -> {
-            Button button = findViewById(R.id.button);
-            button.setEnabled(true);
-          });
-          break;
-        }
-      }
-    }).start();
-  }
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/Recognize.java b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/Recognize.java
deleted file mode 100644
index 31cafcf8a31685216e1510b9b7b43812624b5ea8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/Recognize.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package com.mobvoi.wenet;
-
-public class Recognize {
-
-  static {
-    System.loadLibrary("wenet");
-  }
-
-  public static native void init(String modelDir);
-  public static native void reset();
-  public static native void acceptWaveform(short[] waveform);
-  public static native void setInputFinished();
-  public static native boolean getFinished();
-  public static native void startDecode();
-  public static native String getResult();
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/VoiceRectView.java b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/VoiceRectView.java
deleted file mode 100644
index fd1c832b28536b918f26969ce987898870ad584f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/java/com/mobvoi/wenet/VoiceRectView.java
+++ /dev/null
@@ -1,134 +0,0 @@
-package com.mobvoi.wenet;
-
-import android.content.Context;
-import android.content.res.TypedArray;
-import android.graphics.Canvas;
-import android.graphics.LinearGradient;
-import android.graphics.Paint;
-import android.graphics.Shader;
-import android.util.AttributeSet;
-import android.view.View;
-import androidx.core.content.ContextCompat;
-import java.util.Arrays;
-
-/**
- * 自定义的音频模拟条形图 Created by shize on 2016/9/5.
- */
-public class VoiceRectView extends View {
-
-  // 音频矩形的数量
-  private int mRectCount;
-  // 音频矩形的画笔
-  private Paint mRectPaint;
-  // 渐变颜色的两种
-  private int topColor, downColor;
-  // 音频矩形的宽和高
-  private int mRectWidth, mRectHeight;
-  // 偏移量
-  private int offset;
-  // 频率速度
-  private int mSpeed;
-
-  private double[] mEnergyBuffer = null;
-
-  public VoiceRectView(Context context) {
-    this(context, null);
-  }
-
-  public VoiceRectView(Context context, AttributeSet attrs) {
-    this(context, attrs, 0);
-  }
-
-  public VoiceRectView(Context context, AttributeSet attrs, int defStyleAttr) {
-    super(context, attrs, defStyleAttr);
-    setPaint(context, attrs);
-  }
-
-  public void setPaint(Context context, AttributeSet attrs) {
-    // 将属性存储到TypedArray中
-    TypedArray ta = context.obtainStyledAttributes(attrs, R.styleable.VoiceRect);
-    mRectPaint = new Paint();
-    // 添加矩形画笔的基础颜色
-    mRectPaint.setColor(ta.getColor(R.styleable.VoiceRect_RectTopColor,
-        ContextCompat.getColor(context, R.color.top_color)));
-    // 添加矩形渐变色的上面部分
-    topColor = ta.getColor(R.styleable.VoiceRect_RectTopColor,
-        ContextCompat.getColor(context, R.color.top_color));
-    // 添加矩形渐变色的下面部分
-    downColor = ta.getColor(R.styleable.VoiceRect_RectDownColor,
-        ContextCompat.getColor(context, R.color.down_color));
-    // 设置矩形的数量
-    mRectCount = ta.getInt(R.styleable.VoiceRect_RectCount, 10);
-    mEnergyBuffer = new double[mRectCount];
-
-    // 设置重绘的时间间隔，也就是变化速度
-    mSpeed = ta.getInt(R.styleable.VoiceRect_RectSpeed, 300);
-    // 每个矩形的间隔
-    offset = ta.getInt(R.styleable.VoiceRect_RectOffset, 0);
-    // 回收TypeArray
-    ta.recycle();
-  }
-
-  @Override
-  protected void onSizeChanged(int w, int h, int oldW, int oldH) {
-    super.onSizeChanged(w, h, oldW, oldH);
-    // 渐变效果
-    LinearGradient mLinearGradient;
-    // 画布的宽
-    int mWidth;
-    // 获取画布的宽
-    mWidth = getWidth();
-    // 获取矩形的最大高度
-    mRectHeight = getHeight();
-    // 获取单个矩形的宽度(减去的部分为到右边界的间距)
-    mRectWidth = (mWidth - offset) / mRectCount;
-    // 实例化一个线性渐变
-    mLinearGradient = new LinearGradient(
-        0,
-        0,
-        mRectWidth,
-        mRectHeight,
-        topColor,
-        downColor,
-        Shader.TileMode.CLAMP
-    );
-    // 添加进画笔的着色器
-    mRectPaint.setShader(mLinearGradient);
-  }
-
-  public void add(double energy) {
-    if (mEnergyBuffer.length - 1 >= 0) {
-      System.arraycopy(mEnergyBuffer, 1, mEnergyBuffer, 0, mEnergyBuffer.length - 1);
-    }
-    mEnergyBuffer[mEnergyBuffer.length - 1] = energy;
-  }
-
-  public void zero() {
-    Arrays.fill(mEnergyBuffer, 0);
-  }
-
-  @Override
-  protected void onDraw(Canvas canvas) {
-    super.onDraw(canvas);
-    double mRandom;
-    float currentHeight;
-    for (int i = 0; i < mRectCount; i++) {
-      // 由于只是简单的案例就不监听音频输入，随机模拟一些数字即可
-      mRandom = Math.random();
-
-      //if (i < 1 || i > mRectCount - 2) mRandom = 0;
-      currentHeight = (float) (mRectHeight * mEnergyBuffer[i]);
-
-      // 矩形的绘制是从左边开始到上、右、下边（左右边距离左边画布边界的距离，上下边距离上边画布边界的距离）
-      canvas.drawRect(
-          (float) (mRectWidth * i + offset),
-          (mRectHeight - currentHeight) / 2,
-          (float) (mRectWidth * (i + 1)),
-          mRectHeight / 2 + currentHeight / 2,
-          mRectPaint
-      );
-    }
-    // 使得view延迟重绘
-    postInvalidateDelayed(mSpeed);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
deleted file mode 100644
index 2b068d11462a4b96669193de13a711a3a36220a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
+++ /dev/null
@@ -1,30 +0,0 @@
-<vector xmlns:android="http://schemas.android.com/apk/res/android"
-    xmlns:aapt="http://schemas.android.com/aapt"
-    android:width="108dp"
-    android:height="108dp"
-    android:viewportWidth="108"
-    android:viewportHeight="108">
-    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
-        <aapt:attr name="android:fillColor">
-            <gradient
-                android:endX="85.84757"
-                android:endY="92.4963"
-                android:startX="42.9492"
-                android:startY="49.59793"
-                android:type="linear">
-                <item
-                    android:color="#44000000"
-                    android:offset="0.0" />
-                <item
-                    android:color="#00000000"
-                    android:offset="1.0" />
-            </gradient>
-        </aapt:attr>
-    </path>
-    <path
-        android:fillColor="#FFFFFF"
-        android:fillType="nonZero"
-        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
-        android:strokeWidth="1"
-        android:strokeColor="#00000000" />
-</vector>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/drawable/ic_launcher_background.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/drawable/ic_launcher_background.xml
deleted file mode 100644
index 07d5da9cbf141911847041df5d7b87f0dd5ef9d4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/drawable/ic_launcher_background.xml
+++ /dev/null
@@ -1,170 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<vector xmlns:android="http://schemas.android.com/apk/res/android"
-    android:width="108dp"
-    android:height="108dp"
-    android:viewportWidth="108"
-    android:viewportHeight="108">
-    <path
-        android:fillColor="#3DDC84"
-        android:pathData="M0,0h108v108h-108z" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M9,0L9,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M19,0L19,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M29,0L29,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M39,0L39,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M49,0L49,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M59,0L59,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M69,0L69,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M79,0L79,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M89,0L89,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M99,0L99,108"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,9L108,9"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,19L108,19"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,29L108,29"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,39L108,39"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,49L108,49"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,59L108,59"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,69L108,69"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,79L108,79"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,89L108,89"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M0,99L108,99"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M19,29L89,29"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M19,39L89,39"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M19,49L89,49"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M19,59L89,59"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M19,69L89,69"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M19,79L89,79"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M29,19L29,89"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M39,19L39,89"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M49,19L49,89"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M59,19L59,89"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M69,19L69,89"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-    <path
-        android:fillColor="#00000000"
-        android:pathData="M79,19L79,89"
-        android:strokeWidth="0.8"
-        android:strokeColor="#33FFFFFF" />
-</vector>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/layout/activity_main.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/layout/activity_main.xml
deleted file mode 100644
index 3ef1e0d3b27b4c19aa30a4c79ad2bf557d1ecb65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/layout/activity_main.xml
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
-    xmlns:app="http://schemas.android.com/apk/res-auto"
-    xmlns:tools="http://schemas.android.com/tools"
-    xmlns:VoiceRect="http://schemas.android.com/apk/res-auto"
-    android:layout_width="match_parent"
-    android:layout_height="match_parent"
-    tools:context=".MainActivity">
-
-    <TextView
-      android:id="@+id/textView"
-      android:layout_width="0dp"
-      android:layout_height="wrap_content"
-      android:background="#F4F4F4"
-      android:maxLines="15"
-      android:minLines="15"
-      android:text="TextView"
-      android:textSize="30sp"
-      app:layout_constraintBottom_toBottomOf="parent"
-      app:layout_constraintEnd_toEndOf="parent"
-      app:layout_constraintHorizontal_bias="0.0"
-      app:layout_constraintStart_toStartOf="parent"
-      app:layout_constraintTop_toTopOf="parent"
-      app:layout_constraintVertical_bias="0.08" />
-
-    <com.mobvoi.wenet.VoiceRectView
-      android:id="@+id/voiceRectView"
-      android:layout_width="0dp"
-      android:layout_height="150dp"
-      VoiceRect:RectCount="50"
-      VoiceRect:RectDownColor="@color/green"
-      VoiceRect:RectOffset="0"
-      VoiceRect:RectSpeed="300"
-      VoiceRect:RectTopColor="@color/green"
-      app:layout_constraintBottom_toTopOf="@+id/button"
-      app:layout_constraintEnd_toEndOf="parent"
-      app:layout_constraintStart_toStartOf="parent"
-      app:layout_constraintTop_toBottomOf="@+id/textView" />
-
-    <Button
-      android:id="@+id/button"
-      android:layout_width="0dp"
-      android:layout_height="wrap_content"
-      android:text="Button"
-      app:layout_constraintBottom_toBottomOf="parent"
-      app:layout_constraintEnd_toEndOf="parent"
-      app:layout_constraintHorizontal_bias="1.0"
-      app:layout_constraintStart_toStartOf="parent" />
-
-</androidx.constraintlayout.widget.ConstraintLayout>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
deleted file mode 100644
index eca70cfe52eac1ba66ba280a68ca7be8fcf88a16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
-    <background android:drawable="@drawable/ic_launcher_background" />
-    <foreground android:drawable="@drawable/ic_launcher_foreground" />
-</adaptive-icon>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
deleted file mode 100644
index eca70cfe52eac1ba66ba280a68ca7be8fcf88a16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
-    <background android:drawable="@drawable/ic_launcher_background" />
-    <foreground android:drawable="@drawable/ic_launcher_foreground" />
-</adaptive-icon>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher.png
deleted file mode 100644
index a571e60098c92c2baca8a5df62f2929cbff01b52..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png
deleted file mode 100644
index 61da551c5594a1f9d26193983d2cd69189014603..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher.png
deleted file mode 100644
index c41dd28531901b2c23927768c84bb6765ebcc1db..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png
deleted file mode 100644
index db5080a752731b34d38d7c634732b2cd999125d0..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png
deleted file mode 100644
index 6dba46dab19242bf475ddf2e0a10042de6a0be16..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png
deleted file mode 100644
index da31a871c8dc461a1ea114510bfac640307b0a90..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
deleted file mode 100644
index 15ac681720f3df43c647bf0c081f0314ee34eae1..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
deleted file mode 100644
index b216f2d313cc673d8b8c4da591c174ebed52795c..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
deleted file mode 100644
index f25a419744727fb48b4b4137b6c6a3a8f2620d51..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
deleted file mode 100644
index e96783ccce844001359f968f5cd9d85b341bd3fb..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values-night/themes.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values-night/themes.xml
deleted file mode 100644
index 6f3b30c4bc4cdea242d70bcdfb380654c7735b76..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values-night/themes.xml
+++ /dev/null
@@ -1,16 +0,0 @@
-<resources xmlns:tools="http://schemas.android.com/tools">
-    <!-- Base application theme. -->
-    <style name="Theme.Wenet" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
-        <!-- Primary brand color. -->
-        <item name="colorPrimary">@color/purple_200</item>
-        <item name="colorPrimaryVariant">@color/purple_700</item>
-        <item name="colorOnPrimary">@color/black</item>
-        <!-- Secondary brand color. -->
-        <item name="colorSecondary">@color/teal_200</item>
-        <item name="colorSecondaryVariant">@color/teal_200</item>
-        <item name="colorOnSecondary">@color/black</item>
-        <!-- Status bar color. -->
-        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
-        <!-- Customize your theme here. -->
-    </style>
-</resources>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/attrs.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/attrs.xml
deleted file mode 100644
index bffa405d4b247a75ed35b5b672cfb03439d2329d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/attrs.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<resources>
-    <declare-styleable name="VoiceRect">
-        <attr name="RectCount" format="integer" />
-        <attr name="RectDownColor" format="reference|color" />
-        <attr name="RectSpeed" format="integer" />
-        <attr name="RectTopColor" format="reference|color" />
-        <attr name="RectOffset" format="integer" />
-    </declare-styleable>
-    <declare-styleable name="VoiceLine">
-        <attr name="amplitude_big" format="dimension|reference" />
-        <attr name="amplitude_small" format="dimension|reference" />
-        <attr name="lineColor" format="color|reference" />
-        <attr name="backColor" format="color|reference" />
-        <attr name="frequency" format="float" />
-    </declare-styleable>
-</resources>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/colors.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/colors.xml
deleted file mode 100644
index 3a15f9affd1c15ffa71cda02902971596936885a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/colors.xml
+++ /dev/null
@@ -1,16 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<resources>
-    <color name="purple_200">#FFBB86FC</color>
-    <color name="purple_500">#FF6200EE</color>
-    <color name="purple_700">#FF3700B3</color>
-    <color name="teal_200">#FF03DAC5</color>
-    <color name="teal_700">#FF018786</color>
-    <color name="black">#FF000000</color>
-    <color name="white">#FFFFFFFF</color>
-
-    <color name="red">#f16d7a</color>
-    <color name="green">#b7d28d</color>
-    <color name="blue">#b8f1ed</color>
-    <color name="top_color">#b7d28d</color>
-    <color name="down_color">#b8f1ed</color>
-</resources>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/strings.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/strings.xml
deleted file mode 100644
index e27f2268143954c097b9bc6ab29a1934baedab5b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/strings.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<resources>
-    <string name="app_name">wenet</string>
-</resources>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/themes.xml b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/themes.xml
deleted file mode 100644
index 3d75dde6583e825236d87b1aa2107ccb0f369128..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/main/res/values/themes.xml
+++ /dev/null
@@ -1,16 +0,0 @@
-<resources xmlns:tools="http://schemas.android.com/tools">
-    <!-- Base application theme. -->
-    <style name="Theme.Wenet" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
-        <!-- Primary brand color. -->
-        <item name="colorPrimary">@color/purple_500</item>
-        <item name="colorPrimaryVariant">@color/purple_700</item>
-        <item name="colorOnPrimary">@color/white</item>
-        <!-- Secondary brand color. -->
-        <item name="colorSecondary">@color/teal_200</item>
-        <item name="colorSecondaryVariant">@color/teal_700</item>
-        <item name="colorOnSecondary">@color/black</item>
-        <!-- Status bar color. -->
-        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
-        <!-- Customize your theme here. -->
-    </style>
-</resources>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/test/java/com/mobvoi/wenet/ExampleUnitTest.java b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/test/java/com/mobvoi/wenet/ExampleUnitTest.java
deleted file mode 100644
index 4c733c66beeeb18664008e36433d842aa9976456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/src/test/java/com/mobvoi/wenet/ExampleUnitTest.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package com.mobvoi.wenet;
-
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-/**
- * Example local unit test, which will execute on the development machine (host).
- *
- * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
- */
-public class ExampleUnitTest {
-    @Test
-    public void addition_isCorrect() {
-        assertEquals(4, 2 + 2);
-    }
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/wenet.keystore b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/wenet.keystore
deleted file mode 100644
index f9a83cecb2ad2e798a3a8ae0a3a662115a9df8d6..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/app/wenet.keystore and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/build.gradle b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/build.gradle
deleted file mode 100644
index 81defbf8e304c3041b9b235104a0eda71b35c51a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/build.gradle
+++ /dev/null
@@ -1,21 +0,0 @@
-buildscript {
-    repositories {
-        google()
-        jcenter()
-    }
-    dependencies {
-        classpath 'com.android.tools.build:gradle:4.1.3'
-    }
-}
-
-allprojects {
-    repositories {
-        google()
-        jcenter()
-        maven { url 'https://jitpack.io' }
-    }
-}
-
-task clean(type: Delete) {
-    delete rootProject.buildDir
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle.properties b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle.properties
deleted file mode 100644
index 52f5917cb0d0c47fd00ba8ee85c14eb27e1c41cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle.properties
+++ /dev/null
@@ -1,19 +0,0 @@
-# Project-wide Gradle settings.
-# IDE (e.g. Android Studio) users:
-# Gradle settings configured through the IDE *will override*
-# any settings specified in this file.
-# For more details on how to configure your build environment visit
-# http://www.gradle.org/docs/current/userguide/build_environment.html
-# Specifies the JVM arguments used for the daemon process.
-# The setting is particularly useful for tweaking memory settings.
-org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
-# When configured, Gradle will run in incubating parallel mode.
-# This option should only be used with decoupled projects. More details, visit
-# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
-# org.gradle.parallel=true
-# AndroidX package structure to make it clearer which packages are bundled with the
-# Android operating system, and which are packaged with your app"s APK
-# https://developer.android.com/topic/libraries/support-library/androidx-rn
-android.useAndroidX=true
-# Automatically convert third-party libraries to use AndroidX
-android.enableJetifier=true
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle/wrapper/gradle-wrapper.jar b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle/wrapper/gradle-wrapper.jar
deleted file mode 100644
index f6b961fd5a86aa5fbfe90f707c3138408be7c718..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle/wrapper/gradle-wrapper.jar and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle/wrapper/gradle-wrapper.properties b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle/wrapper/gradle-wrapper.properties
deleted file mode 100644
index 1d8a579c59d6c1ff17c0244095d28ada4331f926..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradle/wrapper/gradle-wrapper.properties
+++ /dev/null
@@ -1,6 +0,0 @@
-#Tue Jan 12 17:33:20 CST 2021
-distributionBase=GRADLE_USER_HOME
-distributionPath=wrapper/dists
-zipStoreBase=GRADLE_USER_HOME
-zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-bin.zip
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradlew b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradlew
deleted file mode 100644
index cccdd3d517fc5249beaefa600691cf150f2fa3e6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradlew
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/env sh
-
-##############################################################################
-##
-##  Gradle start up script for UN*X
-##
-##############################################################################
-
-# Attempt to set APP_HOME
-# Resolve links: $0 may be a link
-PRG="$0"
-# Need this for relative symlinks.
-while [ -h "$PRG" ] ; do
-    ls=`ls -ld "$PRG"`
-    link=`expr "$ls" : '.*-> \(.*\)$'`
-    if expr "$link" : '/.*' > /dev/null; then
-        PRG="$link"
-    else
-        PRG=`dirname "$PRG"`"/$link"
-    fi
-done
-SAVED="`pwd`"
-cd "`dirname \"$PRG\"`/" >/dev/null
-APP_HOME="`pwd -P`"
-cd "$SAVED" >/dev/null
-
-APP_NAME="Gradle"
-APP_BASE_NAME=`basename "$0"`
-
-# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-DEFAULT_JVM_OPTS=""
-
-# Use the maximum available, or set MAX_FD != -1 to use that value.
-MAX_FD="maximum"
-
-warn () {
-    echo "$*"
-}
-
-die () {
-    echo
-    echo "$*"
-    echo
-    exit 1
-}
-
-# OS specific support (must be 'true' or 'false').
-cygwin=false
-msys=false
-darwin=false
-nonstop=false
-case "`uname`" in
-  CYGWIN* )
-    cygwin=true
-    ;;
-  Darwin* )
-    darwin=true
-    ;;
-  MINGW* )
-    msys=true
-    ;;
-  NONSTOP* )
-    nonstop=true
-    ;;
-esac
-
-CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
-
-# Determine the Java command to use to start the JVM.
-if [ -n "$JAVA_HOME" ] ; then
-    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
-        # IBM's JDK on AIX uses strange locations for the executables
-        JAVACMD="$JAVA_HOME/jre/sh/java"
-    else
-        JAVACMD="$JAVA_HOME/bin/java"
-    fi
-    if [ ! -x "$JAVACMD" ] ; then
-        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
-
-Please set the JAVA_HOME variable in your environment to match the
-location of your Java installation."
-    fi
-else
-    JAVACMD="java"
-    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
-
-Please set the JAVA_HOME variable in your environment to match the
-location of your Java installation."
-fi
-
-# Increase the maximum file descriptors if we can.
-if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
-    MAX_FD_LIMIT=`ulimit -H -n`
-    if [ $? -eq 0 ] ; then
-        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
-            MAX_FD="$MAX_FD_LIMIT"
-        fi
-        ulimit -n $MAX_FD
-        if [ $? -ne 0 ] ; then
-            warn "Could not set maximum file descriptor limit: $MAX_FD"
-        fi
-    else
-        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
-    fi
-fi
-
-# For Darwin, add options to specify how the application appears in the dock
-if $darwin; then
-    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
-fi
-
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin ; then
-    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
-    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
-    JAVACMD=`cygpath --unix "$JAVACMD"`
-
-    # We build the pattern for arguments to be converted via cygpath
-    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
-    SEP=""
-    for dir in $ROOTDIRSRAW ; do
-        ROOTDIRS="$ROOTDIRS$SEP$dir"
-        SEP="|"
-    done
-    OURCYGPATTERN="(^($ROOTDIRS))"
-    # Add a user-defined pattern to the cygpath arguments
-    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
-        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
-    fi
-    # Now convert the arguments - kludge to limit ourselves to /bin/sh
-    i=0
-    for arg in "$@" ; do
-        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
-        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
-
-        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
-            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
-        else
-            eval `echo args$i`="\"$arg\""
-        fi
-        i=$((i+1))
-    done
-    case $i in
-        (0) set -- ;;
-        (1) set -- "$args0" ;;
-        (2) set -- "$args0" "$args1" ;;
-        (3) set -- "$args0" "$args1" "$args2" ;;
-        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
-        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
-        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
-        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
-        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
-        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
-    esac
-fi
-
-# Escape application args
-save () {
-    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
-    echo " "
-}
-APP_ARGS=$(save "$@")
-
-# Collect all arguments for the java command, following the shell quoting and substitution rules
-eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
-
-# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
-if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
-  cd "$(dirname "$0")"
-fi
-
-exec "$JAVACMD" "$@"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradlew.bat b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradlew.bat
deleted file mode 100644
index f9553162f122c71b34635112e717c3e733b5b212..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/gradlew.bat
+++ /dev/null
@@ -1,84 +0,0 @@
-@if "%DEBUG%" == "" @echo off
-@rem ##########################################################################
-@rem
-@rem  Gradle startup script for Windows
-@rem
-@rem ##########################################################################
-
-@rem Set local scope for the variables with windows NT shell
-if "%OS%"=="Windows_NT" setlocal
-
-set DIRNAME=%~dp0
-if "%DIRNAME%" == "" set DIRNAME=.
-set APP_BASE_NAME=%~n0
-set APP_HOME=%DIRNAME%
-
-@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-set DEFAULT_JVM_OPTS=
-
-@rem Find java.exe
-if defined JAVA_HOME goto findJavaFromJavaHome
-
-set JAVA_EXE=java.exe
-%JAVA_EXE% -version >NUL 2>&1
-if "%ERRORLEVEL%" == "0" goto init
-
-echo.
-echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
-
-goto fail
-
-:findJavaFromJavaHome
-set JAVA_HOME=%JAVA_HOME:"=%
-set JAVA_EXE=%JAVA_HOME%/bin/java.exe
-
-if exist "%JAVA_EXE%" goto init
-
-echo.
-echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
-
-goto fail
-
-:init
-@rem Get command-line arguments, handling Windows variants
-
-if not "%OS%" == "Windows_NT" goto win9xME_args
-
-:win9xME_args
-@rem Slurp the command line arguments.
-set CMD_LINE_ARGS=
-set _SKIP=2
-
-:win9xME_args_slurp
-if "x%~1" == "x" goto execute
-
-set CMD_LINE_ARGS=%*
-
-:execute
-@rem Setup the command line
-
-set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
-
-@rem Execute Gradle
-"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
-
-:end
-@rem End local scope for the variables with windows NT shell
-if "%ERRORLEVEL%"=="0" goto mainEnd
-
-:fail
-rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
-rem the _cmd.exe /c_ return code!
-if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
-exit /b 1
-
-:mainEnd
-if "%OS%"=="Windows_NT" endlocal
-
-:omega
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/settings.gradle b/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/settings.gradle
deleted file mode 100644
index 03e8ae2a9845e43545a7a263c4071bcaf7aa1b19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/android/settings.gradle
+++ /dev/null
@@ -1,2 +0,0 @@
-include ':app'
-rootProject.name = "wenet"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/CMakeLists.txt
deleted file mode 100644
index 635cdf53b412d125f7adf1c2593ea5a24fd67975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/CMakeLists.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-project(wenet VERSION 0.1)
-
-set(TORCH ON)  # Use torch for binding backend
-set(ONNX OFF)  # ONNX is not used
-set(CXX11_ABI OFF)
-set(FST_HAVE_BIN OFF)
-set(CMAKE_VERBOSE_MAKEFILE OFF)
-
-include(FetchContent)
-set(CMAKE_CXX_STANDARD 14)
-set(CMAKE_CXX_EXTENSIONS OFF)
-set(FETCHCONTENT_QUIET OFF)
-get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-set(FETCHCONTENT_BASE_DIR ${fc_base})
-
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-
-if(NOT MSVC)
-  # Keep the same with openfst, -fPIC or -fpic
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
-else()
-  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-  add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
-endif()
-
-include(libtorch)
-include(openfst)
-include(pybind11)
-
-include_directories(
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-)
-
-add_subdirectory(utils)
-add_subdirectory(frontend)
-add_subdirectory(post_processor)
-add_subdirectory(kaldi)  # kaldi: wfst based decoder
-add_subdirectory(decoder)
-add_subdirectory(api)
-
-# wenet api
-pybind11_add_module(_wenet cpp/binding.cc)
-target_link_libraries(_wenet PRIVATE wenet_api)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/MANIFEST.in b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/MANIFEST.in
deleted file mode 100644
index 941f67e284bed4b963d2312675957f2537ae07ea..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/MANIFEST.in
+++ /dev/null
@@ -1,2 +0,0 @@
-include README.md
-recursive-include py *.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/README.md
deleted file mode 100644
index b5a86ff853d4fa13540d2926706e0c1113eaee64..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/README.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# WeNet Python Binding
-
-This is a python binding of WeNet.
-
-WeNet is a production first and production ready end-to-end speech recognition toolkit.
-
-The best things of the binding are:
-
-1. Multiple languages supports, including English, Chinese. Other languages are in development.
-2. Non-streaming and streaming API
-3. N-best, contextual biasing, and timestamp supports, which are very important for speech productions.
-4. Alignment support. You can get phone level alignments this tool, on developing.
-
-## Install
-
-Python 3.6+ is required.
-
-``` sh
-pip3 install wenetruntime
-```
-
-## Usage
-
-Note:
-
-1. For macOS, wenetruntime packed `libtorch.so`, so we can't import torch and wenetruntime at the same time.
-2. For Windows and Linux, wenetruntime depends on torch. Please install and import the same version `torch` as wenetruntime.
-
-### Non-streaming Usage
-
-``` python
-import sys
-import torch
-import wenetruntime as wenet
-
-wav_file = sys.argv[1]
-decoder = wenet.Decoder(lang='chs')
-ans = decoder.decode_wav(wav_file)
-print(ans)
-```
-
-You can also specify the following parameter in `wenet.Decoder`
-
-* `lang` (str): The language you used, `chs` for Chinese, and `en` for English.
-* `model_dir` (str): is the `Runtime Model` directory, it contains the following files.
-   If not provided, official model for specific `lang` will be downloaded automatically.
-
-  * `final.zip`: runtime TorchScript ASR model.
-  * `units.txt`: modeling units file
-  * `TLG.fst`: optional, it means decoding with LM when `TLG.fst` is given.
-  * `words.txt`: optional, word level symbol table for decoding with `TLG.fst`
-
-  Please refer https://github.com/wenet-e2e/wenet/blob/main/docs/pretrained_models.md for the details of `Runtime Model`.
-
-* `nbest` (int): Output the top-n best result.
-* `enable_timestamp` (bool): Whether to enable the word level timestamp.
-* `context` (List[str]): a list of context biasing words.
-* `context_score` (float): context bonus score.
-* `continuous_decoding` (bool): Whether to enable continuous(long) decoding.
-
-For example:
-``` python
-decoder = wenet.Decoder(model_dir,
-                        lang='chs',
-                        nbest=5,
-                        enable_timestamp=True,
-                        context=['不忘初心', '牢记使命'],
-                        context_score=3.0)
-```
-
-### Streaming Usage
-
-``` python
-import sys
-import torch
-import wave
-import wenetruntime as wenet
-
-test_wav = sys.argv[1]
-
-with wave.open(test_wav, 'rb') as fin:
-    assert fin.getnchannels() == 1
-    wav = fin.readframes(fin.getnframes())
-
-decoder = wenet.Decoder(lang='chs')
-# We suppose the wav is 16k, 16bits, and decode every 0.5 seconds
-interval = int(0.5 * 16000) * 2
-for i in range(0, len(wav), interval):
-    last = False if i + interval < len(wav) else True
-    chunk_wav = wav[i: min(i + interval, len(wav))]
-    ans = decoder.decode(chunk_wav, last)
-    print(ans)
-```
-
-You can use the same parameters as we introduced above to control the behavior of `wenet.Decoder`
-
-
-## Build on Your Local Machine
-
-``` sh
-git clone https://github.com/wenet-e2e/wenet.git
-cd wenet/runtime/binding/python
-python setup.py install
-```
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/CMakeLists.txt
deleted file mode 100644
index 8d61ca8477f0f0b6128f1effe0a2738494b2620f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-if(TORCH)
- add_library(wenet_api SHARED wenet_api.cc)
- target_link_libraries(wenet_api PUBLIC decoder)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/README.md
deleted file mode 100644
index 5eaa13b977eb4836eb930452f4434dc9f2ea4139..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# WeNet API
-
-We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
-for the interface design.
-
-
-We are going to implement the following interfaces:
-
-- [x] non-streaming recognition
-- [] streaming recognition
-- [] nbest
-- [] contextual biasing word
-- [] alignment
-- [] language support(post processor)
-- [] label check
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/wenet_api.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/wenet_api.cc
deleted file mode 100644
index cb1e0c8552e0126e2db274a29075578fe351a25f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/wenet_api.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "post_processor/post_processor.h"
-#include "utils/file.h"
-#include "utils/json.h"
-#include "utils/string.h"
-
-class Recognizer {
- public:
-  explicit Recognizer(const std::string& model_dir) {
-    // FeaturePipeline init
-    feature_config_ = std::make_shared<wenet::FeaturePipelineConfig>(80, 16000);
-    feature_pipeline_ =
-        std::make_shared<wenet::FeaturePipeline>(*feature_config_);
-    // Resource init
-    resource_ = std::make_shared<wenet::DecodeResource>();
-    wenet::TorchAsrModel::InitEngineThreads();
-    std::string model_path = wenet::JoinPath(model_dir, "final.zip");
-    CHECK(wenet::FileExists(model_path));
-
-    auto model = std::make_shared<wenet::TorchAsrModel>();
-    model->Read(model_path);
-    resource_->model = model;
-
-    // units.txt: E2E model unit
-    std::string unit_path = wenet::JoinPath(model_dir, "units.txt");
-    CHECK(wenet::FileExists(unit_path));
-    resource_->unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(unit_path));
-
-    std::string fst_path = wenet::JoinPath(model_dir, "TLG.fst");
-    if (wenet::FileExists(fst_path)) {  // With LM
-      resource_->fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-          fst::Fst<fst::StdArc>::Read(fst_path));
-
-      std::string symbol_path = wenet::JoinPath(model_dir, "words.txt");
-      CHECK(wenet::FileExists(symbol_path));
-      resource_->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(symbol_path));
-    } else {  // Without LM, symbol_table is the same as unit_table
-      resource_->symbol_table = resource_->unit_table;
-    }
-
-    // Context config init
-    context_config_ = std::make_shared<wenet::ContextConfig>();
-    decode_options_ = std::make_shared<wenet::DecodeOptions>();
-    post_process_opts_ = std::make_shared<wenet::PostProcessOptions>();
-  }
-
-  void Reset() {
-    if (feature_pipeline_ != nullptr) {
-      feature_pipeline_->Reset();
-    }
-    if (decoder_ != nullptr) {
-      decoder_->Reset();
-    }
-    result_.clear();
-  }
-
-  void InitDecoder() {
-    CHECK(decoder_ == nullptr);
-    // Optional init context graph
-    if (context_.size() > 0) {
-      context_config_->context_score = context_score_;
-      auto context_graph =
-          std::make_shared<wenet::ContextGraph>(*context_config_);
-      context_graph->BuildContextGraph(context_, resource_->symbol_table);
-      resource_->context_graph = context_graph;
-    }
-    // PostProcessor
-    if (language_ == "chs") {  // TODO(Binbin Zhang): CJK(chs, jp, kr)
-      post_process_opts_->language_type = wenet::kMandarinEnglish;
-    } else {
-      post_process_opts_->language_type = wenet::kIndoEuropean;
-    }
-    resource_->post_processor =
-        std::make_shared<wenet::PostProcessor>(*post_process_opts_);
-    // Init decoder
-    decoder_ = std::make_shared<wenet::AsrDecoder>(feature_pipeline_, resource_,
-                                                   *decode_options_);
-  }
-
-  void Decode(const char* data, int len, int last) {
-    using wenet::DecodeState;
-    // Init decoder when it is called first time
-    if (decoder_ == nullptr) {
-      InitDecoder();
-    }
-    // Convert to 16 bits PCM data to float
-    CHECK_EQ(len % 2, 0);
-    feature_pipeline_->AcceptWaveform(reinterpret_cast<const int16_t*>(data),
-                                      len / 2);
-    if (last > 0) {
-      feature_pipeline_->set_input_finished();
-    }
-
-    while (true) {
-      DecodeState state = decoder_->Decode(false);
-      if (state == DecodeState::kWaitFeats) {
-        break;
-      } else if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        break;
-      } else if (state == DecodeState::kEndpoint && continuous_decoding_) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        decoder_->ResetContinuousDecoding();
-      } else {  // kEndBatch
-        UpdateResult(false);
-      }
-    }
-  }
-
-  void UpdateResult(bool final_result) {
-    json::JSON obj;
-    obj["type"] = final_result ? "final_result" : "partial_result";
-    int nbest = final_result ? nbest_ : 1;
-    obj["nbest"] = json::Array();
-    for (int i = 0; i < nbest && i < decoder_->result().size(); i++) {
-      json::JSON one;
-      one["sentence"] = decoder_->result()[i].sentence;
-      if (final_result && enable_timestamp_) {
-        one["word_pieces"] = json::Array();
-        for (const auto& word_piece : decoder_->result()[i].word_pieces) {
-          json::JSON piece;
-          piece["word"] = word_piece.word;
-          piece["start"] = word_piece.start;
-          piece["end"] = word_piece.end;
-          one["word_pieces"].append(piece);
-        }
-      }
-      one["sentence"] = decoder_->result()[i].sentence;
-      obj["nbest"].append(one);
-    }
-    result_ = obj.dump();
-  }
-
-  const char* GetResult() { return result_.c_str(); }
-
-  void set_nbest(int n) { nbest_ = n; }
-  void set_enable_timestamp(bool flag) { enable_timestamp_ = flag; }
-  void AddContext(const char* word) { context_.emplace_back(word); }
-  void set_context_score(float score) { context_score_ = score; }
-  void set_language(const char* lang) { language_ = lang; }
-  void set_continuous_decoding(bool flag) { continuous_decoding_ = flag; }
-
- private:
-  // NOTE(Binbin Zhang): All use shared_ptr for clone in the future
-  std::shared_ptr<wenet::FeaturePipelineConfig> feature_config_ = nullptr;
-  std::shared_ptr<wenet::FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<wenet::DecodeResource> resource_ = nullptr;
-  std::shared_ptr<wenet::DecodeOptions> decode_options_ = nullptr;
-  std::shared_ptr<wenet::AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<wenet::ContextConfig> context_config_ = nullptr;
-  std::shared_ptr<wenet::PostProcessOptions> post_process_opts_ = nullptr;
-
-  int nbest_ = 1;
-  std::string result_;
-  bool enable_timestamp_ = false;
-  std::vector<std::string> context_;
-  float context_score_;
-  std::string language_ = "chs";
-  bool continuous_decoding_ = false;
-};
-
-void* wenet_init(const char* model_dir) {
-  Recognizer* decoder = new Recognizer(model_dir);
-  return reinterpret_cast<void*>(decoder);
-}
-
-void wenet_free(void* decoder) {
-  delete reinterpret_cast<Recognizer*>(decoder);
-}
-
-void wenet_reset(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Reset();
-}
-
-void wenet_decode(void* decoder, const char* data, int len, int last) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Decode(data, len, last);
-}
-
-const char* wenet_get_result(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  return recognizer->GetResult();
-}
-
-void wenet_set_log_level(int level) {
-  FLAGS_logtostderr = true;
-  FLAGS_v = level;
-}
-
-void wenet_set_nbest(void* decoder, int n) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_nbest(n);
-}
-
-void wenet_set_timestamp(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  bool enable = flag > 0 ? true : false;
-  recognizer->set_enable_timestamp(enable);
-}
-
-void wenet_add_context(void* decoder, const char* word) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->AddContext(word);
-}
-
-void wenet_set_context_score(void* decoder, float score) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_context_score(score);
-}
-
-void wenet_set_language(void* decoder, const char* lang) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_language(lang);
-}
-
-void wenet_set_continuous_decoding(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_continuous_decoding(flag > 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/wenet_api.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/wenet_api.h
deleted file mode 100644
index e839aaa40166a6e50d9aa2ac0e697356bd25b941..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/api/wenet_api.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_WENET_API_H_
-#define API_WENET_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Init decoder from the file and returns the object
- *
- * @param model_dir: the model dir
- * @returns model object or NULL if problem occured
- */
-void* wenet_init(const char* model_dir);
-
-/** Free wenet decoder and corresponding resource
- */
-void wenet_free(void* decoder);
-
-/** Reset decoder for next decoding
- */
-void wenet_reset(void* decoder);
-
-/** Decode the input wav data
- * @param data: pcm data, encoded as int16_t(16 bits)
- * @param len: data length
- * @param last: if it is the last package
- */
-void wenet_decode(void* decoder, const char* data, int len, int last);
-
-/** Get decode result in json format
- *  It returns partial result when last is 0
- *  It returns final result when last is 1
-
-    {
-      "nbest" : [{
-          "sentence" : "are you okay"
-          "word_pieces" : [{
-              "end" : 960,
-              "start" : 0,
-              "word" : "are"
-            }, {
-              "end" : 1200,
-              "start" : 960,
-              "word" : "you"
-            }, {
-            ...}]
-        }, {
-          "sentence" : "are you ok"
-        }],
-      "type" : "final_result"
-    }
-
-    "type": final_result/partial_result
-    "nbest": nbest is enabled when n > 1 in final_result
-        "sentence": the ASR result
-        "word_pieces": optional, output timestamp when enabled
- */
-const char* wenet_get_result(void* decoder);
-
-/** Set n-best, range 1~10
- *  wenet_get_result will return top-n best results
- */
-void wenet_set_nbest(void* decoder, int n);
-
-/** Whether to enable word level timestamp in results
-    disable it when flag = 0, otherwise enable
- */
-void wenet_set_timestamp(void* decoder, int flag);
-
-/** Add one contextual biasing
- */
-void wenet_add_context(void* decoder, const char* word);
-
-/** Set contextual biasing bonus score
- */
-void wenet_set_context_score(void* decoder, float score);
-
-/** Set language, has effect on the postpocessing
- *  @param: lang, could be chs/en now
- */
-void wenet_set_language(void* decoder, const char* lang);
-
-/** Set log level
- *  We use glog in wenet, so the level is the glog level
- */
-void wenet_set_log_level(int level);
-
-/** Enable continous decoding or not
- *  flag > 0: enable, otherwise disable
- */
-void wenet_set_continuous_decoding(void* decoder, int flag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // API_WENET_API_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cpp/binding.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cpp/binding.cc
deleted file mode 100644
index cff4f545e18532b33c6fbe0f326df7f443fe9ff5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/cpp/binding.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang(binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <pybind11/pybind11.h>
-
-#include "api/wenet_api.h"
-
-namespace py = pybind11;
-
-
-PYBIND11_MODULE(_wenet, m) {
-  m.doc() = "wenet pybind11 plugin";  // optional module docstring
-  m.def("wenet_init", &wenet_init, py::return_value_policy::reference,
-        "wenet init");
-  m.def("wenet_free", &wenet_free, "wenet free");
-  m.def("wenet_reset", &wenet_reset, "wenet reset");
-  m.def("wenet_decode", &wenet_decode, "wenet decode");
-  m.def("wenet_get_result", &wenet_get_result, py::return_value_policy::copy,
-        "wenet get result");
-  m.def("wenet_set_log_level", &wenet_set_log_level, "set log level");
-  m.def("wenet_set_nbest", &wenet_set_nbest, "set nbest");
-  m.def("wenet_set_timestamp", &wenet_set_timestamp, "set timestamp flag");
-  m.def("wenet_add_context", &wenet_add_context, "add one context word");
-  m.def("wenet_set_context_score", &wenet_set_context_score,
-        "set context bonus score");
-  m.def("wenet_set_language", &wenet_set_language, "set language");
-  m.def("wenet_set_continuous_decoding", &wenet_set_continuous_decoding,
-        "enable continuous decoding or not");
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/__init__.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/__init__.py
deleted file mode 100644
index 58cd2aef825afa8f54741921a31e25e1945351f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .decoder import Decoder  # noqa
-from _wenet import wenet_set_log_level as set_log_level  # noqa
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/decoder.py
deleted file mode 100644
index fbec3e21869fa62540d1c5d7694ecdf5d8e5aeed..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/decoder.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) 2022  Binbin Zhang(binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import List, Optional
-
-import _wenet
-
-from .hub import Hub
-
-
-class Decoder:
-
-    def __init__(self,
-                 model_dir: Optional[str] = None,
-                 lang: str = 'chs',
-                 nbest: int = 1,
-                 enable_timestamp: bool = False,
-                 context: Optional[List[str]] = None,
-                 context_score: float = 3.0,
-                 continuous_decoding: bool = False):
-        """ Init WeNet decoder
-        Args:
-            lang: language type of the model
-            nbest: nbest number for the final result
-            enable_timestamp: whether to enable word level timestamp
-               for the final result
-            context: context words
-            context_score: bonus score when the context is matched
-            continuous_decoding: enable countinous decoding or not
-        """
-        if model_dir is None:
-            model_dir = Hub.get_model_by_lang(lang)
-
-        self.d = _wenet.wenet_init(model_dir)
-
-        self.set_language(lang)
-        self.set_nbest(nbest)
-        self.enable_timestamp(enable_timestamp)
-        if context is not None:
-            self.add_context(context)
-            self.set_context_score(context_score)
-        self.set_continuous_decoding(continuous_decoding)
-
-    def __del__(self):
-        _wenet.wenet_free(self.d)
-
-    def reset(self):
-        """ Reset status for next decoding """
-        _wenet.wenet_reset(self.d)
-
-    def set_nbest(self, n: int):
-        assert n >= 1
-        assert n <= 10
-        _wenet.wenet_set_nbest(self.d, n)
-
-    def enable_timestamp(self, flag: bool):
-        tag = 1 if flag else 0
-        _wenet.wenet_set_timestamp(self.d, tag)
-
-    def add_context(self, contexts: List[str]):
-        for c in contexts:
-            assert isinstance(c, str)
-            _wenet.wenet_add_context(self.d, c)
-
-    def set_context_score(self, score: float):
-        _wenet.wenet_set_context_score(self.d, score)
-
-    def set_language(self, lang: str):
-        assert lang in ['chs', 'en']
-        _wenet.wenet_set_language(self.d, lang)
-
-    def set_continuous_decoding(self, continuous_decoding: bool):
-        flag = 1 if continuous_decoding else 0
-        _wenet.wenet_set_continuous_decoding(self.d, flag)
-
-    def decode(self, pcm: bytes, last: bool = True) -> str:
-        """ Decode the input data
-
-        Args:
-            pcm: wav pcm
-            last: if it is the last package of the data
-        """
-        assert isinstance(pcm, bytes)
-        finish = 1 if last else 0
-        _wenet.wenet_decode(self.d, pcm, len(pcm), finish)
-        result = _wenet.wenet_get_result(self.d)
-        if last:  # Reset status for next decoding automatically
-            self.reset()
-        return result
-
-    def decode_wav(self, wav_file: str) -> str:
-        """ Decode wav file, we only support:
-            1. 16k sample rate
-            2. mono channel
-            3. sample widths is 16 bits / 2 bytes
-        """
-        import wave
-        with wave.open(wav_file, 'rb') as fin:
-            assert fin.getnchannels() == 1
-            assert fin.getsampwidth() == 2
-            assert fin.getframerate() == 16000
-            wav = fin.readframes(fin.getnframes())
-        return self.decode(wav, True)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/hub.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/hub.py
deleted file mode 100644
index c973968d595862c4c58cb060bcd2455bfcbd073a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/py/hub.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright (c) 2022  Mddct(hamddct@gmail.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import tarfile
-from pathlib import Path
-from urllib.request import urlretrieve
-
-import tqdm
-
-
-def download(url: str, dest: str, only_child=True):
-    """ download from url to dest
-    """
-    assert os.path.exists(dest)
-    print('Downloading {} to {}'.format(url, dest))
-
-    def progress_hook(t):
-        last_b = [0]
-
-        def update_to(b=1, bsize=1, tsize=None):
-            if tsize not in (None, -1):
-                t.total = tsize
-            displayed = t.update((b - last_b[0]) * bsize)
-            last_b[0] = b
-            return displayed
-        return update_to
-
-    # *.tar.gz
-    name = url.split("/")[-1]
-    tar_path = os.path.join(dest, name)
-    with tqdm.tqdm(unit='B',
-                   unit_scale=True,
-                   unit_divisor=1024,
-                   miniters=1,
-                   desc=(name)) as t:
-        urlretrieve(url,
-                    filename=tar_path,
-                    reporthook=progress_hook(t),
-                    data=None)
-        t.total = t.n
-
-        with tarfile.open(tar_path) as f:
-            if not only_child:
-                f.extractall(dest)
-            else:
-                for tarinfo in f:
-                    if "/" not in tarinfo.name:
-                        continue
-                    name = os.path.basename(tarinfo.name)
-                    fileobj = f.extractfile(tarinfo)
-                    with open(os.path.join(dest, name), "wb") as writer:
-                        writer.write(fileobj.read())
-
-
-class Hub(object):
-    """Hub for wenet pretrain runtime model
-    """
-    # TODO(Mddct): make assets class to support other language
-    Assets = {
-        # wenetspeech
-        "chs":
-        "https://github.com/wenet-e2e/wenet/releases/download/v2.0.1/chs.tar.gz",
-        # gigaspeech
-        "en":
-        "https://github.com/wenet-e2e/wenet/releases/download/v2.0.1/en.tar.gz"
-    }
-
-    def __init__(self) -> None:
-        pass
-
-    @staticmethod
-    def get_model_by_lang(lang: str) -> str:
-        assert lang in Hub.Assets.keys()
-        # NOTE(Mddct): model_dir structure
-        # Path.Home()/.went
-        # - chs
-        #    - units.txt
-        #    - final.zip
-        # - en
-        #    - units.txt
-        #    - final.zip
-        model_url = Hub.Assets[lang]
-        model_dir = os.path.join(Path.home(), ".wenet", lang)
-        if not os.path.exists(model_dir):
-            os.makedirs(model_dir)
-        # TODO(Mddct): model metadata
-        if set(["final.zip",
-                "units.txt"]).issubset(set(os.listdir(model_dir))):
-            return model_dir
-        download(model_url, model_dir, only_child=True)
-        return model_dir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/setup.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/setup.py
deleted file mode 100644
index 82f616d88090dcdd01897c8c590249cd3d961e0f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/setup.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c)  2020  Xiaomi Corporation (author: Fangjun Kuang)
-#                2022  Binbin Zhang(binbzha@qq.com)
-
-import glob
-import os
-import shutil
-import sys
-
-import setuptools
-from setuptools.command.build_ext import build_ext
-
-
-def cmake_extension(name, *args, **kwargs) -> setuptools.Extension:
-    kwargs["language"] = "c++"
-    sources = []
-    return setuptools.Extension(name, sources, *args, **kwargs)
-
-
-class BuildExtension(build_ext):
-    def build_extension(self, ext: setuptools.extension.Extension):
-        os.makedirs(self.build_temp, exist_ok=True)
-        os.makedirs(self.build_lib, exist_ok=True)
-
-        cmake_args = os.environ.get("WENET_CMAKE_ARGS",
-                                    "-DCMAKE_BUILD_TYPE=Release")
-        if "PYTHON_EXECUTABLE" not in cmake_args:
-            print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
-            cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"
-
-        src_dir = os.path.dirname(os.path.abspath(__file__))
-        os.system(f"cmake {cmake_args} -B {self.build_temp} -S {src_dir}")
-        ret = os.system(f"""
-            cmake --build {self.build_temp} --target _wenet --config Release
-        """)
-        if ret != 0:
-            raise Exception(
-                "\nBuild wenet failed. Please check the error message.\n"
-                "You can ask for help by creating an issue on GitHub.\n"
-                "\nClick:\n    https://github.com/wenet-e2e/wenet/issues/new\n"
-            )
-
-        libs = []
-        for ext in ['so', 'pyd']:
-            libs.extend(
-                glob.glob(f"{self.build_temp}/**/_wenet*.{ext}",
-                          recursive=True))
-        for ext in ['so', 'dylib', 'dll']:
-            libs.extend(
-                glob.glob(f"{self.build_temp}/**/*wenet_api.{ext}",
-                          recursive=True))
-
-        for lib in libs:
-            print(f"Copying {lib} to {self.build_lib}/")
-            shutil.copy(f"{lib}", f"{self.build_lib}/")
-
-
-def read_long_description():
-    with open("README.md", encoding="utf8") as f:
-        readme = f.read()
-    return readme
-
-
-package_name = "wenetruntime"
-
-setuptools.setup(
-    name=package_name,
-    version='1.0.12',
-    author="Binbin Zhang",
-    author_email="binbzha@qq.com",
-    package_dir={
-        package_name: "py",
-    },
-    packages=[package_name],
-    url="https://github.com/wenet-e2e/wenet",
-    long_description=read_long_description(),
-    long_description_content_type="text/markdown",
-    ext_modules=[cmake_extension("_wenet")],
-    cmdclass={"build_ext": BuildExtension},
-    zip_safe=False,
-    setup_requires=["tqdm"],
-    install_requires=["torch", "tqdm"],
-    classifiers=[
-        "Programming Language :: C++",
-        "Programming Language :: Python :: 3",
-        "Operating System :: OS Independent",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    ],
-    license="Apache licensed, as found in the LICENSE file",
-)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/binding/python/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/CMakeLists.txt
deleted file mode 100644
index 8d61ca8477f0f0b6128f1effe0a2738494b2620f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-if(TORCH)
- add_library(wenet_api SHARED wenet_api.cc)
- target_link_libraries(wenet_api PUBLIC decoder)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/README.md
deleted file mode 100644
index 5eaa13b977eb4836eb930452f4434dc9f2ea4139..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# WeNet API
-
-We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
-for the interface design.
-
-
-We are going to implement the following interfaces:
-
-- [x] non-streaming recognition
-- [] streaming recognition
-- [] nbest
-- [] contextual biasing word
-- [] alignment
-- [] language support(post processor)
-- [] label check
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/wenet_api.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/wenet_api.cc
deleted file mode 100644
index cb1e0c8552e0126e2db274a29075578fe351a25f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/wenet_api.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "post_processor/post_processor.h"
-#include "utils/file.h"
-#include "utils/json.h"
-#include "utils/string.h"
-
-class Recognizer {
- public:
-  explicit Recognizer(const std::string& model_dir) {
-    // FeaturePipeline init
-    feature_config_ = std::make_shared<wenet::FeaturePipelineConfig>(80, 16000);
-    feature_pipeline_ =
-        std::make_shared<wenet::FeaturePipeline>(*feature_config_);
-    // Resource init
-    resource_ = std::make_shared<wenet::DecodeResource>();
-    wenet::TorchAsrModel::InitEngineThreads();
-    std::string model_path = wenet::JoinPath(model_dir, "final.zip");
-    CHECK(wenet::FileExists(model_path));
-
-    auto model = std::make_shared<wenet::TorchAsrModel>();
-    model->Read(model_path);
-    resource_->model = model;
-
-    // units.txt: E2E model unit
-    std::string unit_path = wenet::JoinPath(model_dir, "units.txt");
-    CHECK(wenet::FileExists(unit_path));
-    resource_->unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(unit_path));
-
-    std::string fst_path = wenet::JoinPath(model_dir, "TLG.fst");
-    if (wenet::FileExists(fst_path)) {  // With LM
-      resource_->fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-          fst::Fst<fst::StdArc>::Read(fst_path));
-
-      std::string symbol_path = wenet::JoinPath(model_dir, "words.txt");
-      CHECK(wenet::FileExists(symbol_path));
-      resource_->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(symbol_path));
-    } else {  // Without LM, symbol_table is the same as unit_table
-      resource_->symbol_table = resource_->unit_table;
-    }
-
-    // Context config init
-    context_config_ = std::make_shared<wenet::ContextConfig>();
-    decode_options_ = std::make_shared<wenet::DecodeOptions>();
-    post_process_opts_ = std::make_shared<wenet::PostProcessOptions>();
-  }
-
-  void Reset() {
-    if (feature_pipeline_ != nullptr) {
-      feature_pipeline_->Reset();
-    }
-    if (decoder_ != nullptr) {
-      decoder_->Reset();
-    }
-    result_.clear();
-  }
-
-  void InitDecoder() {
-    CHECK(decoder_ == nullptr);
-    // Optional init context graph
-    if (context_.size() > 0) {
-      context_config_->context_score = context_score_;
-      auto context_graph =
-          std::make_shared<wenet::ContextGraph>(*context_config_);
-      context_graph->BuildContextGraph(context_, resource_->symbol_table);
-      resource_->context_graph = context_graph;
-    }
-    // PostProcessor
-    if (language_ == "chs") {  // TODO(Binbin Zhang): CJK(chs, jp, kr)
-      post_process_opts_->language_type = wenet::kMandarinEnglish;
-    } else {
-      post_process_opts_->language_type = wenet::kIndoEuropean;
-    }
-    resource_->post_processor =
-        std::make_shared<wenet::PostProcessor>(*post_process_opts_);
-    // Init decoder
-    decoder_ = std::make_shared<wenet::AsrDecoder>(feature_pipeline_, resource_,
-                                                   *decode_options_);
-  }
-
-  void Decode(const char* data, int len, int last) {
-    using wenet::DecodeState;
-    // Init decoder when it is called first time
-    if (decoder_ == nullptr) {
-      InitDecoder();
-    }
-    // Convert to 16 bits PCM data to float
-    CHECK_EQ(len % 2, 0);
-    feature_pipeline_->AcceptWaveform(reinterpret_cast<const int16_t*>(data),
-                                      len / 2);
-    if (last > 0) {
-      feature_pipeline_->set_input_finished();
-    }
-
-    while (true) {
-      DecodeState state = decoder_->Decode(false);
-      if (state == DecodeState::kWaitFeats) {
-        break;
-      } else if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        break;
-      } else if (state == DecodeState::kEndpoint && continuous_decoding_) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        decoder_->ResetContinuousDecoding();
-      } else {  // kEndBatch
-        UpdateResult(false);
-      }
-    }
-  }
-
-  void UpdateResult(bool final_result) {
-    json::JSON obj;
-    obj["type"] = final_result ? "final_result" : "partial_result";
-    int nbest = final_result ? nbest_ : 1;
-    obj["nbest"] = json::Array();
-    for (int i = 0; i < nbest && i < decoder_->result().size(); i++) {
-      json::JSON one;
-      one["sentence"] = decoder_->result()[i].sentence;
-      if (final_result && enable_timestamp_) {
-        one["word_pieces"] = json::Array();
-        for (const auto& word_piece : decoder_->result()[i].word_pieces) {
-          json::JSON piece;
-          piece["word"] = word_piece.word;
-          piece["start"] = word_piece.start;
-          piece["end"] = word_piece.end;
-          one["word_pieces"].append(piece);
-        }
-      }
-      one["sentence"] = decoder_->result()[i].sentence;
-      obj["nbest"].append(one);
-    }
-    result_ = obj.dump();
-  }
-
-  const char* GetResult() { return result_.c_str(); }
-
-  void set_nbest(int n) { nbest_ = n; }
-  void set_enable_timestamp(bool flag) { enable_timestamp_ = flag; }
-  void AddContext(const char* word) { context_.emplace_back(word); }
-  void set_context_score(float score) { context_score_ = score; }
-  void set_language(const char* lang) { language_ = lang; }
-  void set_continuous_decoding(bool flag) { continuous_decoding_ = flag; }
-
- private:
-  // NOTE(Binbin Zhang): All use shared_ptr for clone in the future
-  std::shared_ptr<wenet::FeaturePipelineConfig> feature_config_ = nullptr;
-  std::shared_ptr<wenet::FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<wenet::DecodeResource> resource_ = nullptr;
-  std::shared_ptr<wenet::DecodeOptions> decode_options_ = nullptr;
-  std::shared_ptr<wenet::AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<wenet::ContextConfig> context_config_ = nullptr;
-  std::shared_ptr<wenet::PostProcessOptions> post_process_opts_ = nullptr;
-
-  int nbest_ = 1;
-  std::string result_;
-  bool enable_timestamp_ = false;
-  std::vector<std::string> context_;
-  float context_score_;
-  std::string language_ = "chs";
-  bool continuous_decoding_ = false;
-};
-
-void* wenet_init(const char* model_dir) {
-  Recognizer* decoder = new Recognizer(model_dir);
-  return reinterpret_cast<void*>(decoder);
-}
-
-void wenet_free(void* decoder) {
-  delete reinterpret_cast<Recognizer*>(decoder);
-}
-
-void wenet_reset(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Reset();
-}
-
-void wenet_decode(void* decoder, const char* data, int len, int last) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Decode(data, len, last);
-}
-
-const char* wenet_get_result(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  return recognizer->GetResult();
-}
-
-void wenet_set_log_level(int level) {
-  FLAGS_logtostderr = true;
-  FLAGS_v = level;
-}
-
-void wenet_set_nbest(void* decoder, int n) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_nbest(n);
-}
-
-void wenet_set_timestamp(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  bool enable = flag > 0 ? true : false;
-  recognizer->set_enable_timestamp(enable);
-}
-
-void wenet_add_context(void* decoder, const char* word) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->AddContext(word);
-}
-
-void wenet_set_context_score(void* decoder, float score) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_context_score(score);
-}
-
-void wenet_set_language(void* decoder, const char* lang) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_language(lang);
-}
-
-void wenet_set_continuous_decoding(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_continuous_decoding(flag > 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/wenet_api.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/wenet_api.h
deleted file mode 100644
index e839aaa40166a6e50d9aa2ac0e697356bd25b941..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/api/wenet_api.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_WENET_API_H_
-#define API_WENET_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Init decoder from the file and returns the object
- *
- * @param model_dir: the model dir
- * @returns model object or NULL if problem occured
- */
-void* wenet_init(const char* model_dir);
-
-/** Free wenet decoder and corresponding resource
- */
-void wenet_free(void* decoder);
-
-/** Reset decoder for next decoding
- */
-void wenet_reset(void* decoder);
-
-/** Decode the input wav data
- * @param data: pcm data, encoded as int16_t(16 bits)
- * @param len: data length
- * @param last: if it is the last package
- */
-void wenet_decode(void* decoder, const char* data, int len, int last);
-
-/** Get decode result in json format
- *  It returns partial result when last is 0
- *  It returns final result when last is 1
-
-    {
-      "nbest" : [{
-          "sentence" : "are you okay"
-          "word_pieces" : [{
-              "end" : 960,
-              "start" : 0,
-              "word" : "are"
-            }, {
-              "end" : 1200,
-              "start" : 960,
-              "word" : "you"
-            }, {
-            ...}]
-        }, {
-          "sentence" : "are you ok"
-        }],
-      "type" : "final_result"
-    }
-
-    "type": final_result/partial_result
-    "nbest": nbest is enabled when n > 1 in final_result
-        "sentence": the ASR result
-        "word_pieces": optional, output timestamp when enabled
- */
-const char* wenet_get_result(void* decoder);
-
-/** Set n-best, range 1~10
- *  wenet_get_result will return top-n best results
- */
-void wenet_set_nbest(void* decoder, int n);
-
-/** Whether to enable word level timestamp in results
-    disable it when flag = 0, otherwise enable
- */
-void wenet_set_timestamp(void* decoder, int flag);
-
-/** Add one contextual biasing
- */
-void wenet_add_context(void* decoder, const char* word);
-
-/** Set contextual biasing bonus score
- */
-void wenet_set_context_score(void* decoder, float score);
-
-/** Set language, has effect on the postpocessing
- *  @param: lang, could be chs/en now
- */
-void wenet_set_language(void* decoder, const char* lang);
-
-/** Set log level
- *  We use glog in wenet, so the level is the glog level
- */
-void wenet_set_log_level(int level);
-
-/** Enable continous decoding or not
- *  flag > 0: enable, otherwise disable
- */
-void wenet_set_continuous_decoding(void* decoder, int flag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // API_WENET_API_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/CMakeLists.txt
deleted file mode 100644
index a117b8bcb580c8738a7ce72f88bc10ff0a450e98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_executable(decoder_main decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC decoder)
-
-add_executable(label_checker_main label_checker_main.cc)
-target_link_libraries(label_checker_main PUBLIC decoder)
-
-# if(TORCH)
-#  add_executable(api_main api_main.cc)
-#  target_link_libraries(api_main PUBLIC wenet_api)
-# endif()
-
-if(WEBSOCKET)
-  add_executable(websocket_client_main websocket_client_main.cc)
-  target_link_libraries(websocket_client_main PUBLIC websocket)
-  add_executable(websocket_server_main websocket_server_main.cc)
-  target_link_libraries(websocket_server_main PUBLIC websocket)
-endif()
-
-if(GRPC)
-  add_executable(grpc_server_main grpc_server_main.cc)
-  target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
-  add_executable(grpc_client_main grpc_client_main.cc)
-  target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
-endif()
-
-if(HTTP)
-  add_executable(http_client_main http_client_main.cc)
-  target_link_libraries(http_client_main PUBLIC http)
-  add_executable(http_server_main http_server_main.cc)
-  target_link_libraries(http_server_main PUBLIC http)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/api_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/api_main.cc
deleted file mode 100644
index 94b20d52a7b8eee5c39a12af4e1e25324d7d880f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/api_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-
-DEFINE_string(model_dir, "", "model dir path");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_bool(enable_timestamp, false, "enable timestamps");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet_set_log_level(2);
-
-  void* decoder = wenet_init(FLAGS_model_dir.c_str());
-  wenet_set_timestamp(decoder, FLAGS_enable_timestamp == true ? 1 : 0);
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  std::vector<int16_t> data(wav_reader.num_samples());
-  for (int i = 0; i < wav_reader.num_samples(); i++) {
-    data[i] = static_cast<int16_t>(*(wav_reader.data() + i));
-  }
-
-  for (int i = 0; i < 10; i++) {
-    // Return the final result when last is 1
-    wenet_decode(decoder, reinterpret_cast<const char*>(data.data()),
-                 data.size() * 2, 1);
-    const char* result = wenet_get_result(decoder);
-    LOG(INFO) << i << " " << result;
-    wenet_reset(decoder);
-  }
-  wenet_free(decoder);
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/decoder_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/decoder_main.cc
deleted file mode 100644
index b8f1dbae6b88390504cc9ce63f33dc9bd54a2d6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/decoder_main.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <iomanip>
-#include <thread>
-#include <utility>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-#include "utils/thread_pool.h"
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-DEFINE_bool(simulate_streaming, false, "simulate streaming input");
-DEFINE_bool(output_nbest, false, "output n-best of decode result");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_string(wav_scp, "", "input wav scp");
-DEFINE_string(result, "", "result output file");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-DEFINE_int32(thread_num, 1, "num of decode thread");
-DEFINE_int32(warmup, 0, "num of warmup decode, 0 means no warmup");
-
-std::shared_ptr<wenet::DecodeOptions> g_decode_config;
-std::shared_ptr<wenet::FeaturePipelineConfig> g_feature_config;
-std::shared_ptr<wenet::DecodeResource> g_decode_resource;
-
-std::ofstream g_result;
-std::mutex g_mutex;
-int g_total_waves_dur = 0;
-int g_total_decode_time = 0;
-
-void decode(std::pair<std::string, std::string> wav, bool warmup = false) {
-  wenet::WavReader wav_reader(wav.second);
-  int num_samples = wav_reader.num_samples();
-  CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-
-  auto feature_pipeline =
-      std::make_shared<wenet::FeaturePipeline>(*g_feature_config);
-  feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-  feature_pipeline->set_input_finished();
-  LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-
-  wenet::AsrDecoder decoder(feature_pipeline, g_decode_resource,
-                            *g_decode_config);
-
-  int wave_dur = static_cast<int>(static_cast<float>(num_samples) /
-                                  wav_reader.sample_rate() * 1000);
-  int decode_time = 0;
-  std::string final_result;
-  while (true) {
-    wenet::Timer timer;
-    wenet::DecodeState state = decoder.Decode();
-    if (state == wenet::DecodeState::kEndFeats) {
-      decoder.Rescoring();
-    }
-    int chunk_decode_time = timer.Elapsed();
-    decode_time += chunk_decode_time;
-    if (decoder.DecodedSomething()) {
-      LOG(INFO) << "Partial result: " << decoder.result()[0].sentence;
-    }
-
-    if (FLAGS_continuous_decoding && state == wenet::DecodeState::kEndpoint) {
-      if (decoder.DecodedSomething()) {
-        decoder.Rescoring();
-        LOG(INFO) << "Final result (continuous decoding): "
-                  << decoder.result()[0].sentence;
-        final_result.append(decoder.result()[0].sentence);
-      }
-      decoder.ResetContinuousDecoding();
-    }
-
-    if (state == wenet::DecodeState::kEndFeats) {
-      break;
-    } else if (FLAGS_chunk_size > 0 && FLAGS_simulate_streaming) {
-      float frame_shift_in_ms =
-          static_cast<float>(g_feature_config->frame_shift) /
-          wav_reader.sample_rate() * 1000;
-      auto wait_time =
-          decoder.num_frames_in_current_chunk() * frame_shift_in_ms -
-          chunk_decode_time;
-      if (wait_time > 0) {
-        LOG(INFO) << "Simulate streaming, waiting for " << wait_time << "ms";
-        std::this_thread::sleep_for(
-            std::chrono::milliseconds(static_cast<int>(wait_time)));
-      }
-    }
-  }
-  if (decoder.DecodedSomething()) {
-    final_result.append(decoder.result()[0].sentence);
-  }
-  LOG(INFO) << wav.first << " Final result: " << final_result << std::endl;
-  LOG(INFO) << "Decoded " << wave_dur << "ms audio taken " << decode_time
-            << "ms.";
-
-  if (!warmup) {
-    g_mutex.lock();
-    std::ostream& buffer = FLAGS_result.empty() ? std::cout : g_result;
-    if (!FLAGS_output_nbest) {
-      buffer << wav.first << " " << final_result << std::endl;
-    } else {
-      buffer << "wav " << wav.first << std::endl;
-      auto& results = decoder.result();
-      for (auto& r : results) {
-        if (r.sentence.empty()) continue;
-        buffer << "candidate " << r.score << " " << r.sentence << std::endl;
-      }
-    }
-    g_total_waves_dur += wave_dur;
-    g_total_decode_time += decode_time;
-    g_mutex.unlock();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  g_decode_config = wenet::InitDecodeOptionsFromFlags();
-  g_feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  g_decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  if (FLAGS_wav_path.empty() && FLAGS_wav_scp.empty()) {
-    LOG(FATAL) << "Please provide the wave path or the wav scp.";
-  }
-  std::vector<std::pair<std::string, std::string>> waves;
-  if (!FLAGS_wav_path.empty()) {
-    waves.emplace_back(make_pair("test", FLAGS_wav_path));
-  } else {
-    std::ifstream wav_scp(FLAGS_wav_scp);
-    std::string line;
-    while (getline(wav_scp, line)) {
-      std::vector<std::string> strs;
-      wenet::SplitString(line, &strs);
-      CHECK_GE(strs.size(), 2);
-      waves.emplace_back(make_pair(strs[0], strs[1]));
-    }
-
-    if (waves.empty()) {
-      LOG(FATAL) << "Please provide non-empty wav scp.";
-    }
-  }
-
-  if (!FLAGS_result.empty()) {
-    g_result.open(FLAGS_result, std::ios::out);
-  }
-
-  // Warmup
-  if (FLAGS_warmup > 0) {
-    LOG(INFO) << "Warming up...";
-    {
-      ThreadPool pool(FLAGS_thread_num);
-      auto wav = waves[0];
-      for (int i = 0; i < FLAGS_warmup; i++) {
-        pool.enqueue(decode, wav, true);
-      }
-    }
-    LOG(INFO) << "Warmup done.";
-  }
-
-  {
-    ThreadPool pool(FLAGS_thread_num);
-    for (auto& wav : waves) {
-      pool.enqueue(decode, wav, false);
-    }
-  }
-
-  LOG(INFO) << "Total: decoded " << g_total_waves_dur << "ms audio taken "
-            << g_total_decode_time << "ms.";
-  LOG(INFO) << "RTF: " << std::setprecision(4)
-            << static_cast<float>(g_total_decode_time) / g_total_waves_dur;
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/grpc_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/grpc_client_main.cc
deleted file mode 100644
index f2d226d48d3757c5f095335eff3288f5d227282b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/grpc_client_main.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "grpc/grpc_client.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::GrpcClient client(FLAGS_hostname, FLAGS_port, FLAGS_nbest,
-                           FLAGS_continuous_decoding);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  std::vector<float> pcm_data(wav_reader.data(),
-                              wav_reader.data() + num_samples);
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(pcm_data[j]));
-    }
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/grpc_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/grpc_server_main.cc
deleted file mode 100644
index b00f3cbade1ee70dadfb49829e9ca73fd50c2be2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/grpc_server_main.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <grpcpp/ext/proto_server_reflection_plugin.h>
-#include <grpcpp/grpcpp.h>
-#include <grpcpp/health_check_service_interface.h>
-
-#include "decoder/params.h"
-#include "grpc/grpc_server.h"
-#include "utils/log.h"
-
-DEFINE_int32(port, 10086, "grpc listening port");
-DEFINE_int32(workers, 4, "grpc num workers");
-
-using grpc::Server;
-using grpc::ServerBuilder;
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::GrpcServer service(feature_config, decode_config, decode_resource);
-  grpc::EnableDefaultHealthCheckService(true);
-  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
-  ServerBuilder builder;
-  std::string address("0.0.0.0:" + std::to_string(FLAGS_port));
-  builder.AddListeningPort(address, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  builder.SetSyncServerOption(ServerBuilder::SyncServerOption::NUM_CQS,
-                              FLAGS_workers);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server->Wait();
-  google::ShutdownGoogleLogging();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/http_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/http_client_main.cc
deleted file mode 100644
index b59ee3f5f32bf08552416b183802029ac5d5afa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/http_client_main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "http/http_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of http server");
-DEFINE_int32(port, 10086, "port of http server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Convert to short
-  std::vector<int16_t> data;
-  data.reserve(num_samples);
-  for (int j = 0; j < num_samples; j++) {
-    data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-  }
-  // Send data
-  wenet::HttpClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  wenet::Timer timer;
-  VLOG(2) << "Send " << data.size() << " samples";
-  client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/http_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/http_server_main.cc
deleted file mode 100644
index e30cf2bcdf746c2072f023e90f470ccba5467c2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/http_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "http/http_server.h"
-
-DEFINE_int32(port, 10086, "http listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
-                           decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/label_checker_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/label_checker_main.cc
deleted file mode 100644
index e36e3d5c29a38a7ebee80606ebd8e69ae8b1eb96..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/label_checker_main.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_string(text, "", "kaldi style text input file");
-DEFINE_string(wav_scp, "", "kaldi style wav scp");
-DEFINE_double(is_penalty, 1.0,
-              "insertion/substitution penalty for align insertion");
-DEFINE_double(del_penalty, 1.0, "deletion penalty for align insertion");
-DEFINE_string(result, "", "result output file");
-DEFINE_string(timestamp, "", "timestamp output file");
-
-namespace wenet {
-
-const char* kDeletion = "<del>";
-// Is: Insertion and substitution
-const char* kIsStart = "<is>";
-const char* kIsEnd = "</is>";
-
-bool MapToLabel(const std::string& text,
-                std::shared_ptr<fst::SymbolTable> symbol_table,
-                std::vector<int>* labels) {
-  labels->clear();
-  // Split label to char sequence
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(text, &chars);
-  for (size_t i = 0; i < chars.size(); i++) {
-    // ▁ is special symbol for white space
-    std::string label = chars[i] != " " ? chars[i] : "▁";
-    int id = symbol_table->Find(label);
-    if (id != -1) {  // fst::kNoSymbol
-      // LOG(INFO) << label << " " << id;
-      labels->push_back(id);
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<fst::SymbolTable> MakeSymbolTableForFst(
-    std::shared_ptr<fst::SymbolTable> isymbol_table) {
-  LOG(INFO) << isymbol_table;
-  CHECK(isymbol_table != nullptr);
-  auto osymbol_table = std::make_shared<fst::SymbolTable>();
-  osymbol_table->AddSymbol("<eps>", 0);
-  CHECK_EQ(isymbol_table->Find("<blank>"), 0);
-  osymbol_table->AddSymbol("<blank>", 1);
-  for (int i = 1; i < isymbol_table->NumSymbols(); i++) {
-    std::string symbol = isymbol_table->Find(i);
-    osymbol_table->AddSymbol(symbol, i + 1);
-  }
-  osymbol_table->AddSymbol(kDeletion, isymbol_table->NumSymbols() + 1);
-  osymbol_table->AddSymbol(kIsStart, isymbol_table->NumSymbols() + 2);
-  osymbol_table->AddSymbol(kIsEnd, isymbol_table->NumSymbols() + 3);
-  return osymbol_table;
-}
-
-void CompileCtcFst(std::shared_ptr<fst::SymbolTable> symbol_table,
-                   fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  CHECK_EQ(symbol_table->Find("<eps>"), 0);
-  CHECK_EQ(symbol_table->Find("<blank>"), 1);
-  ofst->AddArc(start, fst::StdArc(1, 0, 0.0, start));
-  // Exclude kDeletion and kInsertion
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    int s = ofst->AddState();
-    ofst->AddArc(start, fst::StdArc(i, i, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(i, 0, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(0, 0, 0.0, start));
-  }
-  ofst->SetFinal(start, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdOLabelCompare());
-}
-
-void CompileAlignFst(std::vector<int> labels,
-                     std::shared_ptr<fst::SymbolTable> symbol_table,
-                     fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int deletion = symbol_table->Find(kDeletion);
-  int insertion_start = symbol_table->Find(kIsStart);
-  int insertion_end = symbol_table->Find(kIsEnd);
-
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  // Filler State
-  int filler_start = ofst->AddState();
-  int filler_end = ofst->AddState();
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    ofst->AddArc(filler_start, fst::StdArc(i, i, FLAGS_is_penalty, filler_end));
-  }
-  ofst->AddArc(filler_end, fst::StdArc(0, 0, 0.0, filler_start));
-
-  int prev = start;
-  // Alignment path and optional filler
-  for (size_t i = 0; i < labels.size(); i++) {
-    int cur = ofst->AddState();
-    // 1. Insertion or Substitution
-    ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-    ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-    // 2. Correct
-    ofst->AddArc(prev, fst::StdArc(labels[i], labels[i], 0.0, cur));
-    // 3. Deletion
-    ofst->AddArc(prev, fst::StdArc(0, deletion, FLAGS_del_penalty, cur));
-
-    prev = cur;
-  }
-  // Optional add endding filler
-  ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-  ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-  ofst->SetFinal(prev, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdILabelCompare());
-}
-
-}  // namespace wenet
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-  CHECK(decode_resource->unit_table != nullptr);
-
-  auto wfst_symbol_table =
-      wenet::MakeSymbolTableForFst(decode_resource->unit_table);
-  // wfst_symbol_table->WriteText("fst.txt");
-  // Reset symbol_table to on-the-fly generated wfst_symbol_table
-  decode_resource->symbol_table = wfst_symbol_table;
-
-  // Compile ctc FST
-  fst::StdVectorFst ctc_fst;
-  wenet::CompileCtcFst(wfst_symbol_table, &ctc_fst);
-  // ctc_fst.Write("ctc.fst");
-
-  std::unordered_map<std::string, std::string> wav_table;
-  std::ifstream wav_is(FLAGS_wav_scp);
-  std::string line;
-  while (std::getline(wav_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    CHECK_EQ(strs.size(), 2);
-    wav_table[strs[0]] = strs[1];
-  }
-
-  std::ifstream text_is(FLAGS_text);
-  std::ofstream result_os(FLAGS_result, std::ios::out);
-  std::ofstream timestamp_out;
-  if (!FLAGS_timestamp.empty()) {
-    timestamp_out.open(FLAGS_timestamp, std::ios::out);
-  }
-  std::ostream& timestamp_os =
-      FLAGS_timestamp.empty() ? std::cout : timestamp_out;
-
-  while (std::getline(text_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    if (strs.size() < 2) continue;
-    std::string key = strs[0];
-    LOG(INFO) << "Processing " << key;
-    if (wav_table.find(key) != wav_table.end()) {
-      strs.erase(strs.begin());
-      std::string text = wenet::JoinString(" ", strs);
-      std::vector<int> labels;
-      wenet::MapToLabel(text, wfst_symbol_table, &labels);
-      // Prepare FST for alignment decoding
-      fst::StdVectorFst align_fst;
-      wenet::CompileAlignFst(labels, wfst_symbol_table, &align_fst);
-      // align_fst.Write("align.fst");
-      auto decoding_fst = std::make_shared<fst::StdVectorFst>();
-      fst::Compose(ctc_fst, align_fst, decoding_fst.get());
-      // decoding_fst->Write("decoding.fst");
-      // Preapre feature pipeline
-      wenet::WavReader wav_reader;
-      if (!wav_reader.Open(wav_table[key])) {
-        LOG(WARNING) << "Error in reading " << wav_table[key];
-        continue;
-      }
-      int num_samples = wav_reader.num_samples();
-      CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-      auto feature_pipeline =
-          std::make_shared<wenet::FeaturePipeline>(*feature_config);
-      feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-      feature_pipeline->set_input_finished();
-      decode_resource->fst = decoding_fst;
-      LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-      wenet::AsrDecoder decoder(feature_pipeline, decode_resource,
-                                *decode_config);
-      while (true) {
-        wenet::DecodeState state = decoder.Decode();
-        if (state == wenet::DecodeState::kEndFeats) {
-          decoder.Rescoring();
-          break;
-        }
-      }
-      std::string final_result;
-      std::string timestamp_str;
-      if (decoder.DecodedSomething()) {
-        const wenet::DecodeResult& result = decoder.result()[0];
-        final_result = result.sentence;
-        std::stringstream ss;
-        for (const auto& w : result.word_pieces) {
-          ss << " " << w.word << " " << w.start << " " << w.end;
-        }
-        timestamp_str = ss.str();
-      }
-      result_os << key << " " << final_result << std::endl;
-      timestamp_os << key << " " << timestamp_str << std::endl;
-      LOG(INFO) << key << " " << final_result;
-    } else {
-      LOG(WARNING) << "No wav file for " << key;
-    }
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/websocket_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/websocket_client_main.cc
deleted file mode 100644
index 3eaa96069dc5f57673fbb2819bf7d4883e0d5ffa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/websocket_client_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "websocket/websocket_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::WebSocketClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  client.set_continuous_decoding(FLAGS_continuous_decoding);
-  client.SendStartSignal();
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-    }
-    // TODO(Binbin Zhang): Network order?
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-  client.SendEndSignal();
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/websocket_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/websocket_server_main.cc
deleted file mode 100644
index 796d9d2e6d151f7c08b43d66b7245c58ee086cc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/bin/websocket_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "websocket/websocket_server.h"
-
-DEFINE_int32(port, 10086, "websocket listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
-                                decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/CMakeLists.txt
deleted file mode 100644
index 2a152dd0d38cdc17d2758d7dbd542cd974d5f0c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# compile wenet.proto
-set(PROTO_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-add_custom_command(
-  OUTPUT  ${PROTO_DIR}/wenet.pb.cc
-          ${PROTO_DIR}/wenet.pb.h
-          ${PROTO_DIR}/wenet.grpc.pb.cc
-          ${PROTO_DIR}/wenet.grpc.pb.h
-  COMMAND ${protobuf_BINARY_DIR}/protoc
-  ARGS --grpc_out "${PROTO_DIR}"
-    --cpp_out "${PROTO_DIR}"
-    -I "${PROTO_DIR}"
-    --plugin=protoc-gen-grpc=${grpc_BINARY_DIR}/grpc_cpp_plugin
-    wenet.proto)
-
-# grpc_server/client
-link_directories(${protobuf_BINARY_DIR}/lib)
-add_library(wenet_grpc STATIC
-  grpc_client.cc
-  grpc_server.cc
-  wenet.pb.cc
-  wenet.grpc.pb.cc
-)
-target_link_libraries(wenet_grpc PUBLIC grpc++ grpc++_reflection decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_client.cc
deleted file mode 100644
index 7a2e3f6f384980b6566468213d3eead43a404070..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_client.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_client.h"
-
-#include "utils/log.h"
-
-namespace wenet {
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using grpc::Status;
-using wenet::Request;
-using wenet::Response;
-
-GrpcClient::GrpcClient(const std::string& host, int port, int nbest,
-                       bool continuous_decoding)
-    : host_(host),
-      port_(port),
-      nbest_(nbest),
-      continuous_decoding_(continuous_decoding) {
-  Connect();
-  t_.reset(new std::thread(&GrpcClient::ReadLoopFunc, this));
-}
-
-void GrpcClient::Connect() {
-  channel_ = grpc::CreateChannel(host_ + ":" + std::to_string(port_),
-                                 grpc::InsecureChannelCredentials());
-  stub_ = ASR::NewStub(channel_);
-  context_ = std::make_shared<ClientContext>();
-  stream_ = stub_->Recognize(context_.get());
-  request_ = std::make_shared<Request>();
-  response_ = std::make_shared<Response>();
-  request_->mutable_decode_config()->set_nbest_config(nbest_);
-  request_->mutable_decode_config()->set_continuous_decoding_config(
-      continuous_decoding_);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::SendBinaryData(const void* data, size_t size) {
-  const int16_t* pdata = reinterpret_cast<const int16_t*>(data);
-  request_->set_audio_data(pdata, size);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::ReadLoopFunc() {
-  try {
-    while (stream_->Read(response_.get())) {
-      for (int i = 0; i < response_->nbest_size(); i++) {
-        // you can also traverse wordpieces like demonstrated above
-        LOG(INFO) << i + 1 << "best " << response_->nbest(i).sentence();
-      }
-      if (response_->status() != Response_Status_ok) {
-        break;
-      }
-      if (response_->type() == Response_Type_speech_end) {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void GrpcClient::Join() {
-  stream_->WritesDone();
-  t_->join();
-  Status status = stream_->Finish();
-  if (!status.ok()) {
-    LOG(INFO) << "Recognize rpc failed.";
-  }
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_client.h
deleted file mode 100644
index 36e36a0f5f5ec5bbb818009fe931e863eaa7fd60..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_client.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_CLIENT_H_
-#define GRPC_GRPC_CLIENT_H_
-
-#include <grpc/grpc.h>
-#include <grpcpp/channel.h>
-#include <grpcpp/client_context.h>
-#include <grpcpp/create_channel.h>
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "grpc/wenet.grpc.pb.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcClient {
- public:
-  GrpcClient(const std::string& host, int port, int nbest,
-             bool continuous_decoding);
-
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Join();
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string host_;
-  int port_;
-  std::shared_ptr<Channel> channel_{nullptr};
-  std::unique_ptr<ASR::Stub> stub_{nullptr};
-  std::shared_ptr<ClientContext> context_{nullptr};
-  std::unique_ptr<ClientReaderWriter<Request, Response>> stream_{nullptr};
-  std::shared_ptr<Request> request_{nullptr};
-  std::shared_ptr<Response> response_{nullptr};
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(GrpcClient);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_server.cc
deleted file mode 100644
index 26268bc02a2f2ea56bb24a1eb379a565f693429a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_server.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_server.h"
-
-namespace wenet {
-
-using grpc::ServerReaderWriter;
-using wenet::Request;
-using wenet::Response;
-
-GrpcConnectionHandler::GrpcConnectionHandler(
-    ServerReaderWriter<Response, Request>* stream,
-    std::shared_ptr<Request> request, std::shared_ptr<Response> response,
-    std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : stream_(std::move(stream)),
-      request_(std::move(request)),
-      response_(std::move(response)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void GrpcConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  response_->set_status(Response::ok);
-  response_->set_type(Response::server_ready);
-  stream_->Write(*response_);
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ = std::make_shared<std::thread>(
-      &GrpcConnectionHandler::DecodeThreadFunc, this);
-}
-
-void GrpcConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  CHECK(feature_pipeline_ != nullptr);
-  feature_pipeline_->set_input_finished();
-  got_end_tag_ = true;
-}
-
-void GrpcConnectionHandler::OnPartialResult() {
-  LOG(INFO) << "Partial result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::partial_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinalResult() {
-  LOG(INFO) << "Final result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::final_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinish() {
-  // Send finish tag
-  response_->set_status(Response::ok);
-  response_->set_type(Response::speech_end);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnSpeechData() {
-  // Read binary PCM data
-  const int16_t* pcm_data =
-      reinterpret_cast<const int16_t*>(request_->audio_data().c_str());
-  int num_samples = request_->audio_data().length() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-void GrpcConnectionHandler::SerializeResult(bool finish) {
-  for (const DecodeResult& path : decoder_->result()) {
-    Response_OneBest* one_best_ = response_->add_nbest();
-    one_best_->set_sentence(path.sentence);
-    if (finish) {
-      for (const WordPiece& word_piece : path.word_pieces) {
-        Response_OnePiece* one_piece_ = one_best_->add_wordpieces();
-        one_piece_->set_word(word_piece.word);
-        one_piece_->set_start(word_piece.start);
-        one_piece_->set_end(word_piece.end);
-      }
-    }
-    if (response_->nbest_size() == nbest_) {
-      break;
-    }
-  }
-  return;
-}
-
-void GrpcConnectionHandler::DecodeThreadFunc() {
-  while (true) {
-    DecodeState state = decoder_->Decode();
-    response_->clear_status();
-    response_->clear_type();
-    response_->clear_nbest();
-    if (state == DecodeState::kEndFeats) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      OnFinish();
-      stop_recognition_ = true;
-      break;
-    } else if (state == DecodeState::kEndpoint) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      // If it's not continuous decoding, continue to do next recognition
-      // otherwise stop the recognition
-      if (continuous_decoding_) {
-        decoder_->ResetContinuousDecoding();
-      } else {
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      }
-    } else {
-      if (decoder_->DecodedSomething()) {
-        SerializeResult(false);
-        OnPartialResult();
-      }
-    }
-  }
-}
-
-void GrpcConnectionHandler::operator()() {
-  try {
-    while (stream_->Read(request_.get())) {
-      if (!got_start_tag_) {
-        nbest_ = request_->decode_config().nbest_config();
-        continuous_decoding_ =
-            request_->decode_config().continuous_decoding_config();
-        OnSpeechStart();
-      } else {
-        OnSpeechData();
-      }
-    }
-    OnSpeechEnd();
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-Status GrpcServer::Recognize(ServerContext* context,
-                             ServerReaderWriter<Response, Request>* stream) {
-  LOG(INFO) << "Get Recognize request" << std::endl;
-  auto request = std::make_shared<Request>();
-  auto response = std::make_shared<Response>();
-  GrpcConnectionHandler handler(stream, request, response, feature_config_,
-                                decode_config_, decode_resource_);
-  std::thread t(std::move(handler));
-  t.join();
-  return Status::OK;
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_server.h
deleted file mode 100644
index 3ab47ce5b15897c2a596d8ef27f2e7c4f8d26a3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/grpc_server.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_SERVER_H_
-#define GRPC_GRPC_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-#include "grpc/wenet.grpc.pb.h"
-
-namespace wenet {
-
-using grpc::ServerContext;
-using grpc::ServerReaderWriter;
-using grpc::Status;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcConnectionHandler {
- public:
-  GrpcConnectionHandler(ServerReaderWriter<Response, Request>* stream,
-                        std::shared_ptr<Request> request,
-                        std::shared_ptr<Response> response,
-                        std::shared_ptr<FeaturePipelineConfig> feature_config,
-                        std::shared_ptr<DecodeOptions> decode_config,
-                        std::shared_ptr<DecodeResource> decode_resource);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnFinish();
-  void OnSpeechData();
-  void OnPartialResult();
-  void OnFinalResult();
-  void DecodeThreadFunc();
-  void SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  ServerReaderWriter<Response, Request>* stream_;
-  std::shared_ptr<Request> request_;
-  std::shared_ptr<Response> response_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class GrpcServer final : public ASR::Service {
- public:
-  GrpcServer(std::shared_ptr<FeaturePipelineConfig> feature_config,
-             std::shared_ptr<DecodeOptions> decode_config,
-             std::shared_ptr<DecodeResource> decode_resource)
-      : feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-  Status Recognize(ServerContext* context,
-                   ServerReaderWriter<Response, Request>* reader) override;
-
- private:
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  DISALLOW_COPY_AND_ASSIGN(GrpcServer);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/wenet.proto b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/wenet.proto
deleted file mode 100644
index 4c3033c034c513611c9159ff9db42b225be2cc98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/grpc/wenet.proto
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-syntax = "proto3";
-
-option java_package = "ex.grpc";
-option objc_class_prefix = "wenet";
-
-package wenet;
-
-service ASR {
-  rpc Recognize (stream Request) returns (stream Response) {}
-}
-
-message Request {
-
-  message DecodeConfig {
-    int32 nbest_config = 1;
-    bool continuous_decoding_config = 2;
-  }
-
-  oneof RequestPayload {
-    DecodeConfig decode_config = 1;
-    bytes audio_data = 2;
-  }
-}
-
-message Response {
-
-  message OneBest {
-    string sentence = 1;
-    repeated OnePiece wordpieces = 2;
-  }
-
-  message OnePiece {
-    string word = 1;
-    int32 start = 2;
-    int32 end = 3;
-  }
-
-  enum Status {
-    ok = 0;
-    failed = 1;
-  }
-
-  enum Type {
-    server_ready = 0;
-    partial_result = 1;
-    final_result = 2;
-    speech_end = 3;
-  }
-
-  Status status = 1;
-  Type type = 2;
-  repeated OneBest nbest = 3;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/CMakeLists.txt
deleted file mode 100644
index 4ba414e25bd577575b1baec2eba4bf1c3062b211..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(http STATIC
-  http_client.cc
-  http_server.cc
-)
-target_link_libraries(http PUBLIC decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_client.cc
deleted file mode 100644
index 50bace0d4e40b073586c744cd85799f7414655e8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_client.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "http/http_client.h"
-
-#include "boost/json/src.hpp"
-
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;     // from <boost/beast.hpp>
-namespace http = beast::http;       // from <boost/beast/http.hpp>
-namespace net = boost::asio;        // from <boost/asio.hpp>
-using tcp = net::ip::tcp;           // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-HttpClient::HttpClient(const std::string& hostname, int port)
-    : hostname_(hostname), port_(port) {
-  Connect();
-}
-
-void HttpClient::Connect() {
-  tcp::resolver resolver{ioc_};
-  // Look up the domain name
-  auto const results = resolver.resolve(hostname_, std::to_string(port_));
-  stream_.connect(results);
-}
-
-void HttpClient::SendBinaryData(const void* data, size_t size) {
-  try {
-    json::value start_tag = {{"nbest", nbest_},
-                             {"continuous_decoding", continuous_decoding_}};
-    std::string config = json::serialize(start_tag);
-    req_.set("config", config);
-    std::size_t encode_size = beast::detail::base64::encoded_size(size);
-    char encode_data[encode_size];  // NOLINT
-    beast::detail::base64::encode(encode_data, data, size);
-    req_.body() = encode_data;
-    req_.prepare_payload();
-    http::write(stream_, req_, ec_);
-
-    http::read(stream_, buffer_, res_);
-    std::string message = res_.body();
-    json::object obj = json::parse(message).as_object();
-    LOG(INFO) << message;
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-  stream_.socket().shutdown(tcp::socket::shutdown_both, ec_);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_client.h
deleted file mode 100644
index 803af26a4ef2b5a236570476fb89003014bc0280..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_client.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef HTTP_HTTP_CLIENT_H_
-#define HTTP_HTTP_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include <boost/asio/connect.hpp>
-#include <boost/asio/ip/tcp.hpp>
-#include <boost/beast/core.hpp>
-#include <boost/beast/core/detail/base64.hpp>
-#include <boost/beast/http.hpp>
-#include <boost/beast/version.hpp>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;     // from <boost/beast.hpp>
-namespace http = beast::http;       // from <boost/beast/http.hpp>
-namespace net = boost::asio;        // from <boost/asio.hpp>
-using tcp = net::ip::tcp;           // from <boost/asio/ip/tcp.hpp>
-
-class HttpClient {
- public:
-  HttpClient(const std::string& host, int port);
-
-  void SendBinaryData(const void* data, size_t size);
-  void set_nbest(int nbest) { nbest_ = nbest; }
-
- private:
-  void Connect();
-  std::string hostname_;
-  int port_;
-  std::string target_ = "/";
-  int version_ = 11;
-  int nbest_ = 1;
-  const bool continuous_decoding_ = false;
-  net::io_context ioc_;
-  beast::tcp_stream stream_{ioc_};
-  beast::flat_buffer buffer_;
-  http::request<http::string_body> req_{http::verb::get, target_, version_};
-  http::response<http::string_body> res_{http::status::ok, version_};
-  beast::error_code ec_;
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(HttpClient);
-};
-
-}  // namespace wenet
-
-#endif  // HTTP_HTTP_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_server.cc
deleted file mode 100644
index c839757647554235a9e70a3dfc886a02b4e2cd79..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_server.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "http/http_server.h"
-
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "boost/json/src.hpp"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace net = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-ConnectionHandler::ConnectionHandler(
-    tcp::socket&& socket, std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : socket_(std::move(socket)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)),
-      req_(std::make_shared<http::request<http::string_body>>(
-          http::verb::post, target_, version_)),
-      res_(std::make_shared<http::response<http::string_body>>(http::status::ok,
-                                                               version_)) {}
-
-void ConnectionHandler::OnSpeechStart() {
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ =
-      std::make_shared<std::thread>(&ConnectionHandler::DecodeThreadFunc, this);
-}
-
-void ConnectionHandler::OnSpeechEnd() {
-  if (feature_pipeline_ != nullptr) {
-    feature_pipeline_->set_input_finished();
-  }
-}
-
-void ConnectionHandler::OnFinalResult(const std::string& result) {
-  LOG(INFO) << "Final result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "final_result"}, {"nbest", result}};
-  std::string message = json::serialize(rv);
-  res_.get()->body() = message;
-  http::write(socket_, *res_.get(), ec_);
-}
-
-void ConnectionHandler::OnSpeechData(const std::string& message) {
-  std::size_t decode_size =
-      beast::detail::base64::decoded_size(message.length());
-  int num_samples = decode_size / sizeof(int16_t);
-  int16_t decode_data[num_samples];  // NOLINT
-  beast::detail::base64::decode(decode_data, message.c_str(), message.length());
-
-  // Read binary PCM data
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  feature_pipeline_->AcceptWaveform(decode_data, num_samples);
-}
-
-std::string ConnectionHandler::SerializeResult(bool finish) {
-  json::array nbest;
-  for (const DecodeResult& path : decoder_->result()) {
-    json::object jpath({{"sentence", path.sentence}});
-    if (finish) {
-      json::array word_pieces;
-      for (const WordPiece& word_piece : path.word_pieces) {
-        json::object jword_piece({{"word", word_piece.word},
-                                  {"start", word_piece.start},
-                                  {"end", word_piece.end}});
-        word_pieces.emplace_back(jword_piece);
-      }
-      jpath.emplace("word_pieces", word_pieces);
-    }
-    nbest.emplace_back(jpath);
-
-    if (nbest.size() == nbest_) {
-      break;
-    }
-  }
-  return json::serialize(nbest);
-}
-
-void ConnectionHandler::DecodeThreadFunc() {
-  try {
-    while (true) {
-      DecodeState state = decoder_->Decode();
-      if (state == DecodeState::kEndFeats || state == DecodeState::kEndpoint) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        break;
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void ConnectionHandler::OnError(const std::string& message) {
-  json::value rv = {{"status", "failed"}, {"message", message}};
-  res_.get()->body() = json::serialize(rv);
-  http::write(socket_, *res_.get(), ec_);
-  // Send a TCP shutdown
-  socket_.shutdown(tcp::socket::shutdown_send, ec_);
-}
-
-void ConnectionHandler::OnText(const std::string& message) {
-  LOG(INFO) << message;
-  json::value v = json::parse(message);
-  if (v.is_object()) {
-    json::object obj = v.get_object();
-    if (obj.find("nbest") != obj.end()) {
-      if (obj["nbest"].is_int64()) {
-        nbest_ = obj["nbest"].as_int64();
-      } else {
-        OnError("integer is expected for nbest option");
-      }
-    }
-  } else {
-    OnError("Wrong protocol");
-  }
-}
-
-void ConnectionHandler::operator()() {
-  try {
-    http::read(socket_, buffer_, *req_.get(), ec_);
-    if (ec_) {
-      LOG(ERROR) << ec_;
-    } else {
-      OnText(req_.get()->base()["config"].to_string());
-      OnSpeechStart();
-      OnSpeechData(req_.get()->body());
-      OnSpeechEnd();
-    }
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (beast::system_error const& se) {
-    LOG(INFO) << se.code().message();
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-  socket_.shutdown(tcp::socket::shutdown_send, ec_);
-}
-
-void HttpServer::Start() {
-  try {
-    auto const address = net::ip::make_address("0.0.0.0");
-    tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
-    for (;;) {
-      // This will receive the new connection
-      tcp::socket socket{ioc_};
-      // Block until we get a connection
-      acceptor.accept(socket);
-      // Launch the session, transferring ownership of the socket
-      ConnectionHandler handler(std::move(socket), feature_config_,
-                                decode_config_, decode_resource_);
-      std::thread t(std::move(handler));
-      t.detach();
-    }
-  } catch (const std::exception& e) {
-    LOG(FATAL) << e.what();
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_server.h
deleted file mode 100644
index f7304475e0c374dfb2b5308864b5e08dce71ae12..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/http/http_server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef HTTP_HTTP_SERVER_H_
-#define HTTP_HTTP_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-
-#include <boost/asio/ip/tcp.hpp>
-#include <boost/beast/core.hpp>
-#include <boost/beast/core/detail/base64.hpp>
-#include <boost/beast/http.hpp>
-#include <boost/beast/version.hpp>
-#include <boost/config.hpp>
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;         // from <boost/beast.hpp>
-namespace http = beast::http;           // from <boost/beast/http.hpp>
-namespace net = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;       // from <boost/asio/ip/tcp.hpp>
-
-class ConnectionHandler {
- public:
-  ConnectionHandler(tcp::socket&& socket,
-                    std::shared_ptr<FeaturePipelineConfig> feature_config,
-                    std::shared_ptr<DecodeOptions> decode_config,
-                    std::shared_ptr<DecodeResource> decode_resource_);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnText(const std::string& message);
-  void OnSpeechData(const std::string& message);
-  void OnError(const std::string& message);
-  void OnFinalResult(const std::string& result);
-  void DecodeThreadFunc();
-  std::string SerializeResult(bool finish);
-
-  std::string target_ = "/";
-  int version_ = 11;
-  const bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  tcp::socket socket_;
-  beast::flat_buffer buffer_;
-  beast::error_code ec_;
-  std::shared_ptr<http::request<http::string_body>> req_;
-  std::shared_ptr<http::response<http::string_body>> res_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class HttpServer {
- public:
-  HttpServer(int port, std::shared_ptr<FeaturePipelineConfig> feature_config,
-             std::shared_ptr<DecodeOptions> decode_config,
-             std::shared_ptr<DecodeResource> decode_resource)
-      : port_(port),
-        feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-
-  void Start();
-
- private:
-  int port_;
-  // The io_context is required for all I/O
-  net::io_context ioc_{1};
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  WENET_DISALLOW_COPY_AND_ASSIGN(HttpServer);
-};
-
-}  // namespace wenet
-
-#endif  // HTTP_HTTP_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/CMakeLists.txt
deleted file mode 100644
index 145654105350e91a5f9121b47197f5fc60663f5c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-link_libraries(gtest_main gmock)
-
-add_executable(utils_test utils_test.cc)
-target_link_libraries(utils_test PUBLIC utils)
-add_test(UTILS_TEST utils_test)
-
-add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
-target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
-add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
-
-add_executable(post_processor_test post_processor_test.cc)
-target_link_libraries(post_processor_test PUBLIC post_processor)
-add_test(POST_PROCESSOR_TEST post_processor_test)
-
-
-add_executable(feature_pipeline_test feature_pipeline_test.cc)
-target_link_libraries(feature_pipeline_test PUBLIC frontend)
-add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/ctc_prefix_beam_search_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/ctc_prefix_beam_search_test.cc
deleted file mode 100644
index d8f3b65693b934beb33f3a770795f0b6e7ce3456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/ctc_prefix_beam_search_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <cmath>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(CtcPrefixBeamSearchTest, CtcPrefixBeamSearchLogicTest) {
-  using ::testing::ElementsAre;
-  // See https://robin1001.github.io/2020/12/11/ctc-search for the
-  // graph demonstration of the data
-  std::vector<std::vector<float>> data = {
-      {0.25, 0.40, 0.35}, {0.40, 0.35, 0.25}, {0.10, 0.50, 0.40}};
-  // Apply log
-  for (int i = 0; i < data.size(); i++) {
-    for (int j = 0; j < data[i].size(); j++) {
-      data[i][j] = std::log(data[i][j]);
-    }
-  }
-  wenet::CtcPrefixBeamSearchOptions option;
-  option.first_beam_size = 3;
-  option.second_beam_size = 3;
-  wenet::CtcPrefixBeamSearch prefix_beam_search(option);
-  prefix_beam_search.Search(data);
-  /* Test case info
-  | top k | result index | prefix score | viterbi score | timestamp |
-  |-------|--------------|--------------|---------------|-----------|
-  | top 1 | [2, 1]       | 0.2185       | 0.07          | [0, 2]    |
-  | top 2 | [1, 2]       | 0.1550       | 0.064         | [0, 2]    |
-  | top 3 | [1]          | 0.1525       | 0.07          | [2]       |
-  */
-  const std::vector<std::vector<int>>& result = prefix_beam_search.Outputs();
-  EXPECT_EQ(result.size(), 3);
-  ASSERT_THAT(result[0], ElementsAre(2, 1));
-  ASSERT_THAT(result[1], ElementsAre(1, 2));
-  ASSERT_THAT(result[2], ElementsAre(1));
-
-  const std::vector<float>& likelihood = prefix_beam_search.Likelihood();
-  EXPECT_EQ(likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[0]), 0.2185);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[1]), 0.1550);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[2]), 0.1525);
-
-  const std::vector<float>& viterbi_likelihood =
-      prefix_beam_search.viterbi_likelihood();
-  EXPECT_EQ(viterbi_likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[0]), 0.07);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[1]), 0.064);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[2]), 0.07);
-
-  const std::vector<std::vector<int>>& times = prefix_beam_search.Times();
-  EXPECT_EQ(times.size(), 3);
-  ASSERT_THAT(times[0], ElementsAre(0, 2));
-  ASSERT_THAT(times[1], ElementsAre(0, 2));
-  ASSERT_THAT(times[2], ElementsAre(2));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/feature_pipeline_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/feature_pipeline_test.cc
deleted file mode 100644
index 244ec0735b6086211b476e8d97569e1ee5959bc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/feature_pipeline_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2022 Roney
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <thread>
-#include <vector>
-
-#include "frontend/feature_pipeline.h"
-#include "utils/blocking_queue.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-void pushQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que,
-               std::vector<int> vec) {
-  que->Push(vec);
-}
-
-void popQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que, int num,
-              int back_data) {
-  auto pop_data = que->Pop(num);
-  ASSERT_EQ(pop_data[num - 1], back_data);
-}
-
-TEST(FeaturePipelineTest, BlockingQueueTest) {
-  auto capacity_queue = std::make_shared<wenet::BlockingQueue<int>>(2);
-  std::vector<int> test_data{1, 2, 3, 4, 5};
-  std::thread push_thread(&pushQueue, capacity_queue, test_data);
-  ASSERT_EQ(capacity_queue->Pop(), 1);
-  ASSERT_LE(capacity_queue->Size(), 2);    // capacity_queue: 2 or 2,3
-  auto pop_data = capacity_queue->Pop(3);  // 2,3,4 num > capacity
-  ASSERT_EQ(pop_data.size(), 3);
-  ASSERT_EQ(pop_data[2], 4);
-  push_thread.join();
-  ASSERT_EQ(capacity_queue->Size(), 1);  // capacity_queue:5
-
-  std::thread pop_thread(&popQueue, capacity_queue, 3, 0);  // num > capacity
-  capacity_queue->Push(9);  // capacity_queue:5,9
-  capacity_queue->Push(0);  // capacity_queue:5,9,0
-  pop_thread.join();        // capacity_queue:
-  ASSERT_EQ(capacity_queue->Size(), 0);
-
-  pop_data = capacity_queue->Pop(0);
-  ASSERT_TRUE(pop_data.empty());
-}
-
-TEST(FeaturePipelineTest, PipelineTest) {
-  wenet::FeaturePipelineConfig config(80, 8000);
-  wenet::FeaturePipeline feature_pipeline(config);
-  int audio_len = 8 * 55;  // audio len 55ms,4 frames
-  std::vector<float> pcm(audio_len, 0);
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 4);
-
-  std::vector<std::vector<float>> out_feats;
-  auto b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_TRUE(b);
-  ASSERT_EQ(out_feats.size(), 2);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 2);
-
-  std::vector<float> out_feat;
-  b = feature_pipeline.ReadOne(&out_feat);
-  ASSERT_TRUE(b);
-  ASSERT_FALSE(out_feat.empty());
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 1);
-
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 1);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  feature_pipeline.Read(2, &out_feats);
-  feature_pipeline.Reset();
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 0);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/post_processor_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/post_processor_test.cc
deleted file mode 100644
index fa11fa29231032d62389a93fd00b0ec782bf8a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/post_processor_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(PostProcessorTest, ProcessSpacekMandarinEnglishTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: mandarin character
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "震东好帅",
-      // modeling unit: mandarin word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 吴迪 也 好帅",
-      // modeling unit: english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁binbin▁is▁also▁handsome",
-      // modeling unit: english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " life is short i use wenet",
-      // modeling unit: mandarin character + english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "超哥▁is▁the▁most▁handsome",
-      // modeling unit: mandarin word + english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 人生 苦短 i use wenet",
-  };
-
-  std::vector<std::string> result_lowercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "binbin is also handsome",
-      "life is short i use wenet",
-      "超哥 is the most handsome",
-      "人生苦短i use wenet",
-  };
-
-  std::vector<std::string> result_uppercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "BINBIN IS ALSO HANDSOME",
-      "LIFE IS SHORT I USE WENET",
-      "超哥 IS THE MOST HANDSOME",
-      "人生苦短I USE WENET",
-  };
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
-
-TEST(PostProcessorTest, ProcessSpacekIndoEuropeanTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  opts_lowercase.language_type = wenet::kIndoEuropean;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.language_type = wenet::kIndoEuropean;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁zhendong▁ist▁so▁schön",
-      // modeling unit: word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " zhendong ist so schön"};
-
-  std::vector<std::string> result_lowercase = {"zhendong ist so schön",
-                                               "zhendong ist so schön"};
-
-  std::vector<std::string> result_uppercase = {"ZHENDONG IST SO SCHÖN",
-                                               "ZHENDONG IST SO SCHÖN"};
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/utils_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/utils_test.cc
deleted file mode 100644
index 6b2bbac25e000ce854d5e55a50cb51109d62d758..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/test/utils_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "utils/utils.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-TEST(UtilsTest, TopKTest) {
-  using ::testing::ElementsAre;
-  using ::testing::FloatNear;
-  using ::testing::Pointwise;
-  std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
-  std::vector<float> values;
-  std::vector<int32_t> indices;
-  wenet::TopK(data, 3, &values, &indices);
-  EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
-  ASSERT_THAT(indices, ElementsAre(9, 4, 8));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/toolchains/aarch64-linux-gnu.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/toolchains/aarch64-linux-gnu.toolchain.cmake
deleted file mode 100644
index 9ad37cba9eb6fa58aa194ece96cf9a5da472a76d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/toolchains/aarch64-linux-gnu.toolchain.cmake
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CMAKE_SYSTEM_NAME Linux)
-SET (CMAKE_SYSTEM_PROCESSOR aarch64)
-
-set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
-set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/toolchains/ios.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/toolchains/ios.toolchain.cmake
deleted file mode 100644
index 2bcb0adf7b07c0c5fd5bf16d1b687050579ba673..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/toolchains/ios.toolchain.cmake
+++ /dev/null
@@ -1,1014 +0,0 @@
-# This file is part of the ios-cmake project. It was retrieved from
-# https://github.com/leetal/ios-cmake.git, which is a fork of
-# https://github.com/gerstrong/ios-cmake.git, which is a fork of
-# https://github.com/cristeab/ios-cmake.git, which is a fork of
-# https://code.google.com/p/ios-cmake/. Which in turn is based off of
-# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which
-# are included with CMake 2.8.4
-#
-# The ios-cmake project is licensed under the new BSD license.
-#
-# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software,
-# Kitware, Inc., Insight Software Consortium.  All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# This file is based off of the Platform/Darwin.cmake and
-# Platform/UnixPaths.cmake files which are included with CMake 2.8.4
-# It has been altered for iOS development.
-#
-# Updated by Alex Stewart (alexs.mac@gmail.com)
-#
-# *****************************************************************************
-#      Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com)
-#                      under the BSD-3-Clause license
-#                   https://github.com/leetal/ios-cmake
-# *****************************************************************************
-#
-#                           INFORMATION / HELP
-#
-###############################################################################
-#                                  OPTIONS                                    #
-###############################################################################
-#
-# PLATFORM: (default "OS64")
-#    OS = Build for iPhoneOS.
-#    OS64 = Build for arm64 iphoneOS.
-#    OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR = Build for x86 i386 iphoneOS Simulator.
-#    SIMULATOR64 = Build for x86_64 iphoneOS Simulator.
-#    SIMULATORARM64 = Build for arm64 iphoneOS Simulator.
-#    TVOS = Build for arm64 tvOS.
-#    TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_TVOS = Build for x86_64 tvOS Simulator.
-#    WATCHOS = Build for armv7k arm64_32 for watchOS.
-#    WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator.
-#    MAC = Build for x86_64 macOS.
-#    MAC_ARM64 = Build for Apple Silicon macOS.
-#    MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS).
-#                   Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#    MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS).
-#                         Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#
-# CMAKE_OSX_SYSROOT: Path to the SDK to use.  By default this is
-#    automatically determined from PLATFORM and xcodebuild, but
-#    can also be manually specified (although this should not be required).
-#
-# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform
-#    being compiled for.  By default this is automatically determined from
-#    CMAKE_OSX_SYSROOT, but can also be manually specified (although this should
-#    not be required).
-#
-# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS
-#
-# NAMED_LANGUAGE_SUPPORT:
-#    ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support
-#    OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behaviour, CMake version < 3.16)
-#
-# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default ON
-#
-# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default)
-#
-# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default)
-#
-# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker
-#    to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY)
-#
-# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM
-#    OS = armv7 armv7s arm64 (if applicable)
-#    OS64 = arm64 (if applicable)
-#    SIMULATOR = i386
-#    SIMULATOR64 = x86_64
-#    SIMULATORARM64 = arm64
-#    TVOS = arm64
-#    SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated)
-#    WATCHOS = armv7k arm64_32 (if applicable)
-#    SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated)
-#    MAC = x86_64
-#    MAC_ARM64 = arm64
-#    MAC_CATALYST = x86_64
-#    MAC_CATALYST_ARM64 = arm64
-#
-# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64"
-#
-###############################################################################
-#                                END OPTIONS                                  #
-###############################################################################
-#
-# This toolchain defines the following properties (available via get_property()) for use externally:
-#
-# PLATFORM: The currently targeted platform.
-# XCODE_VERSION: Version number (not including Build version) of Xcode detected.
-# SDK_VERSION: Version of SDK being used.
-# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM).
-# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set!
-#
-# This toolchain defines the following macros for use externally:
-#
-# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT)
-#   A convenience macro for setting xcode specific properties on targets.
-#   Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel
-#   example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all").
-#
-# find_host_package (PROGRAM ARGS)
-#   A macro used to find executable programs on the host system, not within the
-#   environment. Thanks to the android-cmake project for providing the
-#   command.
-#
-
-cmake_minimum_required(VERSION 3.8.0)
-
-# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds.
-if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN})
-  return()
-endif()
-set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true)
-
-# List of supported platform values
-list(APPEND _supported_platforms
-        "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64"
-        "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS"
-        "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS"
-        "MAC" "MAC_ARM64"
-        "MAC_CATALYST" "MAC_CATALYST_ARM64")
-
-# Cache what generator is used
-set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}")
-
-# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib)
-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14")
-  set(MODERN_CMAKE YES)
-endif()
-
-# Get the Xcode version being used.
-# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs.
-# Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable.
-#
-# NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts
-if(DEFINED XCODE_VERSION_INT)
-  # Environment variables are always preserved.
-  set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}")
-elseif(DEFINED ENV{_XCODE_VERSION_INT})
-  set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}")
-elseif(NOT DEFINED XCODE_VERSION_INT)
-  find_program(XCODEBUILD_EXECUTABLE xcodebuild)
-  if(NOT XCODEBUILD_EXECUTABLE)
-    message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.")
-  endif()
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version
-          OUTPUT_VARIABLE XCODE_VERSION_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "")
-endif()
-
-# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur)
-# if you don't set a deployment target it will be set the way you only get 64-bit builds
-if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0)
-  # Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...)
-  set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64")
-endif()
-
-# Check if the platform variable is set
-if(DEFINED PLATFORM)
-  # Environment variables are always preserved.
-  set(ENV{_PLATFORM} "${PLATFORM}")
-elseif(DEFINED ENV{_PLATFORM})
-  set(PLATFORM "$ENV{_PLATFORM}")
-elseif(NOT DEFINED PLATFORM)
-  message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!")
-endif ()
-
-if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The combined builds support requires Xcode to be used as generator via '-G Xcode' command-line argument in CMake")
-endif()
-
-# Safeguard that the platform value is set and is one of the supported values
-list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM)
-if("${contains_PLATFORM}" EQUAL "-1")
-  string(REPLACE ";"  "\n * " _supported_platforms_formatted "${_supported_platforms}")
-  message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n"
-          " Supported PLATFORM values: \n * ${_supported_platforms_formatted}")
-endif()
-
-# Check if Apple Silicon is supported
-if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5")
-  message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5")
-endif()
-
-# Touch toolchain variable to suppress "unused variable" warning.
-# This happens if CMake is invoked with the same command line the second time.
-if(CMAKE_TOOLCHAIN_FILE)
-endif()
-
-# Fix for PThread library not in path
-set(CMAKE_THREAD_LIBS_INIT "-lpthread")
-set(CMAKE_HAVE_THREADS_LIBRARY 1)
-set(CMAKE_USE_WIN32_THREADS_INIT 0)
-set(CMAKE_USE_PTHREADS_INIT 1)
-
-# Specify named language support defaults.
-if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
-  set(NAMED_LANGUAGE_SUPPORT ON)
-  message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.")
-elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  set(NAMED_LANGUAGE_SUPPORT OFF)
-  message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behaviour.")
-elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.")
-endif()
-set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL
-        "Whether or not to enable explicit named language support" FORCE)
-
-# Specify minimum version of deployment target.
-if(NOT DEFINED DEPLOYMENT_TARGET)
-  if (PLATFORM MATCHES "WATCHOS")
-    # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS).
-    set(DEPLOYMENT_TARGET "4.0")
-  elseif(PLATFORM STREQUAL "MAC")
-    # Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos).
-    set(DEPLOYMENT_TARGET "10.13")
-  elseif(PLATFORM STREQUAL "MAC_ARM64")
-    # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm).
-    set(DEPLOYMENT_TARGET "11.0")
-  elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-    # Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement).
-    set(DEPLOYMENT_TARGET "13.1")
-  else()
-    # Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS).
-    set(DEPLOYMENT_TARGET "11.0")
-  endif()
-  message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!")
-elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1")
-  message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!")
-endif()
-
-# Store the DEPLOYMENT_TARGET in the cache
-set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "")
-
-# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially)
-if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "OS64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "SIMULATOR64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-endif()
-
-set(PLATFORM_INT "${PLATFORM}")
-
-if(DEFINED ARCHS)
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-endif()
-
-# Determine the platform name and architectures for use in xcodebuild commands
-# from the specified PLATFORM_INT name.
-if(PLATFORM_INT STREQUAL "OS")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    set(ARCHS armv7 armv7s arm64)
-    set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS arm64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-    else()
-      set(ARCHS arm64)
-    endif()
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64COMBINED")
-  set(SDK_NAME iphoneos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      else()
-        set(ARCHS arm64 x86_64)
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      endif()
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET})
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-  message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.")
-elseif(PLATFORM_INT STREQUAL "SIMULATOR64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATORARM64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "TVOS")
-  set(SDK_NAME appletvos)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-  endif()
-elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED")
-  set(SDK_NAME appletvos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      set(ARCHS arm64 x86_64)
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos${DEPLOYMENT_TARGET})
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64")
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-  set(SDK_NAME appletvsimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOS")
-  set(SDK_NAME watchos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS armv7k arm64_32)
-      set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos${DEPLOYMENT_TARGET})
-    else()
-      set(ARCHS armv7k)
-      set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED")
-  set(SDK_NAME watchos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS armv7k arm64_32 i386)
-        set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      else()
-        set(ARCHS armv7k i386)
-        set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      endif()
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-  set(SDK_NAME watchsimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-else()
-  message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}")
-endif()
-
-string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}")
-
-if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode")
-endif()
-
-if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx")
-  set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst")
-  if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET)
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15")
-  else()
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}")
-  endif()
-elseif(CMAKE_GENERATOR MATCHES "Xcode")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}")
-  if(NOT PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-    set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-  endif()
-endif()
-
-# If user did not specify the SDK root to use, then query xcodebuild for it.
-if(DEFINED CMAKE_OSX_SYSROOT_INT)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}")
-elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT})
-  set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}")
-elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path
-          OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT)
-  message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain"
-          "is pointing to the correct path. Please run:"
-          "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer"
-          "and see if that fixes the problem for you.")
-  message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} "
-          "does not exist.")
-elseif(DEFINED CMAKE_OSX_SYSROOT_INT)
-  set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT.
-  set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-endif()
-
-# Use bitcode or not
-if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+")
-  # Unless specified, enable bitcode support by default
-  message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!")
-  set(ENABLE_BITCODE ON)
-elseif(NOT DEFINED ENABLE_BITCODE)
-  message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!")
-  set(ENABLE_BITCODE OFF)
-endif()
-set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL
-        "Whether or not to enable bitcode" FORCE)
-# Use ARC or not
-if(NOT DEFINED ENABLE_ARC)
-  # Unless specified, enable ARC support by default
-  set(ENABLE_ARC ON)
-  message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!")
-endif()
-set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE)
-# Use hidden visibility or not
-if(NOT DEFINED ENABLE_VISIBILITY)
-  # Unless specified, disable symbols visibility by default
-  set(ENABLE_VISIBILITY OFF)
-  message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!")
-endif()
-set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE)
-# Set strict compiler checks or not
-if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE)
-  # Unless specified, disable strict try_compile()
-  set(ENABLE_STRICT_TRY_COMPILE OFF)
-  message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!")
-endif()
-set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL
-        "Whether or not to use strict compiler checks" FORCE)
-
-# Get the SDK version information.
-if(DEFINED SDK_VERSION)
-  # Environment variables are always preserved.
-  set(ENV{_SDK_VERSION} "${SDK_VERSION}")
-elseif(DEFINED ENV{_SDK_VERSION})
-  set(SDK_VERSION "$ENV{_SDK_VERSION}")
-elseif(NOT DEFINED SDK_VERSION)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion
-          OUTPUT_VARIABLE SDK_VERSION
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-# Find the Developer root for the specific iOS platform being compiled for
-# from CMAKE_OSX_SYSROOT.  Should be ../../ from SDK specified in
-# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain
-# this information from xcrun or xcodebuild.
-if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH)
-  get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH)
-  if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}")
-    message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.")
-  endif()
-endif()
-
-# Find the C & C++ compilers for the specified SDK.
-if(DEFINED CMAKE_C_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_C_COMPILER})
-  set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}")
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-elseif(NOT DEFINED CMAKE_C_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang
-          OUTPUT_VARIABLE CMAKE_C_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-endif()
-if(DEFINED CMAKE_CXX_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_CXX_COMPILER})
-  set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}")
-elseif(NOT DEFINED CMAKE_CXX_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++
-          OUTPUT_VARIABLE CMAKE_CXX_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find (Apple's) libtool.
-if(DEFINED BUILD_LIBTOOL)
-  # Environment variables are always preserved.
-  set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}")
-elseif(DEFINED ENV{_BUILD_LIBTOOL})
-  set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}")
-elseif(NOT DEFINED BUILD_LIBTOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool
-          OUTPUT_VARIABLE BUILD_LIBTOOL
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find the toolchain's provided install_name_tool if none is found on the host
-if(DEFINED CMAKE_INSTALL_NAME_TOOL)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}")
-elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL})
-  set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}")
-elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool
-          OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "")
-endif()
-
-# Configure libtool to be used instead of ar + ranlib to build static libraries.
-# This is required on Xcode 7+, but should also work on previous versions of
-# Xcode.
-get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
-foreach(lang ${languages})
-  set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o <TARGET> <LINK_FLAGS> <OBJECTS> " CACHE INTERNAL "")
-endforeach()
-
-# CMake 3.14+ support building for iOS, watchOS and tvOS out of the box.
-if(MODERN_CMAKE)
-  if(SDK_NAME MATCHES "iphone")
-    set(CMAKE_SYSTEM_NAME iOS)
-  elseif(SDK_NAME MATCHES "macosx")
-    set(CMAKE_SYSTEM_NAME Darwin)
-  elseif(SDK_NAME MATCHES "appletv")
-    set(CMAKE_SYSTEM_NAME tvOS)
-  elseif(SDK_NAME MATCHES "watch")
-    set(CMAKE_SYSTEM_NAME watchOS)
-  endif()
-  # Provide flags for a combined FAT library build on newer CMake versions
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO")
-    set(CMAKE_IOS_INSTALL_COMBINED YES)
-  endif()
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10")
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME iOS)
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME)
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME Darwin)
-endif()
-# Standard settings.
-set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "")
-set(UNIX ON CACHE BOOL "")
-set(APPLE ON CACHE BOOL "")
-if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64")
-  set(IOS OFF CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-  set(IOS ON CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-else()
-  set(IOS ON CACHE BOOL "")
-endif()
-set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
-set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
-set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE)
-# Set the architectures for which to build.
-set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "")
-# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks
-if(NOT ENABLE_STRICT_TRY_COMPILE_INT)
-  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-endif()
-# All iOS/Darwin specific settings - some may be redundant.
-set(CMAKE_MACOSX_BUNDLE YES)
-set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO")
-set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
-set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib")
-set(CMAKE_SHARED_MODULE_PREFIX "lib")
-set(CMAKE_SHARED_MODULE_SUFFIX ".so")
-set(CMAKE_C_COMPILER_ABI ELF)
-set(CMAKE_CXX_COMPILER_ABI ELF)
-set(CMAKE_C_HAS_ISYSROOT 1)
-set(CMAKE_CXX_HAS_ISYSROOT 1)
-set(CMAKE_MODULE_EXISTS 1)
-set(CMAKE_DL_LIBS "")
-set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
-set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
-set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
-set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
-
-if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+")
-  set(CMAKE_C_SIZEOF_DATA_PTR 8)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 8)
-  if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+")
-    set(CMAKE_SYSTEM_PROCESSOR "aarch64")
-  else()
-    set(CMAKE_SYSTEM_PROCESSOR "x86_64")
-  endif()
-else()
-  set(CMAKE_C_SIZEOF_DATA_PTR 4)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 4)
-  set(CMAKE_SYSTEM_PROCESSOR "arm")
-endif()
-
-# Note that only Xcode 7+ supports the newer more specific:
-# -m${SDK_NAME}-version-min flags, older versions of Xcode use:
-# -m(ios/ios-simulator)-version-min instead.
-if(${CMAKE_VERSION} VERSION_LESS "3.11")
-  if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64")
-    if(XCODE_VERSION_INT VERSION_LESS 7.0)
-      set(SDK_NAME_VERSION_FLAGS
-              "-mios-version-min=${DEPLOYMENT_TARGET}")
-    else()
-      # Xcode 7.0+ uses flags we can build directly from SDK_NAME.
-      set(SDK_NAME_VERSION_FLAGS
-              "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}")
-    endif()
-  elseif(PLATFORM_INT STREQUAL "TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "MAC")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mmacosx-version-min=${DEPLOYMENT_TARGET}")
-  else()
-    # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min.
-    set(SDK_NAME_VERSION_FLAGS
-            "-mios-simulator-version-min=${DEPLOYMENT_TARGET}")
-  endif()
-elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST")
-  # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets
-  set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET})
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE_INT)
-  set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "")
-  set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-endif()
-
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks")
-endif()
-
-if(ENABLE_BITCODE_INT)
-  set(BITCODE "-fembed-bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES")
-else()
-  set(BITCODE "")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO")
-endif()
-
-if(ENABLE_ARC_INT)
-  set(FOBJC_ARC "-fobjc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES")
-else()
-  set(FOBJC_ARC "-fno-objc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO")
-endif()
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-  set(OBJC_LEGACY_VARS "")
-else()
-  set(OBJC_VARS "")
-  set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-endif()
-
-if(NOT ENABLE_VISIBILITY_INT)
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES")
-  set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden")
-else()
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO")
-  set(VISIBILITY "-fvisibility=default")
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE)
-  set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}")
-endif()
-
-#Check if Xcode generator is used, since that will handle these flags automagically
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator. Modifying the Xcode build-settings directly instead.")
-else()
-  set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}")
-  set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}")
-  set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}")
-  set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}")
-  set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}")
-  set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}")
-  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}")
-    set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}")
-    set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}")
-    set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}")
-    set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}")
-    set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}")
-  endif()
-  set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
-  set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS}  -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}")
-    set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}")
-  endif()
-  set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}")
-endif()
-
-## Print status messages to inform of the current state
-message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}")
-message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}")
-message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}")
-message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}")
-message(STATUS "Using libtool: ${BUILD_LIBTOOL}")
-message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}")
-if(DEFINED APPLE_TARGET_TRIPLE)
-  message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}")
-endif()
-message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}"
-        " (SDK version: ${SDK_VERSION})")
-if(MODERN_CMAKE)
-  message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!")
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    message(STATUS "Will combine built (static) artifacts into FAT lib...")
-  endif()
-endif()
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}")
-endif()
-message(STATUS "CMake version: ${CMAKE_VERSION}")
-if(DEFINED SDK_NAME_VERSION_FLAGS)
-  message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}")
-endif()
-message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}")
-if(ENABLE_BITCODE_INT)
-  message(STATUS "Bitcode: Enabled")
-else()
-  message(STATUS "Bitcode: Disabled")
-endif()
-
-if(ENABLE_ARC_INT)
-  message(STATUS "ARC: Enabled")
-else()
-  message(STATUS "ARC: Disabled")
-endif()
-
-if(ENABLE_VISIBILITY_INT)
-  message(STATUS "Hiding symbols: Disabled")
-else()
-  message(STATUS "Hiding symbols: Enabled")
-endif()
-
-# Set global properties
-set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}")
-set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}")
-set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}")
-set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}")
-set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}")
-
-# Export configurable variables for the try_compile() command.
-set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        PLATFORM
-        XCODE_VERSION_INT
-        SDK_VERSION
-        NAMED_LANGUAGE_SUPPORT
-        DEPLOYMENT_TARGET
-        CMAKE_DEVELOPER_ROOT
-        CMAKE_OSX_SYSROOT_INT
-        ENABLE_BITCODE
-        ENABLE_ARC
-        CMAKE_ASM_COMPILER
-        CMAKE_C_COMPILER
-        CMAKE_C_COMPILER_TARGET
-        CMAKE_CXX_COMPILER
-        CMAKE_CXX_COMPILER_TARGET
-        BUILD_LIBTOOL
-        CMAKE_INSTALL_NAME_TOOL
-        CMAKE_C_FLAGS
-        CMAKE_C_DEBUG
-        CMAKE_C_MINSIZEREL
-        CMAKE_C_RELWITHDEBINFO
-        CMAKE_C_RELEASE
-        CMAKE_CXX_FLAGS
-        CMAKE_CXX_FLAGS_DEBUG
-        CMAKE_CXX_FLAGS_MINSIZEREL
-        CMAKE_CXX_FLAGS_RELWITHDEBINFO
-        CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_C_LINK_FLAGS
-        CMAKE_CXX_LINK_FLAGS
-        CMAKE_ASM_FLAGS
-)
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        CMAKE_OBJC_FLAGS
-        CMAKE_OBJC_DEBUG
-        CMAKE_OBJC_MINSIZEREL
-        CMAKE_OBJC_RELWITHDEBINFO
-        CMAKE_OBJC_RELEASE
-        CMAKE_OBJCXX_FLAGS
-        CMAKE_OBJCXX_DEBUG
-        CMAKE_OBJCXX_MINSIZEREL
-        CMAKE_OBJCXX_RELWITHDEBINFO
-        CMAKE_OBJCXX_RELEASE
-        CMAKE_OBJC_LINK_FLAGS
-        CMAKE_OBJCXX_LINK_FLAGS
-  )
-endif()
-
-set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
-set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks")
-set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a")
-set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name")
-
-# Set the find root to the SDK developer roots.
-# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds.
-if(NOT PLATFORM_INT MATCHES "^MAC.*$")
-  list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "")
-endif()
-
-# Default to searching for frameworks first.
-set(CMAKE_FIND_FRAMEWORK FIRST)
-
-# Set up the default search directories for frameworks.
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-else()
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-endif()
-
-# By default, search both the specified iOS SDK and the remainder of the host filesystem.
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "")
-endif()
-
-#
-# Some helper-macros below to simplify and beautify the CMakeFile
-#
-
-# This little macro lets you set any Xcode specific property.
-macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION)
-  set(XCODE_RELVERSION_I "${XCODE_RELVERSION}")
-  if(XCODE_RELVERSION_I STREQUAL "All")
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}")
-  else()
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}")
-  endif()
-endmacro(set_xcode_property)
-
-# This macro lets you find executable programs on the host system.
-macro(find_host_package)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER)
-  set(_TOOLCHAIN_IOS ${IOS})
-  set(IOS OFF)
-  find_package(${ARGN})
-  set(IOS ${_TOOLCHAIN_IOS})
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
-endmacro(find_host_package)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/CMakeLists.txt
deleted file mode 100644
index 67447c42d977f120fc39cdab0d052b011edd3efe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(websocket STATIC
-  websocket_client.cc
-  websocket_server.cc
-)
-target_link_libraries(websocket PUBLIC decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_client.cc
deleted file mode 100644
index c0394e6250153e2d59636c9eab62badc4a737d16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_client.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_client.h"
-
-#include "boost/json/src.hpp"
-
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-WebSocketClient::WebSocketClient(const std::string& hostname, int port)
-    : hostname_(hostname), port_(port) {
-  Connect();
-  t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
-}
-
-void WebSocketClient::Connect() {
-  tcp::resolver resolver{ioc_};
-  // Look up the domain name
-  auto const results = resolver.resolve(hostname_, std::to_string(port_));
-  // Make the connection on the IP address we get from a lookup
-  auto ep = asio::connect(ws_.next_layer(), results);
-  // Provide the value of the Host HTTP header during the WebSocket handshake.
-  // See https://tools.ietf.org/html/rfc7230#section-5.4
-  std::string host = hostname_ + ":" + std::to_string(ep.port());
-  // Perform the websocket handshake
-  ws_.handshake(host, "/");
-}
-
-void WebSocketClient::SendTextData(const std::string& data) {
-  ws_.text(true);
-  ws_.write(asio::buffer(data));
-}
-
-void WebSocketClient::SendBinaryData(const void* data, size_t size) {
-  ws_.binary(true);
-  ws_.write(asio::buffer(data, size));
-}
-
-void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }
-
-void WebSocketClient::ReadLoopFunc() {
-  try {
-    while (true) {
-      beast::flat_buffer buffer;
-      ws_.read(buffer);
-      std::string message = beast::buffers_to_string(buffer.data());
-      LOG(INFO) << message;
-      CHECK(ws_.got_text());
-      json::object obj = json::parse(message).as_object();
-      if (obj["status"] != "ok") {
-        break;
-      }
-      if (obj["type"] == "speech_end") {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (beast::system_error const& se) {
-    // This indicates that the session was closed
-    if (se.code() != websocket::error::closed) {
-      LOG(ERROR) << se.code().message();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketClient::Join() { t_->join(); }
-
-void WebSocketClient::SendStartSignal() {
-  // TODO(Binbin Zhang): Add sample rate and other setting support
-  json::value start_tag = {{"signal", "start"},
-                           {"nbest", nbest_},
-                           {"continuous_decoding", continuous_decoding_}};
-  std::string start_message = json::serialize(start_tag);
-  this->SendTextData(start_message);
-}
-
-void WebSocketClient::SendEndSignal() {
-  json::value end_tag = {{"signal", "end"}};
-  std::string end_message = json::serialize(end_tag);
-  this->SendTextData(end_message);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_client.h
deleted file mode 100644
index 76ec3aa451d31c7ee6b158ce21c8acdc10575eb3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_client.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_CLIENT_H_
-#define WEBSOCKET_WEBSOCKET_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class WebSocketClient {
- public:
-  WebSocketClient(const std::string& host, int port);
-
-  void SendTextData(const std::string& data);
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Close();
-  void Join();
-  void SendStartSignal();
-  void SendEndSignal();
-  void set_nbest(int nbest) { nbest_ = nbest; }
-  void set_continuous_decoding(bool continuous_decoding) {
-    continuous_decoding_ = continuous_decoding;
-  }
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string hostname_;
-  int port_;
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  asio::io_context ioc_;
-  websocket::stream<tcp::socket> ws_{ioc_};
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketClient);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_server.cc
deleted file mode 100644
index 52ab088f46d59b9f3f1add1e34d3aceae290f5da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_server.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_server.h"
-
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "boost/json/src.hpp"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-ConnectionHandler::ConnectionHandler(
-    tcp::socket&& socket, std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : ws_(std::move(socket)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void ConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ =
-      std::make_shared<std::thread>(&ConnectionHandler::DecodeThreadFunc, this);
-}
-
-void ConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  if (feature_pipeline_ != nullptr) {
-    feature_pipeline_->set_input_finished();
-  }
-  got_end_tag_ = true;
-}
-
-void ConnectionHandler::OnPartialResult(const std::string& result) {
-  LOG(INFO) << "Partial result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "partial_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinalResult(const std::string& result) {
-  LOG(INFO) << "Final result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "final_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinish() {
-  // Send finish tag
-  json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
-  // Read binary PCM data
-  int num_samples = buffer.size() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  const auto* pcm_data = static_cast<const int16_t*>(buffer.data().data());
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-std::string ConnectionHandler::SerializeResult(bool finish) {
-  json::array nbest;
-  for (const DecodeResult& path : decoder_->result()) {
-    json::object jpath({{"sentence", path.sentence}});
-    if (finish) {
-      json::array word_pieces;
-      for (const WordPiece& word_piece : path.word_pieces) {
-        json::object jword_piece({{"word", word_piece.word},
-                                  {"start", word_piece.start},
-                                  {"end", word_piece.end}});
-        word_pieces.emplace_back(jword_piece);
-      }
-      jpath.emplace("word_pieces", word_pieces);
-    }
-    nbest.emplace_back(jpath);
-
-    if (nbest.size() == nbest_) {
-      break;
-    }
-  }
-  return json::serialize(nbest);
-}
-
-void ConnectionHandler::DecodeThreadFunc() {
-  try {
-    while (true) {
-      DecodeState state = decoder_->Decode();
-      if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      } else if (state == DecodeState::kEndpoint) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        // If it's not continuous decoding, continue to do next recognition
-        // otherwise stop the recognition
-        if (continuous_decoding_) {
-          decoder_->ResetContinuousDecoding();
-        } else {
-          OnFinish();
-          stop_recognition_ = true;
-          break;
-        }
-      } else {
-        if (decoder_->DecodedSomething()) {
-          std::string result = SerializeResult(false);
-          OnPartialResult(result);
-        }
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void ConnectionHandler::OnError(const std::string& message) {
-  json::value rv = {{"status", "failed"}, {"message", message}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  // Close websocket
-  ws_.close(websocket::close_code::normal);
-}
-
-void ConnectionHandler::OnText(const std::string& message) {
-  json::value v = json::parse(message);
-  if (v.is_object()) {
-    json::object obj = v.get_object();
-    if (obj.find("signal") != obj.end()) {
-      json::string signal = obj["signal"].as_string();
-      if (signal == "start") {
-        if (obj.find("nbest") != obj.end()) {
-          if (obj["nbest"].is_int64()) {
-            nbest_ = obj["nbest"].as_int64();
-          } else {
-            OnError("integer is expected for nbest option");
-          }
-        }
-        if (obj.find("continuous_decoding") != obj.end()) {
-          if (obj["continuous_decoding"].is_bool()) {
-            continuous_decoding_ = obj["continuous_decoding"].as_bool();
-          } else {
-            OnError(
-                "boolean true or false is expected for "
-                "continuous_decoding option");
-          }
-        }
-        OnSpeechStart();
-      } else if (signal == "end") {
-        OnSpeechEnd();
-      } else {
-        OnError("Unexpected signal type");
-      }
-    } else {
-      OnError("Wrong message header");
-    }
-  } else {
-    OnError("Wrong protocol");
-  }
-}
-
-void ConnectionHandler::operator()() {
-  try {
-    // Accept the websocket handshake
-    ws_.accept();
-    for (;;) {
-      // This buffer will hold the incoming message
-      beast::flat_buffer buffer;
-      // Read a message
-      ws_.read(buffer);
-      if (ws_.got_text()) {
-        std::string message = beast::buffers_to_string(buffer.data());
-        LOG(INFO) << message;
-        OnText(message);
-        if (got_end_tag_) {
-          break;
-        }
-      } else {
-        if (!got_start_tag_) {
-          OnError("Start signal is expected before binary data");
-        } else {
-          if (stop_recognition_) {
-            break;
-          }
-          OnSpeechData(buffer);
-        }
-      }
-    }
-
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (beast::system_error const& se) {
-    LOG(INFO) << se.code().message();
-    // This indicates that the session was closed
-    if (se.code() == websocket::error::closed) {
-      OnSpeechEnd();
-    }
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketServer::Start() {
-  try {
-    auto const address = asio::ip::make_address("0.0.0.0");
-    tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
-    for (;;) {
-      // This will receive the new connection
-      tcp::socket socket{ioc_};
-      // Block until we get a connection
-      acceptor.accept(socket);
-      // Launch the session, transferring ownership of the socket
-      ConnectionHandler handler(std::move(socket), feature_config_,
-                                decode_config_, decode_resource_);
-      std::thread t(std::move(handler));
-      t.detach();
-    }
-  } catch (const std::exception& e) {
-    LOG(FATAL) << e.what();
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_server.h
deleted file mode 100644
index a1241834221dcf93c34d6414bd9b5ae40ef1cf38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/core/websocket/websocket_server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_SERVER_H_
-#define WEBSOCKET_WEBSOCKET_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class ConnectionHandler {
- public:
-  ConnectionHandler(tcp::socket&& socket,
-                    std::shared_ptr<FeaturePipelineConfig> feature_config,
-                    std::shared_ptr<DecodeOptions> decode_config,
-                    std::shared_ptr<DecodeResource> decode_resource_);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnText(const std::string& message);
-  void OnFinish();
-  void OnSpeechData(const beast::flat_buffer& buffer);
-  void OnError(const std::string& message);
-  void OnPartialResult(const std::string& result);
-  void OnFinalResult(const std::string& result);
-  void DecodeThreadFunc();
-  std::string SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  websocket::stream<tcp::socket> ws_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class WebSocketServer {
- public:
-  WebSocketServer(int port,
-                  std::shared_ptr<FeaturePipelineConfig> feature_config,
-                  std::shared_ptr<DecodeOptions> decode_config,
-                  std::shared_ptr<DecodeResource> decode_resource)
-      : port_(port),
-        feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-
-  void Start();
-
- private:
-  int port_;
-  // The io_context is required for all I/O
-  asio::io_context ioc_{1};
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketServer);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Dockerfile/Dockerfile.client b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Dockerfile/Dockerfile.client
deleted file mode 100644
index 559a0bcdbc7a008e118d304dc98dd8e40f5e8bea..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Dockerfile/Dockerfile.client
+++ /dev/null
@@ -1,7 +0,0 @@
-FROM  nvcr.io/nvidia/tritonserver:23.01-py3-sdk
-LABEL maintainer="NVIDIA"
-LABEL repository="tritonserver"
-
-RUN apt-get update && apt-get install -y libsndfile1
-RUN pip3 install soundfile
-WORKDIR /workspace
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Dockerfile/Dockerfile.server b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Dockerfile/Dockerfile.server
deleted file mode 100644
index 1fabb7e359f3c2592b92ed14794c45ab94040b37..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Dockerfile/Dockerfile.server
+++ /dev/null
@@ -1,11 +0,0 @@
-FROM nvcr.io/nvidia/tritonserver:23.01-py3
-LABEL maintainer="NVIDIA"
-LABEL repository="tritonserver"
-
-RUN apt-get update && apt-get -y install swig && apt-get -y install python3-dev && apt-get install -y cmake
-RUN pip3 install torch torchaudio
-RUN pip3 install -v kaldifeat pyyaml onnx
-
-WORKDIR /workspace
-RUN git clone https://github.com/Slyne/ctc_decoder.git && cd ctc_decoder/swig && bash setup.sh
-COPY ./scripts scripts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Overview.JPG b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Overview.JPG
deleted file mode 100644
index 1b5d9d6b0d3f564296a25ac12b866f1e75ec0c08..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/Overview.JPG and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/README.md
deleted file mode 100644
index 6acf514ebfab76aafcb7bba1f62e4c74c67f7a30..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/README.md
+++ /dev/null
@@ -1,243 +0,0 @@
-## Introduction
-The below example shows how to deploy WeNet offline and online ASR models on GPUs.
-
-## Instructions
-* Step 1. Convert your model/pretrained model to onnx models. For example:
-
-```bash
-conda activate wenet
-pip3 install onnxruntime-gpu onnxmltools
-cd wenet/examples/aishell2 && . ./path.sh
-model_dir=<absolute path to>/20211025_conformer_exp
-onnx_model_dir=<absolute path>
-mkdir $onnx_model_dir
-python3 wenet/bin/export_onnx_gpu.py --config=$model_dir/train.yaml --checkpoint=$model_dir/final.pt --cmvn_file=$model_dir/global_cmvn --ctc_weight=0.5 --output_onnx_dir=$onnx_model_dir --fp16
-cp $model_dir/words.txt $model_dir/train.yaml $onnx_model_dir/
-```
-
-If you want to export streaming model (u2/u2++) for streaming inference (inference by chunks) instead of offline inference (inference by audio segments/utterance), you should add `--streaming` option:
-
-```
-python3 wenet/bin/export_onnx_gpu.py --config=$model_dir/train.yaml --checkpoint=$model_dir/final.pt --cmvn_file=$model_dir/global_cmvn  --ctc_weight=0.1 --reverse_weight=0.4 --output_onnx_dir=$onnx_model_dir --fp16 --streaming
-```
-
-Congratulations! You've successully exported the onnx models and now you are able to deploy them. Please also ensure you have set up [NGC](https://catalog.ngc.nvidia.com/) account before the next step!
-
-* Step 2. Build server docker and start server on one gpu:
-
-```
-docker build . -f Dockerfile/Dockerfile.server -t wenet_server:latest --network host
-# offline model
-docker run --gpus '"device=0"' -it -v $PWD/model_repo:/ws/model_repo -v $onnx_model_dir:/ws/onnx_model -p 8000:8000 -p 8001:8001 -p 8002:8002 --shm-size=1g --ulimit memlock=-1  wenet_server:latest /workspace/scripts/convert_start_server.sh
-# streaming model
-docker run --gpus '"device=0"' -it -v $PWD/model_repo_stateful:/ws/model_repo -v $onnx_model_dir:/ws/onnx_model -p 8000:8000 -p 8001:8001 -p 8002:8002 --shm-size=1g --ulimit memlock=-1  wenet_server:latest /workspace/scripts/convert_start_server.sh
-```
-Whenever there's something wrong starting the server, you may check the config.pbtxt files in every model in `model_repo` or `model_repo_stateful` to find if the settings are right.
-
-* Step 3. Start client
-
-```
-docker build . -f Dockerfile/Dockerfile.client -t wenet_client:latest --network host
-
-AUDIO_DATA=<path to your wav data>
-docker run -ti --net host --name wenet_client -v $PWD/client:/ws/client -v $AUDIO_DATA:/ws/test_data wenet_client:latest
-# In docker
-# offline model test
-cd /ws/client
-# test one wav file
-python3 client.py --audio_file=/ws/test_data/mid.wav --url=localhost:8001
-
-# test a list of wav files & cer
-python3 client.py --wavscp=/ws/dataset/test/wav.scp --data_dir=/ws/dataset/test/ --trans=/ws/dataset/test/text
-```
-
-Similarly, if your model is exported with `--streaming`, you should add this option when calling your streaming model.
-For example,
-
-```
-python3 client.py --wavscp=/ws/test_data/data_aishell2/test/wav.scp --data_dir=/ws/test_data/data_aishell2/test/ --trans=/ws/test_data/data_aishell2/test/trans.txt --model_name=streaming_wenet --streaming
-```
-
-<img src="test.gif" alt="test" width="500"/>
-
-## Precision Impact
-Some of you may be worried about whether fp16 will affect the final accuracy. We did several experiments and we may find the accuracy is acceptable.
-|Model | Dataset | Precision | CER |
-|------------|-----------|-----------|----------|
-|Aishell2-U2++ Conformer|Aishell2-TEST|FP16| 5.39%|
-|Aishell2-U2++ Conformer|Aishell2-TEST|FP32| 5.38%|
-|Wenetspeech Conformer| Wenetspeech-DEV|FP16| 8.61%|
-|Wenetspeech Conformer| Wenetspeech-DEV|FP32| 8.61%|
-|Wenetspeech Conformer | Wenetspeech-TestNet|FP16|9.07%|
-|Wenetspeech Conformer| Wenetspeech-TestNet|FP32|9.06%|
-|Wenetspeech Concofmer| Wenetspeech-TestMeeting|FP16|15.72%|
-|Wenetspeech Conformer| Wenetspeech-TestMeeting|FP32|15.70%|
-
-
-## Perf Test
-We use the below command to do our testing and we run the below command several times to warm up:
-
-```
-cd /ws/client
-# generate the test data, input to our feature extractor
-# offline model
-python3 generate_perf_input.py --audio_file=input.wav
-# offline_input.json generated
-perf_analyzer -m attention_rescoring -b 1 -p 20000 --concurrency-range 100:200:50 -i gRPC --input-data=offline_input.json  -u localhost:8001
-
-# streaming input
-python3 generate_perf_input.py --audio_file=input.wav --streaming
-# online_input.json generated
-perf_analyzer -u "localhost:8001" -i gRPC --streaming --input-data=online_input.json -m streaming_wenet -b 1 --concurrency-range 100:200:50
-```
-Where:
-- `input.wav` the input test audio, we tested 5 seconds, 8 seconds, 10 seconds audio;
-- `-m` option indicates the name of the served model;
-- `-p` option is the mearsurement window, which indicates in what time duration to collect the metrics;
-- `-v` option turns on the verbose model;
-- `-i` option is for choosing the networking protocol, you can choose `HTTP` or `gRPC` here;
-- `-u` option sets the url of the service in the form of `<IP Adrress>:<Port>`, but notice that port `8000` corresponds to HTTP protocol while port `8001` corresponds to gRPC protocol;
-- `-b` option indicates the batch size of the input requests used fo testing; since we simulate individual users sending requests, we set batch size here to `1`;
-- `--input-data` option points to the path of the json file containing the real input data
-- `--concurrency-range` option is an important one, it indicates the concurrency of the requests which defines the pressure we will give to the server.
-- You can also set `-f` option to set the path of testing result file;
-- You can also set `--max-threads` option to set the number of threads used to send test request, it should be set to the number of CPU cores in your test machine.
-
-### Tested ENV
-* NVIDIA DRIVER: 470.57.02
-* GPU: V100 & T4 & A30
-* CUDA: 11.4
-* Triton Inference Server: 22.03
-
-### Offline Model Perf
-Here are the wenetspeech conformer model and aishell2 u2++ perf on T4.
-* Aishell2, FP16, onnx, [U2++ Conformer](http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell2/20210618_u2pp_conformer_exp.tar.gz)
-
-|input length|Concurrency|RTF / GPU  |Throughput|Latency_p50 (ms)|Latency_p90 (ms)|Latency_p95 (ms)|Latency_p99 (ms)|
-|------------|-----------|-----------|----------|----------------|----------------|----------------|----------------|
-|5s          | 50        |0.0010     |204       |245.003         |277.086         |284.818         |295.777         |
-|5s          | 100       |0.0009     |225       | 452.578        |492.355         |506.399         |533.325         |
-|5s          | 150       |0.0009     |228       |657.478         |722.427         |747.493         |794.346         |
-|5s          |200        |0.0009     |228       |875.721         |946.02          |975.703         |1020.615        |
-|10s         |10         |0.0011     |88        |116.203         |113.929         |136.902         |149.18          |
-|10s         |50         |0.0009     |108       |476.678         |522.65          |532.693         |562.097         |
-|10s         |100        |0.0009     |110       |921.383         |1001.848        |1029.96         |1067.6          |
-
-* Wenetspeech, FP16, onnx, [Conformer](http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wenetspeech/20211025_conformer_exp.tar.gz)
-
-|input length|Concurrency|RTF / GPU  |Throughput|Latency_p50 (ms)|Latency_p90 (ms)|Latency_p95 (ms)|Latency_p99 (ms)|
-|------------|-----------|-----------|----------|----------------|----------------|----------------|----------------|
-|5s          |50         |0.0018     |110       |464.18          |508.246         |517.967         |547.002         |
-|5s          |100        |0.0018     |112       |891.173         |958.046         |1011.058        |1093.231        |
-|10s         |5          |0.0020     |50        |100.35          |120.757         |122.148         |132.053         |
-|10s         |10         |0.0020     |50        |201.551         |240.75          |252.026         |286.132         |
-|10s         |50         |0.0019     |52        |986.52          |1030.635        |1051.282        |1101.016        |
-
-* The offline model_repo's overview:
-
-![overview](./Overview.JPG)
-
-### Online Model Perf
-* Aishell2, FP16, onnx, [U2++ Conformer](http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell2/20210618_u2pp_conformer_exp.tar.gz)
-Our chunksize is 16 * 4 * 10 = 640 ms, so we should care about the perf of latency less than 640ms so that it can be a realtime application.
-
-<table border='0' cellpadding='0' cellspacing='0'>
-<tr id='r0'>
-<td>Model</td>
-<td>Num Left Chunks</td>
-<td>Concurrency</td>
-<td>Avg Latency (ms)</td>
-<td>Latency P95 (ms)</td>
-<td>Latency P99 (ms)</td>
-<td>Throughput (infer/s)</td>
-<td>Platform</td>
- </tr>
- <tr id='r1'>
-<td rowspan='2' class='x21'>ctc prefix beam search</td>
-<td rowspan='4' class='x21'>5</td>
-<td>50</td>
-<td>78</td>
-<td>110</td>
-<td>134</td>
-<td>644</td>
-<td rowspan='4' class='x21'>onnx</td>
- </tr>
- <tr id='r2'>
-<td>100</td>
-<td>124</td>
-<td>155</td>
-<td>192</td>
-<td>805</td>
- </tr>
- <tr id='r5'>
-<td rowspan='2' class='x21'>attention rescoring</td>
-<td>50</td>
-<td>99</td>
-<td>136</td>
-<td>164</td>
-<td>504</td>
- </tr>
- <tr id='r6'>
-<td>100</td>
-<td>172</td>
-<td>227</td>
-<td>272</td>
-<td>585</td>
- </tr>
-</table>
-
-### Improve Accuracy
-
-#### Add Lanaguage Model
-* Add language model: set `--lm_path` in the `convert_start_server.sh`. Notice the path of your language model is the path in docker.
-* You may refer to `wenet/bin/recognize_onnx.py` to run inference locally. If you want to add language model locally, you may refer to [here](https://github.com/Slyne/ctc_decoder/blob/master/README.md#usage)
-
-#### Dynamic Left Chunks
-For online model, training with dynamic left chunk option on will help further improve the model accuracy.
-Let's take a look at the below table.
-
-**WenetSpeech**
-chunksize = 16
-|Scoring                | left-full | left-8 | left-4 | left-2 | left-1 | left-0 |
-|-----------------------|-----------|--------|--------|--------|--------|--------|
-| DEV |
-|ctc greedy             | 9.16        | 9.19   |9.12      |9.05    |9.08      | 9.3    |
-|ctc prefix beam          | 9.1          | 9.11     |9.04      |8.98       |9          |9.23    |
-|attention rescoring      | 8.76      |    8.75     |8.69      |8.62       |8.61      |8.74    |
-| Test Meeting |
-| ctc greedy              |18.43        |18.47     |18.53      |18.81     |19.27      |20.37   |
-| ctc prefix beam          |18.27        |18.29     |18.35      |18.65     |19.13      |20.22   |
-| attention rescoring      |17.77        |17.81     |17.9      |18.19     |18.66      |19.68   |
-| Test Net |
-|ctc greedy                |10.91        |10.93     |10.98      |11.12     |11.29      |11.77   |
-|ctc prefix beam          |10.86        |10.88     |10.93      |11.07     |11.24      |11.72   |
-|attention rescoring      |10.13        |10.16     |10.2      |10.34     |10.51      |10.93   |
-
-With dynamic left = False in training and chunk size=16 when inferencing, we take all the left chunks:
-|Scoring            | DEV | Test Meeting | Test Net |
-|-------------------|-----|--------------|----------|
-|ctc greedy         | 9.32| 18.79        | 11.02    |
-|ctc prefix beam    | 9.25| 18.62        | 10.96    |
-|attention rescoring| 8.87| 18.11        | 10.22    |
-
-
-### Warning
-* We only tested mandarin models in `wenetspeech` and `aishell2` and haven't tested models built with English corpus.
-* The bpe models currently are not supported.
-* You need to add a VAD module to split your long audio into segments.
-* Please pad your audio in order to leverage triton inference [dynamic batching](https://github.com/triton-inference-server/server/blob/main/docs/architecture.md#models-and-schedulers) feature. We suggest you to pad your audio to nearest length such as 2s, 5s, 10s seconds.
-* We're still working on improving this solution. If you find any issue, please raise an issue.
-* For streaming pipeline, we haven't implemented `endpoint` feature, which means you have to cut your audio manually.
-* Please use `client.py` as a reference for building an intermediate server between the true clients (e.g., web application) and triton inference server. You may add the padding strategy (for offline model), buffering, vad, endpoint or other preprocessing (for streaming model) in this intermediate server.
-
-More info can be found on Triton documents. For example, if you want to deploy your application on scale, you may refer to [triton k8s deploy](https://github.com/triton-inference-server/server/blob/main/deploy/gcp/README.md).
-
-
-## Reference
-* [Triton inference server](https://github.com/triton-inference-server/server) & [Client](https://github.com/triton-inference-server/client)
-
-## Acknowledge
-This part originates from NVIDIA CISI project. We also have TTS and NLP solutions deployed on triton inference server.
-If you are interested, please contact us.
-
-Thanks to @RiverLiu @Jiawei and @day9011 for great effort in testing.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/client.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/client.py
deleted file mode 100644
index 0ca362e76e894f52a0c4387a83208ed0846e54d6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/client.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import multiprocessing
-from multiprocessing import Pool
-
-import argparse
-import os
-import tritonclient.grpc as grpcclient
-from utils import cal_cer
-from speech_client import *
-import numpy as np
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="store_true",
-        required=False,
-        default=False,
-        help="Enable verbose output",
-    )
-    parser.add_argument(
-        "-u",
-        "--url",
-        type=str,
-        required=False,
-        default="localhost:8001",
-        help="Inference server URL. Default is " "localhost:8001.",
-    )
-    parser.add_argument(
-        "--model_name",
-        required=False,
-        default="attention_rescoring",
-        choices=["attention_rescoring", "streaming_wenet"],
-        help="the model to send request to",
-    )
-    parser.add_argument(
-        "--wavscp",
-        type=str,
-        required=False,
-        default=None,
-        help="audio_id \t wav_path",
-    )
-    parser.add_argument(
-        "--trans",
-        type=str,
-        required=False,
-        default=None,
-        help="audio_id \t text",
-    )
-    parser.add_argument(
-        "--data_dir",
-        type=str,
-        required=False,
-        default=None,
-        help="path prefix for wav_path in wavscp/audio_file",
-    )
-    parser.add_argument(
-        "--audio_file",
-        type=str,
-        required=False,
-        default=None,
-        help="single wav file path",
-    )
-    # below arguments are for streaming
-    # Please check onnx_config.yaml and train.yaml
-    parser.add_argument("--streaming", action="store_true", required=False)
-    parser.add_argument(
-        "--sample_rate",
-        type=int,
-        required=False,
-        default=16000,
-        help="sample rate used in training",
-    )
-    parser.add_argument(
-        "--frame_length_ms",
-        type=int,
-        required=False,
-        default=25,
-        help="frame length",
-    )
-    parser.add_argument(
-        "--frame_shift_ms",
-        type=int,
-        required=False,
-        default=10,
-        help="frame shift length",
-    )
-    parser.add_argument(
-        "--chunk_size",
-        type=int,
-        required=False,
-        default=16,
-        help="chunk size default is 16",
-    )
-    parser.add_argument(
-        "--context",
-        type=int,
-        required=False,
-        default=7,
-        help="subsampling context",
-    )
-    parser.add_argument(
-        "--subsampling",
-        type=int,
-        required=False,
-        default=4,
-        help="subsampling rate",
-    )
-
-    FLAGS = parser.parse_args()
-
-    # load data
-    filenames = []
-    transcripts = []
-    if FLAGS.audio_file is not None:
-        path = FLAGS.audio_file
-        if FLAGS.data_dir:
-            path = os.path.join(FLAGS.data_dir, path)
-        if os.path.exists(path):
-            filenames = [path]
-    elif FLAGS.wavscp is not None:
-        audio_data = {}
-        with open(FLAGS.wavscp, "r", encoding="utf-8") as f:
-            for line in f:
-                aid, path = line.strip().split("\t")
-                if FLAGS.data_dir:
-                    path = os.path.join(FLAGS.data_dir, path)
-                audio_data[aid] = {"path": path}
-        with open(FLAGS.trans, "r", encoding="utf-8") as f:
-            for line in f:
-                aid, text = line.strip().split("\t")
-                audio_data[aid]["text"] = text
-        for key, value in audio_data.items():
-            filenames.append(value["path"])
-            transcripts.append(value["text"])
-
-    num_workers = multiprocessing.cpu_count() // 2
-
-    if FLAGS.streaming:
-        speech_client_cls = StreamingSpeechClient
-    else:
-        speech_client_cls = OfflineSpeechClient
-
-    def single_job(client_files):
-        with grpcclient.InferenceServerClient(
-            url=FLAGS.url, verbose=FLAGS.verbose
-        ) as triton_client:
-            protocol_client = grpcclient
-            speech_client = speech_client_cls(
-                triton_client, FLAGS.model_name, protocol_client, FLAGS
-            )
-            idx, audio_files = client_files
-            predictions = []
-            for li in audio_files:
-                result = speech_client.recognize(li, idx)
-                print("Recognized {}:{}".format(li, result[0]))
-                predictions += result
-        return predictions
-
-    # start to do inference
-    # Group requests in batches
-    predictions = []
-    tasks = []
-    splits = np.array_split(filenames, num_workers)
-
-    for idx, per_split in enumerate(splits):
-        cur_files = per_split.tolist()
-        tasks.append((idx, cur_files))
-
-    with Pool(processes=num_workers) as pool:
-        predictions = pool.map(single_job, tasks)
-
-    predictions = [item for sublist in predictions for item in sublist]
-    if transcripts:
-        cer = cal_cer(predictions, transcripts)
-        print("CER is: {}".format(cer))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/decode_manifest_triton.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/decode_manifest_triton.py
deleted file mode 100644
index 3a8d57fed86a44d7f6967401d6e01ee052b0c029..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/decode_manifest_triton.py
+++ /dev/null
@@ -1,541 +0,0 @@
-#!/usr/bin/env python3
-# Copyright      2022  Xiaomi Corp.        (authors: Fangjun Kuang)
-#                2023  Nvidia              (authors: Yuekai Zhang)
-# See LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This script loads a manifest in lhotse format and sends it to the server
-for decoding, in parallel.
-
-Usage:
-# For offline wenet server
-./decode_manifest_triton.py \
-    --server-addr localhost \
-    --compute-cer \
-    --model-name attention_rescoring \
-    --num-tasks 300 \
-    --manifest-filename ./aishell-test-dev-manifests/data/fbank/aishell_cuts_test.jsonl.gz # noqa
-
-# For streaming wenet server
-./decode_manifest_triton.py \
-    --server-addr localhost \
-    --streaming \
-    --compute-cer \
-    --context 7 \
-    --model-name streaming_wenet \
-    --num-tasks 300 \
-    --manifest-filename ./aishell-test-dev-manifests/data/fbank/aishell_cuts_test.jsonl.gz # noqa
-
-# For simulate streaming mode wenet server
-./decode_manifest_triton.py \
-    --server-addr localhost \
-    --simulate-streaming \
-    --compute-cer \
-    --context 7 \
-    --model-name streaming_wenet \
-    --num-tasks 300 \
-    --manifest-filename ./aishell-test-dev-manifests/data/fbank/aishell_cuts_test.jsonl.gz # noqa
-
-# For test container:
-docker run -it --rm --name "wenet_client_test" --net host --gpus all soar97/triton-k2:22.12.1 # noqa
-
-# For aishell manifests:
-apt-get install git-lfs
-git-lfs install
-git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests
-sudo mkdir -p /root/fangjun/open-source/icefall-aishell/egs/aishell/ASR/download/aishell
-tar xf ./aishell-test-dev-manifests/data_aishell.tar.gz -C /root/fangjun/open-source/icefall-aishell/egs/aishell/ASR/download/aishell/ # noqa
-
-"""
-
-import argparse
-import asyncio
-import math
-import time
-import types
-from pathlib import Path
-import json
-import numpy as np
-import tritonclient
-import tritonclient.grpc.aio as grpcclient
-from lhotse import CutSet, load_manifest
-from tritonclient.utils import np_to_triton_dtype
-
-from icefall.utils import store_transcripts, write_error_stats
-
-DEFAULT_MANIFEST_FILENAME = "/mnt/samsung-t7/yuekai/aishell-test-dev-manifests/data/fbank/aishell_cuts_test.jsonl.gz"  # noqa
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument(
-        "--server-addr",
-        type=str,
-        default="localhost",
-        help="Address of the server",
-    )
-
-    parser.add_argument(
-        "--server-port",
-        type=int,
-        default=8001,
-        help="Port of the server",
-    )
-
-    parser.add_argument(
-        "--manifest-filename",
-        type=str,
-        default=DEFAULT_MANIFEST_FILENAME,
-        help="Path to the manifest for decoding",
-    )
-
-    parser.add_argument(
-        "--model-name",
-        type=str,
-        default="transducer",
-        help="triton model_repo module name to request",
-    )
-
-    parser.add_argument(
-        "--num-tasks",
-        type=int,
-        default=50,
-        help="Number of tasks to use for sending",
-    )
-
-    parser.add_argument(
-        "--log-interval",
-        type=int,
-        default=5,
-        help="Controls how frequently we print the log.",
-    )
-
-    parser.add_argument(
-        "--compute-cer",
-        action="store_true",
-        default=False,
-        help="""True to compute CER, e.g., for Chinese.
-        False to compute WER, e.g., for English words.
-        """,
-    )
-
-    parser.add_argument(
-        "--streaming",
-        action="store_true",
-        default=False,
-        help="""True for streaming ASR.
-        """,
-    )
-
-    parser.add_argument(
-        "--simulate-streaming",
-        action="store_true",
-        default=False,
-        help="""True for strictly simulate streaming ASR.
-        Threads will sleep to simulate the real speaking scene.
-        """,
-    )
-
-    parser.add_argument(
-        "--chunk_size",
-        type=int,
-        required=False,
-        default=16,
-        help="chunk size default is 16",
-    )
-
-    parser.add_argument(
-        "--context",
-        type=int,
-        required=False,
-        default=-1,
-        help="subsampling context for wenet",
-    )
-
-    parser.add_argument(
-        "--encoder_right_context",
-        type=int,
-        required=False,
-        default=2,
-        help="encoder right context",
-    )
-
-    parser.add_argument(
-        "--subsampling",
-        type=int,
-        required=False,
-        default=4,
-        help="subsampling rate",
-    )
-
-    parser.add_argument(
-        "--stats_file",
-        type=str,
-        required=False,
-        default="./stats.json",
-        help="output of stats anaylasis",
-    )
-
-    return parser.parse_args()
-
-
-async def send(
-    cuts: CutSet,
-    name: str,
-    triton_client: tritonclient.grpc.aio.InferenceServerClient,
-    protocol_client: types.ModuleType,
-    log_interval: int,
-    compute_cer: bool,
-    model_name: str,
-):
-    total_duration = 0.0
-    results = []
-
-    for i, c in enumerate(cuts):
-        if i % log_interval == 0:
-            print(f"{name}: {i}/{len(cuts)}")
-
-        waveform = c.load_audio().reshape(-1).astype(np.float32)
-        sample_rate = 16000
-
-        # padding to nearset 10 seconds
-        samples = np.zeros(
-            (
-                1,
-                10 * sample_rate * (int(len(waveform) / sample_rate // 10) + 1),
-            ),
-            dtype=np.float32,
-        )
-        samples[0, : len(waveform)] = waveform
-
-        lengths = np.array([[len(waveform)]], dtype=np.int32)
-
-        inputs = [
-            protocol_client.InferInput(
-                "WAV", samples.shape, np_to_triton_dtype(samples.dtype)
-            ),
-            protocol_client.InferInput(
-                "WAV_LENS", lengths.shape, np_to_triton_dtype(lengths.dtype)
-            ),
-        ]
-        inputs[0].set_data_from_numpy(samples)
-        inputs[1].set_data_from_numpy(lengths)
-        outputs = [protocol_client.InferRequestedOutput("TRANSCRIPTS")]
-        sequence_id = 10086 + i
-
-        response = await triton_client.infer(
-            model_name, inputs, request_id=str(sequence_id), outputs=outputs
-        )
-
-        decoding_results = response.as_numpy("TRANSCRIPTS")[0]
-        if type(decoding_results) == np.ndarray:
-            decoding_results = b" ".join(decoding_results).decode("utf-8")
-        else:
-            # For wenet
-            decoding_results = decoding_results.decode("utf-8")
-
-        total_duration += c.duration
-
-        if compute_cer:
-            ref = c.supervisions[0].text.split()
-            hyp = decoding_results.split()
-            ref = list("".join(ref))
-            hyp = list("".join(hyp))
-            results.append((c.id, ref, hyp))
-        else:
-            results.append(
-                (
-                    c.id,
-                    c.supervisions[0].text.split(),
-                    decoding_results.split(),
-                )
-            )  # noqa
-
-    return total_duration, results
-
-
-async def send_streaming(
-    cuts: CutSet,
-    name: str,
-    triton_client: tritonclient.grpc.aio.InferenceServerClient,
-    protocol_client: types.ModuleType,
-    log_interval: int,
-    compute_cer: bool,
-    model_name: str,
-    first_chunk_in_secs: float,
-    other_chunk_in_secs: float,
-    task_index: int,
-    simulate_mode: bool = False,
-):
-    total_duration = 0.0
-    results = []
-    latency_data = []
-
-    for i, c in enumerate(cuts):
-        if i % log_interval == 0:
-            print(f"{name}: {i}/{len(cuts)}")
-
-        waveform = c.load_audio().reshape(-1).astype(np.float32)
-        sample_rate = 16000
-
-        wav_segs = []
-
-        j = 0
-        while j < len(waveform):
-            if j == 0:
-                stride = int(first_chunk_in_secs * sample_rate)
-                wav_segs.append(waveform[j : j + stride])
-            else:
-                stride = int(other_chunk_in_secs * sample_rate)
-                wav_segs.append(waveform[j : j + stride])
-            j += len(wav_segs[-1])
-
-        sequence_id = task_index + 10086
-
-        for idx, seg in enumerate(wav_segs):
-            chunk_len = len(seg)
-
-            if simulate_mode:
-                await asyncio.sleep(chunk_len / sample_rate)
-
-            chunk_start = time.time()
-            if idx == 0:
-                chunk_samples = int(first_chunk_in_secs * sample_rate)
-                expect_input = np.zeros((1, chunk_samples), dtype=np.float32)
-            else:
-                chunk_samples = int(other_chunk_in_secs * sample_rate)
-                expect_input = np.zeros((1, chunk_samples), dtype=np.float32)
-
-            expect_input[0][0:chunk_len] = seg
-            input0_data = expect_input
-            input1_data = np.array([[chunk_len]], dtype=np.int32)
-
-            inputs = [
-                protocol_client.InferInput(
-                    "WAV",
-                    input0_data.shape,
-                    np_to_triton_dtype(input0_data.dtype),
-                ),
-                protocol_client.InferInput(
-                    "WAV_LENS",
-                    input1_data.shape,
-                    np_to_triton_dtype(input1_data.dtype),
-                ),
-            ]
-
-            inputs[0].set_data_from_numpy(input0_data)
-            inputs[1].set_data_from_numpy(input1_data)
-
-            outputs = [protocol_client.InferRequestedOutput("TRANSCRIPTS")]
-            end = False
-            if idx == len(wav_segs) - 1:
-                end = True
-
-            response = await triton_client.infer(
-                model_name,
-                inputs,
-                outputs=outputs,
-                sequence_id=sequence_id,
-                sequence_start=idx == 0,
-                sequence_end=end,
-            )
-            idx += 1
-
-            decoding_results = response.as_numpy("TRANSCRIPTS")
-            if type(decoding_results) == np.ndarray:
-                decoding_results = b" ".join(decoding_results).decode("utf-8")
-            else:
-                # For wenet
-                decoding_results = response.as_numpy("TRANSCRIPTS")[0].decode(
-                    "utf-8"
-                )
-            chunk_end = time.time() - chunk_start
-            latency_data.append((chunk_end, chunk_len / sample_rate))
-
-        total_duration += c.duration
-
-        if compute_cer:
-            ref = c.supervisions[0].text.split()
-            hyp = decoding_results.split()
-            ref = list("".join(ref))
-            hyp = list("".join(hyp))
-            results.append((c.id, ref, hyp))
-        else:
-            results.append(
-                (
-                    c.id,
-                    c.supervisions[0].text.split(),
-                    decoding_results.split(),
-                )
-            )  # noqa
-
-    return total_duration, results, latency_data
-
-
-async def main():
-    args = get_args()
-    filename = args.manifest_filename
-    server_addr = args.server_addr
-    server_port = args.server_port
-    url = f"{server_addr}:{server_port}"
-    num_tasks = args.num_tasks
-    log_interval = args.log_interval
-    compute_cer = args.compute_cer
-
-    cuts = load_manifest(filename)
-    cuts_list = cuts.split(num_tasks)
-    tasks = []
-
-    triton_client = grpcclient.InferenceServerClient(url=url, verbose=False)
-    protocol_client = grpcclient
-
-    if args.streaming or args.simulate_streaming:
-        frame_shift_ms = 10
-        frame_length_ms = 25
-        add_frames = math.ceil(
-            (frame_length_ms - frame_shift_ms) / frame_shift_ms
-        )
-        # decode_window_length: input sequence length of streaming encoder
-        if args.context > 0:
-            # decode window length calculation for wenet
-            decode_window_length = (
-                args.chunk_size - 1
-            ) * args.subsampling + args.context
-        else:
-            # decode window length calculation for icefall
-            decode_window_length = (
-                args.chunk_size + 2 + args.encoder_right_context
-            ) * args.subsampling + 3
-
-        first_chunk_ms = (decode_window_length + add_frames) * frame_shift_ms
-
-    start_time = time.time()
-    for i in range(num_tasks):
-        if args.streaming:
-            assert not args.simulate_streaming
-            task = asyncio.create_task(
-                send_streaming(
-                    cuts=cuts_list[i],
-                    name=f"task-{i}",
-                    triton_client=triton_client,
-                    protocol_client=protocol_client,
-                    log_interval=log_interval,
-                    compute_cer=compute_cer,
-                    model_name=args.model_name,
-                    first_chunk_in_secs=first_chunk_ms / 1000,
-                    other_chunk_in_secs=args.chunk_size
-                    * args.subsampling
-                    * frame_shift_ms
-                    / 1000,
-                    task_index=i,
-                )
-            )
-        elif args.simulate_streaming:
-            task = asyncio.create_task(
-                send_streaming(
-                    cuts=cuts_list[i],
-                    name=f"task-{i}",
-                    triton_client=triton_client,
-                    protocol_client=protocol_client,
-                    log_interval=log_interval,
-                    compute_cer=compute_cer,
-                    model_name=args.model_name,
-                    first_chunk_in_secs=first_chunk_ms / 1000,
-                    other_chunk_in_secs=args.chunk_size
-                    * args.subsampling
-                    * frame_shift_ms
-                    / 1000,
-                    task_index=i,
-                    simulate_mode=True,
-                )
-            )
-        else:
-            task = asyncio.create_task(
-                send(
-                    cuts=cuts_list[i],
-                    name=f"task-{i}",
-                    triton_client=triton_client,
-                    protocol_client=protocol_client,
-                    log_interval=log_interval,
-                    compute_cer=compute_cer,
-                    model_name=args.model_name,
-                )
-            )
-        tasks.append(task)
-
-    ans_list = await asyncio.gather(*tasks)
-
-    end_time = time.time()
-    elapsed = end_time - start_time
-
-    results = []
-    total_duration = 0.0
-    latency_data = []
-    for ans in ans_list:
-        total_duration += ans[0]
-        results += ans[1]
-        if args.streaming or args.simulate_streaming:
-            latency_data += ans[2]
-
-    rtf = elapsed / total_duration
-
-    s = f"RTF: {rtf:.4f}\n"
-    s += f"total_duration: {total_duration:.3f} seconds\n"
-    s += f"({total_duration/3600:.2f} hours)\n"
-    s += (
-        f"processing time: {elapsed:.3f} seconds "
-        f"({elapsed/3600:.2f} hours)\n"
-    )
-
-    if args.streaming or args.simulate_streaming:
-        latency_list = [
-            chunk_end for (chunk_end, chunk_duration) in latency_data
-        ]
-        latency_ms = sum(latency_list) / float(len(latency_list)) * 1000.0
-        latency_variance = np.var(latency_list, dtype=np.float64) * 1000.0
-        s += f"latency_variance: {latency_variance:.2f}\n"
-        s += f"latency_50_percentile: {np.percentile(latency_list, 50) * 1000.0:.2f}\n"
-        s += f"latency_90_percentile: {np.percentile(latency_list, 90) * 1000.0:.2f}\n"
-        s += f"latency_99_percentile: {np.percentile(latency_list, 99) * 1000.0:.2f}\n"
-        s += f"average_latency_ms: {latency_ms:.2f}\n"
-
-    print(s)
-
-    with open("rtf.txt", "w") as f:
-        f.write(s)
-
-    name = Path(filename).stem.split(".")[0]
-    results = sorted(results)
-    store_transcripts(filename=f"recogs-{name}.txt", texts=results)
-
-    with open(f"errs-{name}.txt", "w") as f:
-        write_error_stats(f, "test-set", results, enable_log=True)
-
-    with open(f"errs-{name}.txt", "r") as f:
-        print(f.readline())  # WER
-        print(f.readline())  # Detailed errors
-
-    if args.stats_file:
-        stats = await triton_client.get_inference_statistics(
-            model_name="", as_json=True
-        )
-        with open(args.stats_file, "w") as f:
-            json.dump(stats, f)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/decode_manifest_triton.sh b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/decode_manifest_triton.sh
deleted file mode 100644
index fc633b991c2d98931b15dc2605021c2ca0108137..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/decode_manifest_triton.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-serveraddr=localhost
-manifest_path=/myworkspace/aishell-test-dev-manifests/data/fbank/aishell_cuts_test.jsonl.gz
-exp_dir=model_repo_stateful_trt_exp1
-mkdir -p $exp_dir
-
-for num_task in 20 40 60 80
-do
-python3 decode_manifest_triton.py \
-    --server-addr $serveraddr \
-    --streaming \
-    --compute-cer \
-    --context 7 \
-    --model-name streaming_wenet \
-    --num-tasks $num_task \
-    --manifest-filename $manifest_path
-mv rtf.txt $exp_dir/rtf-${num_task}-streaming.txt
-mv errs-aishell_cuts_test.txt $exp_dir/errs-aishell_cuts_test-${num_task}.txt
-done
-
-for num_task in 500
-do
-# For simulate streaming mode wenet server
-python3 decode_manifest_triton.py \
-    --server-addr $serveraddr \
-    --simulate-streaming \
-    --compute-cer \
-    --context 7 \
-    --model-name streaming_wenet \
-    --num-tasks $num_task \
-    --manifest-filename $manifest_path
-mv rtf.txt $exp_dir/rtf-${num_task}-simulate-streaming.txt
-mv errs-aishell_cuts_test.txt $exp_dir/errs-aishell_cuts_test-${num_task}.txt
-done
-
-python3 stats_summary.py
-mv stats.json $exp_dir/
-mv stats_summary.txt $exp_dir/
-
-perf_analyzer -m streaming_wenet -b 1 -a -p 10000 --concurrency-range 50:201:50 -i gRPC --input-data=./online_input.json  -u $serveraddr:8001 -f $exp_dir/log.txt --streaming
-perf_analyzer -m encoder -b 1 -a -p 5000 --concurrency-range 100:500:100 -i gRPC -u $serveraddr:8001 --streaming -f $exp_dir/log_encoder.txt
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/generate_perf_input.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/generate_perf_input.py
deleted file mode 100644
index 682936e58d3e6002df064e5ad3d70681036e8af8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/generate_perf_input.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import os
-import json
-import soundfile as sf
-import numpy as np
-import argparse
-import math
-
-
-def generate_offline_input(args):
-    wav_file = args.audio_file
-    print("Reading {}".format(wav_file))
-    waveform, sample_rate = sf.read(wav_file)
-    batch_size = 1
-    mat = np.array([waveform] * batch_size, dtype=np.float32)
-
-    out_dict = {
-        "data": [
-            {
-                "WAV_LENS": [len(waveform)],
-                "WAV": {
-                    "shape": [len(waveform)],
-                    "content": mat.flatten().tolist(),
-                },
-            }
-        ]
-    }
-    json.dump(out_dict, open("offline_input.json", "w"))
-
-
-def generate_online_input(args):
-    wav_file = args.audio_file
-    waveform, sample_rate = sf.read(wav_file)
-    chunk_size, subsampling = args.chunk_size, args.subsampling
-    context = args.context
-    first_chunk_length = (chunk_size - 1) * subsampling + context
-    frame_length_ms, frame_shift_ms = args.frame_length_ms, args.frame_shift_ms
-    # for the first chunk,
-    # we need additional frame to generate the exact first chunk length frames
-    add_frames = math.ceil((frame_length_ms - frame_shift_ms) / frame_shift_ms)
-    first_chunk_ms = (first_chunk_length + add_frames) * frame_shift_ms
-    other_chunk_ms = chunk_size * subsampling * frame_shift_ms
-    first_chunk_s = first_chunk_ms / 1000
-    other_chunk_s = other_chunk_ms / 1000
-
-    wav_segs = []
-    i = 0
-    while i < len(waveform):
-        if i == 0:
-            stride = int(first_chunk_s * sample_rate)
-            wav_segs.append(waveform[i : i + stride])
-        else:
-            stride = int(other_chunk_s * sample_rate)
-            wav_segs.append(waveform[i : i + stride])
-        i += len(wav_segs[-1])
-
-    data = {"data": [[]]}
-
-    for idx, seg in enumerate(wav_segs):  # 0, num_frames + 5, 64
-        chunk_len = len(seg)
-        if idx == 0:
-            length = int(first_chunk_s * sample_rate)
-            expect_input = np.zeros((1, length), dtype=np.float32)
-        else:
-            length = int(other_chunk_s * sample_rate)
-            expect_input = np.zeros((1, length), dtype=np.float32)
-
-        expect_input[0][0:chunk_len] = seg
-
-        flat_chunk = expect_input.flatten().astype(np.float32).tolist()
-        seq = {
-            "WAV": {"content": flat_chunk, "shape": expect_input[0].shape},
-            "WAV_LENS": [chunk_len],
-        }
-        data["data"][0].append(seq)
-
-    json.dump(data, open("online_input.json", "w"))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--audio_file", type=str, default=None, help="single wav file"
-    )
-    # below is only for streaming input
-    parser.add_argument("--streaming", action="store_true", required=False)
-    parser.add_argument(
-        "--sample_rate",
-        type=int,
-        required=False,
-        default=16000,
-        help="sample rate used in training",
-    )
-    parser.add_argument(
-        "--frame_length_ms",
-        type=int,
-        required=False,
-        default=25,
-        help="frame length used in training",
-    )
-    parser.add_argument(
-        "--frame_shift_ms",
-        type=int,
-        required=False,
-        default=10,
-        help="frame shift length used in training",
-    )
-    parser.add_argument(
-        "--chunk_size",
-        type=int,
-        required=False,
-        default=16,
-        help="chunk size default is 16",
-    )
-    parser.add_argument(
-        "--context",
-        type=int,
-        required=False,
-        default=7,
-        help="conformer context default is 7",
-    )
-    parser.add_argument(
-        "--subsampling",
-        type=int,
-        required=False,
-        default=4,
-        help="subsampling rate default is 4",
-    )
-
-    args = parser.parse_args()
-
-    if args.streaming and os.path.exists(args.audio_file):
-        generate_online_input(args)
-    else:
-        generate_offline_input(args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/speech_client.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/speech_client.py
deleted file mode 100644
index 1bafbe246aacbf73c9b69c71e0d375104ad7adee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/speech_client.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from tritonclient.utils import np_to_triton_dtype
-import numpy as np
-import math
-import soundfile as sf
-
-
-class OfflineSpeechClient(object):
-    def __init__(self, triton_client, model_name, protocol_client, args):
-        self.triton_client = triton_client
-        self.protocol_client = protocol_client
-        self.model_name = model_name
-
-    def recognize(self, wav_file, idx=0):
-        waveform, sample_rate = sf.read(wav_file)
-        samples = np.array([waveform], dtype=np.float32)
-        lengths = np.array([[len(waveform)]], dtype=np.int32)
-        # better pad waveform to nearest length here
-        # target_seconds = math.cel(len(waveform) / sample_rate)
-        # target_samples = np.zeros([1, target_seconds  * sample_rate])
-        # target_samples[0][0: len(waveform)] = waveform
-        # samples = target_samples
-        sequence_id = 10086 + idx
-        result = ""
-        inputs = [
-            self.protocol_client.InferInput(
-                "WAV", samples.shape, np_to_triton_dtype(samples.dtype)
-            ),
-            self.protocol_client.InferInput(
-                "WAV_LENS", lengths.shape, np_to_triton_dtype(lengths.dtype)
-            ),
-        ]
-        inputs[0].set_data_from_numpy(samples)
-        inputs[1].set_data_from_numpy(lengths)
-        outputs = [self.protocol_client.InferRequestedOutput("TRANSCRIPTS")]
-        response = self.triton_client.infer(
-            self.model_name,
-            inputs,
-            request_id=str(sequence_id),
-            outputs=outputs,
-        )
-        result = response.as_numpy("TRANSCRIPTS")[0].decode("utf-8")
-        return [result]
-
-
-class StreamingSpeechClient(object):
-    def __init__(self, triton_client, model_name, protocol_client, args):
-        self.triton_client = triton_client
-        self.protocol_client = protocol_client
-        self.model_name = model_name
-        chunk_size = args.chunk_size
-        subsampling = args.subsampling
-        context = args.context
-        frame_shift_ms = args.frame_shift_ms
-        frame_length_ms = args.frame_length_ms
-        # for the first chunk
-        # we need additional frames to generate
-        # the exact first chunk length frames
-        # since the subsampling will look ahead several frames
-        first_chunk_length = (chunk_size - 1) * subsampling + context
-        add_frames = math.ceil(
-            (frame_length_ms - frame_shift_ms) / frame_shift_ms
-        )
-        first_chunk_ms = (first_chunk_length + add_frames) * frame_shift_ms
-        other_chunk_ms = chunk_size * subsampling * frame_shift_ms
-        self.first_chunk_in_secs = first_chunk_ms / 1000
-        self.other_chunk_in_secs = other_chunk_ms / 1000
-
-    def recognize(self, wav_file, idx=0):
-        waveform, sample_rate = sf.read(wav_file)
-        wav_segs = []
-        i = 0
-        while i < len(waveform):
-            if i == 0:
-                stride = int(self.first_chunk_in_secs * sample_rate)
-                wav_segs.append(waveform[i : i + stride])
-            else:
-                stride = int(self.other_chunk_in_secs * sample_rate)
-                wav_segs.append(waveform[i : i + stride])
-            i += len(wav_segs[-1])
-
-        sequence_id = idx + 10086
-        # simulate streaming
-        for idx, seg in enumerate(wav_segs):
-            chunk_len = len(seg)
-            if idx == 0:
-                chunk_samples = int(self.first_chunk_in_secs * sample_rate)
-                expect_input = np.zeros((1, chunk_samples), dtype=np.float32)
-            else:
-                chunk_samples = int(self.other_chunk_in_secs * sample_rate)
-                expect_input = np.zeros((1, chunk_samples), dtype=np.float32)
-
-            expect_input[0][0:chunk_len] = seg
-            input0_data = expect_input
-            input1_data = np.array([[chunk_len]], dtype=np.int32)
-
-            inputs = [
-                self.protocol_client.InferInput(
-                    "WAV",
-                    input0_data.shape,
-                    np_to_triton_dtype(input0_data.dtype),
-                ),
-                self.protocol_client.InferInput(
-                    "WAV_LENS",
-                    input1_data.shape,
-                    np_to_triton_dtype(input1_data.dtype),
-                ),
-            ]
-
-            inputs[0].set_data_from_numpy(input0_data)
-            inputs[1].set_data_from_numpy(input1_data)
-
-            outputs = [self.protocol_client.InferRequestedOutput("TRANSCRIPTS")]
-            end = False
-            if idx == len(wav_segs) - 1:
-                end = True
-
-            response = self.triton_client.infer(
-                self.model_name,
-                inputs,
-                outputs=outputs,
-                sequence_id=sequence_id,
-                sequence_start=idx == 0,
-                sequence_end=end,
-            )
-            idx += 1
-            result = response.as_numpy("TRANSCRIPTS")[0].decode("utf-8")
-            print("Get response from {}th chunk: {}".format(idx, result))
-        return [result]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/stats_summary.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/stats_summary.py
deleted file mode 100644
index c3ac547e3ca6210fbc8ef53471aae395a15ab3fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/stats_summary.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python3
-# Copyright      2023  Nvidia              (authors: Yuekai Zhang)
-#
-# See LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Convert triton staistic json file for better view.
-
-python3 stats_summary.py
-
-"""
-import json
-import argparse
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument(
-        "--stats_file",
-        type=str,
-        required=False,
-        default="./stats.json",
-        help="output of stats anaylasis",
-    )
-
-    parser.add_argument(
-        "--summary_file",
-        type=str,
-        required=False,
-        default="./stats_summary.txt",
-        help="output of stats summary",
-    )
-
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = get_args()
-
-    with open(args.stats_file) as stats_f, open(
-        args.summary_file, "w"
-    ) as summary_f:
-        stats = json.load(stats_f)
-        model_stats = stats["model_stats"]
-        for model_state in model_stats:
-            summary_f.write(f"model name is {model_state['name']} \n")
-            model_inference_stats = model_state["inference_stats"]
-            total_queue_time_s = (
-                int(model_inference_stats["queue"]["ns"]) / 10e9
-            )
-            total_infer_time_s = (
-                int(model_inference_stats["compute_infer"]["ns"]) / 10e9
-            )
-            total_input_time_s = (
-                int(model_inference_stats["compute_input"]["ns"]) / 10e9
-            )
-            total_output_time_s = (
-                int(model_inference_stats["compute_output"]["ns"]) / 10e9
-            )
-            summary_f.write(
-                f"queue {total_queue_time_s:<5.2f} s, infer {total_infer_time_s:<5.2f} s, input {total_input_time_s:<5.2f} s, output {total_output_time_s:<5.2f} s \n" # noqa
-            )
-            model_batch_stats = model_state["batch_stats"]
-            for batch in model_batch_stats:
-                batch_size = int(batch["batch_size"])
-                compute_input = batch["compute_input"]
-                compute_output = batch["compute_output"]
-                compute_infer = batch["compute_infer"]
-                batch_count = int(compute_infer["count"])
-                assert (
-                    compute_infer["count"]
-                    == compute_output["count"]
-                    == compute_input["count"]
-                )
-                compute_infer_time_ms = int(compute_infer["ns"]) / 10e6
-                compute_input_time_ms = int(compute_input["ns"]) / 10e6
-                compute_output_time_ms = int(compute_output["ns"]) / 10e6
-                summary_f.write(
-                    f"Batch_size {batch_size:<2}, {batch_count:<5} times, infer {compute_infer_time_ms:<9.2f} ms, avg {compute_infer_time_ms/batch_count:.2f} ms, {compute_infer_time_ms/batch_count/batch_size:.2f} ms " # noqa
-                )
-                summary_f.write(
-                    f"input {compute_input_time_ms:<9.2f} ms, avg {compute_input_time_ms/batch_count:.2f} ms, " # noqa
-                )
-                summary_f.write(
-                    f"output {compute_output_time_ms:<9.2f} ms, avg {compute_output_time_ms/batch_count:.2f} ms \n" # noqa
-                )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/test_wavs/long.wav b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/test_wavs/long.wav
deleted file mode 100644
index d2430cacb54b8b27f4bb1dbc3e2e40f89232002b..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/test_wavs/long.wav and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/test_wavs/mid.wav b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/test_wavs/mid.wav
deleted file mode 100644
index 45a4259db8c452b4db088ad1b0b03dd9a1092179..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/test_wavs/mid.wav and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/utils.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/utils.py
deleted file mode 100644
index ec102f71d033ac304a3052666e05a70f7ebffd63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/client/utils.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import numpy as np
-
-
-def _levenshtein_distance(ref, hyp):
-    """Levenshtein distance is a string metric for measuring the difference
-    between two sequences. Informally, the levenshtein disctance is defined as
-    the minimum number of single-character edits (substitutions, insertions or
-    deletions) required to change one word into the other. We can naturally
-    extend the edits to word level when calculate levenshtein disctance for
-    two sentences.
-    """
-    m = len(ref)
-    n = len(hyp)
-
-    # special case
-    if ref == hyp:
-        return 0
-    if m == 0:
-        return n
-    if n == 0:
-        return m
-
-    if m < n:
-        ref, hyp = hyp, ref
-        m, n = n, m
-
-    # use O(min(m, n)) space
-    distance = np.zeros((2, n + 1), dtype=np.int32)
-
-    # initialize distance matrix
-    for j in range(n + 1):
-        distance[0][j] = j
-
-    # calculate levenshtein distance
-    for i in range(1, m + 1):
-        prev_row_idx = (i - 1) % 2
-        cur_row_idx = i % 2
-        distance[cur_row_idx][0] = i
-        for j in range(1, n + 1):
-            if ref[i - 1] == hyp[j - 1]:
-                distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
-            else:
-                s_num = distance[prev_row_idx][j - 1] + 1
-                i_num = distance[cur_row_idx][j - 1] + 1
-                d_num = distance[prev_row_idx][j] + 1
-                distance[cur_row_idx][j] = min(s_num, i_num, d_num)
-
-    return distance[m % 2][n]
-
-
-def cal_cer(references, predictions):
-    errors = 0
-    lengths = 0
-    for ref, pred in zip(references, predictions):
-        cur_ref = list(ref)
-        cur_hyp = list(pred)
-        cur_error = _levenshtein_distance(cur_ref, cur_hyp)
-        errors += cur_error
-        lengths += len(cur_ref)
-    return float(errors) / lengths
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/attention_rescoring/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/attention_rescoring/config_template.pbtxt
deleted file mode 100644
index cc26b6e78ce2eeda2e1968fff4e32fa7ef9a6081..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/attention_rescoring/config_template.pbtxt
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "attention_rescoring"
-platform: "ensemble"
-max_batch_size: 64 #MAX_BATCH
-
-input [
-  {
-    name: "WAV"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "WAV_LENS"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-output [
-  {
-    name: "TRANSCRIPTS"
-    data_type: TYPE_STRING
-    dims: [1]
-  }
-]
-
-ensemble_scheduling {
- step [
-   {
-    model_name: "feature_extractor"
-    model_version: -1
-    input_map {
-      key: "wav"
-      value: "WAV"
-    }
-    input_map {
-      key: "wav_lens"
-      value: "WAV_LENS"
-    }
-    output_map {
-      key: "speech"
-      value: "SPEECH"
-    }
-    output_map {
-      key: "speech_lengths"
-      value: "SPEECH_LENGTHS"
-    }
-   },
-   {
-    model_name: "encoder"
-    model_version: -1
-    input_map {
-      key: "speech"
-      value: "SPEECH"
-    }
-    input_map {
-      key: "speech_lengths"
-      value: "SPEECH_LENGTHS"
-    }
-    output_map {
-      key: "encoder_out"
-      value: "encoder_out"
-    }
-    output_map {
-      key: "encoder_out_lens"
-      value: "encoder_out_lens"
-    }
-    output_map {
-        key: "beam_log_probs"
-        value: "beam_log_probs"
-    }
-    output_map {
-        key: "beam_log_probs_idx"
-        value: "beam_log_probs_idx"
-    }
-  },
-  {
-      model_name: "scoring"
-      model_version: -1
-      input_map {
-          key: "encoder_out"
-          value: "encoder_out"
-      }
-      input_map {
-          key: "encoder_out_lens"
-          value: "encoder_out_lens"
-      }
-      input_map {
-          key: "batch_log_probs"
-          value: "beam_log_probs"
-      }
-      input_map {
-          key: "batch_log_probs_idx"
-          value: "beam_log_probs_idx"
-      }
-      output_map {
-          key: "OUTPUT0"
-          value: "TRANSCRIPTS"
-      }
-  }
- ]
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/decoder/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/decoder/config_template.pbtxt
deleted file mode 100644
index 1ae17a6064acd35f0d679df367a4b15a1f0a5701..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/decoder/config_template.pbtxt
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "decoder"
-backend: "onnxruntime"
-default_model_filename: "decoder.onnx"
-
-max_batch_size: 640
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #output_size] # [-1, feature_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
- {
-    name: "hyps_lens_sos"
-    data_type: TYPE_INT32
-    dims: [#beam_size]
-  },
-  {
-    name: "r_hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
-  {
-    name: "ctc_score"
-    data_type: TYPE_#DTYPE
-    dims: [#beam_size]
-  }
-]
-
-output [
-  {
-    name: "best_index"
-    data_type: TYPE_INT64
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/decoder/config_template2.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/decoder/config_template2.pbtxt
deleted file mode 100644
index 51a41c1c9ac0099edb0c38809c706e9a4054360a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/decoder/config_template2.pbtxt
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "decoder"
-backend: "onnxruntime"
-default_model_filename: "decoder.onnx"
-
-max_batch_size: 640
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #output_size] # [-1, feature_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
- {
-    name: "hyps_lens_sos"
-    data_type: TYPE_INT32
-    dims: [#beam_size]
-  },
-  {
-    name: "ctc_score"
-    data_type: TYPE_#DTYPE
-    dims: [#beam_size]
-  }
-]
-
-output [
-   {
-    name: "best_index"
-    data_type: TYPE_INT64
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/encoder/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/encoder/config_template.pbtxt
deleted file mode 100644
index 3c4469bd7e3d105e3fc5e8860644f06a808ee673..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/encoder/config_template.pbtxt
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "encoder"
-backend: "onnxruntime"
-default_model_filename: "encoder.onnx"
-
-max_batch_size: 64
-input [
-  {
-    name: "speech"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #num_mel_bins] # 80
-  },
-  {
-    name: "speech_lengths"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-output [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #encoder_output_size] # [-1, feature_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "ctc_log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #vocab_size]
-  },
-  {
-    name: "beam_log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #beam_size]  # [-1, beam_size]
-  },
-  {
-    name: "beam_log_probs_idx"
-    data_type: TYPE_INT64
-    dims: [-1, #beam_size] # [-1, beam_size]
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-
-
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/feature_extractor/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/feature_extractor/1/model.py
deleted file mode 100644
index 4a2c258cc0758d7d834bbe4e04ac2374fa3e2553..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/feature_extractor/1/model.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import triton_python_backend_utils as pb_utils
-from torch.utils.dlpack import to_dlpack
-import torch
-import numpy as np
-import kaldifeat
-import _kaldifeat
-from typing import List
-import json
-
-class Fbank(torch.nn.Module):
-    def __init__(self, opts):
-        super(Fbank, self).__init__()
-        self.fbank = kaldifeat.Fbank(opts)
-
-    def forward(self, waves: List[torch.Tensor]):
-        return self.fbank(waves)
-
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        self.model_config = model_config = json.loads(args['model_config'])
-        self.max_batch_size = max(model_config["max_batch_size"], 1)
-
-        if "GPU" in model_config["instance_group"][0]["kind"]:
-            self.device = "cuda"
-        else:
-            self.device = "cpu"
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "speech")
-        # Convert Triton types to numpy types
-        output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-        if output0_dtype == np.float32:
-            self.output0_dtype = torch.float32
-        else:
-            self.output0_dtype = torch.float16
-
-        # Get OUTPUT1 configuration
-        output1_config = pb_utils.get_output_config_by_name(
-            model_config, "speech_lengths")
-        # Convert Triton types to numpy types
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config['data_type'])
-
-        params = self.model_config['parameters']
-        opts = kaldifeat.FbankOptions()
-        opts.frame_opts.dither = 0
-
-        for li in params.items():
-            key, value = li
-            value = value["string_value"]
-            if key == "num_mel_bins":
-                opts.mel_opts.num_bins = int(value)
-            elif key == "frame_shift_in_ms":
-                opts.frame_opts.frame_shift_ms = float(value)
-            elif key == "frame_length_in_ms":
-                opts.frame_opts.frame_length_ms = float(value)
-            elif key == "sample_rate":
-                opts.frame_opts.samp_freq = int(value)
-        opts.device = torch.device(self.device)
-        self.opts = opts
-        self.feature_extractor = Fbank(self.opts)
-        self.feature_size = opts.mel_opts.num_bins
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model.
-
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-        batch_count = []
-        total_waves = []
-        batch_len = []
-        responses = []
-        for request in requests:
-            input0 = pb_utils.get_input_tensor_by_name(request, "wav")
-            input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
-
-            cur_b_wav = input0.as_numpy()
-            cur_b_wav = cur_b_wav * (1 << 15)  # b x -1
-            cur_b_wav_lens = input1.as_numpy()  # b x 1
-            cur_batch = cur_b_wav.shape[0]
-            cur_len = cur_b_wav.shape[1]
-            batch_count.append(cur_batch)
-            batch_len.append(cur_len)
-            for wav, wav_len in zip(cur_b_wav, cur_b_wav_lens):
-                wav_len = wav_len[0]
-                wav = torch.tensor(wav[0:wav_len], dtype=torch.float32,
-                                   device=self.device)
-                total_waves.append(wav)
-
-        features = self.feature_extractor(total_waves)
-        idx = 0
-        for b, l in zip(batch_count, batch_len):
-            expect_feat_len = _kaldifeat.num_frames(l, self.opts.frame_opts)
-            speech = torch.zeros((b, expect_feat_len, self.feature_size),
-                                 dtype=self.output0_dtype, device=self.device)
-            speech_lengths = torch.zeros((b, 1), dtype=torch.int32, device=self.device)
-            for i in range(b):
-                f = features[idx]
-                f_l = f.shape[0]
-                speech[i, 0: f_l, :] = f.to(self.output0_dtype)
-                speech_lengths[i][0] = f_l
-                idx += 1
-            # put speech feature on device will cause empty output
-            # we will follow this issue and now temporarily put it on cpu
-            speech = speech.cpu()
-            speech_lengths = speech_lengths.cpu()
-            out0 = pb_utils.Tensor.from_dlpack("speech", to_dlpack(speech))
-            out1 = pb_utils.Tensor.from_dlpack("speech_lengths",
-                                               to_dlpack(speech_lengths))
-            inference_response = pb_utils.InferenceResponse(output_tensors=[out0, out1])
-            responses.append(inference_response)
-        return responses
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/feature_extractor/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/feature_extractor/config_template.pbtxt
deleted file mode 100644
index d2f611b71025d700bec27f70d77016fa581e168f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/feature_extractor/config_template.pbtxt
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "feature_extractor"
-backend: "python"
-max_batch_size: 64
-
-parameters [
-  {
-    key: "num_mel_bins",
-    value: { string_value: "#num_mel_bins"}
-  },
-  {
-    key: "frame_shift_in_ms"
-    value: { string_value: "#frame_shift"}
-  },
-  {
-    key: "frame_length_in_ms"
-    value: { string_value: "#frame_length"}
-  },
-  {
-    key: "sample_rate"
-    value: { string_value: "#sample_rate"}
-  }
-
-]
-
-input [
-  {
-    name: "wav"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "wav_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-output [
-  {
-    name: "speech"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #num_mel_bins]  # 80
-  },
-  {
-    name: "speech_lengths"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/scoring/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/scoring/1/model.py
deleted file mode 100644
index 56e919ad9561ab6804cb2189ccc8766ca98b42f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/scoring/1/model.py
+++ /dev/null
@@ -1,288 +0,0 @@
-import triton_python_backend_utils as pb_utils
-import numpy as np
-import multiprocessing
-from torch.utils.dlpack import from_dlpack
-from swig_decoders import ctc_beam_search_decoder_batch, \
-    Scorer, PathTrie, TrieVector, map_batch
-import json
-import os
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        self.model_config = model_config = json.loads(args['model_config'])
-        self.max_batch_size = max(model_config["max_batch_size"], 1)
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "OUTPUT0")
-        # Convert Triton types to numpy types
-        self.out0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-
-        # Get INPUT configuration
-        batch_log_probs = pb_utils.get_input_config_by_name(
-            model_config, "batch_log_probs")
-        self.beam_size = batch_log_probs['dims'][-1]
-
-        encoder_config = pb_utils.get_input_config_by_name(
-            model_config, "encoder_out")
-        self.data_type = pb_utils.triton_string_to_numpy(
-            encoder_config['data_type'])
-
-        self.feature_size = encoder_config['dims'][-1]
-
-        self.lm = None
-        self.init_ctc_rescore(self.model_config['parameters'])
-        print('Initialized Rescoring!')
-
-    def init_ctc_rescore(self, parameters):
-        num_processes = multiprocessing.cpu_count()
-        cutoff_prob = 0.9999
-        blank_id = 0
-        alpha = 2.0
-        beta = 1.0
-        bidecoder = 0
-        lm_path, vocab_path = None, None
-        for li in parameters.items():
-            key, value = li
-            value = value["string_value"]
-            if key == "num_processes":
-                num_processes = int(value)
-            elif key == "blank_id":
-                blank_id = int(value)
-            elif key == "cutoff_prob":
-                cutoff_prob = float(value)
-            elif key == "lm_path":
-                lm_path = value
-            elif key == "alpha":
-                alpha = float(value)
-            elif key == "beta":
-                beta = float(value)
-            elif key == "vocabulary":
-                vocab_path = value
-            elif key == "bidecoder":
-                bidecoder = int(value)
-
-        self.num_processes = num_processes
-        self.cutoff_prob = cutoff_prob
-        self.blank_id = blank_id
-        _, vocab = self.load_vocab(vocab_path)
-        if lm_path and os.path.exists(lm_path):
-            self.lm = Scorer(alpha, beta, lm_path, vocab)
-            print("Successfully load language model!")
-        self.vocabulary = vocab
-        self.bidecoder = bidecoder
-        sos = eos = len(vocab) - 1
-        self.sos = sos
-        self.eos = eos
-
-    def load_vocab(self, vocab_file):
-        """
-        load lang_char.txt
-        """
-        id2vocab = {}
-        with open(vocab_file, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                char, id = line.split()
-                id2vocab[int(id)] = char
-        vocab = [0] * len(id2vocab)
-        for id, char in id2vocab.items():
-            vocab[id] = char
-        return id2vocab, vocab
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model.
-
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-
-        responses = []
-
-        # Every Python backend must iterate through list of requests and create
-        # an instance of pb_utils.InferenceResponse class for each of them. You
-        # should avoid storing any of the input Tensors in the class attributes
-        # as they will be overridden in subsequent inference requests. You can
-        # make a copy of the underlying NumPy array and store it if it is
-        # required.
-
-        batch_encoder_out, batch_encoder_lens = [], []
-        batch_log_probs, batch_log_probs_idx = [], []
-        batch_count = []
-        batch_root = TrieVector()
-        batch_start = []
-        root_dict = {}
-
-        encoder_max_len = 0
-        hyps_max_len = 0
-        total = 0
-        for request in requests:
-            # Perform inference on the request and append it to responses list...
-            in_0 = pb_utils.get_input_tensor_by_name(request, "encoder_out")
-            in_1 = pb_utils.get_input_tensor_by_name(request, "encoder_out_lens")
-            in_2 = pb_utils.get_input_tensor_by_name(request, "batch_log_probs")
-            in_3 = pb_utils.get_input_tensor_by_name(request, "batch_log_probs_idx")
-
-            batch_encoder_out.append(in_0.as_numpy())
-            encoder_max_len = max(encoder_max_len, batch_encoder_out[-1].shape[1])
-
-            cur_b_lens = in_1.as_numpy()
-            batch_encoder_lens.append(cur_b_lens)
-            cur_batch = cur_b_lens.shape[0]
-            batch_count.append(cur_batch)
-
-            cur_b_log_probs = in_2.as_numpy()
-            cur_b_log_probs_idx = in_3.as_numpy()
-            for i in range(cur_batch):
-                cur_len = cur_b_lens[i]
-                cur_probs = cur_b_log_probs[i][0:cur_len, :].tolist()  # T X Beam
-                cur_idx = cur_b_log_probs_idx[i][0:cur_len, :].tolist()  # T x Beam
-                batch_log_probs.append(cur_probs)
-                batch_log_probs_idx.append(cur_idx)
-                root_dict[total] = PathTrie()
-                batch_root.append(root_dict[total])
-                batch_start.append(True)
-                total += 1
-
-        score_hyps = ctc_beam_search_decoder_batch(batch_log_probs,
-                                                   batch_log_probs_idx,
-                                                   batch_root,
-                                                   batch_start,
-                                                   self.beam_size,
-                                                   min(total, self.num_processes),
-                                                   blank_id=self.blank_id,
-                                                   space_id=-2,
-                                                   cutoff_prob=self.cutoff_prob,
-                                                   ext_scorer=self.lm)
-        all_hyps = []
-        all_ctc_score = []
-        max_seq_len = 0
-        for seq_cand in score_hyps:
-            # if candidates less than beam size
-            if len(seq_cand) != self.beam_size:
-                seq_cand = list(seq_cand)
-                seq_cand += (self.beam_size - len(seq_cand)) * [(-float("INF"), (0,))]
-
-            for score, hyps in seq_cand:
-                all_hyps.append(list(hyps))
-                all_ctc_score.append(score)
-                max_seq_len = max(len(hyps), max_seq_len)
-
-        beam_size = self.beam_size
-        feature_size = self.feature_size
-        hyps_max_len = max_seq_len + 2
-        in_ctc_score = np.zeros((total, beam_size), dtype=self.data_type)
-        in_hyps_pad_sos_eos = np.ones(
-            (total, beam_size, hyps_max_len), dtype=np.int64) * self.eos
-        if self.bidecoder:
-            in_r_hyps_pad_sos_eos = np.ones(
-                (total, beam_size, hyps_max_len), dtype=np.int64) * self.eos
-
-        in_hyps_lens_sos = np.ones((total, beam_size), dtype=np.int32)
-
-        in_encoder_out = np.zeros((total, encoder_max_len, feature_size),
-                                  dtype=self.data_type)
-        in_encoder_out_lens = np.zeros(total, dtype=np.int32)
-        st = 0
-        for b in batch_count:
-            t = batch_encoder_out.pop(0)
-            in_encoder_out[st:st + b, 0:t.shape[1]] = t
-            in_encoder_out_lens[st:st + b] = batch_encoder_lens.pop(0)
-            for i in range(b):
-                for j in range(beam_size):
-                    cur_hyp = all_hyps.pop(0)
-                    cur_len = len(cur_hyp) + 2
-                    in_hyp = [self.sos] + cur_hyp + [self.eos]
-                    in_hyps_pad_sos_eos[st + i][j][0:cur_len] = in_hyp
-                    in_hyps_lens_sos[st + i][j] = cur_len - 1
-                    if self.bidecoder:
-                        r_in_hyp = [self.sos] + cur_hyp[::-1] + [self.eos]
-                        in_r_hyps_pad_sos_eos[st + i][j][0:cur_len] = r_in_hyp
-                    in_ctc_score[st + i][j] = all_ctc_score.pop(0)
-            st += b
-        in_encoder_out_lens = np.expand_dims(in_encoder_out_lens, axis=1)
-        in_tensor_0 = pb_utils.Tensor("encoder_out", in_encoder_out)
-        in_tensor_1 = pb_utils.Tensor("encoder_out_lens", in_encoder_out_lens)
-        in_tensor_2 = pb_utils.Tensor("hyps_pad_sos_eos", in_hyps_pad_sos_eos)
-        in_tensor_3 = pb_utils.Tensor("hyps_lens_sos", in_hyps_lens_sos)
-        input_tensors = [in_tensor_0, in_tensor_1, in_tensor_2, in_tensor_3]
-        if self.bidecoder:
-            in_tensor_4 = pb_utils.Tensor("r_hyps_pad_sos_eos", in_r_hyps_pad_sos_eos)
-            input_tensors.append(in_tensor_4)
-        in_tensor_5 = pb_utils.Tensor("ctc_score", in_ctc_score)
-        input_tensors.append(in_tensor_5)
-
-        inference_request = pb_utils.InferenceRequest(
-            model_name='decoder',
-            requested_output_names=['best_index'],
-            inputs=input_tensors)
-
-        inference_response = inference_request.exec()
-        if inference_response.has_error():
-            raise pb_utils.TritonModelException(inference_response.error().message())
-        else:
-            # Extract the output tensors from the inference response.
-            best_index = pb_utils.get_output_tensor_by_name(inference_response,
-                                                            'best_index')
-            if best_index.is_cpu():
-                best_index = best_index.as_numpy()
-            else:
-                best_index = from_dlpack(best_index.to_dlpack())
-                best_index = best_index.cpu().numpy()
-            hyps = []
-            idx = 0
-            for cands, cand_lens in zip(in_hyps_pad_sos_eos, in_hyps_lens_sos):
-                best_idx = best_index[idx][0]
-                best_cand_len = cand_lens[best_idx] - 1  # remove sos
-                best_cand = cands[best_idx][1: 1 + best_cand_len].tolist()
-                hyps.append(best_cand)
-                idx += 1
-
-            hyps = map_batch(hyps, self.vocabulary,
-                             min(multiprocessing.cpu_count(), len(in_ctc_score)))
-            st = 0
-            for b in batch_count:
-                sents = np.array(hyps[st:st + b])
-                out0 = pb_utils.Tensor("OUTPUT0", sents.astype(self.out0_dtype))
-                inference_response = pb_utils.InferenceResponse(output_tensors=[out0])
-                responses.append(inference_response)
-                st += b
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is optional. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print('Cleaning up...')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/scoring/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/scoring/config_template.pbtxt
deleted file mode 100644
index f9aeb3e69b9f2499a21c2a6a6ff07a852a3af17a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo/scoring/config_template.pbtxt
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "scoring"
-backend: "python"
-max_batch_size: 64
-
-parameters [
-  {
-    key: "vocabulary",
-    value: { string_value: "#vocabulary_path"}
-  },
-  {
-    key: "bidecoder",
-    value: { string_value: "#bidecoder"}
-  },
-  {
-    key: "lm_path"
-    value: { string_value: "#lm_path"}
-  }
-]
-
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #output_size] # [-1, feature_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "batch_log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #beam_size] #[-1, beam_size]
-  },
-  {
-    name: "batch_log_probs_idx"
-    data_type: TYPE_INT64
-    dims: [-1, #beam_size]
-  }
-]
-
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_STRING
-    dims: [1]
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-instance_group [
-    {
-      count: 4
-      kind: KIND_CPU
-    }
-  ]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/decoder/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/decoder/config_template.pbtxt
deleted file mode 100644
index 7c24fa1e9e7066f386529530e287d29b4737d518..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/decoder/config_template.pbtxt
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "decoder"
-backend: "onnxruntime"
-default_model_filename: "decoder.onnx"
-
-max_batch_size: 640
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #output_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
- {
-    name: "hyps_lens_sos"
-    data_type: TYPE_INT32
-    dims: [#beam_size]
-  },
-  {
-    name: "r_hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
-  {
-    name: "ctc_score"
-    data_type: TYPE_#DTYPE
-    dims: [#beam_size]
-  }
-]
-
-output [
-  {
-    name: "best_index"
-    data_type: TYPE_INT64
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/decoder/config_template2.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/decoder/config_template2.pbtxt
deleted file mode 100644
index 51a41c1c9ac0099edb0c38809c706e9a4054360a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/decoder/config_template2.pbtxt
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "decoder"
-backend: "onnxruntime"
-default_model_filename: "decoder.onnx"
-
-max_batch_size: 640
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #output_size] # [-1, feature_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
- {
-    name: "hyps_lens_sos"
-    data_type: TYPE_INT32
-    dims: [#beam_size]
-  },
-  {
-    name: "ctc_score"
-    data_type: TYPE_#DTYPE
-    dims: [#beam_size]
-  }
-]
-
-output [
-   {
-    name: "best_index"
-    data_type: TYPE_INT64
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/encoder/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/encoder/config_template.pbtxt
deleted file mode 100644
index b1c912b68456c179b0a3cc5a2ec20ab43b3596f2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/encoder/config_template.pbtxt
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "encoder"
-backend: "onnxruntime"
-default_model_filename: "encoder.onnx"
-
-max_batch_size: 512
-
-sequence_batching{
-    max_sequence_idle_microseconds: 5000000
-    oldest {
-      max_candidate_sequences: 1024
-      max_queue_delay_microseconds: 5000
-    }
-    control_input [
-    ]
-    state [
-    {
-      input_name: "offset"
-      output_name: "r_offset"
-      data_type: TYPE_INT64
-      dims: [ 1 ]
-      initial_state: {
-       data_type: TYPE_INT64
-       dims: [ 1 ]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "att_cache"
-      output_name: "r_att_cache"
-      data_type: TYPE_#DTYPE
-      dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "cnn_cache"
-      output_name: "r_cnn_cache"
-      data_type: TYPE_#DTYPE
-      dims: [#num_layers, #output_size, #cnn_module_cache]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [#num_layers, #output_size, #cnn_module_cache]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "cache_mask"
-      output_name: "r_cache_mask"
-      data_type: TYPE_#DTYPE
-      dims: [1, #cache_size]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [1, #cache_size]
-       zero_data: true
-       name: "initial state"
-      }
-    }
-  ]
-}
-input [
-  {
-    name: "chunk_xs"
-    data_type: TYPE_#DTYPE
-    dims: [#decoding_window, #num_mel_bins]
-  },
-  {
-    name: "chunk_lens"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    reshape: { shape: [] }
-  }
-]
-output [
-  {
-    name: "log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "log_probs_idx"
-    data_type: TYPE_INT64
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "chunk_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #encoder_output_size]
-  },
-  {
-    name: "chunk_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [] }
-  }
-]
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/encoder/config_template2.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/encoder/config_template2.pbtxt
deleted file mode 100644
index 1f0f9bb16a15999a6641d50bc9b1e2b381db856a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/encoder/config_template2.pbtxt
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "encoder"
-backend: "onnxruntime"
-default_model_filename: "encoder.onnx"
-
-max_batch_size: 512
-
-sequence_batching{
-    max_sequence_idle_microseconds: 5000000
-    oldest {
-      max_candidate_sequences: 1024
-      preferred_batch_size: [32, 64, 128, 256]
-      max_queue_delay_microseconds: 5000
-    }
-    control_input [
-    ]
-    state [
-    {
-      input_name: "offset"
-      output_name: "r_offset"
-      data_type: TYPE_INT64
-      dims: [ 1 ]
-      initial_state: {
-       data_type: TYPE_INT64
-       dims: [ 1 ]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "att_cache"
-      output_name: "r_att_cache"
-      data_type: TYPE_#DTYPE
-      dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "cache_mask"
-      output_name: "r_cache_mask"
-      data_type: TYPE_#DTYPE
-      dims: [1, #cache_size]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [1, #cache_size]
-       zero_data: true
-       name: "initial state"
-      }
-    }
-  ]
-}
-input [
-  {
-    name: "chunk_xs"
-    data_type: TYPE_#DTYPE
-    dims: [#decoding_window, #num_mel_bins]
-  },
-  {
-    name: "chunk_lens"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    reshape: { shape: [] }
-  }
-]
-output [
-  {
-    name: "log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "log_probs_idx"
-    data_type: TYPE_INT64
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "chunk_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #encoder_output_size]
-  },
-  {
-    name: "chunk_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [] }
-  }
-]
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/feature_extractor/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/feature_extractor/1/model.py
deleted file mode 100644
index ce1f340f8e450fc5a668bb5d03fe565b2e0fdb6c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/feature_extractor/1/model.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import triton_python_backend_utils as pb_utils
-from torch.utils.dlpack import from_dlpack
-import torch
-import kaldifeat
-import _kaldifeat
-from typing import List
-import json
-import numpy as np
-
-class Fbank(torch.nn.Module):
-    def __init__(self, opts):
-        super(Fbank, self).__init__()
-        self.fbank = kaldifeat.Fbank(opts)
-
-    def forward(self, waves: List[torch.Tensor]):
-        return self.fbank(waves)
-
-class Feat(object):
-    def __init__(self, seqid, offset_ms, sample_rate,
-                 first_chunk_sz, frame_stride, device='cpu'):
-        self.seqid = seqid
-        self.sample_rate = sample_rate
-        self.wav = torch.tensor([], device=device)
-        self.offset = int(offset_ms / 1000 * sample_rate)
-        self.frames = None
-        self.frame_stride = int(frame_stride)
-        self.first_chunk_sz = first_chunk_sz
-        self.device = device
-
-    def add_wavs(self, wav: torch.tensor):
-        if len(self.wav) == 0 and len(wav) < self.first_chunk_sz:
-            raise Exception("Invalid first chunk size", len(wav))
-        wav = wav.to(self.device)
-        self.wav = torch.cat([self.wav, wav], axis=0)
-
-    def get_seg_wav(self):
-        seg = self.wav[:]
-        self.wav = self.wav[-self.offset:]
-        return seg
-
-    def add_frames(self, frames: torch.tensor):
-        """
-        frames: seq_len x feat_sz
-        """
-        if self.frames is None:
-            self.frames = frames
-        else:
-            self.frames = torch.cat([self.frames, frames], axis=0)
-
-    def get_frames(self, num_frames: int):
-        seg = self.frames[0: num_frames]
-        self.frames = self.frames[self.frame_stride:]
-        return seg
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        self.model_config = model_config = json.loads(args['model_config'])
-        self.max_batch_size = max(model_config["max_batch_size"], 1)
-
-        if "GPU" in model_config["instance_group"][0]["kind"]:
-            self.device = "cuda"
-        else:
-            self.device = "cpu"
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "speech")
-        # Convert Triton types to numpy types
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-
-        if self.output0_dtype == np.float32:
-            self.dtype = torch.float32
-        else:
-            self.dtype = torch.float16
-
-        self.feature_size = output0_config['dims'][-1]
-        self.decoding_window = output0_config['dims'][-2]
-        # Get OUTPUT1 configuration
-        output1_config = pb_utils.get_output_config_by_name(
-            model_config, "speech_lengths")
-        # Convert Triton types to numpy types
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config['data_type'])
-
-        feat_opt = self.parse_model_params(model_config["parameters"])
-
-        opts = kaldifeat.FbankOptions()
-        opts.frame_opts.dither = 0
-        opts.mel_opts.num_bins = self.feature_size
-        frame_length_ms = feat_opt["frame_length_ms"]
-        frame_shift_ms = feat_opt["frame_shift_ms"]
-        opts.frame_opts.frame_length_ms = frame_length_ms
-        opts.frame_opts.frame_shift_ms = frame_shift_ms
-        opts.frame_opts.samp_freq = feat_opt["sample_rate"]
-        opts.device = torch.device(self.device)
-        self.opts = opts
-        self.feature_extractor = Fbank(self.opts)
-        self.seq_feat = {}
-        chunk_size_s = feat_opt["chunk_size_s"]
-        sample_rate = feat_opt["sample_rate"]
-        self.chunk_size = int(chunk_size_s * sample_rate)
-        self.frame_stride = (chunk_size_s * 1000) // frame_shift_ms
-
-        first_chunk_size = int(self.chunk_size)
-        cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
-        while cur_frames < self.decoding_window:
-            first_chunk_size += frame_shift_ms * sample_rate // 1000
-            cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
-        #  self.pad_silence = first_chunk_size - self.chunk_size
-        self.first_chunk_size = first_chunk_size
-        self.offset_ms = self.get_offset(frame_length_ms, frame_shift_ms)
-        self.sample_rate = sample_rate
-        self.min_seg = frame_length_ms * sample_rate // 1000
-        print("MIN SEG IS", self.min_seg)
-
-    def get_offset(self, frame_length_ms, frame_shift_ms):
-        offset_ms = 0
-        while offset_ms + frame_shift_ms < frame_length_ms:
-            offset_ms += frame_shift_ms
-        return offset_ms
-
-    def parse_model_params(self, model_params):
-        model_p = {
-            "frame_length_ms": 25,
-            "frame_shift_ms": 10,
-            "sample_rate": 16000,
-            "chunk_size_s": 0.64}
-        # get parameter configurations
-        for li in model_params.items():
-            key, value = li
-            true_value = value["string_value"]
-            if key not in model_p:
-                continue
-            key_type = type(model_p[key])
-            if key_type == type(None):
-                model_p[key] = true_value
-            else:
-                model_p[key] = key_type(true_value)
-        return model_p
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model.
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-        total_waves = []
-        responses = []
-        batch_seqid = []
-        end_seqid = {}
-        for request in requests:
-            input0 = pb_utils.get_input_tensor_by_name(request, "wav")
-            #  wavs = input0.as_numpy()[0]
-            wavs = from_dlpack(input0.to_dlpack())[0]
-
-            input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
-            #  wav_lens = input1.as_numpy()[0][0]
-            wav_lens = from_dlpack(input1.to_dlpack())[0]
-
-            in_start = pb_utils.get_input_tensor_by_name(request, "START")
-            start = in_start.as_numpy()[0][0]
-            in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
-            ready = in_ready.as_numpy()[0][0]
-            in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
-            corrid = in_corrid.as_numpy()[0][0]
-            in_end = pb_utils.get_input_tensor_by_name(request, "END")
-            end = in_end.as_numpy()[0][0]
-
-            if start:
-                self.seq_feat[corrid] = Feat(corrid, self.offset_ms,
-                                             self.sample_rate,
-                                             self.first_chunk_size,
-                                             self.frame_stride,
-                                             self.device)
-            if ready:
-                self.seq_feat[corrid].add_wavs(wavs[0:wav_lens])
-
-            batch_seqid.append(corrid)
-            if end:
-                end_seqid[corrid] = 1
-
-            # if not start
-            # check chunk ms size
-
-            wav = self.seq_feat[corrid].get_seg_wav() * 32768
-            if len(wav) < self.min_seg:
-                temp = torch.zeros(self.min_seg, dtype=torch.float32,
-                                   device=self.device)
-                temp[0:len(wav)] = wav[:]
-                wav = temp
-            total_waves.append(wav)
-
-        features = self.feature_extractor(total_waves)
-
-        batch_size = len(batch_seqid)
-        batch_speech = torch.zeros((batch_size, self.decoding_window,
-                                    self.feature_size), dtype=self.dtype)
-        batch_speech_lens = torch.zeros((batch_size, 1), dtype=torch.int32)
-        i = 0
-        for corrid, frames in zip(batch_seqid, features):
-            self.seq_feat[corrid].add_frames(frames)
-            r_frames = self.seq_feat[corrid].get_frames(self.decoding_window)
-            speech = batch_speech[i: i + 1]
-            speech_lengths = batch_speech_lens[i: i + 1]
-            i += 1
-            speech_lengths[0] = r_frames.size(0)
-            speech[0][0:r_frames.size(0)] = r_frames.to(speech.device)
-            # out_tensor0 = pb_utils.Tensor.from_dlpack("speech", to_dlpack(speech))
-            # out_tensor1 = pb_utils.Tensor.from_dlpack("speech_lengths",
-            #                                            to_dlpack(speech_lengths))
-            out_tensor0 = pb_utils.Tensor("speech", speech.numpy())
-            out_tensor1 = pb_utils.Tensor("speech_lengths", speech_lengths.numpy())
-            output_tensors = [out_tensor0, out_tensor1]
-            response = pb_utils.InferenceResponse(output_tensors=output_tensors)
-            responses.append(response)
-            if corrid in end_seqid:
-                del self.seq_feat[corrid]
-        return responses
-
-    def finalize(self):
-        print("Remove feature extractor!")
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/feature_extractor/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/feature_extractor/config_template.pbtxt
deleted file mode 100644
index f8698ee2f54ca7e724997dd1f03c7de8af377414..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/feature_extractor/config_template.pbtxt
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "feature_extractor"
-backend: "python"
-max_batch_size: 512
-
-parameters [
-  {
-    key: "frame_length_ms",
-    value: { string_value: "#frame_length" }
-  },
-  {
-    key: "frame_shift_ms"
-    value: { string_value: "#frame_shift" }
-  },
-  {
-    key: "sample_rate"
-    value: { string_value: "#sample_rate" }
-  },
-  {
-    key: "chunk_size_s",
-    value: { string_value: "#chunk_size_in_seconds" }
-  }
-]
-sequence_batching{
-    max_sequence_idle_microseconds: 5000000
-    oldest {
-      max_candidate_sequences: 512
-      preferred_batch_size: [ 32, 64, 128, 256]
-    }
-    control_input [
-        {
-            name: "START",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_START
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "READY"
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_READY
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "CORRID",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_CORRID
-                    data_type: TYPE_UINT64
-                }
-            ]
-        },
-        {
-            name: "END",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_END
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        }
-    ]
-}
-input [
-  {
-    name: "wav"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "wav_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-output [
-  {
-    name: "speech"
-    data_type: TYPE_#DTYPE # FP32
-    dims: [#decoding_window, #num_mel_bins]
-  },
-  {
-    name: "speech_lengths"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/streaming_wenet/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/streaming_wenet/config_template.pbtxt
deleted file mode 100644
index 70a2d99062b253f3c6a52a606a0007fb82b8a644..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/streaming_wenet/config_template.pbtxt
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "streaming_wenet"
-platform: "ensemble"
-max_batch_size: 512 #MAX_BATCH
-
-input [
-  {
-    name: "WAV"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "WAV_LENS"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-output [
-  {
-    name: "TRANSCRIPTS"
-    data_type: TYPE_STRING
-    dims: [1]
-  }
-]
-
-ensemble_scheduling {
- step [
-   {
-        model_name: "feature_extractor"
-        model_version: -1
-        input_map {
-        key: "wav"
-        value: "WAV"
-        }
-        input_map {
-        key: "wav_lens"
-        value: "WAV_LENS"
-        }
-        output_map {
-        key: "speech"
-        value: "SPEECH"
-        }
-        output_map {
-        key: "speech_lengths"
-        value: "SPEECH_LENGTHS"
-        }
-   },
-   {
-        model_name: "encoder"
-        model_version: -1
-        input_map {
-        key: "chunk_xs"
-        value: "SPEECH"
-        }
-        input_map {
-        key: "chunk_lens"
-        value: "SPEECH_LENGTHS"
-        }
-        output_map {
-            key: "log_probs"
-            value: "LOG_PROBS"
-        }
-        output_map {
-            key: "log_probs_idx"
-            value: "LOG_PROBS_IDX"
-        }
-        output_map {
-            key: "chunk_out"
-             value: "CHUNK_OUT"
-        }
-        output_map {
-            key: "chunk_out_lens"
-            value: "CHUNK_OUT_LENS"
-        }
-    },
-    {
-        model_name: "wenet"
-        model_version: -1
-        input_map {
-        key: "log_probs"
-        value: "LOG_PROBS"
-        }
-        input_map {
-        key: "log_probs_idx"
-        value: "LOG_PROBS_IDX"
-        }
-        input_map {
-        key: "chunk_out"
-        value: "CHUNK_OUT"
-        }
-        input_map {
-        key: "chunk_out_lens"
-        value: "CHUNK_OUT_LENS"
-        }
-        output_map {
-        key: "OUTPUT0"
-        value: "TRANSCRIPTS"
-        }
-    }
- ]
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/1/model.py
deleted file mode 100644
index 91e008dc923c36d03d96aec456edc3200317745e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/1/model.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import json
-import torch
-from swig_decoders import PathTrie, TrieVector
-
-# triton_python_backend_utils is available in every Triton Python model. You
-# need to use this module to create inference requests and responses. It also
-# contains some utility functions for extracting information from model_config
-# and converting Triton input/output types to numpy types.
-import triton_python_backend_utils as pb_utils
-from wenet_onnx_model import WenetModel
-
-from torch.utils.dlpack import from_dlpack
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to intialize any state associated with this model.
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-
-        # You must parse model_config. JSON string is not parsed here
-        self.model_config = model_config = json.loads(args['model_config'])
-
-        # get device
-        if args["model_instance_kind"] == "GPU":
-            self.device = 'cuda'
-        else:
-            self.device = 'cpu'
-
-        # get parameter configurations
-        self.model = WenetModel(self.model_config, self.device)
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "OUTPUT0")
-
-        # Convert Triton types to numpy types
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-
-        # use to record every sequence state
-        self.seq_states = {}
-        print("Finish Init")
-
-    def execute(self, requests):
-        """
-        requests : list
-          A list of pb_utils.InferenceRequest
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-        responses = []
-        batch_log_probs, batch_log_probs_idx, batch_len, batch_states = [], [], [], []
-        cur_encoder_out = []
-
-        batch_encoder_hist = []
-        batch_start = []
-
-        trieVector = TrieVector()
-
-        rescore_index = {}
-        batch_idx2_corrid = {}
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        batch_idx = 0
-        for request in requests:
-            # Get INPUT0
-            in_0 = pb_utils.get_input_tensor_by_name(request, "log_probs")
-            batch_log_probs.append(in_0.as_numpy()[0])
-            in_1 = pb_utils.get_input_tensor_by_name(request, "log_probs_idx")
-            batch_log_probs_idx.append(in_1.as_numpy()[0])
-            if self.model.rescoring:
-                in_2 = pb_utils.get_input_tensor_by_name(request, "chunk_out")
-                # important to use clone or this tensor
-                # the tensor will be released after one inference
-                in_2 = from_dlpack(in_2.to_dlpack()).clone()
-                cur_encoder_out.append(in_2[0])
-            in_3 = pb_utils.get_input_tensor_by_name(request, "chunk_out_lens")
-            batch_len.append(in_3.as_numpy())
-
-            in_start = pb_utils.get_input_tensor_by_name(request, "START")
-            start = in_start.as_numpy()[0][0]
-
-            if start:
-                batch_start.append(True)
-            else:
-                batch_start.append(False)
-
-            in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
-            ready = in_ready.as_numpy()[0][0]
-
-            in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
-            corrid = in_corrid.as_numpy()[0][0]
-
-            in_end = pb_utils.get_input_tensor_by_name(request, "END")
-            end = in_end.as_numpy()[0][0]
-
-            if start and ready:
-                # intialize states
-                encoder_out = self.model.generate_init_cache()
-                root = PathTrie()
-                # register this sequence
-                self.seq_states[corrid] = [root, encoder_out]
-
-            if end and ready:
-                rescore_index[batch_idx] = 1
-
-            if ready:
-                root, encoder_out = self.seq_states[corrid]
-                trieVector.append(root)
-                batch_idx2_corrid[batch_idx] = corrid
-                batch_encoder_hist.append(encoder_out)
-
-            batch_idx += 1
-
-        batch_states = [trieVector, batch_start, batch_encoder_hist, cur_encoder_out]
-        res_sents, new_states = self.model.infer(batch_log_probs, batch_log_probs_idx,
-                                                 batch_len, rescore_index, batch_states)
-        cur_encoder_out = new_states
-        for i in range(len(res_sents)):
-            sent = np.array(res_sents[i])
-            out_tensor_0 = pb_utils.Tensor("OUTPUT0", sent.astype(self.output0_dtype))
-            response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0])
-            responses.append(response)
-            corr = batch_idx2_corrid[i]
-            if i in rescore_index:
-                # this response ends, remove it
-                del self.seq_states[corr]
-            else:
-                if self.model.rescoring:
-                    if self.seq_states[corr][1] is None:
-                        self.seq_states[corr][1] = cur_encoder_out[i]
-                    else:
-                        new_hist = torch.cat([self.seq_states[corr][1],
-                                              cur_encoder_out[i]], axis=0)
-                        self.seq_states[corr][1] = new_hist
-
-        assert len(requests) == len(responses)
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is OPTIONAL. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print('Cleaning up...')
-        del self.model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/1/wenet_onnx_model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/1/wenet_onnx_model.py
deleted file mode 100644
index d9db48839a554e8c5598a2962b822d3e5db1c5f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/1/wenet_onnx_model.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import multiprocessing
-import numpy as np
-import os
-import torch
-import triton_python_backend_utils as pb_utils
-from torch.utils.dlpack import to_dlpack, from_dlpack
-from swig_decoders import ctc_beam_search_decoder_batch, Scorer, map_batch
-
-class WenetModel(object):
-    def __init__(self, model_config, device):
-        params = self.parse_model_parameters(model_config['parameters'])
-
-        self.device = device
-        print("Using device", device)
-        print("Successfully load model !")
-
-        # load vocabulary
-        ret = self.load_vocab(params["vocab_path"])
-        self.id2vocab, self.vocab, space_id, blank_id, sos_eos = ret
-        self.space_id = space_id if space_id else -1
-        self.blank_id = blank_id if blank_id else 0
-        self.eos = self.sos = sos_eos if sos_eos else len(self.vocab) - 1
-        print("Successfully load vocabulary !")
-        self.params = params
-
-        # beam search setting
-        self.beam_size = params.get("beam_size")
-        self.cutoff_prob = params.get("cutoff_prob")
-
-        # language model
-        lm_path = params.get("lm_path", None)
-        alpha, beta = params.get('alpha'), params.get('beta')
-        self.scorer = None
-        if os.path.exists(lm_path):
-            self.scorer = Scorer(alpha, beta, lm_path, self.vocab)
-
-        self.bidecoder = params.get('bidecoder')
-        # rescore setting
-        self.rescoring = params.get("rescoring", 0)
-        print("Using rescoring:", bool(self.rescoring))
-        print("Successfully load all parameters!")
-
-        log_probs_config = pb_utils.get_input_config_by_name(
-            model_config, "log_probs")
-        # Convert Triton types to numpy types
-        log_probs_dtype = pb_utils.triton_string_to_numpy(
-            log_probs_config['data_type'])
-
-        if log_probs_dtype == np.float32:
-            self.dtype = torch.float32
-        else:
-            self.dtype = torch.float16
-
-    def generate_init_cache(self):
-        encoder_out = None
-        return encoder_out
-
-    def load_vocab(self, vocab_file):
-        """
-        load lang_char.txt
-        """
-        id2vocab = {}
-        space_id, blank_id, sos_eos = None, None, None
-        with open(vocab_file, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                char, id = line.split()
-                id2vocab[int(id)] = char
-                if char == " ":
-                    space_id = int(id)
-                elif char == "<blank>":
-                    blank_id = int(id)
-                elif char == "<sos/eos>":
-                    sos_eos = int(id)
-        vocab = [0] * len(id2vocab)
-        for id, char in id2vocab.items():
-            vocab[id] = char
-        return (id2vocab, vocab, space_id, blank_id, sos_eos)
-
-    def parse_model_parameters(self, model_parameters):
-        model_p = {"beam_size": 10,
-                   "cutoff_prob": 0.999,
-                   "vocab_path": None,
-                   "lm_path": None,
-                   "alpha": 2.0,
-                   "beta": 1.0,
-                   "rescoring": 0,
-                   "bidecoder": 1}
-        # get parameter configurations
-        for li in model_parameters.items():
-            key, value = li
-            true_value = value["string_value"]
-            if key not in model_p:
-                continue
-            key_type = type(model_p[key])
-            if key_type == type(None):
-                model_p[key] = true_value
-            else:
-                model_p[key] = key_type(true_value)
-        assert model_p["vocab_path"] is not None
-        return model_p
-
-    def infer(self, batch_log_probs, batch_log_probs_idx,
-              seq_lens, rescore_index, batch_states):
-        """
-        batch_states = [trieVector, batch_start,
-                       batch_encoder_hist, cur_encoder_out]
-        """
-        trie_vector, batch_start, batch_encoder_hist, cur_encoder_out = batch_states
-        num_processes = min(multiprocessing.cpu_count(), len(batch_log_probs))
-
-        score_hyps = self.batch_ctc_prefix_beam_search_cpu(batch_log_probs,
-                                                           batch_log_probs_idx,
-                                                           seq_lens,
-                                                           trie_vector,
-                                                           batch_start,
-                                                           self.beam_size,
-                                                           self.blank_id,
-                                                           self.space_id,
-                                                           self.cutoff_prob,
-                                                           num_processes,
-                                                           self.scorer)
-
-        if self.rescoring and len(rescore_index) != 0:
-            # find the end of sequence
-            rescore_encoder_hist = []
-            rescore_encoder_lens = []
-            rescore_hyps = []
-            res_idx = list(rescore_index.keys())
-            max_length = -1
-            for idx in res_idx:
-                hist_enc = batch_encoder_hist[idx]
-                if hist_enc is None:
-                    cur_enc = cur_encoder_out[idx]
-                else:
-                    cur_enc = torch.cat([hist_enc, cur_encoder_out[idx]], axis=0)
-                rescore_encoder_hist.append(cur_enc)
-                cur_mask_len = int(len(hist_enc) + seq_lens[idx])
-                rescore_encoder_lens.append(cur_mask_len)
-                rescore_hyps.append(score_hyps[idx])
-                if cur_enc.shape[0] > max_length:
-                    max_length = cur_enc.shape[0]
-            best_index = self.batch_rescoring(rescore_hyps, rescore_encoder_hist,
-                                              rescore_encoder_lens, max_length)
-
-        best_sent = []
-        j = 0
-        for idx, li in enumerate(score_hyps):
-            if idx in rescore_index and self.rescoring:
-                best_sent.append(li[best_index[j]][1])
-                j += 1
-            else:
-                best_sent.append(li[0][1])
-
-        final_result = map_batch(best_sent, self.vocab, num_processes)
-
-        return final_result, cur_encoder_out
-
-    def batch_ctc_prefix_beam_search_cpu(self, batch_log_probs_seq,
-                                         batch_log_probs_idx,
-                                         batch_len, batch_root,
-                                         batch_start, beam_size,
-                                         blank_id, space_id,
-                                         cutoff_prob, num_processes,
-                                         scorer):
-        """
-        Return: Batch x Beam_size elements, each element is a tuple
-                (score, list of ids),
-        """
-
-        batch_len_list = batch_len
-        batch_log_probs_seq_list = []
-        batch_log_probs_idx_list = []
-        for i in range(len(batch_len_list)):
-            cur_len = int(batch_len_list[i])
-            batch_log_probs_seq_list.append(batch_log_probs_seq[i][0:cur_len].tolist())
-            batch_log_probs_idx_list.append(batch_log_probs_idx[i][0:cur_len].tolist())
-        score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq_list,
-                                                   batch_log_probs_idx_list,
-                                                   batch_root,
-                                                   batch_start,
-                                                   beam_size,
-                                                   num_processes,
-                                                   blank_id,
-                                                   space_id,
-                                                   cutoff_prob,
-                                                   scorer)
-        return score_hyps
-
-    def batch_rescoring(self, score_hyps, hist_enc, hist_mask_len, max_len):
-        """
-        score_hyps: [((ctc_score, (id1, id2, id3, ....)), (), ...), ....]
-        hist_enc: [len1xF, len2xF, .....]
-        hist_mask: [1x1xlen1, 1x1xlen2]
-        return bzx1  best_index
-        """
-        bz = len(hist_enc)
-        f = hist_enc[0].shape[-1]
-        beam_size = self.beam_size
-        encoder_lens = np.zeros((bz, 1), dtype=np.int32)
-        encoder_out = torch.zeros((bz, max_len, f), dtype=self.dtype)
-        hyps = []
-        ctc_score = torch.zeros((bz, beam_size), dtype=self.dtype)
-        max_seq_len = 0
-        for i in range(bz):
-            cur_len = hist_enc[i].shape[0]
-            encoder_out[i, 0:cur_len] = hist_enc[i]
-            encoder_lens[i, 0] = hist_mask_len[i]
-
-            # process candidate
-            if len(score_hyps[i]) < beam_size:
-                to_append = (beam_size - len(score_hyps[i])) * [(-10000, ())]
-                score_hyps[i] = list(score_hyps[i]) + to_append
-            for idx, c in enumerate(score_hyps[i]):
-                score, idlist = c
-                if score < -10000:
-                    score = -10000
-                ctc_score[i][idx] = score
-                hyps.append(list(idlist))
-                if len(hyps[-1]) > max_seq_len:
-                    max_seq_len = len(hyps[-1])
-
-        max_seq_len += 2
-        hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
-        hyps_pad_sos_eos = hyps_pad_sos_eos * self.eos  # fill eos
-        if self.bidecoder:
-            r_hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
-            r_hyps_pad_sos_eos = r_hyps_pad_sos_eos * self.eos
-
-        hyps_lens_sos = np.ones((bz, beam_size), dtype=np.int32)
-        bz_id = 0
-        for idx, cand in enumerate(hyps):
-            bz_id = idx // beam_size
-            length = len(cand) + 2
-            bz_offset = idx % beam_size
-            pad_cand = [self.sos] + cand + [self.eos]
-            hyps_pad_sos_eos[bz_id][bz_offset][0 : length] = pad_cand
-            if self.bidecoder:
-                r_pad_cand = [self.sos] + cand[::-1] + [self.eos]
-                r_hyps_pad_sos_eos[bz_id][bz_offset][0:length] = r_pad_cand
-            hyps_lens_sos[bz_id][idx % beam_size] = len(cand) + 1
-        in0 = pb_utils.Tensor.from_dlpack("encoder_out", to_dlpack(encoder_out))
-        in1 = pb_utils.Tensor("encoder_out_lens", encoder_lens)
-        in2 = pb_utils.Tensor("hyps_pad_sos_eos", hyps_pad_sos_eos)
-        in3 = pb_utils.Tensor("hyps_lens_sos", hyps_lens_sos)
-        input_tensors = [in0, in1, in2, in3]
-        if self.bidecoder:
-            in4 = pb_utils.Tensor("r_hyps_pad_sos_eos", r_hyps_pad_sos_eos)
-            input_tensors.append(in4)
-        in5 = pb_utils.Tensor.from_dlpack("ctc_score", to_dlpack(ctc_score))
-        input_tensors.append(in5)
-        request = pb_utils.InferenceRequest(model_name='decoder',
-                                            requested_output_names=['best_index'],
-                                            inputs=input_tensors)
-        response = request.exec()
-        best_index = pb_utils.get_output_tensor_by_name(response, 'best_index')
-        best_index = from_dlpack(best_index.to_dlpack()).clone()
-        best_index = best_index.cpu().numpy()[:, 0]
-        return best_index
-
-    def __del__(self):
-        print("remove wenet model")
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/config_template.pbtxt
deleted file mode 100644
index 63e459ba600868c23f8e70964ba3b58191496382..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/model_repo_stateful/wenet/config_template.pbtxt
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "wenet"
-backend: "python"
-max_batch_size: 512
-
-sequence_batching{
-    max_sequence_idle_microseconds: 5000000
-    oldest {
-      max_candidate_sequences: 1024
-      preferred_batch_size: [32, 64, 128, 256]
-    }
-    control_input [
-        {
-            name: "START",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_START
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "READY"
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_READY
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "CORRID",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_CORRID
-                    data_type: TYPE_UINT64
-                }
-            ]
-        },
-        {
-            name: "END",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_END
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        }
-    ]
-}
-
-parameters [
-  {
-    key: "beam_size",
-    value: { string_value: "#beam_size" }
-  },
-  {
-    key: "cutoff_prob",
-    value: { string_value: "0.9999" }
-  },
-  {
-    key: "alpha",
-    value: { string_value: "2" }
-  },
-  {
-    key: "beta",
-    value: { string_value: "1" }
-  },
-  {
-    key: "vocab_path",
-    value: { string_value: "/ws/onnx_model/units.txt"}
-  },
-  {
-    key: "lm_path",
-    value: { string_value: "/ws/onnx_model/lm.bin"}
-  },
-  {
-    key: "bidecoder",
-    value: { string_value: "#bidecoder"}
-  },
-  {
-    key: "rescoring",
-    value: { string_value: "1" }
-  },
-  {
-   key: "FORCE_CPU_ONLY_INPUT_TENSORS",
-   value: {string_value:"yes"}
-  }
-]
-
-input [
-   {
-    name: "log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "log_probs_idx"
-    data_type: TYPE_INT64
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "chunk_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, -1]
-  },
-  {
-    name: "chunk_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_STRING
-    dims: [1]
-    reshape { shape: [] }
-  }
-]
-instance_group [
-    {
-      count: 2
-      kind: KIND_CPU
-    }
-]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/benchmark_onnx_throughput.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/benchmark_onnx_throughput.py
deleted file mode 100644
index 077cec68e751b48019d7083ad2fccefa858edc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/benchmark_onnx_throughput.py
+++ /dev/null
@@ -1,470 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from below:
-# https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/onnx_exporter.py
-
-"""
-Usage:
-export CUDA_VISIBLE_DEVICES="0"
-python3 test_onnx_throughput.py \
-      --batch_sizes 1,4,16 \
-      --sequence_lenghts 67 \
-      --onnxFile ./encoder.onnx \
-      --model_type streaming_conformer_encoder \
-      --log ./log.txt
-
-"""
-
-import timeit
-import onnxruntime
-import torch
-import argparse
-import numpy
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument(
-        "--batch_sizes",
-        type=str,
-        default="1,4,32",
-        help="batch sizes for infer e.g. 1,2,4,8",
-    )
-
-    parser.add_argument(
-        "--sequence_lengths",
-        type=str,
-        default="67",
-        help="sequence frames for infer e.g. 500,800,2000",
-    )
-
-    parser.add_argument(
-        "--onnxFile",
-        type=str,
-        default="/mnt/samsung-t7/yuekai/benchmark/wenet/wenet/bin/u2pp_aishell2_onnx/encoder_fp16.onnx",
-        help="Path to the onnx file",
-    )
-
-    parser.add_argument(
-        "--log",
-        type=str,
-        default="./log.txt",
-        help="Path to the log file",
-    )
-
-    parser.add_argument(
-        "--model_type",
-        type=str,
-        choices=[
-            "streaming_conformer_encoder",
-            "conformer_encoder",
-            "decoder",
-            "bidecoder",
-        ],
-        default="streaming_conformer_encoder",
-        help="onnx model type for wenet",
-    )
-
-    parser.add_argument(
-        "--disable_gpu",
-        action="store_true",
-        help="whether to disable gpu infer, default false",
-    )
-
-    parser.add_argument(
-        "--disable_ort_io_binding",
-        action="store_true",
-        help="whether to disable onnxrt io binding",
-    )
-
-    return parser
-
-
-def allocateOutputBuffers(
-    output_buffers, output_buffer_max_sizes, device, data_type=torch.float32
-):
-    # Allocate output tensors with the largest test size needed.
-    # So the allocated memory can be reused
-    # for each test run.
-
-    for i in output_buffer_max_sizes:
-        output_buffers.append(torch.empty(i, dtype=data_type, device=device))
-
-
-def get_latency_result(latency_list, batch_size):
-    latency_ms = sum(latency_list) / float(len(latency_list)) * 1000.0
-    latency_variance = numpy.var(latency_list, dtype=numpy.float64) * 1000.0
-    throughput = batch_size * (1000.0 / latency_ms)
-    throughput_trt = 1000.0 / latency_ms
-
-    return {
-        "test_times": len(latency_list),
-        "latency_variance": "{:.2f}".format(latency_variance),
-        "latency_90_percentile": "{:.2f}".format(
-            numpy.percentile(latency_list, 90) * 1000.0
-        ),
-        "latency_95_percentile": "{:.2f}".format(
-            numpy.percentile(latency_list, 95) * 1000.0
-        ),
-        "latency_99_percentile": "{:.2f}".format(
-            numpy.percentile(latency_list, 99) * 1000.0
-        ),
-        "average_latency_ms": "{:.2f}".format(latency_ms),
-        "QPS": "{:.2f}".format(throughput),
-        f"QPS_trt_batch{batch_size}": "{:.2f}".format(throughput_trt),
-    }
-
-
-def create_onnxruntime_input(
-    batch_size,
-    sequence_length,
-    input_names,
-    config,
-    model_type,
-    data_type=torch.float16,
-):
-    inputs = {}
-    if model_type == "streaming_conformer_encoder":
-        feature_size = 80
-        num_layers = 12
-        head = 4
-        required_cache_size = 80
-        output_size = 256
-        d_k = int(output_size / head)
-        cnn_module_kernel = 7
-
-        chunk_xs = torch.randn(
-            batch_size, sequence_length, feature_size, dtype=data_type
-        ).numpy()
-        inputs["chunk_xs"] = chunk_xs
-        chunk_lens = (
-            torch.ones(batch_size, dtype=torch.int32).numpy() * sequence_length
-        )
-        inputs["chunk_lens"] = chunk_lens
-        offset = (
-            torch.arange(0, batch_size, dtype=torch.int64).unsqueeze(1).numpy()
-        )
-        inputs["offset"] = offset
-        att_cache = torch.randn(
-            batch_size,
-            num_layers,
-            head,
-            required_cache_size,
-            d_k * 2,
-            dtype=data_type,
-        ).numpy()
-        inputs["att_cache"] = att_cache
-        cnn_cache = torch.randn(
-            batch_size,
-            num_layers,
-            output_size,
-            cnn_module_kernel,
-            dtype=data_type,
-        ).numpy()
-        inputs["cnn_cache"] = cnn_cache
-        cache_mask = torch.ones(
-            batch_size, 1, required_cache_size, dtype=data_type
-        ).numpy()
-        inputs["cache_mask"] = cache_mask
-
-    else:
-        return NotImplementedError
-    return inputs
-
-
-def inference_ort(
-    ort_session,
-    ort_inputs,
-    result_template,
-    repeat_times,
-    batch_size,
-    warm_up_repeat=0,
-):
-    result = {}
-    timeit.repeat(
-        lambda: ort_session.run(None, ort_inputs),
-        number=1,
-        repeat=warm_up_repeat,
-    )  # Dry run
-    latency_list = timeit.repeat(
-        lambda: ort_session.run(None, ort_inputs), number=1, repeat=repeat_times
-    )
-    result.update(result_template)
-    result.update({"io_binding": False})
-    result.update(get_latency_result(latency_list, batch_size))
-    return result
-
-
-IO_BINDING_DATA_TYPE_MAP = {
-    "float32": numpy.float32,
-    "float16": numpy.float16,
-    "int32": numpy.int32,
-    "int64": numpy.int64
-    # TODO: Add more.
-}
-
-
-def inference_ort_with_io_binding(
-    ort_session,
-    ort_inputs,
-    result_template,
-    repeat_times,
-    ort_output_names,
-    ort_outputs,
-    output_buffers,
-    output_buffer_max_sizes,
-    batch_size,
-    device,
-    data_type=numpy.longlong,
-    warm_up_repeat=0,
-):
-    result = {}
-
-    # Bind inputs and outputs to onnxruntime session
-    io_binding = ort_session.io_binding()
-    # Bind inputs to device
-    for name in ort_inputs.keys():
-        np_input = torch.from_numpy(ort_inputs[name]).to(device)
-        input_type = (
-            IO_BINDING_DATA_TYPE_MAP[str(ort_inputs[name].dtype)]
-            if str(ort_inputs[name].dtype) in IO_BINDING_DATA_TYPE_MAP
-            else data_type
-        )
-
-        io_binding.bind_input(
-            name,
-            np_input.device.type,
-            0,
-            input_type,
-            np_input.shape,
-            np_input.data_ptr(),
-        )
-    # Bind outputs buffers with the sizes needed if not allocated already
-    if len(output_buffers) == 0:
-        allocateOutputBuffers(output_buffers, output_buffer_max_sizes, device)
-
-    for i, ort_output_name in enumerate(ort_output_names):
-        output_type = (
-            IO_BINDING_DATA_TYPE_MAP[str(ort_outputs[i].dtype)]
-            if str(ort_outputs[i].dtype) in IO_BINDING_DATA_TYPE_MAP
-            else data_type
-        )
-        io_binding.bind_output(
-            ort_output_name,
-            output_buffers[i].device.type,
-            0,
-            output_type,
-            ort_outputs[i].shape,
-            output_buffers[i].data_ptr(),
-        )
-
-    timeit.repeat(
-        lambda: ort_session.run_with_iobinding(io_binding),
-        number=1,
-        repeat=warm_up_repeat,
-    )  # Dry run
-
-    latency_list = timeit.repeat(
-        lambda: ort_session.run_with_iobinding(io_binding),
-        number=1,
-        repeat=repeat_times,
-    )
-    result.update(result_template)
-    result.update({"io_binding": True})
-    result.update(get_latency_result(latency_list, batch_size))
-    return result
-
-
-def create_onnxruntime_session(
-    onnx_model_path,
-    use_gpu,
-    provider=None,
-    enable_all_optimization=True,
-    num_threads=-1,
-    enable_profiling=False,
-    verbose=False,
-    provider_options=None,
-):
-    session = None
-    sess_options = onnxruntime.SessionOptions()
-
-    if enable_all_optimization:
-        sess_options.graph_optimization_level = (
-            onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        )
-    else:
-        sess_options.graph_optimization_level = (
-            onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC
-        )
-        # sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL # noqa
-
-    if enable_profiling:
-        sess_options.enable_profiling = True
-
-    if num_threads > 0:
-        sess_options.intra_op_num_threads = num_threads
-
-    if verbose:
-        sess_options.log_severity_level = 0
-    else:
-        sess_options.log_severity_level = 4
-
-    if use_gpu:
-        if provider == "cuda":
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-        elif provider == "tensorrt":
-            providers = [
-                "TensorrtExecutionProvider",
-                "CUDAExecutionProvider",
-                "CPUExecutionProvider",
-            ]
-        else:
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-    else:
-        providers = ["CPUExecutionProvider"]
-
-    if provider_options:
-        providers = [
-            (name, provider_options[name]) if name in provider_options else name
-            for name in providers
-        ]
-
-    session = onnxruntime.InferenceSession(
-        onnx_model_path, sess_options, providers=providers
-    )
-
-    return session
-
-
-if __name__ == "__main__":
-    args = get_parser().parse_args()
-    warm_up_repeat = 20
-    repeat_times = 20
-    input_value_type = torch.float16
-
-    batch_sizes = list(map(int, args.batch_sizes.split(",")))
-    max_sequence_length = None
-    sequence_lengths = list(map(int, args.sequence_lengths.split(",")))
-
-    if args.model_type == "streaming_conformer_encoder":
-        input_names = [
-            "chunk_xs",
-            "chunk_lens",
-            "offset",
-            "att_cache",
-            "cnn_cache",
-            "cache_mask",
-        ]
-        ort_output_names = [
-            "log_probs",
-            "log_probs_idx",
-            "chunk_out",
-            "chunk_out_lens",
-            "r_offset",
-            "r_att_cache",
-            "r_cnn_cache",
-            "r_cache_mask",
-        ]
-    else:
-        raise NotImplementedError
-
-    if args.disable_gpu:
-        EP_list = ["CPUExecutionProvider"]
-    else:
-        EP_list = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-
-    device = "cpu" if args.disable_gpu else "cuda"
-
-    ort_session = create_onnxruntime_session(
-        args.onnxFile,
-        not args.disable_gpu,
-        provider="cuda",
-        enable_all_optimization=True,
-        num_threads=-1,
-        verbose=False,
-    )
-
-    results = []
-    for batch_size in batch_sizes:
-        for sequence_length in sequence_lengths:
-            if (
-                max_sequence_length is not None
-                and sequence_length > max_sequence_length
-            ):
-                continue
-
-            ort_inputs = create_onnxruntime_input(
-                batch_size,
-                sequence_length,
-                input_names,
-                input_value_type,
-                args.model_type,
-            )
-
-            result_template = {
-                "io_binding": not args.disable_ort_io_binding,
-                "batch_size": batch_size,
-                "sequence_length": sequence_length,
-            }
-
-            print(
-                "Run onnxruntime on {} with input shape {}".format(
-                    args.onnxFile, [batch_size, sequence_length]
-                )
-            )
-
-            if args.disable_ort_io_binding:
-                result = inference_ort(
-                    ort_session,
-                    ort_inputs,
-                    result_template,
-                    repeat_times,
-                    batch_size,
-                    warm_up_repeat,
-                )
-            else:
-                # Get output sizes from a dummy ort run
-                ort_outputs = ort_session.run(ort_output_names, ort_inputs)
-                output_buffer_max_sizes = []
-                for i in range(len(ort_outputs)):
-                    output_buffer_max_sizes.append(
-                        numpy.prod(ort_outputs[i].shape)
-                    )
-
-                data_type = numpy.intc
-                output_buffers = []
-                result = inference_ort_with_io_binding(
-                    ort_session,
-                    ort_inputs,
-                    result_template,
-                    repeat_times,
-                    ort_output_names,
-                    ort_outputs,
-                    output_buffers,
-                    output_buffer_max_sizes,
-                    batch_size,
-                    device,
-                    data_type,
-                    warm_up_repeat,
-                )
-            print(result)
-            results.append(result)
-    with open(args.log, "w") as log_f:
-        for result in results:
-            log_f.write(f"{result}\n")
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/convert.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/convert.py
deleted file mode 100644
index 2a72f45a872c3faddd4e200f933b68b2812f1fc6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/convert.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import yaml
-import os
-import subprocess
-import onnx
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description='generate config.pbtxt for model_repo')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--vocab', required=True,
-                        help='vocabulary file, units.txt')
-    parser.add_argument('--model_repo', required=True,
-                        help='model repo directory')
-    parser.add_argument('--onnx_model_dir', default=True, type=str, required=False,
-                        help="onnx model path")
-    parser.add_argument('--lm_path', default=None, type=str, required=False,
-                        help="the additional language model path")
-    args = parser.parse_args()
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    with open(os.path.join(args.onnx_model_dir, 'config.yaml'), 'r') as fin:
-        onnx_configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    params = [("#beam_size", 10), ("#num_mel_bins", 80), ("#frame_shift", 10),
-              ("#frame_length", 25), ("#sample_rate", 16000), ("#output_size", 256),
-              ("#lm_path", ""), ("#bidecoder", 0), ("#vocabulary_path", ""),
-              ("#DTYPE", "FP32")]
-    model_params = dict(params)
-    # fill values
-    model_params["#beam_size"] = onnx_configs["beam_size"]
-    if onnx_configs["fp16"]:
-        model_params["#DTYPE"] = "FP16"
-    feature_conf = configs["dataset_conf"]["fbank_conf"]
-    model_params["#num_mel_bins"] = feature_conf["num_mel_bins"]
-    model_params["#frame_shift"] = feature_conf["frame_shift"]
-    model_params["#frame_length"] = feature_conf["frame_length"]
-    dataset_conf = configs["dataset_conf"]["resample_conf"]
-    model_params["#sample_rate"] = dataset_conf["resample_rate"]
-    model_params["#output_size"] = configs["encoder_conf"]["output_size"]
-    model_params["#encoder_output_size"] = model_params["#output_size"]
-    model_params["#lm_path"] = args.lm_path
-    if configs["decoder"].startswith("bi"):
-        model_params["#bidecoder"] = 1
-    model_params["#vocabulary_path"] = args.vocab
-    model_params["#vocab_size"] = configs["output_dim"]
-
-    streaming = "decoding_window" in onnx_configs
-    if streaming:
-        # add streaming model parameters
-        chunk_size = onnx_configs["decoding_chunk_size"]
-        num_left_chunks = onnx_configs["num_decoding_left_chunks"]
-        cache_size = chunk_size * num_left_chunks
-        model_params["#cache_size"] = cache_size
-        subsampling_rate = onnx_configs["subsampling_rate"]
-        frame_shift = model_params["#frame_shift"]
-        chunk_seconds = (chunk_size * subsampling_rate * frame_shift) / 1000
-        model_params["#chunk_size_in_seconds"] = chunk_seconds
-        model_params["#num_layers"] = configs["encoder_conf"]["num_blocks"]
-        model_params["#context"] = onnx_configs["context"]
-        model_params["#cnn_module_cache"] = onnx_configs["cnn_module_kernel_cache"]
-        model_params["#decoding_window"] = onnx_configs["decoding_window"]
-        head = configs["encoder_conf"]["attention_heads"]
-        model_params["#num_head"] = head
-        d_k = configs["encoder_conf"]["output_size"] // head
-        model_params["#att_cache_output_size"] = d_k * 2
-
-    for model in os.listdir(args.model_repo):
-        template = "config_template.pbtxt"
-        # non u2++ decoder
-        if "decoder" == model and model_params["#bidecoder"] == 0:
-            template = "config_template2.pbtxt"
-        # streaming transformer encoder
-        if "encoder" == model and model_params.get("#cnn_module_cache", -1) == 0:
-            template = "config_template2.pbtxt"
-
-        model_dir = os.path.join(args.model_repo, model)
-        out = os.path.join(model_dir, "config.pbtxt")
-        out = open(out, "w")
-
-        if model in ("decoder", "encoder"):
-            if onnx_configs["fp16"]:
-                model_name = model + "_fp16.onnx"
-            else:
-                model_name = model + ".onnx"
-            source_model = os.path.join(args.onnx_model_dir, model_name)
-            target_model = os.path.join(model_dir, "1", model + ".onnx")
-            res = subprocess.call(
-                ["cp", source_model, target_model], shell=False)
-            if model == "encoder":
-                # currently, with torch 1.10, the
-                # exported conformer encoder output size is -1
-                # Solution: Please upgrade your torch version
-                # torch version >= 1.11.0 should fix this issue
-                model = onnx.load(source_model)
-                if streaming:
-                    encoder_out = model.graph.output[2]
-                else:
-                    encoder_out = model.graph.output[0]
-                output_dim = encoder_out.type.tensor_type.shape.dim[2].dim_param
-                if output_dim.startswith("Add"):
-                    model_params["#encoder_output_size"] = -1
-
-        with open(os.path.join(model_dir, template), "r", encoding="utf-8") as f:
-            for line in f:
-                if line.startswith("#"):
-                    continue
-                for key, value in model_params.items():
-                    line = line.replace(key, str(value))
-                out.write(line)
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/convert_start_server.sh b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/convert_start_server.sh
deleted file mode 100644
index de0aebc2f281adcf658cbfc36114c427cb361a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/convert_start_server.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-onnx_model_dir=/ws/onnx_model
-model_repo=/ws/model_repo
-
-# Convert config.pbtxt in model_repo and move models
-python3 scripts/convert.py --config=$onnx_model_dir/train.yaml --vocab=$onnx_model_dir/words.txt \
-        --model_repo=$model_repo --onnx_model_dir=$onnx_model_dir
-
-# Start server
-tritonserver --model-repository=${model_repo} --pinned-memory-pool-byte-size=1024000000 --cuda-memory-pool-byte-size=0:1024000000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/run_qa.sh b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/run_qa.sh
deleted file mode 100644
index 1710180b72ebcb031958bceadbe4b94a5a4a25dd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/scripts/run_qa.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-export CUDA_VISIBLE_DEVICES="0"
-stage=1
-stop_stage=2
-
-pretrained_model_link=https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20211025_conformer_exp.tar.gz
-#pretrained_model_link=https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/wenetspeech/20211025_conformer_exp.tar.gz
-# aishell2 small streaming u2pp model
-#pretrained_model_link=http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell2/20210618_u2pp_conformer_exp.tar.gz
-pretrained_model_name=20211025_conformer_exp
-model_repo_path=$(pwd)/../model_repo
-
-model_dir=$(pwd)/${pretrained_model_name}
-onnx_model_dir=$(pwd)/${pretrained_model_name}_onnx
-mkdir -p $onnx_model_dir
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-   echo "export to onnx files"
-   wget ${pretrained_model_link} --no-check-certificate
-   tar zxvf ${pretrained_model_name}.tar.gz
-
-   cd ../../../
-   export PYTHONPATH=$PYTHONPATH:$(pwd)
-   python3 wenet/bin/export_onnx_gpu.py \
-           --config=$model_dir/train.yaml \
-           --checkpoint=$model_dir/final.pt \
-           --cmvn_file=$model_dir/global_cmvn \
-           --ctc_weight=0.5 \
-           --output_onnx_dir=$onnx_model_dir \
-           --fp16 || exit 1
-   cp $model_dir/words.txt $model_dir/train.yaml $onnx_model_dir
-   cd -
-fi
-
-# For streaming model
-if [ ${stage} -le 10 ] && [ ${stop_stage} -ge 10 ]; then
-   echo "export to onnx files"
-   wget ${pretrained_model_link} --no-check-certificate
-   tar zxvf ${pretrained_model_name}.tar.gz
-
-   cd ../../../
-   export PYTHONPATH=$PYTHONPATH:$(pwd)
-   python3 wenet/bin/export_onnx_gpu.py \
-           --config=$model_dir/train.yaml \
-           --checkpoint=$model_dir/final.pt \
-           --cmvn_file=$model_dir/global_cmvn \
-           --ctc_weight=0.5 \
-           --output_onnx_dir=$onnx_model_dir \
-           --streaming \
-           --fp16 || exit 1
-   cp $model_dir/words.txt $model_dir/train.yaml $onnx_model_dir
-   cd -
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-   echo "prepare model repo for triton"
-
-   python3 ./convert.py --config=$onnx_model_dir/train.yaml --vocab=$onnx_model_dir/words.txt \
-        --model_repo=$model_repo_path --onnx_model_dir=$onnx_model_dir
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-   echo "launch triton server"
-   tritonserver --model-repository $model_repo_path \
-                --pinned-memory-pool-byte-size=512000000 \
-                --cuda-memory-pool-byte-size=0:1024000000
-fi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/LayerNormPlugin.cu b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/LayerNormPlugin.cu
deleted file mode 100644
index 61d7e8a8180a1c0d4a9b57ae568b0b7283f523c5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/LayerNormPlugin.cu
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "LayerNormPlugin.h"
-
-using namespace nvinfer1; // NOLINT
-
-PluginFieldCollection LayerNormPluginCreator::fc_{};
-std::vector<PluginField> LayerNormPluginCreator::attr_;
-
-template <int VPT>
-struct BytesToType;
-
-template <>
-struct BytesToType<2> {
-    using type = uint16_t;
-};
-template <>
-struct BytesToType<4> {
-    using type = uint32_t;
-};
-template <>
-struct BytesToType<8> {
-    using type = uint64_t;
-};
-template <>
-struct BytesToType<16> {
-    using type = float4;
-};
-
-template <int Bytes>
-__device__ inline void copy(const void* local, void* data) {
-    using T = typename BytesToType<Bytes>::type;
-
-    const T* in = static_cast<const T*>(local);
-    T* out = static_cast<T*>(data);
-    *out = *in;
-}
-
-struct mySum {
-    __host__ __device__ __forceinline__ float2 operator()(
-        const float2 &a, const float2 &b) const {
-        return make_float2(a.x + b.x, a.y + b.y);
-    }
-};
-
-template <typename T, int TPB, int VPT>
-__global__ void layerNormKernel(const T* input, const T* gamma,
-                                const T* beta, T* output) {
-    const int idx = blockIdx.x * 256 + threadIdx.x * VPT;
-    T localX[VPT], localGamma[VPT], localBeta[VPT];
-
-    copy<sizeof(T) * VPT>(&input[idx], localX);
-    float2 localFloat2 = {0.f, 0.f};
-
-    const float rld = float(1)/ float(256); // NOLINT
-#pragma unroll
-    for (int it = 0; it < VPT; it++) {
-        const float tmp = rld * (float)localX[it]; // NOLINT
-        localFloat2.x += tmp;
-        localFloat2.y += tmp * (float)localX[it]; // NOLINT
-    }
-
-    copy<sizeof(T) * VPT>(&beta[threadIdx.x * VPT], localBeta);
-    copy<sizeof(T) * VPT>(&gamma[threadIdx.x * VPT], localGamma);
-
-    using BlockReduce = cub::BlockReduce<float2, TPB>;
-    __shared__ typename BlockReduce::TempStorage temp_storage;
-    __shared__ float mu;     // mean
-    __shared__ float rsigma;  // 1 / std.dev.
-
-    //  const float2 sumKV =
-    //  BlockReduce(temp_storage).Reduce(localFloat2, cub::Sum());
-    const float2 sumKV = BlockReduce(temp_storage).Reduce(localFloat2, mySum());
-
-    if (threadIdx.x == 0) {
-        mu = sumKV.x;
-        rsigma = rsqrt(sumKV.y - mu * mu + 1e-6);
-    }
-    __syncthreads();
-#pragma unroll
-    for (int it = 0; it < VPT; it++) {
-        localX[it] = (float)localGamma[it] * ((float)localX[it] - mu) * rsigma // NOLINT
-        + (float)localBeta[it]; // NOLINT
-    }
-
-    copy<sizeof(T) * VPT>(localX, &output[idx]); // NOLINT
-}
-
-template __global__ void layerNormKernel<float, 64, 4>(const float*,
-                                const float*, const float*, float*);
-template __global__ void layerNormKernel<half, 32, 8>(const half*,
-                                 const half*, const half*, half*);
-
-int LayerNormPlugin::enqueue(const PluginTensorDesc* inputDesc,
-                             const PluginTensorDesc* outputDesc,
-                             const void* const* inputs, void* const* outputs,
-                             void* workspace, cudaStream_t stream) noexcept {
-    const int gridSize = inputDesc[0].dims.d[0] * inputDesc[0].dims.d[1];
-
-    if (inputDesc[0].type == DataType::kFLOAT) {
-        constexpr int VPT = 16 / sizeof(float);
-        constexpr int TPB = 256 / VPT;
-        (layerNormKernel<float, TPB, VPT>)
-        <<<gridSize, TPB, 0, stream>>> ((const float*)inputs[0],
-        (const float*)inputs[1], (const float*)inputs[2], (float*)outputs[0]); // NOLINT
-    } else {
-        constexpr int VPT = 16 / sizeof(half);
-        constexpr int TPB = 256 / VPT;
-        (layerNormKernel<half, TPB, VPT>) <<<gridSize, TPB, 0, stream>>> ((
-        const half*)inputs[0], (const half*)inputs[1],
-        (const half*)inputs[2], (half*)outputs[0]); // NOLINT
-    }
-    return 0;
-}
-
-REGISTER_TENSORRT_PLUGIN(LayerNormPluginCreator);
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/LayerNormPlugin.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/LayerNormPlugin.h
deleted file mode 100644
index 176bf0ea72bc138c6243969abcc658f676a9a37c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/LayerNormPlugin.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef RUNTIME_GPU_TENSORRT_LAYERNORMPLUGIN_LAYERNORMPLUGIN_H_
-#define RUNTIME_GPU_TENSORRT_LAYERNORMPLUGIN_LAYERNORMPLUGIN_H_
-
-#include <vector>
-#include <string>
-#include <cassert>
-#include <NvInfer.h>
-#include <cuda_fp16.h> // NOLINT
-#include <cub/cub.cuh> // NOLINT
-
-#define CEIL_DIVIDE(X, Y)    (((X)+(Y)-1)/(Y))
-#define CEIL_TO(X, Y)        (((X)+(Y)-1)/(Y)*(Y))
-
-template <typename T>
-__device__ T epsilon();
-
-template <>
-__device__ float epsilon<float>() {
-    return (float)6.0e-12; // NOLINT
-}
-
-template <>
-__device__ half epsilon<half>() {
-    return (half)6.0e-6;
-}
-
-// +------- Debug wrapper -----------------------------------
-#if DEBUG
-#define WHERE_AM_I() do {printf("[%s]:this=->%p\n", __func__, this);} while (0);
-#else
-#define WHERE_AM_I()
-#endif  // DEBUG
-
-// +------- Plguin -------------------------------------------
-namespace { // NOLINT
-static const char* PLUGIN_NAME{"LayerNorm"};
-static const char* PLUGIN_VERSION{"1"};
-}  // namespace
-
-namespace nvinfer1 {
-
-// +------- Plugin body ---------------------------------------
-class LayerNormPlugin: public IPluginV2DynamicExt {
- private:
-    std::string name_;
-    std::string namespace_;
-
- public:
-    LayerNormPlugin(const std::string& name) : name_(name) { // NOLINT
-        WHERE_AM_I();
-    }
-
-    LayerNormPlugin(const std::string& name,
-                    const void* data, size_t length) : name_(name) {
-        WHERE_AM_I();
-    }
-
-    LayerNormPlugin() = delete;
-
-    ~LayerNormPlugin() {
-        WHERE_AM_I();
-    }
-
-    size_t getSerializationSize() const noexcept override {
-        WHERE_AM_I();
-        return 0;
-    }
-
-    void serialize(void *buffer) const noexcept override {
-        WHERE_AM_I();
-    }
-
-    IPluginV2DynamicExt* clone() const noexcept override {
-        WHERE_AM_I();
-        return new LayerNormPlugin(name_);
-    }
-
-    int getNbOutputs() const noexcept override {
-        WHERE_AM_I();
-        return 1;
-    }
-
-    DimsExprs getOutputDimensions(int32_t outputIndex, const DimsExprs* inputs,
-                                  int32_t nbInputs,
-                                  IExprBuilder& exprBuilder) noexcept override {
-        WHERE_AM_I();
-        return inputs[0];
-    }
-
-    bool supportsFormatCombination(int32_t pos, const PluginTensorDesc* inOut,
-                                   int32_t nbInputs,
-                                   int32_t nbOutputs) noexcept override {
-        WHERE_AM_I();
-        if (inOut[pos].format != TensorFormat::kLINEAR) {
-            return false;
-        }
-
-        bool res = false;
-        switch (pos) {
-        case 0:
-            res = (inOut[pos].type == DataType::kFLOAT
-                   || inOut[pos].type == DataType::kHALF); break;
-        case 1:
-        case 2:
-        case 3:
-            res = inOut[pos].type == inOut[0].type; break;
-        default:  // should NOT be here
-            res = false; break;
-        }
-
-        return res;
-    }
-
-    DataType getOutputDataType(int outputIndex,
-                               const DataType* inputTypes,
-                               int nbInputs) const noexcept override {
-        WHERE_AM_I();
-        return inputTypes[0];
-    }
-
-    void configurePlugin(const DynamicPluginTensorDesc* in, int32_t nbInputs,
-                         const DynamicPluginTensorDesc* out,
-                         int32_t nbOutputs) noexcept override {
-        WHERE_AM_I();
-    }
-
-    size_t getWorkspaceSize(const PluginTensorDesc* inputs, int32_t nbInputs,
-                            const PluginTensorDesc* outputs,
-                            int32_t nbOutputs) const noexcept override {
-        WHERE_AM_I();
-        return 0;
-    }
-
-    void setPluginNamespace(const char* szNamespace) noexcept override {
-        WHERE_AM_I();
-        namespace_ = szNamespace;
-    }
-    const char* getPluginNamespace() const noexcept override {
-        WHERE_AM_I();
-        return namespace_.c_str();
-    }
-    const char* getPluginType() const noexcept override {
-        WHERE_AM_I();
-        return PLUGIN_NAME;
-    }
-    const char* getPluginVersion() const noexcept override {
-        WHERE_AM_I();
-        return PLUGIN_VERSION;
-    }
-    int initialize() noexcept override {
-        WHERE_AM_I();
-        return 0;
-    }
-    void terminate() noexcept override {
-        WHERE_AM_I();
-        return;
-    }
-
-    void destroy() noexcept override {
-        WHERE_AM_I();
-    }
-
-    int32_t enqueue(const PluginTensorDesc* inputDesc,
-                    const PluginTensorDesc* outputDesc,
-                    const void* const* inputs,
-                    void* const* outputs, void* workspace,
-                    cudaStream_t stream) noexcept override;
-};  // class LayerNormPlugin
-
-class LayerNormPluginCreator : public IPluginCreator {
- private:
-    static PluginFieldCollection fc_;
-    static std::vector<PluginField> attr_;
-    std::string namespace_;
-
- public:
-    LayerNormPluginCreator() {
-        fc_.nbFields = attr_.size();
-        fc_.fields = attr_.data();
-    }
-
-    ~LayerNormPluginCreator() {}
-
-    IPluginV2* createPlugin(const char* name,
-                            const PluginFieldCollection* fc) noexcept override {
-        WHERE_AM_I();
-        return new LayerNormPlugin(name);
-    }
-
-    IPluginV2* deserializePlugin(const char* name, const void* serialData,
-                                 size_t serialLength) noexcept override {
-        return new LayerNormPlugin(name, serialData, serialLength);
-    }
-
-    void setPluginNamespace(const char* szNamespace) noexcept override {
-        namespace_ = szNamespace;
-    }
-
-    const char* getPluginNamespace() const noexcept override {
-        return namespace_.c_str();
-    }
-
-    const char* getPluginName() const noexcept override {
-        return PLUGIN_NAME;
-    }
-
-    const char* getPluginVersion() const noexcept override {
-        return PLUGIN_VERSION;
-    }
-
-    const PluginFieldCollection* getFieldNames() noexcept override {
-        return &fc_;
-    }
-};  // class LayerNormPluginCreator
-
-}  // namespace nvinfer1
-#endif  // RUNTIME_GPU_TENSORRT_LAYERNORMPLUGIN_LAYERNORMPLUGIN_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/Makefile b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/Makefile
deleted file mode 100644
index c1f3d4d05987a80986dd9d2a488dee15d93bfa0b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-CUDA_PATH       = /usr/local/cuda
-TRT_PATH        = /usr/lib/x86_64-linux-gnu
-NVCC            = $(CUDA_PATH)/bin/nvcc
-#SM              = 61
-                # 61 for GTX1070, 75 for T4,80 for A30
-GENCODE         = -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86
-CUFLAG          = -w -std=c++14 -O3 -UDEBUG -Xcompiler -fPIC $(GENCODE)
-CPPFLAG         = -w -std=c++14 -O3 -use_fast_math
-SOFLAG          = $(CUFLAG) -shared
-INCLUDE         = -I. -I$(CUDA_PATH)/include
-LDFLAG          = -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcublasLt -L$(TRT_PATH)/lib -lnvinfer
-
-SRC_CU          = $(shell find ./ -name '*.cu')
-
-all: LayerNorm.so
-
-%.o: %.cu
-	$(NVCC) $(CUFLAG) $(INCLUDE) -o $@ -c $<
-
-LayerNorm.so: $(SRC_CU:.cu=.o)
-	$(NVCC) $(SOFLAG) $(LDFLAG) -o $@ $^
-
-.PHONY: clean
-clean:
-	rm -rf ./*.so ./*.o ./*.d ./*.trt
-
-.PHONY: test
-test:
-	clear
-	python testLayerNormPlugin.py
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/testLayerNormPlugin.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/testLayerNormPlugin.py
deleted file mode 100644
index 43e953f43cb9a6327c8ca5e95a503212275eb5b9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/LayerNormPlugin/testLayerNormPlugin.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import ctypes
-import numpy as np
-from time import time_ns
-import tensorrt as trt
-import pycuda.autoinit # noqa
-import pycuda.driver as cuda
-
-useFile = False
-ipnutDataFile = "./layerNormIO-bs64.npz"
-soFilePath = "./LayerNorm.so"
-nBS = 1024
-nSL = 256
-nEmbedding = 256
-nTime = 100
-epsilon = 1e-6
-
-np.random.seed(97)
-
-npToTRT = {
-    np.int8: trt.int8,
-    np.float16: trt.float16,
-    np.int32: trt.int32,
-    np.float32: trt.float32,
-}
-npToPFT = {
-    np.int8: trt.PluginFieldType.INT8,
-    np.float16: trt.PluginFieldType.FLOAT16,
-    np.int32: trt.PluginFieldType.INT32,
-    np.float32: trt.PluginFieldType.FLOAT32,
-}
-
-
-def check(a, b, weak=False):
-    if weak:
-        return np.all(np.abs(a - b) < epsilon)
-    else:
-        return np.all(a == b)
-
-
-def layerNormCPU(bufferH):
-    _x, b, a = bufferH
-    nEmbed = bufferH[0].shape[2]
-    _0 = np.mean(_x, 2)[:, :, np.newaxis]
-    _1 = _x - _0
-    _2 = _1 * _1
-    _3 = np.mean(_2, 2)[:, :, np.newaxis]
-    _4 = np.array(1e-12, dtype=np.float32)
-    _5 = _4.reshape(1, 1, 1)
-    _6 = _3 + _5
-    _7 = np.sqrt(_6)
-    _8 = 1 / _7  # 1/sqrt(...)
-    _9 = b
-    _10 = _9.reshape(1, 1, nEmbed)
-    _11 = _8 * _10  # b/sqrt(...)
-    _12 = _0 * _11  # bμ/sqrt(...)
-    _13 = a
-    _14 = _13.reshape(1, 1, nEmbed)
-    _15 = _14 - _12  # a-bμ/sqrt(...)
-    _16 = _x * _11  # bx/sqrt(...)
-    _17 = _15 + _16  # b(x-μ)/sqrt(...)+a
-    _18 = _17.reshape(
-        bufferH[0].shape[0], bufferH[0].shape[1], bufferH[0].shape[2]
-    )
-    return _18
-
-
-def testLayerNormCPU():
-    print("test LayerNormCPU!")
-    bufferH = []
-    io = np.load(ipnutDataFile)
-    bufferH.append(io["encoder1_inputs:0"])
-    bufferH.append(io["(Unnamed Layer* 9) [Constant]_output"])
-    bufferH.append(io["(Unnamed Layer* 13) [Constant]_output"])
-
-    temp1 = layerNormCPU(bufferH)
-    print(
-        "outputCPU: %s,SumAbs=%.5e,Var=%.5f,Max=%.5f,Min=%.5f,SAD=%.5f"
-        % (
-            str(temp1.shape),
-            np.sum(abs(temp1)),
-            np.var(temp1),
-            np.max(temp1),
-            np.min(temp1),
-            np.sum(np.abs(np.diff(temp1.reshape(-1)))),
-        )
-    )
-    # print(temp1)
-    temp2 = io[
-        "seq2seq/encoder_1/layer_0/multi_head/conv1d/conv1d/ExpandDims:0"
-    ]
-    print(
-        "outputRef: %s,SumAbs=%.5e,Var=%.5f,Max=%.5f,Min=%.5f,SAD=%.5f"
-        % (
-            str(temp2.shape),
-            np.sum(abs(temp2)),
-            np.var(temp2),
-            np.max(temp2),
-            np.min(temp2),
-            np.sum(np.abs(np.diff(temp2.reshape(-1)))),
-        )
-    )
-    # print(temp2)
-    print("check result:")
-    print(check(temp1, temp2, True))
-    print("test layerNormCPU finish!")
-
-
-def getLayerNormPlugin():
-    for c in trt.get_plugin_registry().plugin_creator_list:
-        # print(c.name)
-        if c.name == "LayerNorm":
-            return c.create_plugin(c.name, trt.PluginFieldCollection([]))
-    return None
-
-
-def run():
-    testCase = "test<fp%s,bs=%d,sl=%d,nEmbed=%d>" % (
-        ["32", "16"][0],
-        nBS,
-        nSL,
-        nEmbedding,
-    )
-    logger = trt.Logger(trt.Logger.ERROR)
-    trt.init_libnvinfer_plugins(logger, "")
-    ctypes.cdll.LoadLibrary(soFilePath)
-
-    builder = trt.Builder(logger)
-    network = builder.create_network(1 << 0)
-    config = builder.create_builder_config()
-    config.max_workspace_size = 6 << 30
-    config.flags = [0, 1 << int(trt.BuilderFlag.FP16)][0]
-
-    inputTensorList = []
-    inputTensorList.append(
-        network.add_input("inputT", trt.float32, [-1, -1, 256])
-    )
-    inputTensorList.append(network.add_input("inputB", trt.float32, [256]))
-    inputTensorList.append(network.add_input("inputA", trt.float32, [256]))
-
-    profile = builder.create_optimization_profile()
-    profile.set_shape("inputT", [1, 4, 256], [1024, 256, 256], [1024, 256, 256])
-    config.add_optimization_profile(profile)
-
-    pluginLayer = network.add_plugin_v2(inputTensorList, getLayerNormPlugin())
-    pluginLayer.get_output(0).dtype = [trt.float32, trt.float16][0]
-
-    network.mark_output(pluginLayer.get_output(0))
-
-    engine = builder.build_engine(network, config)
-
-    context = engine.create_execution_context()
-    context.set_binding_shape(0, [nBS, nSL, nEmbedding])
-    context.set_binding_shape(1, [nEmbedding])
-    context.set_binding_shape(2, [nEmbedding])
-    print(
-        "Binding all? %s"
-        % (["No", "Yes"][int(context.all_binding_shapes_specified)])
-    )
-    stream = cuda.Stream()
-
-    nInput = np.sum(
-        [engine.binding_is_input(i) for i in range(engine.num_bindings)]
-    )
-    nOutput = engine.num_bindings - nInput
-    for i in range(engine.num_bindings):
-        print(
-            "input ->" if engine.binding_is_input(i) else "output->",
-            engine.get_binding_dtype(i),
-            engine.get_binding_shape(i),
-            context.get_binding_shape(i),
-        )
-
-    bufferH = []
-    bufferH.append(
-        np.random.rand(nBS, nSL, nEmbedding)
-        .astype(np.float32)
-        .reshape(nBS, nSL, nEmbedding)
-        * 2
-        - 1
-    )
-    bufferH.append(np.ones(nEmbedding).astype(np.float32))
-    bufferH.append(np.zeros(nEmbedding).astype(np.float32))
-    bufferH.append(
-        np.empty(
-            context.get_binding_shape(3),
-            dtype=trt.nptype(engine.get_binding_dtype(3)),
-        )
-    )
-
-    bufferD = []
-    for i in range(engine.num_bindings):
-        bufferD.append(cuda.mem_alloc(bufferH[i].nbytes))
-
-    for i in range(nInput):
-        cuda.memcpy_htod_async(
-            bufferD[i], np.ascontiguousarray(bufferH[i].reshape(-1)), stream
-        )
-
-    context.execute_async_v2(bufferD, stream.handle)
-    stream.synchronize()
-
-    for i in range(nOutput):
-        cuda.memcpy_dtoh_async(bufferH[nInput + i], bufferD[nInput + i], stream)
-    stream.synchronize()
-
-    for i in range(nInput):
-        temp = bufferH[i]
-        print(
-            "inputH%d" % i,
-            temp.shape,
-            np.sum(abs(temp)),
-            np.var(temp),
-            np.max(temp),
-            np.min(temp),
-            np.sum(np.abs(np.diff(temp.reshape(-1)))),
-        )
-        print(temp.reshape(-1)[:10])
-        # print(temp)
-
-    for i in range(nOutput):
-        temp = bufferH[nInput + i]
-        print(
-            "outputH%d" % i,
-            temp.shape,
-            np.sum(abs(temp)),
-            np.var(temp),
-            np.max(temp),
-            np.min(temp),
-            np.sum(np.abs(np.diff(temp.reshape(-1)))),
-        )
-        # print(temp)
-
-    for i in range(10):
-        context.execute_async_v2(bufferD, stream.handle)
-    stream.synchronize()
-
-    time0 = time_ns()
-    for i in range(nTime):
-        context.execute_async_v2(bufferD, stream.handle)
-    stream.synchronize()
-    time1 = time_ns()
-    print(
-        testCase
-        + "average %fms per inference\n" % ((time1 - time0) / nTime / 1000000)
-    )
-
-    print("check result:")
-    temp1 = bufferH[-1]
-    temp2 = layerNormCPU(bufferH[:3])
-
-    print(
-        check(temp1, temp2, True),
-        "max diff=%f" % (np.max(np.abs(temp1 - temp2))),
-    )
-
-
-if __name__ == "__main__":
-    os.system("rm -f ./*.trt")
-    np.set_printoptions(precision=4, linewidth=200, suppress=True)
-
-    run()
-
-    # print("test all finish!")
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/README.md
deleted file mode 100644
index 4ecca2dfcb6e5bd7a34c964c5192eb976573a13d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-### Using Tensorrt for Triton ASR Server
-
-```sh
-# using docker image runtime/gpu/Dockerfile/Dockerfile.server
-docker pull soar97/triton-wenet:22.12
-docker run -it --rm --name "wenet_trt_test" --gpus all --shm-size 1g --net host soar97/triton-wenet:22.12
-# inside the docker container
-git clone https://github.com/wenet-e2e/wenet.git
-cd wenet/runtime/gpu/tensorrt
-pip3 install nvidia-pyindex
-# Use pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple if you encounter network issue
-pip3 install -r requirements.txt
-
-bash run_streaming_small_model.sh
-```
-
-#### Performance of Small u2pp Model for Streaming ASR
-
-Benchmark(small u2pp onnx) based on Aishell1 test set with server-A10 (16vCPU 60GB Memory)/client(4vCPU 16GB Memory), the total audio duration is 36108.919 seconds.
-
-(Note: using non-simulate-streaming mode)
-|concurrent-tasks | processing time(s) |
-|----------|--------------------|
-| 20 (onnx fp16)                | 123.796 |
-| 40 (onnx fp16)                | 84.557  |
-| 60 (onnx fp16)                | 73.232  |
-| 80 (onnx fp16)                | 66.862  |
-| 20 (trt fp16+layernorm plugin)| 90.582  |
-| 40 (trt fp16+layernorm plugin)| 75.411  |
-| 60 (trt fp16+layernorm plugin)| 69.602  |
-| 80 (trt fp16+layernorm plugin)| 65.603  |
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/export_streaming_conformer_trt.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/export_streaming_conformer_trt.py
deleted file mode 100644
index 5dfb4c0843970caa948fe00d16c570623344f17f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/export_streaming_conformer_trt.py
+++ /dev/null
@@ -1,351 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Usage:
-export CUDA_VISIBLE_DEVICES="0"
-python3 export_trt.py \
-    --fp16 \
-    --onnxFile exp6_fp16/encoderV4.onnx \
-    --chunk_xs 1x67x80,32x67x80,64x67x80,1x67x80,4x67x80,8x67x80 \
-    --chunk_lens 1,32,64,1,4,8 \
-    --offset 1x1,32x1,64x1,1x1,4x1,8x1 \
-    --att_cache 1x12x4x80x128,32x12x4x80x128,64x12x4x80x128,1x12x4x80x128,4x12x4x80x128,8x12x4x80x128 \ # noqa
-    --cnn_cache 1x12x256x7,32x12x256x7,64x12x256x7,1x12x256x7,4x12x256x7,8x12x256x7 \
-    --cache_mask 1x1x80,32x1x80,64x1x80,1x1x80,4x1x80,8x1x80 \
-    --plugin exp6_fp16/LayerNorm.so \
-    --trtFile exp6_fp16/encoder_test.plan \
-    --test
-
-"""
-
-import ctypes
-from cuda import cudart
-import argparse
-import numpy as np
-import os
-import torch
-import tensorrt as trt
-import timeit
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument(
-        "--chunk_xs",
-        type=str,
-        default="",
-        help="BxTxD,BxTxD,BxTxD;BxTxD,BxTxD,BxTxD",
-    )
-
-    parser.add_argument(
-        "--chunk_lens",
-        type=str,
-        default="",
-        help="BxTxD,BxTxD,BxTxD;BxTxD,BxTxD,BxTxD",
-    )
-
-    parser.add_argument(
-        "--offset",
-        type=str,
-        default="",
-        help="BxTxD,BxTxD,BxTxD;BxTxD,BxTxD,BxTxD",
-    )
-
-    parser.add_argument(
-        "--att_cache",
-        type=str,
-        default="",
-        help="BxTxD,BxTxD,BxTxD;BxTxD,BxTxD,BxTxD",
-    )
-
-    parser.add_argument(
-        "--cnn_cache",
-        type=str,
-        default="",
-        help="BxTxD,BxTxD,BxTxD;BxTxD,BxTxD,BxTxD",
-    )
-
-    parser.add_argument(
-        "--cache_mask",
-        type=str,
-        default="",
-        help="BxTxD,BxTxD,BxTxD;BxTxD,BxTxD,BxTxD",
-    )
-
-    parser.add_argument(
-        "--plugin",
-        type=str,
-        default="./LayerNorm.so",
-        help="Path to the LayerNorm plugin",
-    )
-
-    parser.add_argument(
-        "--timeCacheFile",
-        type=str,
-        default="./encoder.cache",
-        help="Path to the saved engine cache file",
-    )
-
-    parser.add_argument(
-        "--trtFile",
-        type=str,
-        default="./encoder.plan",
-        help="Path to the exported tensorrt engine",
-    )
-
-    parser.add_argument(
-        "--onnxFile",
-        type=str,
-        default="./encoder.onnx",
-        help="Path to the onnx file",
-    )
-
-    parser.add_argument(
-        "--useTimeCache",
-        action="store_true",
-        help="whether to use time cache, default false",
-    )
-
-    parser.add_argument(
-        "--fp16",
-        action="store_true",
-        help="whether to export fp16 model, default false",
-    )
-
-    parser.add_argument(
-        "--test",
-        action="store_true",
-        help="whether to test exported engine, default false",
-    )
-
-    return parser
-
-
-def get_latency_result(latency_list, batch_size):
-    latency_ms = sum(latency_list) / float(len(latency_list)) * 1000.0
-    latency_variance = np.var(latency_list, dtype=np.float64) * 1000.0
-    throughput = batch_size * (1000.0 / latency_ms)
-    throughput_trt = 1000.0 / latency_ms
-
-    return {
-        "test_times": len(latency_list),
-        "latency_variance": "{:.2f}".format(latency_variance),
-        "latency_90_percentile": "{:.2f}".format(
-            np.percentile(latency_list, 90) * 1000.0
-        ),
-        "latency_95_percentile": "{:.2f}".format(
-            np.percentile(latency_list, 95) * 1000.0
-        ),
-        "latency_99_percentile": "{:.2f}".format(
-            np.percentile(latency_list, 99) * 1000.0
-        ),
-        "average_latency_ms": "{:.2f}".format(latency_ms),
-        "QPS": "{:.2f}".format(throughput),
-        f"QPS_trt_batch{batch_size}": "{:.2f}".format(throughput_trt),
-    }
-
-
-def test(engine, context, nBatchSize, batch_threshold=8):
-    nProfile = engine.num_optimization_profiles
-    if nProfile == 1:
-        bindingBias = 0
-    else:
-        if nBatchSize > batch_threshold:
-            bindingBias = 0
-            context.set_optimization_profile_async(0, 0)
-            cudart.cudaStreamSynchronize(0)
-        else:
-            bindingBias = int(engine.num_bindings / nProfile)
-            context.set_optimization_profile_async(1, 0)
-            cudart.cudaStreamSynchronize(0)
-
-    context.set_binding_shape(bindingBias, [nBatchSize, 67, 80])
-    context.set_binding_shape(bindingBias + 1, [nBatchSize])
-    context.set_binding_shape(bindingBias + 2, [nBatchSize, 1])
-    context.set_binding_shape(bindingBias + 3, [nBatchSize, 12, 4, 80, 128])
-    context.set_binding_shape(bindingBias + 4, [nBatchSize, 12, 256, 7])
-    context.set_binding_shape(bindingBias + 5, [nBatchSize, 1, 80])
-
-    nInput = np.sum(
-        [engine.binding_is_input(i) for i in range(engine.num_bindings)]
-    )
-    nOutput = engine.num_bindings - nInput
-
-    nInput = nInput // nProfile
-    nOutput = nOutput // nProfile
-
-    chunk_xs = torch.randn(nBatchSize, 67, 80, dtype=torch.float32).numpy()
-    chunk_lens = 67 * torch.ones(nBatchSize, dtype=torch.int32).numpy()
-
-    offset = torch.arange(0, nBatchSize).unsqueeze(1).numpy()
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = 4
-    d_k = 64
-    att_cache = torch.randn(
-        nBatchSize, 12, head, 80, d_k * 2, dtype=torch.float32
-    ).numpy()
-    cnn_cache = torch.randn(nBatchSize, 12, 256, 7, dtype=torch.float32)
-
-    cache_mask = torch.ones(nBatchSize, 1, 80, dtype=torch.float32)
-
-    input_tensors = [
-        chunk_xs,
-        chunk_lens,
-        offset,
-        att_cache,
-        cnn_cache,
-        cache_mask,
-    ]
-    bufferH = []
-    for data in input_tensors:
-        bufferH.append(np.ascontiguousarray(data.reshape(-1)))
-    for i in range(nInput, nInput + nOutput):
-        bufferH.append(
-            np.empty(
-                context.get_binding_shape(bindingBias + i),
-                dtype=trt.nptype(engine.get_binding_dtype(bindingBias + i)),
-            )
-        )
-    bufferD = []
-    for i in range(nInput + nOutput):
-        bufferD.append(cudart.cudaMalloc(bufferH[i].nbytes)[1])
-
-    if nProfile == 1 or nBatchSize > batch_threshold:
-        bufferD = bufferD + [int(0) for _ in range(bindingBias)]
-    else:
-        bufferD = [int(0) for _ in range(bindingBias)] + bufferD
-
-    for i in range(nInput):
-        cudart.cudaMemcpy(
-            bufferD[i],
-            bufferH[i].ctypes.data,
-            bufferH[i].nbytes,
-            cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
-        )
-
-    nWarm, nTest = 5, 10
-    timeit.repeat(
-        lambda: context.execute_v2(bufferD), number=1, repeat=nWarm
-    )  # Dry run
-    latency_list = timeit.repeat(
-        lambda: context.execute_v2(bufferD), number=1, repeat=nTest
-    )
-    print(get_latency_result(latency_list, nBatchSize))
-
-    if nProfile == 1 or nBatchSize > batch_threshold:
-        bufferD = bufferD[:bindingBias]
-    else:
-        bufferD = bufferD[-bindingBias:]
-
-    for b in bufferD:
-        cudart.cudaFree(b)
-
-
-def main():
-    args = get_parser().parse_args()
-
-    logger = trt.Logger(trt.Logger.ERROR)
-    trt.init_libnvinfer_plugins(logger, "")
-    ctypes.cdll.LoadLibrary(args.plugin)
-
-    timeCache = b""
-    if args.useTimeCache and os.path.isfile(args.timeCacheFile):
-        with open(args.timeCacheFile, "rb") as f:
-            timeCache = f.read()
-        if timeCache is None:
-            print("Failed getting serialized timing cache!")
-            exit()
-        print("Succeeded getting serialized timing cache!")
-
-    if os.path.isfile(args.trtFile):
-        print("Engine existed!")
-        with open(args.trtFile, "rb") as f:
-            engineString = f.read()
-    else:
-        builder = trt.Builder(logger)
-        network = builder.create_network(
-            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-        )
-        config = builder.create_builder_config()
-
-        if args.useTimeCache:
-            cache = config.create_timing_cache(timeCache)
-            config.set_timing_cache(cache, False)
-
-        if args.fp16:
-            config.flags = 1 << int(trt.BuilderFlag.FP16)
-        # config.flags = config.flags & ~(1 << int(trt.BuilderFlag.TF32))
-        # config.flags = config.flags | (1 << int(trt.BuilderFlag.DEBUG))
-        config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED
-
-        parser = trt.OnnxParser(network, logger)
-        if not os.path.exists(args.onnxFile):
-            print("Failed finding ONNX file!")
-            exit()
-        print("Succeeded finding ONNX file!")
-        with open(args.onnxFile, "rb") as model:
-            if not parser.parse(model.read()):
-                print("Failed parsing ONNX file!")
-                for error in range(parser.num_errors):
-                    print(parser.get_error(error))
-                exit()
-            print("Succeeded parsing ONNX file!")
-
-        profile0 = builder.create_optimization_profile()
-        profile1 = builder.create_optimization_profile()
-        for key, value in vars(args).items():
-            if isinstance(value, str) and "," in value:
-                shapes = [
-                    tuple(map(int, item.split("x")))
-                    for item in value.split(",")
-                ]
-                assert len(shapes) == 2 * 3
-                profile0.set_shape(key, shapes[0], shapes[1], shapes[2])
-                profile1.set_shape(key, shapes[3], shapes[4], shapes[5])
-
-        config.add_optimization_profile(profile0)
-        config.add_optimization_profile(profile1)
-
-        engineString = builder.build_serialized_network(network, config)
-
-        if engineString is None:
-            print("Failed building engine!")
-            exit()
-
-        if args.useTimeCache and not os.path.isfile(args.timeCacheFile):
-            timeCache = config.get_timing_cache()
-            timeCacheString = timeCache.serialize()
-            with open(args.timeCacheFile, "wb") as f:
-                f.write(timeCacheString)
-                print("Succeeded saving .cache file!")
-
-        print("Succeeded building engine!")
-        with open(args.trtFile, "wb") as f:
-            f.write(engineString)
-
-    if args.test:
-        engine = trt.Runtime(logger).deserialize_cuda_engine(engineString)
-        context = engine.create_execution_context()
-        batch_sizes = [1, 4, 8, 16, 32]
-        for batch in batch_sizes:
-            test(engine, context, batch)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/config_template.pbtxt
deleted file mode 100644
index 7c24fa1e9e7066f386529530e287d29b4737d518..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/config_template.pbtxt
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "decoder"
-backend: "onnxruntime"
-default_model_filename: "decoder.onnx"
-
-max_batch_size: 640
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #output_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
- {
-    name: "hyps_lens_sos"
-    data_type: TYPE_INT32
-    dims: [#beam_size]
-  },
-  {
-    name: "r_hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
-  {
-    name: "ctc_score"
-    data_type: TYPE_#DTYPE
-    dims: [#beam_size]
-  }
-]
-
-output [
-  {
-    name: "best_index"
-    data_type: TYPE_INT64
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/config_template2.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/config_template2.pbtxt
deleted file mode 100644
index 51a41c1c9ac0099edb0c38809c706e9a4054360a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/config_template2.pbtxt
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "decoder"
-backend: "onnxruntime"
-default_model_filename: "decoder.onnx"
-
-max_batch_size: 640
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #output_size] # [-1, feature_size]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "hyps_pad_sos_eos"
-    data_type: TYPE_INT64
-    dims: [#beam_size, -1]
-  },
- {
-    name: "hyps_lens_sos"
-    data_type: TYPE_INT32
-    dims: [#beam_size]
-  },
-  {
-    name: "ctc_score"
-    data_type: TYPE_#DTYPE
-    dims: [#beam_size]
-  }
-]
-
-output [
-   {
-    name: "best_index"
-    data_type: TYPE_INT64
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-dynamic_batching {
-    preferred_batch_size: [ 16, 32 ]
-  }
-
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitkeep b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/config_template.pbtxt
deleted file mode 100644
index c9a1f73d383432acb5c2e392bd77ecf6aae10043..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/config_template.pbtxt
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "encoder"
-backend: "tensorrt"
-default_model_filename: "encoder_fp16.plan"
-
-max_batch_size: 32
-
-sequence_batching{
-    max_sequence_idle_microseconds: 5000000
-    oldest {
-      max_candidate_sequences: 1024
-      preferred_batch_size: [8, 16]
-      max_queue_delay_microseconds: 20000
-    }
-    control_input [
-    ]
-    state [
-    {
-      input_name: "offset"
-      output_name: "r_offset"
-      data_type: TYPE_INT32
-      dims: [ 1 ]
-      initial_state: {
-       data_type: TYPE_INT32
-       dims: [ 1 ]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "att_cache"
-      output_name: "r_att_cache"
-      data_type: TYPE_#DTYPE
-      dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [ #num_layers, #num_head, #cache_size, #att_cache_output_size ]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "cnn_cache"
-      output_name: "r_cnn_cache"
-      data_type: TYPE_#DTYPE
-      dims: [#num_layers, #output_size, #cnn_module_cache]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [#num_layers, #output_size, #cnn_module_cache]
-       zero_data: true
-       name: "initial state"
-      }
-    },
-    {
-      input_name: "cache_mask"
-      output_name: "r_cache_mask"
-      data_type: TYPE_#DTYPE
-      dims: [1, #cache_size]
-      initial_state: {
-       data_type: TYPE_#DTYPE
-       dims: [1, #cache_size]
-       zero_data: true
-       name: "initial state"
-      }
-    }
-  ]
-}
-input [
-  {
-    name: "chunk_xs"
-    data_type: TYPE_#DTYPE
-    dims: [#decoding_window, #num_mel_bins]
-  },
-  {
-    name: "chunk_lens"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    reshape: { shape: [] }
-  }
-]
-output [
-  {
-    name: "log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "log_probs_idx"
-    data_type: TYPE_INT32
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "chunk_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #encoder_output_size]
-  },
-  {
-    name: "chunk_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [] }
-  }
-]
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/feature_extractor/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/feature_extractor/1/model.py
deleted file mode 100644
index ce1f340f8e450fc5a668bb5d03fe565b2e0fdb6c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/feature_extractor/1/model.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import triton_python_backend_utils as pb_utils
-from torch.utils.dlpack import from_dlpack
-import torch
-import kaldifeat
-import _kaldifeat
-from typing import List
-import json
-import numpy as np
-
-class Fbank(torch.nn.Module):
-    def __init__(self, opts):
-        super(Fbank, self).__init__()
-        self.fbank = kaldifeat.Fbank(opts)
-
-    def forward(self, waves: List[torch.Tensor]):
-        return self.fbank(waves)
-
-class Feat(object):
-    def __init__(self, seqid, offset_ms, sample_rate,
-                 first_chunk_sz, frame_stride, device='cpu'):
-        self.seqid = seqid
-        self.sample_rate = sample_rate
-        self.wav = torch.tensor([], device=device)
-        self.offset = int(offset_ms / 1000 * sample_rate)
-        self.frames = None
-        self.frame_stride = int(frame_stride)
-        self.first_chunk_sz = first_chunk_sz
-        self.device = device
-
-    def add_wavs(self, wav: torch.tensor):
-        if len(self.wav) == 0 and len(wav) < self.first_chunk_sz:
-            raise Exception("Invalid first chunk size", len(wav))
-        wav = wav.to(self.device)
-        self.wav = torch.cat([self.wav, wav], axis=0)
-
-    def get_seg_wav(self):
-        seg = self.wav[:]
-        self.wav = self.wav[-self.offset:]
-        return seg
-
-    def add_frames(self, frames: torch.tensor):
-        """
-        frames: seq_len x feat_sz
-        """
-        if self.frames is None:
-            self.frames = frames
-        else:
-            self.frames = torch.cat([self.frames, frames], axis=0)
-
-    def get_frames(self, num_frames: int):
-        seg = self.frames[0: num_frames]
-        self.frames = self.frames[self.frame_stride:]
-        return seg
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        self.model_config = model_config = json.loads(args['model_config'])
-        self.max_batch_size = max(model_config["max_batch_size"], 1)
-
-        if "GPU" in model_config["instance_group"][0]["kind"]:
-            self.device = "cuda"
-        else:
-            self.device = "cpu"
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "speech")
-        # Convert Triton types to numpy types
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-
-        if self.output0_dtype == np.float32:
-            self.dtype = torch.float32
-        else:
-            self.dtype = torch.float16
-
-        self.feature_size = output0_config['dims'][-1]
-        self.decoding_window = output0_config['dims'][-2]
-        # Get OUTPUT1 configuration
-        output1_config = pb_utils.get_output_config_by_name(
-            model_config, "speech_lengths")
-        # Convert Triton types to numpy types
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config['data_type'])
-
-        feat_opt = self.parse_model_params(model_config["parameters"])
-
-        opts = kaldifeat.FbankOptions()
-        opts.frame_opts.dither = 0
-        opts.mel_opts.num_bins = self.feature_size
-        frame_length_ms = feat_opt["frame_length_ms"]
-        frame_shift_ms = feat_opt["frame_shift_ms"]
-        opts.frame_opts.frame_length_ms = frame_length_ms
-        opts.frame_opts.frame_shift_ms = frame_shift_ms
-        opts.frame_opts.samp_freq = feat_opt["sample_rate"]
-        opts.device = torch.device(self.device)
-        self.opts = opts
-        self.feature_extractor = Fbank(self.opts)
-        self.seq_feat = {}
-        chunk_size_s = feat_opt["chunk_size_s"]
-        sample_rate = feat_opt["sample_rate"]
-        self.chunk_size = int(chunk_size_s * sample_rate)
-        self.frame_stride = (chunk_size_s * 1000) // frame_shift_ms
-
-        first_chunk_size = int(self.chunk_size)
-        cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
-        while cur_frames < self.decoding_window:
-            first_chunk_size += frame_shift_ms * sample_rate // 1000
-            cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts)
-        #  self.pad_silence = first_chunk_size - self.chunk_size
-        self.first_chunk_size = first_chunk_size
-        self.offset_ms = self.get_offset(frame_length_ms, frame_shift_ms)
-        self.sample_rate = sample_rate
-        self.min_seg = frame_length_ms * sample_rate // 1000
-        print("MIN SEG IS", self.min_seg)
-
-    def get_offset(self, frame_length_ms, frame_shift_ms):
-        offset_ms = 0
-        while offset_ms + frame_shift_ms < frame_length_ms:
-            offset_ms += frame_shift_ms
-        return offset_ms
-
-    def parse_model_params(self, model_params):
-        model_p = {
-            "frame_length_ms": 25,
-            "frame_shift_ms": 10,
-            "sample_rate": 16000,
-            "chunk_size_s": 0.64}
-        # get parameter configurations
-        for li in model_params.items():
-            key, value = li
-            true_value = value["string_value"]
-            if key not in model_p:
-                continue
-            key_type = type(model_p[key])
-            if key_type == type(None):
-                model_p[key] = true_value
-            else:
-                model_p[key] = key_type(true_value)
-        return model_p
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model.
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-        total_waves = []
-        responses = []
-        batch_seqid = []
-        end_seqid = {}
-        for request in requests:
-            input0 = pb_utils.get_input_tensor_by_name(request, "wav")
-            #  wavs = input0.as_numpy()[0]
-            wavs = from_dlpack(input0.to_dlpack())[0]
-
-            input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
-            #  wav_lens = input1.as_numpy()[0][0]
-            wav_lens = from_dlpack(input1.to_dlpack())[0]
-
-            in_start = pb_utils.get_input_tensor_by_name(request, "START")
-            start = in_start.as_numpy()[0][0]
-            in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
-            ready = in_ready.as_numpy()[0][0]
-            in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
-            corrid = in_corrid.as_numpy()[0][0]
-            in_end = pb_utils.get_input_tensor_by_name(request, "END")
-            end = in_end.as_numpy()[0][0]
-
-            if start:
-                self.seq_feat[corrid] = Feat(corrid, self.offset_ms,
-                                             self.sample_rate,
-                                             self.first_chunk_size,
-                                             self.frame_stride,
-                                             self.device)
-            if ready:
-                self.seq_feat[corrid].add_wavs(wavs[0:wav_lens])
-
-            batch_seqid.append(corrid)
-            if end:
-                end_seqid[corrid] = 1
-
-            # if not start
-            # check chunk ms size
-
-            wav = self.seq_feat[corrid].get_seg_wav() * 32768
-            if len(wav) < self.min_seg:
-                temp = torch.zeros(self.min_seg, dtype=torch.float32,
-                                   device=self.device)
-                temp[0:len(wav)] = wav[:]
-                wav = temp
-            total_waves.append(wav)
-
-        features = self.feature_extractor(total_waves)
-
-        batch_size = len(batch_seqid)
-        batch_speech = torch.zeros((batch_size, self.decoding_window,
-                                    self.feature_size), dtype=self.dtype)
-        batch_speech_lens = torch.zeros((batch_size, 1), dtype=torch.int32)
-        i = 0
-        for corrid, frames in zip(batch_seqid, features):
-            self.seq_feat[corrid].add_frames(frames)
-            r_frames = self.seq_feat[corrid].get_frames(self.decoding_window)
-            speech = batch_speech[i: i + 1]
-            speech_lengths = batch_speech_lens[i: i + 1]
-            i += 1
-            speech_lengths[0] = r_frames.size(0)
-            speech[0][0:r_frames.size(0)] = r_frames.to(speech.device)
-            # out_tensor0 = pb_utils.Tensor.from_dlpack("speech", to_dlpack(speech))
-            # out_tensor1 = pb_utils.Tensor.from_dlpack("speech_lengths",
-            #                                            to_dlpack(speech_lengths))
-            out_tensor0 = pb_utils.Tensor("speech", speech.numpy())
-            out_tensor1 = pb_utils.Tensor("speech_lengths", speech_lengths.numpy())
-            output_tensors = [out_tensor0, out_tensor1]
-            response = pb_utils.InferenceResponse(output_tensors=output_tensors)
-            responses.append(response)
-            if corrid in end_seqid:
-                del self.seq_feat[corrid]
-        return responses
-
-    def finalize(self):
-        print("Remove feature extractor!")
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/feature_extractor/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/feature_extractor/config_template.pbtxt
deleted file mode 100644
index 5e4fc2ee21f28db51fce89612d3db17ff54c684a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/feature_extractor/config_template.pbtxt
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "feature_extractor"
-backend: "python"
-max_batch_size: 512
-
-parameters [
-  {
-    key: "frame_length_ms",
-    value: { string_value: "#frame_length" }
-  },
-  {
-    key: "frame_shift_ms"
-    value: { string_value: "#frame_shift" }
-  },
-  {
-    key: "sample_rate"
-    value: { string_value: "#sample_rate" }
-  },
-  {
-    key: "chunk_size_s",
-    value: { string_value: "#chunk_size_in_seconds" }
-  }
-]
-sequence_batching{
-    max_sequence_idle_microseconds: 5000000
-    oldest {
-      max_candidate_sequences: 512
-      preferred_batch_size: [ 2,3,4,5,6,7,8,12,14,16]
-      max_queue_delay_microseconds: 500
-    }
-    control_input [
-        {
-            name: "START",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_START
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "READY"
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_READY
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "CORRID",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_CORRID
-                    data_type: TYPE_UINT64
-                }
-            ]
-        },
-        {
-            name: "END",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_END
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        }
-    ]
-}
-input [
-  {
-    name: "wav"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "wav_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-output [
-  {
-    name: "speech"
-    data_type: TYPE_#DTYPE # FP32
-    dims: [#decoding_window, #num_mel_bins]
-  },
-  {
-    name: "speech_lengths"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-]
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitkeep b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/config_template.pbtxt
deleted file mode 100644
index 1905a59e028a0db81f871385a5ae3c21d8ae83d6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/config_template.pbtxt
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "streaming_wenet"
-platform: "ensemble"
-max_batch_size: 32 #MAX_BATCH
-
-input [
-  {
-    name: "WAV"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "WAV_LENS"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-output [
-  {
-    name: "TRANSCRIPTS"
-    data_type: TYPE_STRING
-    dims: [1]
-  }
-]
-
-ensemble_scheduling {
- step [
-   {
-        model_name: "feature_extractor"
-        model_version: -1
-        input_map {
-        key: "wav"
-        value: "WAV"
-        }
-        input_map {
-        key: "wav_lens"
-        value: "WAV_LENS"
-        }
-        output_map {
-        key: "speech"
-        value: "SPEECH"
-        }
-        output_map {
-        key: "speech_lengths"
-        value: "SPEECH_LENGTHS"
-        }
-   },
-   {
-        model_name: "encoder"
-        model_version: -1
-        input_map {
-        key: "chunk_xs"
-        value: "SPEECH"
-        }
-        input_map {
-        key: "chunk_lens"
-        value: "SPEECH_LENGTHS"
-        }
-        output_map {
-            key: "log_probs"
-            value: "LOG_PROBS"
-        }
-        output_map {
-            key: "log_probs_idx"
-            value: "LOG_PROBS_IDX"
-        }
-        output_map {
-            key: "chunk_out"
-             value: "CHUNK_OUT"
-        }
-        output_map {
-            key: "chunk_out_lens"
-            value: "CHUNK_OUT_LENS"
-        }
-    },
-    {
-        model_name: "wenet"
-        model_version: -1
-        input_map {
-        key: "log_probs"
-        value: "LOG_PROBS"
-        }
-        input_map {
-        key: "log_probs_idx"
-        value: "LOG_PROBS_IDX"
-        }
-        input_map {
-        key: "chunk_out"
-        value: "CHUNK_OUT"
-        }
-        input_map {
-        key: "chunk_out_lens"
-        value: "CHUNK_OUT_LENS"
-        }
-        output_map {
-        key: "OUTPUT0"
-        value: "TRANSCRIPTS"
-        }
-    }
- ]
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/1/model.py
deleted file mode 100644
index 91e008dc923c36d03d96aec456edc3200317745e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/1/model.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import json
-import torch
-from swig_decoders import PathTrie, TrieVector
-
-# triton_python_backend_utils is available in every Triton Python model. You
-# need to use this module to create inference requests and responses. It also
-# contains some utility functions for extracting information from model_config
-# and converting Triton input/output types to numpy types.
-import triton_python_backend_utils as pb_utils
-from wenet_onnx_model import WenetModel
-
-from torch.utils.dlpack import from_dlpack
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to intialize any state associated with this model.
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-
-        # You must parse model_config. JSON string is not parsed here
-        self.model_config = model_config = json.loads(args['model_config'])
-
-        # get device
-        if args["model_instance_kind"] == "GPU":
-            self.device = 'cuda'
-        else:
-            self.device = 'cpu'
-
-        # get parameter configurations
-        self.model = WenetModel(self.model_config, self.device)
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "OUTPUT0")
-
-        # Convert Triton types to numpy types
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-
-        # use to record every sequence state
-        self.seq_states = {}
-        print("Finish Init")
-
-    def execute(self, requests):
-        """
-        requests : list
-          A list of pb_utils.InferenceRequest
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-        responses = []
-        batch_log_probs, batch_log_probs_idx, batch_len, batch_states = [], [], [], []
-        cur_encoder_out = []
-
-        batch_encoder_hist = []
-        batch_start = []
-
-        trieVector = TrieVector()
-
-        rescore_index = {}
-        batch_idx2_corrid = {}
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        batch_idx = 0
-        for request in requests:
-            # Get INPUT0
-            in_0 = pb_utils.get_input_tensor_by_name(request, "log_probs")
-            batch_log_probs.append(in_0.as_numpy()[0])
-            in_1 = pb_utils.get_input_tensor_by_name(request, "log_probs_idx")
-            batch_log_probs_idx.append(in_1.as_numpy()[0])
-            if self.model.rescoring:
-                in_2 = pb_utils.get_input_tensor_by_name(request, "chunk_out")
-                # important to use clone or this tensor
-                # the tensor will be released after one inference
-                in_2 = from_dlpack(in_2.to_dlpack()).clone()
-                cur_encoder_out.append(in_2[0])
-            in_3 = pb_utils.get_input_tensor_by_name(request, "chunk_out_lens")
-            batch_len.append(in_3.as_numpy())
-
-            in_start = pb_utils.get_input_tensor_by_name(request, "START")
-            start = in_start.as_numpy()[0][0]
-
-            if start:
-                batch_start.append(True)
-            else:
-                batch_start.append(False)
-
-            in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
-            ready = in_ready.as_numpy()[0][0]
-
-            in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
-            corrid = in_corrid.as_numpy()[0][0]
-
-            in_end = pb_utils.get_input_tensor_by_name(request, "END")
-            end = in_end.as_numpy()[0][0]
-
-            if start and ready:
-                # intialize states
-                encoder_out = self.model.generate_init_cache()
-                root = PathTrie()
-                # register this sequence
-                self.seq_states[corrid] = [root, encoder_out]
-
-            if end and ready:
-                rescore_index[batch_idx] = 1
-
-            if ready:
-                root, encoder_out = self.seq_states[corrid]
-                trieVector.append(root)
-                batch_idx2_corrid[batch_idx] = corrid
-                batch_encoder_hist.append(encoder_out)
-
-            batch_idx += 1
-
-        batch_states = [trieVector, batch_start, batch_encoder_hist, cur_encoder_out]
-        res_sents, new_states = self.model.infer(batch_log_probs, batch_log_probs_idx,
-                                                 batch_len, rescore_index, batch_states)
-        cur_encoder_out = new_states
-        for i in range(len(res_sents)):
-            sent = np.array(res_sents[i])
-            out_tensor_0 = pb_utils.Tensor("OUTPUT0", sent.astype(self.output0_dtype))
-            response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0])
-            responses.append(response)
-            corr = batch_idx2_corrid[i]
-            if i in rescore_index:
-                # this response ends, remove it
-                del self.seq_states[corr]
-            else:
-                if self.model.rescoring:
-                    if self.seq_states[corr][1] is None:
-                        self.seq_states[corr][1] = cur_encoder_out[i]
-                    else:
-                        new_hist = torch.cat([self.seq_states[corr][1],
-                                              cur_encoder_out[i]], axis=0)
-                        self.seq_states[corr][1] = new_hist
-
-        assert len(requests) == len(responses)
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is OPTIONAL. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print('Cleaning up...')
-        del self.model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/1/wenet_onnx_model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/1/wenet_onnx_model.py
deleted file mode 100644
index d9db48839a554e8c5598a2962b822d3e5db1c5f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/1/wenet_onnx_model.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import multiprocessing
-import numpy as np
-import os
-import torch
-import triton_python_backend_utils as pb_utils
-from torch.utils.dlpack import to_dlpack, from_dlpack
-from swig_decoders import ctc_beam_search_decoder_batch, Scorer, map_batch
-
-class WenetModel(object):
-    def __init__(self, model_config, device):
-        params = self.parse_model_parameters(model_config['parameters'])
-
-        self.device = device
-        print("Using device", device)
-        print("Successfully load model !")
-
-        # load vocabulary
-        ret = self.load_vocab(params["vocab_path"])
-        self.id2vocab, self.vocab, space_id, blank_id, sos_eos = ret
-        self.space_id = space_id if space_id else -1
-        self.blank_id = blank_id if blank_id else 0
-        self.eos = self.sos = sos_eos if sos_eos else len(self.vocab) - 1
-        print("Successfully load vocabulary !")
-        self.params = params
-
-        # beam search setting
-        self.beam_size = params.get("beam_size")
-        self.cutoff_prob = params.get("cutoff_prob")
-
-        # language model
-        lm_path = params.get("lm_path", None)
-        alpha, beta = params.get('alpha'), params.get('beta')
-        self.scorer = None
-        if os.path.exists(lm_path):
-            self.scorer = Scorer(alpha, beta, lm_path, self.vocab)
-
-        self.bidecoder = params.get('bidecoder')
-        # rescore setting
-        self.rescoring = params.get("rescoring", 0)
-        print("Using rescoring:", bool(self.rescoring))
-        print("Successfully load all parameters!")
-
-        log_probs_config = pb_utils.get_input_config_by_name(
-            model_config, "log_probs")
-        # Convert Triton types to numpy types
-        log_probs_dtype = pb_utils.triton_string_to_numpy(
-            log_probs_config['data_type'])
-
-        if log_probs_dtype == np.float32:
-            self.dtype = torch.float32
-        else:
-            self.dtype = torch.float16
-
-    def generate_init_cache(self):
-        encoder_out = None
-        return encoder_out
-
-    def load_vocab(self, vocab_file):
-        """
-        load lang_char.txt
-        """
-        id2vocab = {}
-        space_id, blank_id, sos_eos = None, None, None
-        with open(vocab_file, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                char, id = line.split()
-                id2vocab[int(id)] = char
-                if char == " ":
-                    space_id = int(id)
-                elif char == "<blank>":
-                    blank_id = int(id)
-                elif char == "<sos/eos>":
-                    sos_eos = int(id)
-        vocab = [0] * len(id2vocab)
-        for id, char in id2vocab.items():
-            vocab[id] = char
-        return (id2vocab, vocab, space_id, blank_id, sos_eos)
-
-    def parse_model_parameters(self, model_parameters):
-        model_p = {"beam_size": 10,
-                   "cutoff_prob": 0.999,
-                   "vocab_path": None,
-                   "lm_path": None,
-                   "alpha": 2.0,
-                   "beta": 1.0,
-                   "rescoring": 0,
-                   "bidecoder": 1}
-        # get parameter configurations
-        for li in model_parameters.items():
-            key, value = li
-            true_value = value["string_value"]
-            if key not in model_p:
-                continue
-            key_type = type(model_p[key])
-            if key_type == type(None):
-                model_p[key] = true_value
-            else:
-                model_p[key] = key_type(true_value)
-        assert model_p["vocab_path"] is not None
-        return model_p
-
-    def infer(self, batch_log_probs, batch_log_probs_idx,
-              seq_lens, rescore_index, batch_states):
-        """
-        batch_states = [trieVector, batch_start,
-                       batch_encoder_hist, cur_encoder_out]
-        """
-        trie_vector, batch_start, batch_encoder_hist, cur_encoder_out = batch_states
-        num_processes = min(multiprocessing.cpu_count(), len(batch_log_probs))
-
-        score_hyps = self.batch_ctc_prefix_beam_search_cpu(batch_log_probs,
-                                                           batch_log_probs_idx,
-                                                           seq_lens,
-                                                           trie_vector,
-                                                           batch_start,
-                                                           self.beam_size,
-                                                           self.blank_id,
-                                                           self.space_id,
-                                                           self.cutoff_prob,
-                                                           num_processes,
-                                                           self.scorer)
-
-        if self.rescoring and len(rescore_index) != 0:
-            # find the end of sequence
-            rescore_encoder_hist = []
-            rescore_encoder_lens = []
-            rescore_hyps = []
-            res_idx = list(rescore_index.keys())
-            max_length = -1
-            for idx in res_idx:
-                hist_enc = batch_encoder_hist[idx]
-                if hist_enc is None:
-                    cur_enc = cur_encoder_out[idx]
-                else:
-                    cur_enc = torch.cat([hist_enc, cur_encoder_out[idx]], axis=0)
-                rescore_encoder_hist.append(cur_enc)
-                cur_mask_len = int(len(hist_enc) + seq_lens[idx])
-                rescore_encoder_lens.append(cur_mask_len)
-                rescore_hyps.append(score_hyps[idx])
-                if cur_enc.shape[0] > max_length:
-                    max_length = cur_enc.shape[0]
-            best_index = self.batch_rescoring(rescore_hyps, rescore_encoder_hist,
-                                              rescore_encoder_lens, max_length)
-
-        best_sent = []
-        j = 0
-        for idx, li in enumerate(score_hyps):
-            if idx in rescore_index and self.rescoring:
-                best_sent.append(li[best_index[j]][1])
-                j += 1
-            else:
-                best_sent.append(li[0][1])
-
-        final_result = map_batch(best_sent, self.vocab, num_processes)
-
-        return final_result, cur_encoder_out
-
-    def batch_ctc_prefix_beam_search_cpu(self, batch_log_probs_seq,
-                                         batch_log_probs_idx,
-                                         batch_len, batch_root,
-                                         batch_start, beam_size,
-                                         blank_id, space_id,
-                                         cutoff_prob, num_processes,
-                                         scorer):
-        """
-        Return: Batch x Beam_size elements, each element is a tuple
-                (score, list of ids),
-        """
-
-        batch_len_list = batch_len
-        batch_log_probs_seq_list = []
-        batch_log_probs_idx_list = []
-        for i in range(len(batch_len_list)):
-            cur_len = int(batch_len_list[i])
-            batch_log_probs_seq_list.append(batch_log_probs_seq[i][0:cur_len].tolist())
-            batch_log_probs_idx_list.append(batch_log_probs_idx[i][0:cur_len].tolist())
-        score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq_list,
-                                                   batch_log_probs_idx_list,
-                                                   batch_root,
-                                                   batch_start,
-                                                   beam_size,
-                                                   num_processes,
-                                                   blank_id,
-                                                   space_id,
-                                                   cutoff_prob,
-                                                   scorer)
-        return score_hyps
-
-    def batch_rescoring(self, score_hyps, hist_enc, hist_mask_len, max_len):
-        """
-        score_hyps: [((ctc_score, (id1, id2, id3, ....)), (), ...), ....]
-        hist_enc: [len1xF, len2xF, .....]
-        hist_mask: [1x1xlen1, 1x1xlen2]
-        return bzx1  best_index
-        """
-        bz = len(hist_enc)
-        f = hist_enc[0].shape[-1]
-        beam_size = self.beam_size
-        encoder_lens = np.zeros((bz, 1), dtype=np.int32)
-        encoder_out = torch.zeros((bz, max_len, f), dtype=self.dtype)
-        hyps = []
-        ctc_score = torch.zeros((bz, beam_size), dtype=self.dtype)
-        max_seq_len = 0
-        for i in range(bz):
-            cur_len = hist_enc[i].shape[0]
-            encoder_out[i, 0:cur_len] = hist_enc[i]
-            encoder_lens[i, 0] = hist_mask_len[i]
-
-            # process candidate
-            if len(score_hyps[i]) < beam_size:
-                to_append = (beam_size - len(score_hyps[i])) * [(-10000, ())]
-                score_hyps[i] = list(score_hyps[i]) + to_append
-            for idx, c in enumerate(score_hyps[i]):
-                score, idlist = c
-                if score < -10000:
-                    score = -10000
-                ctc_score[i][idx] = score
-                hyps.append(list(idlist))
-                if len(hyps[-1]) > max_seq_len:
-                    max_seq_len = len(hyps[-1])
-
-        max_seq_len += 2
-        hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
-        hyps_pad_sos_eos = hyps_pad_sos_eos * self.eos  # fill eos
-        if self.bidecoder:
-            r_hyps_pad_sos_eos = np.ones((bz, beam_size, max_seq_len), dtype=np.int64)
-            r_hyps_pad_sos_eos = r_hyps_pad_sos_eos * self.eos
-
-        hyps_lens_sos = np.ones((bz, beam_size), dtype=np.int32)
-        bz_id = 0
-        for idx, cand in enumerate(hyps):
-            bz_id = idx // beam_size
-            length = len(cand) + 2
-            bz_offset = idx % beam_size
-            pad_cand = [self.sos] + cand + [self.eos]
-            hyps_pad_sos_eos[bz_id][bz_offset][0 : length] = pad_cand
-            if self.bidecoder:
-                r_pad_cand = [self.sos] + cand[::-1] + [self.eos]
-                r_hyps_pad_sos_eos[bz_id][bz_offset][0:length] = r_pad_cand
-            hyps_lens_sos[bz_id][idx % beam_size] = len(cand) + 1
-        in0 = pb_utils.Tensor.from_dlpack("encoder_out", to_dlpack(encoder_out))
-        in1 = pb_utils.Tensor("encoder_out_lens", encoder_lens)
-        in2 = pb_utils.Tensor("hyps_pad_sos_eos", hyps_pad_sos_eos)
-        in3 = pb_utils.Tensor("hyps_lens_sos", hyps_lens_sos)
-        input_tensors = [in0, in1, in2, in3]
-        if self.bidecoder:
-            in4 = pb_utils.Tensor("r_hyps_pad_sos_eos", r_hyps_pad_sos_eos)
-            input_tensors.append(in4)
-        in5 = pb_utils.Tensor.from_dlpack("ctc_score", to_dlpack(ctc_score))
-        input_tensors.append(in5)
-        request = pb_utils.InferenceRequest(model_name='decoder',
-                                            requested_output_names=['best_index'],
-                                            inputs=input_tensors)
-        response = request.exec()
-        best_index = pb_utils.get_output_tensor_by_name(response, 'best_index')
-        best_index = from_dlpack(best_index.to_dlpack()).clone()
-        best_index = best_index.cpu().numpy()[:, 0]
-        return best_index
-
-    def __del__(self):
-        print("remove wenet model")
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/config_template.pbtxt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/config_template.pbtxt
deleted file mode 100644
index 4864188ecaac7b8dd4a4f6dc68d1e236a4887961..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/model_repo_stateful_trt/wenet/config_template.pbtxt
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "wenet"
-backend: "python"
-max_batch_size: 512
-
-sequence_batching{
-    max_sequence_idle_microseconds: 5000000
-    oldest {
-      max_candidate_sequences: 1024
-      preferred_batch_size: [2,3,4,5,6,7,8,9,10,12,14,16,24]
-      max_queue_delay_microseconds: 500
-    }
-    control_input [
-        {
-            name: "START",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_START
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "READY"
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_READY
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        },
-        {
-            name: "CORRID",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_CORRID
-                    data_type: TYPE_UINT64
-                }
-            ]
-        },
-        {
-            name: "END",
-            control [
-                {
-                    kind: CONTROL_SEQUENCE_END
-                    fp32_false_true: [0, 1]
-                }
-            ]
-        }
-    ]
-}
-
-parameters [
-  {
-    key: "beam_size",
-    value: { string_value: "#beam_size" }
-  },
-  {
-    key: "cutoff_prob",
-    value: { string_value: "0.9999" }
-  },
-  {
-    key: "alpha",
-    value: { string_value: "2" }
-  },
-  {
-    key: "beta",
-    value: { string_value: "1" }
-  },
-  {
-    key: "vocab_path",
-    value: { string_value: "/ws/onnx_model/units.txt"}
-  },
-  {
-    key: "lm_path",
-    value: { string_value: "/ws/onnx_model/lm.bin"}
-  },
-  {
-    key: "bidecoder",
-    value: { string_value: "#bidecoder"}
-  },
-  {
-    key: "rescoring",
-    value: { string_value: "1" }
-  },
-  {
-   key: "FORCE_CPU_ONLY_INPUT_TENSORS",
-   value: {string_value:"yes"}
-  }
-]
-
-input [
-   {
-    name: "log_probs"
-    data_type: TYPE_#DTYPE
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "log_probs_idx"
-    data_type: TYPE_INT32
-    dims: [-1, #beam_size] # [-1, beam_size]
-  },
-  {
-    name: "chunk_out"
-    data_type: TYPE_#DTYPE
-    dims: [-1, -1]
-  },
-  {
-    name: "chunk_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_STRING
-    dims: [1]
-    reshape { shape: [] }
-  }
-]
-instance_group [
-    {
-      count: 4
-      kind: KIND_CPU
-    }
-]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/replace_layernorm.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/replace_layernorm.py
deleted file mode 100644
index c6c126e82a68a3a934bf331ee217fdbe299730fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/replace_layernorm.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from copy import deepcopy
-import numpy as np
-import onnx
-import onnx_graphsurgeon as gs
-import argparse
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="process onnx file for trt engine generation"
-    )
-    parser.add_argument(
-        "--input_onnx", type=str, required=True, help="input onnx model path"
-    )
-    parser.add_argument(
-        "--output_onnx", type=str, required=True, help="output .npy file path"
-    )
-    args = parser.parse_args()
-
-    sourceOnnx = args.input_onnx
-    destinationOnnx = args.output_onnx
-
-    graph = gs.import_onnx(
-        onnx.shape_inference.infer_shapes(onnx.load(sourceOnnx))
-    )
-
-    nLayerNormPlugin = 0
-    for node in graph.nodes:
-        if (
-            node.op == "ReduceMean"
-            and node.o().op == "Sub"
-            and node.o().inputs[0] == node.inputs[0]
-            and node.o().o(0).op == "Pow"
-            and node.o().o(1).op == "Div"
-            and node.o().o(0).o().op == "ReduceMean"
-            and node.o().o(0).o().o().op == "Add"
-            and node.o().o(0).o().o().o().op == "Sqrt"
-            and node.o().o(0).o().o().o().o().op == "Div"
-            and node.o().o(0).o().o().o().o() == node.o().o(1)
-            and node.o().o(0).o().o().o().o().o().op == "Mul"
-            and node.o().o(0).o().o().o().o().o().o().op == "Add"
-        ):
-            inputTensor = node.inputs[0]
-
-            lastMultipyNode = node.o().o(0).o().o().o().o().o()
-            index = ["weight" in i.name for i in lastMultipyNode.inputs].index(
-                True
-            )
-            b = np.array(
-                deepcopy(lastMultipyNode.inputs[index].values.tolist()),
-                dtype=np.float32,
-            )
-            # MUST use np.ascontiguousarray,
-            # or TRT will regard the shape of this Constant as (0) !!!
-            constantB = gs.Constant(
-                "LayerNormB-" + str(nLayerNormPlugin),
-                np.ascontiguousarray(b.reshape(-1)),
-            )
-
-            lastAddNode = node.o().o(0).o().o().o().o().o().o()
-            index = ["bias" in i.name for i in lastAddNode.inputs].index(True)
-            a = np.array(
-                deepcopy(lastAddNode.inputs[index].values.tolist()),
-                dtype=np.float32,
-            )
-            constantA = gs.Constant(
-                "LayerNormA-" + str(nLayerNormPlugin),
-                np.ascontiguousarray(a.reshape(-1)),
-            )
-
-            inputList = [inputTensor, constantB, constantA]
-            layerNormV = gs.Variable(
-                "LayerNormV-" + str(nLayerNormPlugin),
-                np.dtype(np.float32),
-                None,
-            )
-            layerNormN = gs.Node(
-                "LayerNorm",
-                "LayerNormN-" + str(nLayerNormPlugin),
-                inputs=inputList,
-                outputs=[layerNormV],
-            )
-            graph.nodes.append(layerNormN)
-
-            # the last LayerNorm provide one of the graph's output,
-            # and do not unsqueeze to 4 dimension
-            if lastAddNode.outputs[0] in graph.outputs:
-                # oldLastAdd -> graph.outputs[0] ===>
-                # LayerNorm -> Squeeze -> graph.outputs[0]
-                layerNormN.outputs[0].name = "chunk_out"
-                index = graph.outputs.index(lastAddNode.outputs[0])
-                # TODO: FIX ME YUEKAI, for offline asr encoder_out dtype
-                graph.outputs[index] = layerNormN.outputs[0].to_variable(
-                    np.float16
-                )
-                # graph.outputs[index] = layerNormN.outputs[0]
-            else:  # other LayerNorm contain the subsequent Squeeze operation
-                for n in graph.nodes:
-                    if lastAddNode.outputs[0] in n.inputs:
-                        index = n.inputs.index(lastAddNode.outputs[0])
-                        n.inputs[index] = layerNormN.outputs[0]
-
-                lastAddNode.outputs = []
-            nLayerNormPlugin += 1
-            continue
-
-    graph.cleanup()
-    onnx.save(gs.export_onnx(graph), destinationOnnx)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/requirements.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/requirements.txt
deleted file mode 100644
index 947e2b12f1fe00996ff3551a514cc6b22082fcb8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-nvidia-pyindex
-tensorrt==8.5.1.7
-onnx
-onnxruntime-gpu
-onnx_graphsurgeon>=0.3.21 --index-url https://pypi.ngc.nvidia.com
-polygraphy
-cuda-python
-typeguard
-onnxmltools
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/run_streaming_small_model.sh b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/run_streaming_small_model.sh
deleted file mode 100644
index df73a03a37da28c7ba967922f7d0061c6f5e3b35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt/run_streaming_small_model.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-trtexec=/usr/src/tensorrt/bin/trtexec
-export CUDA_VISIBLE_DEVICES="0"
-stage=-1
-stop_stage=4
-
-#<wenet_onnx_gpu_models>
-onnx_model_dir=$(pwd)/u2pp_aishell2_onnx
-#<your_output_dir>
-outputs_dir=$(pwd)/exp_streaming_trt
-model_repo_path=./model_repo_stateful_trt
-mkdir -p $outputs_dir
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-   echo "export to onnx files"
-   wget http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell2/20210618_u2pp_conformer_exp.tar.gz --no-check-certificate
-   tar zxvf 20210618_u2pp_conformer_exp.tar.gz
-   model_dir=$(pwd)/20210618_u2pp_conformer_exp
-   mkdir -p $onnx_model_dir
-   cd ../../../
-   export PYTHONPATH=$PYTHONPATH:$(pwd)
-   python3 wenet/bin/export_onnx_gpu.py \
-           --config=$model_dir/train.yaml \
-           --checkpoint=$model_dir/final.pt \
-           --cmvn_file=$model_dir/global_cmvn \
-           --ctc_weight=0.5 \
-           --output_onnx_dir=$onnx_model_dir \
-           --fp16 \
-           --streaming || exit 1
-   cp $model_dir/words.txt $model_dir/train.yaml $onnx_model_dir
-   cd -
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    cd ./LayerNormPlugin
-    make clean
-    make
-    cp LayerNorm.so $outputs_dir
-    cd ..
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-     echo "repalce onnx ops with layernorm plugin "
-     polygraphy surgeon sanitize $onnx_model_dir/encoder_fp16.onnx --fold-constant -o $outputs_dir/encoderV2.onnx
-     python3 replace_layernorm.py --input_onnx $outputs_dir/encoderV2.onnx \
-                               --output_onnx $outputs_dir/encoderV3.onnx \
-                               || exit 1
-     polygraphy surgeon sanitize $outputs_dir/encoderV3.onnx --fold-constant -o $outputs_dir/encoderV4.onnx
-
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-     echo "convert conformer encoder with layernorm plugin"
-     MIN_BATCH=1
-     OPT_BATCH=16
-     MAX_BATCH=32
-     $trtexec \
-          --fp16 \
-          --onnx=$outputs_dir/encoderV4.onnx \
-          --minShapes=chunk_xs:${MIN_BATCH}x67x80,chunk_lens:${MIN_BATCH},offset:${MIN_BATCH}x1,att_cache:${MIN_BATCH}x12x4x80x128,cnn_cache:${MIN_BATCH}x12x256x7,cache_mask:${MIN_BATCH}x1x80 \
-          --optShapes=chunk_xs:${OPT_BATCH}x67x80,chunk_lens:${OPT_BATCH},offset:${OPT_BATCH}x1,att_cache:${OPT_BATCH}x12x4x80x128,cnn_cache:${OPT_BATCH}x12x256x7,cache_mask:${OPT_BATCH}x1x80 \
-          --maxShapes=chunk_xs:${MAX_BATCH}x67x80,chunk_lens:${MAX_BATCH},offset:${MAX_BATCH}x1,att_cache:${MAX_BATCH}x12x4x80x128,cnn_cache:${MAX_BATCH}x12x256x7,cache_mask:${MAX_BATCH}x1x80 \
-          --plugins=$outputs_dir/LayerNorm.so \
-          --saveEngine=$outputs_dir/encoder_fp16_v2.plan
-fi
-
-
-# if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-#      echo "convert conformer encoder with layernorm plugin using multi-profile"
-#      # profile1
-#      MIN_BATCH1=9
-#      OPT_BATCH1=16
-#      MAX_BATCH1=32
-#      # profile2
-#      MIN_BATCH2=1
-#      OPT_BATCH2=8
-#      MAX_BATCH2=8
-
-#      python3 export_streaming_conformer_trt.py \
-#           --fp16 \
-#           --onnxFile $outputs_dir/encoderV4.onnx \
-#           --chunk_xs ${MIN_BATCH1}x67x80,${OPT_BATCH1}x67x80,${MAX_BATCH1}x67x80,${MIN_BATCH2}x67x80,${OPT_BATCH2}x67x80,${MAX_BATCH2}x67x80 \
-#           --chunk_lens ${MIN_BATCH1},${OPT_BATCH1},${MAX_BATCH1},${MIN_BATCH2},${OPT_BATCH2},${MAX_BATCH2} \
-#           --offset ${MIN_BATCH1}x1,${OPT_BATCH1}x1,${MAX_BATCH1}x1,${MIN_BATCH2}x1,${OPT_BATCH2}x1,${MAX_BATCH2}x1 \
-#           --att_cache ${MIN_BATCH1}x12x4x80x128,${OPT_BATCH1}x12x4x80x128,${MAX_BATCH1}x12x4x80x128,${MIN_BATCH2}x12x4x80x128,${OPT_BATCH2}x12x4x80x128,${MAX_BATCH2}x12x4x80x128 \
-#           --cnn_cache ${MIN_BATCH1}x12x256x7,${OPT_BATCH1}x12x256x7,${MAX_BATCH1}x12x256x7,${MIN_BATCH2}x12x256x7,${OPT_BATCH2}x12x256x7,${MAX_BATCH2}x12x256x7 \
-#           --cache_mask ${MIN_BATCH1}x1x80,${OPT_BATCH1}x1x80,${MAX_BATCH1}x1x80,${MIN_BATCH2}x1x80,${OPT_BATCH2}x1x80,${MAX_BATCH2}x1x80 \
-#           --plugin $outputs_dir/LayerNorm.so \
-#           --trtFile $outputs_dir/encoder_fp16.plan \
-#           --test || exit 1
-# fi
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-   echo "prepare model repo for triton"
-   python3 ../scripts/convert.py --config=$onnx_model_dir/train.yaml --vocab=$onnx_model_dir/words.txt \
-        --model_repo=$model_repo_path --onnx_model_dir=$onnx_model_dir
-   # TODO: fix hard-coding path
-   mkdir -p /ws/onnx_model
-   cp $onnx_model_dir/words.txt /ws/onnx_model/units.txt
-
-   cp $outputs_dir/encoder_fp16.plan $model_repo_path/encoder/1/
-   cp $outputs_dir/LayerNorm.so $model_repo_path/../
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-   echo "launch triton server"
-   LD_PRELOAD=./LayerNorm.so tritonserver --model-repository $model_repo_path \
-   --pinned-memory-pool-byte-size=2512000000 --cuda-memory-pool-byte-size=0:2024000000
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-     echo "test conformer encoder with layernorm plugin using multi-profile"
-     trtFile=$outputs_dir/encoder_fp16_multiprofile.plan
-     python3 export_streaming_conformer_trt.py \
-          --plugin $outputs_dir/LayerNorm.so \
-          --trtFile $trtFile \
-          --test
-
-     echo "test single profile throughput"
-     for B in 1 4 8 16 32
-     do
-          trtFile=$outputs_dir/encoder_fp16.plan
-          /usr/src/tensorrt/bin/trtexec --fp16 --loadEngine=$trtFile --plugins=$outputs_dir/LayerNorm.so --noDataTransfers \
-          --shapes=chunk_xs:${B}x67x80,chunk_lens:${B},offset:${B}x1,att_cache:${B}x12x4x80x128,cnn_cache:${B}x12x256x7,cache_mask:${B}x1x80 | grep qps
-     done
-
-     echo "test onnx throughput"
-     python3 ../scripts/benchmark_onnx_throughput.py \
-          --batch_sizes 1,4,8,16,32,64,128,256 \
-          --onnxFile $onnx_model_dir/encoder_fp16.onnx \
-          --log $outputs_dir/log.txt
-fi
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/README.md
deleted file mode 100644
index 0c0a2be84afae41ccffcddd8484b45566e202264..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/README.md
+++ /dev/null
@@ -1,285 +0,0 @@
-## Recipe for use of FasterTransformer accelerated Conformer model with Triton
-
-```sh
-# using docker image runtime/gpu/Dockerfile/Dockerfile.server
-docker pull soar97/triton-wenet:22.12
-docker run -it --rm --name "wenet_trt_test" --gpus all --net host --shm-size=1g soar97/triton-wenet:22.12
-# inside the docker container
-git clone https://github.com/wenet-e2e/wenet.git
-cd wenet/runtime/gpu/tensorrt_fastertransformer
-git submodule update --init
-pip3 install nvidia-pyindex
-# Use pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple if you encounter network issue
-pip3 install -r requirements.txt
-# Please modify the model parameters in run.sh. This recipe is for small conformer model.
-bash run.sh
-```
-With help of FasterTransformer wenet tensorrt plugins, the overall throughput could get ~50% improvement comparing with onnx fp16 inference.
-
-<p float="left">
-  <img src="./encoder_plugin.JPG" width="45%" />
-  <img src="./decoder_plugin.JPG" width="50%" />
-</p>
-
-#### Performance of Small Model for AIShell2
-
-The following benchmakr shows the performance on T4 of a small Conformer model for AIShell2 case as defined in [WeNet AIShell2 Example](https://github.com/wenet-e2e/wenet/tree/main/examples/aishell2/s0).
-
-<table>
- <col span='11'>
- <tr id='r0'>
-<td>Backend</td>
-<td>Precision</td>
-<td>Input Length</td>
-<td>concurrency</td>
-<td>RTF</td>
-<td>Throughput(infer/s)</td>
-<td>Latency_p50(ms)</td>
-<td>Latency_p90(ms)</td>
-<td>Latency_p95(ms)</td>
-<td>Latency_p99(ms)</td>
-<td>Avg latency(ms)</td>
- </tr>
- <tr id='r1'>
-<td rowspan='11' class='x21'>ONNX</td>
-<td rowspan='11' class='x21'>FP16</td>
-<td rowspan='6' class='x21'>5s</td>
-<td style='text-align:right;'>10</td>
-<td style='text-align:right;'>0.00082224</td>
-<td style='text-align:right;'>243.238</td>
-<td style='text-align:right;'>40.254</td>
-<td style='text-align:right;'>49.884</td>
-<td style='text-align:right;'>52.553</td>
-<td style='text-align:right;'>57.191</td>
-<td style='text-align:right;'>41.067</td>
- </tr>
- <tr id='r2'>
-<td style='text-align:right;'>50</td>
-<td style='text-align:right;'>0.000775588</td>
-<td style='text-align:right;'>257.869</td>
-<td style='text-align:right;'>200.534</td>
-<td style='text-align:right;'>249.891</td>
-<td style='text-align:right;'>263.713</td>
-<td style='text-align:right;'>285.763</td>
-<td style='text-align:right;'>193.721</td>
- </tr>
- <tr id='r3'>
-<td style='text-align:right;'>100</td>
-<td style='text-align:right;'>0.000777538</td>
-<td style='text-align:right;'>257.222</td>
-<td style='text-align:right;'>391.78</td>
-<td style='text-align:right;'>479.738</td>
-<td style='text-align:right;'>505.176</td>
-<td style='text-align:right;'>551.676</td>
-<td style='text-align:right;'>387.872</td>
- </tr>
- <tr id='r4'>
-<td style='text-align:right;'>200</td>
-<td style='text-align:right;'>0.000770553</td>
-<td style='text-align:right;'>259.554</td>
-<td style='text-align:right;'>757.215</td>
-<td style='text-align:right;'>908.135</td>
-<td style='text-align:right;'>962.108</td>
-<td style='text-align:right;'>1122.423</td>
-<td style='text-align:right;'>766.502</td>
- </tr>
- <tr id='r5'>
-<td style='text-align:right;'>400</td>
-<td style='text-align:right;'>0.000773168</td>
-<td style='text-align:right;'>258.676</td>
-<td style='text-align:right;'>1549.111</td>
-<td style='text-align:right;'>1747.315</td>
-<td style='text-align:right;'>1822.979</td>
-<td style='text-align:right;'>1965.012</td>
-<td style='text-align:right;'>1529.002</td>
- </tr>
- <tr id='r6'>
-<td style='text-align:right;'>600</td>
-<td style='text-align:right;'>0.000768832</td>
-<td style='text-align:right;'>260.135</td>
-<td style='text-align:right;'>2268.471</td>
-<td style='text-align:right;'>2513.159</td>
-<td style='text-align:right;'>2584.864</td>
-<td style='text-align:right;'>2867.763</td>
-<td style='text-align:right;'>2260.872</td>
- </tr>
- <tr id='r7'>
-<td rowspan='5' class='x21'>8s</td>
-<td style='text-align:right;'>10</td>
-<td style='text-align:right;'>0.000749239</td>
-<td style='text-align:right;'>166.836</td>
-<td style='text-align:right;'>58.89</td>
-<td style='text-align:right;'>76.627</td>
-<td style='text-align:right;'>80.612</td>
-<td style='text-align:right;'>87.494</td>
-<td style='text-align:right;'>59.893</td>
- </tr>
- <tr id='r8'>
-<td style='text-align:right;'>50</td>
-<td style='text-align:right;'>0.000739693</td>
-<td style='text-align:right;'>168.989</td>
-<td style='text-align:right;'>307.598</td>
-<td style='text-align:right;'>380.47</td>
-<td style='text-align:right;'>398.13</td>
-<td style='text-align:right;'>431.647</td>
-<td style='text-align:right;'>295.728</td>
- </tr>
- <tr id='r9'>
-<td style='text-align:right;'>100</td>
-<td style='text-align:right;'>0.000740175</td>
-<td style='text-align:right;'>168.879</td>
-<td style='text-align:right;'>584.401</td>
-<td style='text-align:right;'>722.173</td>
-<td style='text-align:right;'>759.771</td>
-<td style='text-align:right;'>820.324</td>
-<td style='text-align:right;'>588.995</td>
- </tr>
- <tr id='r10'>
-<td style='text-align:right;'>200</td>
-<td style='text-align:right;'>0.000733417</td>
-<td style='text-align:right;'>170.435</td>
-<td style='text-align:right;'>1136.949</td>
-<td style='text-align:right;'>1343.338</td>
-<td style='text-align:right;'>1396.169</td>
-<td style='text-align:right;'>1526.366</td>
-<td style='text-align:right;'>1162.847</td>
- </tr>
- <tr id='r11'>
-<td style='text-align:right;'>400</td>
-<td style='text-align:right;'>0.000758132</td>
-<td style='text-align:right;'>164.879</td>
-<td style='text-align:right;'>2424.999</td>
-<td style='text-align:right;'>2837.982</td>
-<td style='text-align:right;'>2948.57</td>
-<td style='text-align:right;'>3165.213</td>
-<td style='text-align:right;'>2407.398</td>
- </tr>
- <tr id='r12'>
-<td rowspan='12' class='x21'>TensorRT</td>
-<td rowspan='12' class='x21'>FP16</td>
-<td rowspan='6' class='x21'>5s</td>
-<td style='text-align:right;'>10</td>
-<td style='text-align:right;'>0.000806917</td>
-<td style='text-align:right;'>247.857</td>
-<td style='text-align:right;'>40.607</td>
-<td style='text-align:right;'>55.071</td>
-<td style='text-align:right;'>59.839</td>
-<td style='text-align:right;'>70.496</td>
-<td style='text-align:right;'>40.292</td>
- </tr>
- <tr id='r13'>
-<td style='text-align:right;'>50</td>
-<td style='text-align:right;'>0.000616084</td>
-<td style='text-align:right;'>324.631</td>
-<td style='text-align:right;'>155.034</td>
-<td style='text-align:right;'>207.362</td>
-<td style='text-align:right;'>224.129</td>
-<td style='text-align:right;'>247.152</td>
-<td style='text-align:right;'>153.683</td>
- </tr>
- <tr id='r14'>
-<td style='text-align:right;'>100</td>
-<td style='text-align:right;'>0.000539622</td>
-<td style='text-align:right;'>370.63</td>
-<td style='text-align:right;'>274.031</td>
-<td style='text-align:right;'>319.398</td>
-<td style='text-align:right;'>333.757</td>
-<td style='text-align:right;'>353.26</td>
-<td style='text-align:right;'>269.732</td>
- </tr>
- <tr id='r15'>
-<td style='text-align:right;'>200</td>
-<td style='text-align:right;'>0.000531885</td>
-<td style='text-align:right;'>376.021</td>
-<td style='text-align:right;'>515.018</td>
-<td style='text-align:right;'>638.437</td>
-<td style='text-align:right;'>657.918</td>
-<td style='text-align:right;'>680.002</td>
-<td style='text-align:right;'>592.341</td>
- </tr>
- <tr id='r16'>
-<td style='text-align:right;'>400</td>
-<td style='text-align:right;'>0.000536799</td>
-<td style='text-align:right;'>372.579</td>
-<td style='text-align:right;'>1040.958</td>
-<td style='text-align:right;'>1173.062</td>
-<td style='text-align:right;'>1187.594</td>
-<td style='text-align:right;'>1237.658</td>
-<td style='text-align:right;'>1064.299</td>
- </tr>
- <tr id='r17'>
-<td style='text-align:right;'>800</td>
-<td style='text-align:right;'>0.000561322</td>
-<td style='text-align:right;'>356.302</td>
-<td style='text-align:right;'>2210.466</td>
-<td style='text-align:right;'>2319.285</td>
-<td style='text-align:right;'>2343.111</td>
-<td style='text-align:right;'>2893.016</td>
-<td style='text-align:right;'>2207.461</td>
- </tr>
- <tr id='r18'>
-<td rowspan='6' class='x21'>8s</td>
-<td style='text-align:right;'>10</td>
-<td style='text-align:right;'>0.00066929</td>
-<td style='text-align:right;'>186.765</td>
-<td style='text-align:right;'>53.462</td>
-<td style='text-align:right;'>74.164</td>
-<td style='text-align:right;'>81.128</td>
-<td style='text-align:right;'>96.467</td>
-<td style='text-align:right;'>53.454</td>
- </tr>
- <tr id='r19'>
-<td style='text-align:right;'>50</td>
-<td style='text-align:right;'>0.000550967</td>
-<td style='text-align:right;'>226.874</td>
-<td style='text-align:right;'>223.332</td>
-<td style='text-align:right;'>282.558</td>
-<td style='text-align:right;'>310.565</td>
-<td style='text-align:right;'>355.387</td>
-<td style='text-align:right;'>219.928</td>
- </tr>
- <tr id='r20'>
-<td style='text-align:right;'>100</td>
-<td style='text-align:right;'>0.000525813</td>
-<td style='text-align:right;'>237.727</td>
-<td style='text-align:right;'>423.628</td>
-<td style='text-align:right;'>497.259</td>
-<td style='text-align:right;'>517.581</td>
-<td style='text-align:right;'>563.85</td>
-<td style='text-align:right;'>418.962</td>
- </tr>
- <tr id='r21'>
-<td style='text-align:right;'>200</td>
-<td style='text-align:right;'>0.000513162</td>
-<td style='text-align:right;'>243.588</td>
-<td style='text-align:right;'>794.689</td>
-<td style='text-align:right;'>911.775</td>
-<td style='text-align:right;'>973.896</td>
-<td style='text-align:right;'>1037.958</td>
-<td style='text-align:right;'>815.351</td>
- </tr>
- <tr id='r22'>
-<td style='text-align:right;'>400</td>
-<td style='text-align:right;'>0.000531747</td>
-<td style='text-align:right;'>235.074</td>
-<td style='text-align:right;'>1683.981</td>
-<td style='text-align:right;'>1823.173</td>
-<td style='text-align:right;'>1852.701</td>
-<td style='text-align:right;'>1910.553</td>
-<td style='text-align:right;'>1685.217</td>
- </tr>
- <tr id='r23'>
-<td style='text-align:right;'>600</td>
-<td style='text-align:right;'>0.000585379</td>
-<td style='text-align:right;'>213.537</td>
-<td style='text-align:right;'>2782.118</td>
-<td style='text-align:right;'>2939.944</td>
-<td style='text-align:right;'>2998.534</td>
-<td style='text-align:right;'>3192.138</td>
-<td style='text-align:right;'>2751.356</td>
- </tr>
-</table>
-
-Note: Currently, encoder only supports offline conformer model. Decoder only supports transformer decoder.
-
-For directly using FasterTransformer built wenet, you may refer to https://github.com/NVIDIA/FasterTransformer/tree/main/examples/cpp/wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/decoder_plugin.JPG b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/decoder_plugin.JPG
deleted file mode 100644
index f31bf2acdeaabb0091b43ca625285fa118972da7..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/decoder_plugin.JPG and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/encoder_plugin.JPG b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/encoder_plugin.JPG
deleted file mode 100644
index 2c06a30be380e9101133e368e6401109a7534040..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/encoder_plugin.JPG and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/extract_weights.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/extract_weights.py
deleted file mode 100644
index 78397f50ba03f5e477c6eccab445abcbc3be3e77..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/extract_weights.py
+++ /dev/null
@@ -1,206 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import onnx
-import onnx_graphsurgeon as gs
-import numpy as np
-
-import utils
-
-
-def get_not(model, exported_name):
-    not_name = list()
-    for w in model.graph.initializer:
-        if w.name not in exported_name:
-            not_name.append(w.name)
-    return not_name
-
-
-def export_GetAllWeight(model, gsg):
-    exported_name = list()
-    res = dict()
-    for w in model.graph.initializer:
-        if 'encoder' in str(w.name) or 'ctc' in str(w.name):
-            print("export ", w.name, w.dims, w.data_type)
-            dtype = utils.onnx2np_type(w.data_type)
-            res[w.name] = np.frombuffer(w.raw_data, dtype=dtype).reshape(w.dims)
-            exported_name.append(w.name)
-            if w.name.endswith("bias"):
-                new_name = w.name[0:len(w.name) - 4] + "weight"
-                wname = utils.get_weight_by_bias(gsg, w.name)
-                if wname is None:
-                    continue
-                w = utils.onnx_GetWeight(model, wname)
-                dtype = utils.onnx2np_type(w.data_type)
-                res[new_name] = np.frombuffer(
-                    w.raw_data, dtype=dtype).reshape(w.dims)
-                res[new_name] = np.transpose(res[new_name], (1, 0))
-                print("export ", w.name, w.dims, w.data_type,
-                      " -> ", new_name, res[new_name].shape)
-                exported_name.append(w.name)
-
-    not_name = get_not(model, exported_name)
-    cur_idx = 0
-    for w in model.graph.initializer:
-        if w.name in not_name and len(w.dims) == 2 \
-           and (w.dims[0] == 256 or w.dims[0] == 512) \
-           and (w.dims[1] == 256 or w.dims[1] == 512):
-            for node in gsg.nodes:
-                if node.op == "MatMul" and node.i(0, 0).op == "Slice" \
-                   and node.inputs[1].name == w.name:
-                    new_name = "encoder.encoders." + \
-                        str(cur_idx) + ".self_attn.linear_pos.weight"
-                    print("export ", w.name, w.dims,
-                          w.data_type, " -> ", new_name)
-                    dtype = utils.onnx2np_type(w.data_type)
-                    res[new_name] = np.frombuffer(
-                        w.raw_data, dtype=dtype).reshape(w.dims)
-                    res[new_name] = np.transpose(res[new_name], (1, 0))
-                    print("export ", w.name, w.dims, w.data_type,
-                          " -> ", new_name, res[new_name].shape)
-                    exported_name.append(w.name)
-                    cur_idx += 1
-
-    not_name = get_not(model, exported_name)
-    cur_idx = 0
-    for w in model.graph.initializer:
-        if w.name in not_name and len(w.dims) == 3:
-            for node in gsg.nodes:
-                if node.op == "Conv" and len(node.inputs) == 3 \
-                   and node.inputs[1].name == w.name:
-                    new_name = "encoder.encoders." + \
-                        str(cur_idx) + ".conv_module.depthwise_conv.weight"
-                    print("export ", w.name, w.dims,
-                          w.data_type, " -> ", new_name)
-                    dtype = utils.onnx2np_type(w.data_type)
-                    res[new_name] = np.frombuffer(
-                        w.raw_data, dtype=dtype).reshape(w.dims)
-                    exported_name.append(w.name)
-
-                    bname = node.inputs[2].name
-                    w = utils.onnx_GetWeight(model, bname)
-                    new_name = "encoder.encoders." + \
-                        str(cur_idx) + ".conv_module.depthwise_conv.bias"
-                    print("export ", w.name, w.dims,
-                          w.data_type, " -> ", new_name)
-                    dtype = utils.onnx2np_type(w.data_type)
-                    res[new_name] = np.frombuffer(
-                        w.raw_data, dtype=dtype).reshape(w.dims)
-                    exported_name.append(w.name)
-                    cur_idx += 1
-
-    for node in gsg.nodes:
-        if node.op == "Slice" and 'value' in node.i(0, 0).attrs:
-            pnode = node.i(0, 0)
-            print(node.name)
-            w = pnode.attrs["value"]
-            new_name = "encoder.positional.encoding.data"
-            print("export ", w.name, w.shape, w.dtype, " -> ", new_name)
-            print(type(w.values))
-            res[new_name] = w.values
-
-    assert "encoder.positional.encoding.data" in res
-
-    return res
-
-
-def export_decoder_GetAllWeight(model, gsg):
-    exported_name = list()
-    res = dict()
-    for w in model.graph.initializer:
-        if len(str(w.name)) > 4:
-            print("export ", w.name, w.dims, w.data_type)
-            dtype = utils.onnx2np_type(w.data_type)
-            res[w.name] = np.frombuffer(w.raw_data, dtype=dtype).reshape(w.dims)
-            exported_name.append(w.name)
-            if w.name.endswith("bias"):
-                new_name = w.name[0:len(w.name) - 4] + "weight"
-                wname = utils.get_weight_by_bias(gsg, w.name)
-                if wname is None:
-                    continue
-                w = utils.onnx_GetWeight(model, wname)
-                dtype = utils.onnx2np_type(w.data_type)
-                res[new_name] = np.frombuffer(
-                    w.raw_data, dtype=dtype).reshape(w.dims)
-                res[new_name] = np.transpose(res[new_name], (1, 0))
-                print("export ", w.name, w.dims, w.data_type,
-                      " -> ", new_name, res[new_name].shape)
-                exported_name.append(w.name)
-
-    for node in gsg.nodes:
-        if node.op == "Slice" and 'value' in node.i(0, 0).attrs:
-            pnode = node.i(0, 0)
-            w = pnode.attrs["value"]
-            print(w.values, w.values.shape)
-            new_name = "decoder.positional.encoding.data"
-            print("export ", w.name, w.shape, w.dtype, " -> ", new_name)
-            print(type(w.values))
-            res[new_name] = w.values
-
-    not_name = get_not(model, exported_name)
-    for w in model.graph.initializer:
-        if w.name in not_name:
-            dtype = utils.onnx2np_type(w.data_type)
-            cur = np.frombuffer(w.raw_data, dtype=dtype).reshape(w.dims)
-            print("not export ", w.name, w.dims, w.data_type, cur)
-
-    return res
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description='process onnx file for trt engine generation')
-    parser.add_argument('--input_onnx', type=str,
-                        required=True, help="input onnx model path")
-    parser.add_argument('--output_dir', type=str,
-                        required=True, help="output weights dir")
-
-    args = parser.parse_args()
-
-    onnx_model = onnx.load(args.input_onnx)
-    graph = gs.import_onnx(onnx_model)
-
-    if 'encoder' in args.input_onnx:
-        result = export_GetAllWeight(onnx_model, graph)
-
-    elif 'decoder' in args.input_onnx:
-        result = export_decoder_GetAllWeight(onnx_model, graph)
-
-    else:
-        raise NotImplementedError
-
-    for name in result:
-        saved_path = args.output_dir + "/" + name + ".bin"
-        cur = result[name]
-        if name.endswith(".weight") and len(cur.shape) == 2 \
-           and "decoder.embed.0.weight" not in name:
-            cur = cur.transpose((1, 0))
-
-        if name.endswith(".pointwise_conv1.weight"):
-            cur = cur.transpose((1, 0, 2))
-
-        if name.endswith(".pointwise_conv2.weight"):
-            cur = cur.transpose((1, 0, 2))
-
-        if name.endswith(".depthwise_conv.weight") and len(cur.shape) == 3:
-            cur = cur.transpose((2, 1, 0))
-
-        if name.endswith(".weight") and ".embed.conv." in name:
-            cur = cur.transpose((0, 2, 3, 1))
-
-        cur.tofile(saved_path)
-
-    print("extract Wenet model weight finish!")
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/attention_rescoring/config.pbtxt.template b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/attention_rescoring/config.pbtxt.template
deleted file mode 100644
index 27185630a9f31716c2e76bf9584cb42916784dbd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/attention_rescoring/config.pbtxt.template
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "attention_rescoring"
-platform: "ensemble"
-max_batch_size: MAX_BATCH
-
-input [
-  {
-    name: "WAV"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "WAV_LENS"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-output [
-  {
-    name: "TRANSCRIPTS"
-    data_type: TYPE_STRING
-    dims: [1]
-  }
-]
-
-ensemble_scheduling {
- step [
-   {
-    model_name: "feature_extractor"
-    model_version: -1
-    input_map {
-      key: "wav"
-      value: "WAV"
-    }
-    input_map {
-      key: "wav_lens"
-      value: "WAV_LENS"
-    }
-    output_map {
-      key: "speech"
-      value: "SPEECH"
-    }
-    output_map {
-      key: "speech_lengths"
-      value: "SPEECH_LENGTHS"
-    }
-   },
-   {
-    model_name: "encoder"
-    model_version: -1
-    input_map {
-      key: "speech"
-      value: "SPEECH"
-    }
-    input_map {
-      key: "speech_lengths"
-      value: "SPEECH_LENGTHS"
-    }
-    output_map {
-      key: "encoder_out"
-      value: "encoder_out"
-    }
-    output_map {
-      key: "encoder_out_lens"
-      value: "encoder_out_lens"
-    }
-    output_map {
-        key: "beam_log_probs"
-        value: "beam_log_probs"
-    }
-    output_map {
-        key: "beam_log_probs_idx"
-        value: "beam_log_probs_idx"
-    }
-    output_map {
-        key: "ctc_log_probs"
-        value: "ctc_log_probs"
-    }
-  },
-  {
-      model_name: "scoring"
-      model_version: -1
-      input_map {
-          key: "encoder_out"
-          value: "encoder_out"
-      }
-      input_map {
-          key: "encoder_out_lens"
-          value: "encoder_out_lens"
-      }
-      input_map {
-          key: "batch_log_probs"
-          value: "beam_log_probs"
-      }
-      input_map {
-          key: "batch_log_probs_idx"
-          value: "beam_log_probs_idx"
-      }
-      input_map {
-        key: "ctc_log_probs"
-        value: "ctc_log_probs"
-      }
-      output_map {
-          key: "OUTPUT0"
-          value: "TRANSCRIPTS"
-      }
-  }
- ]
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/1/.gitkeep b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/1/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/config.pbtxt.template b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/config.pbtxt.template
deleted file mode 100644
index 89f06b3a94737449f677aedf7d05ea536df3e9a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/config.pbtxt.template
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "decoder"
-backend: "tensorrt"
-default_model_filename: "decoder.plan"
-
-max_batch_size: MAX_BATCH
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_FP32
-    dims: [-1, D_MODEL]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "hyps_pad_sos_eos"
-    data_type: TYPE_INT32
-    dims: [BEAM_SIZE, -1]
-  },
- {
-    name: "hyps_lens_sos"
-    data_type: TYPE_INT32
-    dims: [BEAM_SIZE]
-  },
-  {
-    name: "ctc_score"
-    data_type: TYPE_FP32
-    dims: [BEAM_SIZE]
-  }
-]
-
-output [
-   {
-    name: "best_index"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "decoder_out"
-    data_type: TYPE_FP32
-    dims: [BEAM_SIZE,-1,VOCAB_SIZE]
-  }
-]
-
-dynamic_batching {
-  max_queue_delay_microseconds: MAX_DELAY
-  }
-
-instance_group [
-    {
-      count: INSTANCE_NUM
-      kind: KIND_GPU
-    }
-]
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/1/.gitkeep b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/1/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/config.pbtxt.template b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/config.pbtxt.template
deleted file mode 100644
index 95f635ccbb9bfa1315117baedea33e88a6512b27..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/config.pbtxt.template
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "encoder"
-backend: "tensorrt"
-default_model_filename: "encoder.plan"
-
-max_batch_size: MAX_BATCH
-input [
-  {
-    name: "speech"
-    data_type: TYPE_FP32
-    dims: [-1, 80]
-  },
-  {
-    name: "speech_lengths"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  }
-]
-
-output [
-  {
-    name: "encoder_out"
-    data_type: TYPE_FP32
-    dims: [-1, D_MODEL]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "ctc_log_probs"
-    data_type: TYPE_FP32
-    dims: [-1, VOCAB_SIZE]
-  },
-  {
-    name: "beam_log_probs"
-    data_type: TYPE_FP32
-    dims: [-1, BEAM_SIZE]
-  },
-  {
-    name: "beam_log_probs_idx"
-    data_type: TYPE_INT32
-    dims: [-1, BEAM_SIZE]
-  }
-]
-
-dynamic_batching {
-  max_queue_delay_microseconds: MAX_DELAY
-  }
-
-
-instance_group [
-    {
-      count: INSTANCE_NUM
-      kind: KIND_GPU
-    }
-]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/feature_extractor/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/feature_extractor/1/model.py
deleted file mode 100644
index c54f6ca3e201554a8604ec917403fa9cc05f2518..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/feature_extractor/1/model.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import triton_python_backend_utils as pb_utils
-from torch.utils.dlpack import to_dlpack
-import torch
-import numpy as np
-import kaldifeat
-import _kaldifeat
-from typing import List
-import json
-
-class Fbank(torch.nn.Module):
-    def __init__(self, opts):
-        super(Fbank, self).__init__()
-        self.fbank = kaldifeat.Fbank(opts)
-
-    def forward(self, waves: List[torch.Tensor]):
-        return self.fbank(waves)
-
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        self.model_config = model_config = json.loads(args['model_config'])
-        self.max_batch_size = max(model_config["max_batch_size"], 1)
-
-        if "GPU" in model_config["instance_group"][0]["kind"]:
-            self.device = "cuda"
-        else:
-            self.device = "cpu"
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "speech")
-        # Convert Triton types to numpy types
-        output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-        if output0_dtype == np.float32:
-            self.output0_dtype = torch.float32
-        else:
-            self.output0_dtype = torch.float16
-
-        # Get OUTPUT1 configuration
-        output1_config = pb_utils.get_output_config_by_name(
-            model_config, "speech_lengths")
-        # Convert Triton types to numpy types
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config['data_type'])
-
-        params = self.model_config['parameters']
-        opts = kaldifeat.FbankOptions()
-        opts.frame_opts.dither = 0
-
-        for li in params.items():
-            key, value = li
-            value = value["string_value"]
-            if key == "num_mel_bins":
-                opts.mel_opts.num_bins = int(value)
-            elif key == "frame_shift_in_ms":
-                opts.frame_opts.frame_shift_ms = float(value)
-            elif key == "frame_length_in_ms":
-                opts.frame_opts.frame_length_ms = float(value)
-            elif key == "sample_rate":
-                opts.frame_opts.samp_freq = int(value)
-        opts.device = torch.device(self.device)
-        self.opts = opts
-        self.feature_extractor = Fbank(self.opts)
-        self.feature_size = opts.mel_opts.num_bins
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model.
-
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-        batch_count = []
-        total_waves = []
-        batch_len = []
-        responses = []
-        for request in requests:
-            input0 = pb_utils.get_input_tensor_by_name(request, "wav")
-            input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
-
-            cur_b_wav = input0.as_numpy()
-            cur_b_wav = cur_b_wav * (1 << 15)  # b x -1
-            cur_b_wav_lens = input1.as_numpy()  # b x 1
-            cur_batch = cur_b_wav.shape[0]
-            cur_len = cur_b_wav.shape[1]
-            batch_count.append(cur_batch)
-            batch_len.append(cur_len)
-            for wav, wav_len in zip(cur_b_wav, cur_b_wav_lens):
-                wav_len = wav_len[0]
-                wav = torch.tensor(wav[0:wav_len], dtype=torch.float32,
-                                   device=self.device)
-                total_waves.append(wav)
-
-        features = self.feature_extractor(total_waves)
-        for b, l in zip(batch_count, batch_len):
-            expect_feat_len = _kaldifeat.num_frames(l, self.opts.frame_opts)
-            speech = torch.zeros((b, expect_feat_len, self.feature_size),
-                                 dtype=self.output0_dtype, device=self.device)
-            speech_lengths = torch.zeros((b, 1), dtype=torch.int32, device=self.device)
-            for i in range(b):
-                f = features.pop(0)
-                f_l = f.shape[0]
-                speech[i, 0: f_l, :] = f.to(self.output0_dtype)
-                speech_lengths[i][0] = f_l
-            # put speech feature on device will cause empty output
-            # we will follow this issue and now temporarily put it on cpu
-            speech = speech.cpu()
-            speech_lengths = speech_lengths.cpu()
-            out0 = pb_utils.Tensor.from_dlpack("speech", to_dlpack(speech))
-            out1 = pb_utils.Tensor.from_dlpack("speech_lengths",
-                                               to_dlpack(speech_lengths))
-            inference_response = pb_utils.InferenceResponse(output_tensors=[out0, out1])
-            responses.append(inference_response)
-        return responses
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/feature_extractor/config.pbtxt.template b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/feature_extractor/config.pbtxt.template
deleted file mode 100644
index 237fcd9cb9182c5d6f77cc976b8925ddf0d6968c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/feature_extractor/config.pbtxt.template
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "feature_extractor"
-backend: "python"
-max_batch_size: MAX_BATCH
-
-parameters [
-  {
-    key: "num_mel_bins",
-    value: { string_value: "80"}
-  },
-  {
-    key: "frame_shift_in_ms"
-    value: { string_value: "10"}
-  },
-  {
-    key: "frame_length_in_ms"
-    value: { string_value: "25"}
-  },
-  {
-    key: "sample_rate"
-    value: { string_value: "16000"}
-  }
-
-]
-
-input [
-  {
-    name: "wav"
-    data_type: TYPE_FP32
-    dims: [-1]
-  },
-  {
-    name: "wav_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-output [
-  {
-    name: "speech"
-    data_type: TYPE_FP32
-    dims: [-1, 80]
-  },
-  {
-    name: "speech_lengths"
-    data_type: TYPE_INT32
-    dims: [1]
-  }
-]
-
-dynamic_batching {
-  }
-instance_group [
-    {
-      count: INSTANCE_NUM
-      kind: KIND_GPU
-    }
-]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/scoring/1/model.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/scoring/1/model.py
deleted file mode 100644
index 3a4dd5040aced654225a2a5a42d537cf09e6dcd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/scoring/1/model.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import triton_python_backend_utils as pb_utils
-import numpy as np
-import multiprocessing
-from swig_decoders import ctc_beam_search_decoder_batch, \
-    Scorer, PathTrie, TrieVector, map_batch
-from torch.utils.dlpack import from_dlpack
-import json
-import os
-import time
-
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        self.model_config = model_config = json.loads(args['model_config'])
-        self.max_batch_size = max(model_config["max_batch_size"], 1)
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "OUTPUT0")
-        # Convert Triton types to numpy types
-        self.out0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-
-        # Get INPUT configuration
-        batch_log_probs = pb_utils.get_input_config_by_name(
-            model_config, "batch_log_probs")
-        self.beam_size = batch_log_probs['dims'][-1]
-
-        encoder_config = pb_utils.get_input_config_by_name(
-            model_config, "encoder_out")
-        self.data_type = pb_utils.triton_string_to_numpy(
-            encoder_config['data_type'])
-
-        self.feature_size = encoder_config['dims'][-1]
-
-        self.lm = None
-        self.init_ctc_rescore(self.model_config['parameters'])
-        print('Initialized Rescoring!')
-
-    def init_ctc_rescore(self, parameters):
-        num_processes = multiprocessing.cpu_count()
-        cutoff_prob = 0.9999
-        blank_id = 0
-        alpha = 2.0
-        beta = 1.0
-        ignore_id = -1
-        bidecoder = 0
-        lm_path, vocab_path = None, None
-        for li in parameters.items():
-            key, value = li
-            value = value["string_value"]
-            if key == "num_processes":
-                num_processes = int(value)
-            elif key == "blank_id":
-                blank_id = int(value)
-            elif key == "cutoff_prob":
-                cutoff_prob = float(value)
-            elif key == "lm_path":
-                lm_path = value
-            elif key == "alpha":
-                alpha = float(value)
-            elif key == "beta":
-                beta = float(value)
-            elif key == "vocabulary":
-                vocab_path = value
-            if key == 'ignore_id':
-                ignore_id = int(value)
-            elif key == "bidecoder":
-                bidecoder = int(value)
-
-        self.num_processes = num_processes
-        self.cutoff_prob = cutoff_prob
-        self.blank_id = blank_id
-        _, vocab = self.load_vocab(vocab_path)
-        if lm_path and os.path.exists(lm_path):
-            self.lm = Scorer(alpha, beta, lm_path, vocab)
-            print("Successfully load language model!")
-        self.vocabulary = vocab
-        self.bidecoder = bidecoder
-        sos = eos = len(vocab) - 1
-        self.sos = sos
-        self.eos = eos
-        self.ignore_id = ignore_id
-
-    def load_vocab(self, vocab_file):
-        """
-        load lang_char.txt
-        """
-        id2vocab = {}
-        with open(vocab_file, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                char, id = line.split()
-                id2vocab[int(id)] = char
-        vocab = [0] * len(id2vocab)
-        for id, char in id2vocab.items():
-            vocab[id] = char
-        return id2vocab, vocab
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model.
-
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-
-        responses = []
-
-        # Every Python backend must iterate through list of requests and create
-        # an instance of pb_utils.InferenceResponse class for each of them. You
-        # should avoid storing any of the input Tensors in the class attributes
-        # as they will be overridden in subsequent inference requests. You can
-        # make a copy of the underlying NumPy array and store it if it is
-        # required.
-
-        batch_encoder_out, batch_encoder_lens = [], []
-        batch_log_probs, batch_log_probs_idx = [], []
-        batch_count = []
-        batch_root = TrieVector()
-        batch_start = []
-        root_dict = {}
-
-        encoder_max_len = 0
-        hyps_max_len = 0
-        total = 0
-        for request in requests:
-            # Perform inference on the request and append it to responses list...
-            in_0 = pb_utils.get_input_tensor_by_name(request, "encoder_out")
-            in_1 = pb_utils.get_input_tensor_by_name(
-                request, "encoder_out_lens")
-            in_2 = pb_utils.get_input_tensor_by_name(request, "batch_log_probs")
-            in_3 = pb_utils.get_input_tensor_by_name(
-                request, "batch_log_probs_idx")
-
-            batch_encoder_out.append(in_0.as_numpy())
-
-            encoder_max_len = max(
-                encoder_max_len, batch_encoder_out[-1].shape[1])
-
-            cur_b_lens = in_1.as_numpy()
-            batch_encoder_lens.append(cur_b_lens)
-            cur_batch = cur_b_lens.shape[0]
-            batch_count.append(cur_batch)
-
-            cur_b_log_probs = in_2.as_numpy()
-            cur_b_log_probs_idx = in_3.as_numpy()
-            for i in range(cur_batch):
-                cur_len = cur_b_lens[i]
-                # T X Beam
-                cur_probs = cur_b_log_probs[i][0:cur_len, :].tolist()
-                # T x Beam
-                cur_idx = cur_b_log_probs_idx[i][0:cur_len, :].tolist()
-                batch_log_probs.append(cur_probs)
-                batch_log_probs_idx.append(cur_idx)
-                root_dict[total] = PathTrie()
-                batch_root.append(root_dict[total])
-                batch_start.append(True)
-                total += 1
-
-        score_hyps = ctc_beam_search_decoder_batch(batch_log_probs,
-                                                   batch_log_probs_idx,
-                                                   batch_root,
-                                                   batch_start,
-                                                   self.beam_size,
-                                                   min(total, self.num_processes),
-                                                   blank_id=self.blank_id,
-                                                   space_id=-2,
-                                                   cutoff_prob=self.cutoff_prob,
-                                                   ext_scorer=self.lm)
-
-        all_hyps = []
-        all_ctc_score = []
-        max_seq_len = 0
-        for seq_cand in score_hyps:
-            # if candidates less than beam size
-            if len(seq_cand) != self.beam_size:
-                seq_cand = list(seq_cand)
-                seq_cand += (self.beam_size - len(seq_cand)) * \
-                    [(-float("INF"), (0,))]
-
-            for score, hyps in seq_cand:
-                all_hyps.append(list(hyps))
-                all_ctc_score.append(score)
-                max_seq_len = max(len(hyps), max_seq_len)
-
-        beam_size = self.beam_size
-        feature_size = self.feature_size
-        hyps_max_len = max_seq_len + 2
-
-        in_ctc_score = np.zeros((total, beam_size), dtype=self.data_type)
-        # TODO fix int64 for onnxruntime Yuekai
-        in_hyps_pad_sos_eos = np.ones(
-            (total, beam_size, hyps_max_len), dtype=np.int32) * self.ignore_id
-        if self.bidecoder:
-            in_r_hyps_pad_sos_eos = np.ones(
-                (total, beam_size, hyps_max_len), dtype=np.int32) * self.ignore_id
-
-        in_hyps_lens_sos = np.ones((total, beam_size), dtype=np.int32)
-
-        in_encoder_out = np.zeros((total, encoder_max_len, feature_size),
-                                  dtype=self.data_type)
-        # print(f"Decoding encoder_out size is {in_encoder_out.shape}")
-        time1 = time.perf_counter()
-        in_encoder_out_lens = np.zeros(total, dtype=np.int32)
-        st = 0
-        for b in batch_count:
-            t = batch_encoder_out.pop(0)
-            in_encoder_out[st:st + b, 0:t.shape[1]] = t
-            in_encoder_out_lens[st:st + b] = batch_encoder_lens.pop(0)
-            for i in range(b):
-                for j in range(beam_size):
-                    cur_hyp = all_hyps.pop(0)
-                    cur_len = len(cur_hyp) + 2
-                    in_hyp = [self.sos] + cur_hyp + [self.eos]
-                    in_hyps_pad_sos_eos[st + i][j][0:cur_len] = in_hyp
-                    in_hyps_lens_sos[st + i][j] = cur_len - 1
-                    if self.bidecoder:
-                        r_in_hyp = [self.sos] + cur_hyp[::-1] + [self.eos]
-                        in_r_hyps_pad_sos_eos[st + i][j][0:cur_len] = r_in_hyp
-                    in_ctc_score[st + i][j] = all_ctc_score.pop(0)
-            st += b
-        in_encoder_out_lens = np.expand_dims(in_encoder_out_lens, axis=1)
-        in_tensor_0 = pb_utils.Tensor("encoder_out", in_encoder_out)
-        in_tensor_1 = pb_utils.Tensor("encoder_out_lens", in_encoder_out_lens)
-        in_tensor_2 = pb_utils.Tensor("hyps_pad_sos_eos", in_hyps_pad_sos_eos)
-        in_tensor_3 = pb_utils.Tensor("hyps_lens_sos", in_hyps_lens_sos)
-        input_tensors = [in_tensor_0, in_tensor_1, in_tensor_2, in_tensor_3]
-        if self.bidecoder:
-            in_tensor_4 = pb_utils.Tensor(
-                "r_hyps_pad_sos_eos", in_r_hyps_pad_sos_eos)
-            input_tensors.append(in_tensor_4)
-        in_tensor_5 = pb_utils.Tensor("ctc_score", in_ctc_score)
-        input_tensors.append(in_tensor_5)
-
-        inference_request = pb_utils.InferenceRequest(
-            model_name='decoder',
-            requested_output_names=['best_index'],
-            inputs=input_tensors)
-
-        inference_response = inference_request.exec()
-        if inference_response.has_error():
-            raise pb_utils.TritonModelException(
-                inference_response.error().message())
-        else:
-            # Extract the output tensors from the inference response.
-            best_index = pb_utils.get_output_tensor_by_name(inference_response,
-                                                            'best_index')
-            best_index = from_dlpack(best_index.to_dlpack())
-            best_index = best_index.cpu().numpy()
-
-            hyps = []
-            idx = 0
-            for cands, cand_lens in zip(in_hyps_pad_sos_eos, in_hyps_lens_sos):
-                best_idx = best_index[idx][0]
-                best_cand_len = cand_lens[best_idx] - 1  # remove sos
-                best_cand = cands[best_idx][1: 1 + best_cand_len].tolist()
-                hyps.append(best_cand)
-                idx += 1
-
-            hyps = map_batch(hyps, self.vocabulary,
-                             min(multiprocessing.cpu_count(), len(in_ctc_score)))
-            st = 0
-            for b in batch_count:
-                sents = np.array(hyps[st:st + b])
-                out0 = pb_utils.Tensor("OUTPUT0", sents.astype(self.out0_dtype))
-                inference_response = pb_utils.InferenceResponse(
-                    output_tensors=[out0])
-                responses.append(inference_response)
-                st += b
-        time2 = time.perf_counter() - time1
-        # print(f"cost {time2} secs")
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is optional. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print('Cleaning up...')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/scoring/config.pbtxt.template b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/scoring/config.pbtxt.template
deleted file mode 100644
index 97e25e007b95af536e60b86f9a83da5904d159d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/scoring/config.pbtxt.template
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-name: "scoring"
-backend: "python"
-max_batch_size: MAX_BATCH
-
-parameters [
-  {
-    key: "ignore_id",
-    value: { string_value: "-1"}
-  },
-  {
-    key: "vocabulary",
-    value: { string_value: "DICT_PATH"}
-  },
-  {
-    key: "bidecoder",
-    value: { string_value: "0"}
-  },
-  {
-    key: "lm_path"
-    value: { string_value: "None"}
-  }
-]
-
-
-input [
-  {
-    name: "encoder_out"
-    data_type: TYPE_FP32
-    dims: [-1, D_MODEL]
-  },
-  {
-    name: "encoder_out_lens"
-    data_type: TYPE_INT32
-    dims: [1]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "batch_log_probs"
-    data_type: TYPE_FP32
-    dims: [-1, BEAM_SIZE]
-  },
-  {
-    name: "batch_log_probs_idx"
-    data_type: TYPE_INT32
-    dims: [-1, BEAM_SIZE]
-  },
-  {
-    name: "ctc_log_probs"
-    data_type: TYPE_FP32
-    dims: [-1, VOCAB_SIZE]
-  }
-]
-
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_STRING
-    dims: [1]
-  }
-]
-
-dynamic_batching {
-  max_queue_delay_microseconds: MAX_DELAY
-  }
-instance_group [
-    {
-      count: INSTANCE_NUM
-      kind: KIND_CPU
-    }
-  ]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/replace_plugin.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/replace_plugin.py
deleted file mode 100644
index 93e8eb0f68039fb96be709d3f2e65aa449f9bc4e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/replace_plugin.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import onnx
-import onnx_graphsurgeon as gs
-import argparse
-import utils
-
-
-@gs.Graph.register()
-def replace_plugin(self, inputs, outputs, op, name, attrs):
-
-    for out in outputs:
-        out.inputs.clear()
-
-    return self.layer(op=op,
-                      inputs=inputs,
-                      outputs=outputs,
-                      name=name,
-                      attrs=attrs
-                      )
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description='process onnx file for trt engine generation')
-    parser.add_argument('--input_onnx', type=str,
-                        required=True, help="input onnx model path")
-    parser.add_argument('--output_onnx', type=str,
-                        required=True, help="output .npy file path")
-    parser.add_argument('--max_len', type=int, default=5000,
-                        help="Max seq for pos embedding, TODO: remove this")
-    parser.add_argument('--head_num', type=int, default=4,
-                        choices=[4, 8], help="")
-    parser.add_argument('--feature_size', type=int, default=80, help="")
-    parser.add_argument('--inter_size', type=int, default=2048, help="")
-    parser.add_argument('--d_model', type=int, default=256,
-                        choices=[256, 512], help="")
-    parser.add_argument('--num_layer', type=int, default=12, help="")
-    parser.add_argument('--vocab_size', type=int, default=4233, help="")
-    parser.add_argument('--conv_module_kernel_size', type=int, default=15,
-                        choices=[15, 31], help="kernel size for conv module")
-    # TODO: hard-coding below encoder decoder weight path, pls don't change it for now
-    parser.add_argument('--decoder_weight_path', type=str,
-                        default="/weight/dec/", help="decoder weights path")
-    parser.add_argument('--encoder_weight_path', type=str,
-                        default="/weight/enc/", help="encoder weights path")
-    parser.add_argument('--useFP16', type=bool,
-                        default=True, help="using fp16 mode")
-    parser.add_argument('--use_layernorm_in_conv_module', action='store_true',
-                        default=False, help="using layernorm in conformer conv module")
-    parser.add_argument('--q_scaling', type=float, default=1.0,
-                        help="please hard-coding it for now")
-
-    args = parser.parse_args()
-
-    onnx_model = onnx.load(args.input_onnx)
-    graph = gs.import_onnx(onnx_model)
-    tmap = graph.tensors()
-
-    if 'encoder' in args.input_onnx:
-        inputs = [tmap[i] for i in ["speech", "speech_lengths"]]
-        outputs = [tmap[i]
-                   for i in ["encoder_out", "encoder_out_lens", "ctc_log_probs"]]
-        op = "WenetEncoderPlugin"
-        name = "WenetEncoder"
-        attrs = {
-            "max_len": 5000,  # TODO remove this, for pos embedding
-            "head_num": args.head_num,
-            "size_per_head": int(args.d_model / args.head_num),
-            "feature_size": args.feature_size,
-            "inter_size": args.inter_size,
-            "d_model": args.d_model,
-            "num_layer": args.num_layer,
-            "use_layernorm_in_conv_module": args.use_layernorm_in_conv_module,
-            "useFP16": args.useFP16,
-            "vocab_size": args.vocab_size,
-            "conv_module_kernel_size": args.conv_module_kernel_size,
-            "weightFilePath": args.encoder_weight_path,
-            "q_scaling": args.q_scaling,
-        }
-
-    elif 'decoder' in args.input_onnx:
-        inputs = [tmap[i] for i in ["hyps_pad_sos_eos", "hyps_lens_sos",
-                                    "encoder_out", "encoder_out_lens", "ctc_score"]]
-        outputs = [tmap[i] for i in ["decoder_out", "best_index"]]
-        op = "WenetDecoderPlugin"
-        name = "WenetDecoder"
-        attrs = {
-            "head_num": args.head_num,
-            "size_per_head": int(args.d_model / args.head_num),
-            "inter_size": args.inter_size,
-            "d_model": args.d_model,
-            "num_layer": args.num_layer,
-            "useFP16": args.useFP16,
-            "vocab_size": args.vocab_size,
-            "weightFilePath": args.decoder_weight_path,
-            "q_scaling": args.q_scaling,
-        }
-
-    else:
-        raise NotImplementedError
-
-    graph.replace_plugin(inputs, outputs, op, name, attrs)
-
-    graph.cleanup().toposort()
-    utils.fold_const(graph)
-
-    onnx.save(gs.export_onnx(graph), args.output_onnx)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/requirements.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/requirements.txt
deleted file mode 100644
index 61fc46fbe8f8f89300b0d8185809df20e8859aea..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-onnx
-nvidia-pyindex
-onnx-graphsurgeon
-cuda-python
-onnxruntime-gpu
-typeguard
-onnxmltools
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/run.sh b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/run.sh
deleted file mode 100644
index 81c197adbb21e17925e22892647dbcc2cb7c2c27..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/run.sh
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-trtexec=/usr/src/tensorrt/bin/trtexec
-export CUDA_VISIBLE_DEVICES="0"
-stage=-1
-stop_stage=5
-
-#<wenet_onnx_gpu_models>
-onnx_model_dir=$(pwd)/aishell_onnx
-#<your_output_dir>
-outputs_dir=./exp1
-
-# modify model parameters according to your own model
-d_model=256
-head_num=4
-vocab_size=4233
-
-# paramters for TRT engines
-MIN_BATCH=1
-OPT_BATCH=16
-MAX_BATCH=16
-MAX_BATCH_FOR_SCORING=16
-
-ENC_MIN_LEN=16
-ENC_OPT_LEN=512
-ENC_MAX_LEN=2048
-DEC_MIN_LEN=$(( ENC_MIN_LEN / 4))
-DEC_OPT_LEN=$(( ENC_OPT_LEN / 4))
-DEC_MAX_LEN=$(( ENC_MAX_LEN / 4))
-
-MIN_HYPS_PAD=2
-OPT_HYPS_PAD=20
-MAX_HYPS_PAD=64
-
-BEAM_SIZE=10 # Don't modify it
-
-mkdir -p $outputs_dir
-
-model_repo_path=./model_repo_ft
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-   echo "export to onnx files"
-   wget https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20211025_conformer_exp.tar.gz --no-check-certificate
-   tar zxvf 20211025_conformer_exp.tar.gz
-   model_dir=$(pwd)/20211025_conformer_exp
-   mkdir -p $onnx_model_dir
-   cd ../../../
-   export PYTHONPATH=$PYTHONPATH:$(pwd)
-   python3 wenet/bin/export_onnx_gpu.py \
-           --config=$model_dir/train.yaml \
-           --checkpoint=$model_dir/final.pt \
-           --cmvn_file=$model_dir/global_cmvn \
-           --ctc_weight=0.5 \
-           --output_onnx_dir=$onnx_model_dir \
-           --fp16 \
-           --decoder_fastertransformer || exit 1
-   cp $model_dir/words.txt $model_dir/train.yaml $onnx_model_dir
-   cd -
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-     echo "extract weights and replace onnx with plugins"
-
-     mkdir -p /weight/enc
-     mkdir -p /weight/dec
-     python3 extract_weights.py --input_onnx $onnx_model_dir/encoder.onnx --output_dir /weight/enc || exit 1
-     python3 extract_weights.py --input_onnx $onnx_model_dir/decoder.onnx --output_dir /weight/dec || exit 1
-
-     python3 replace_plugin.py --input_onnx $onnx_model_dir/encoder.onnx \
-                               --d_model $d_model --head_num $head_num --vocab_size $vocab_size\
-                               --output_onnx ${outputs_dir}/encoder_plugin.onnx || exit 1
-
-     python3 replace_plugin.py --input_onnx $onnx_model_dir/decoder.onnx \
-                               --output_onnx ${outputs_dir}/decoder_plugin.onnx \
-                               --d_model $d_model --head_num $head_num --vocab_size $vocab_size \
-                               --num_layer 6 || exit 1
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-     echo "compile FasterTransformer"
-     ft_path=./FasterTransformer
-     pushd ${ft_path}
-
-     export TRT_LIBPATH=/usr/lib/x86_64-linux-gnu
-     CUR_DIR=`pwd`
-     mkdir -p build
-     cd build
-
-     cmake \
-          -DCMAKE_BUILD_TYPE=Release \
-          -DCMAKE_VERBOSE_MAKEFILE=OFF \
-          -DCMAKE_INSTALL_PREFIX=${CUR_DIR}/install \
-          -DBUILD_TF=OFF \
-          -DBUILD_PYT=OFF \
-          -DBUILD_MULTI_GPU=OFF \
-          -DUSE_NVTX=OFF \
-          -DBUILD_EXAMPLE=ON \
-          -DBUILD_TEST=OFF \
-          -DBUILD_TRT=ON \
-          -DBUILD_ORGIN_NET=OFF \
-          ..
-
-     make -j$(nproc) || exit 1
-     popd
-     cp ${ft_path}/build/lib/libtrt_wenet.so $outputs_dir
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-     cd $outputs_dir
-
-     if [ ! -d /weight/enc ] || [ ! -d /weight/dec ]; then
-        echo "Please extract weights and move them here first"
-        exit 1
-     fi
-
-     echo "convert to trt"
-     ${trtexec} \
-          --onnx=./encoder_plugin.onnx \
-          --minShapes=speech:${MIN_BATCH}x${ENC_MIN_LEN}x80,speech_lengths:${MIN_BATCH} \
-          --optShapes=speech:${OPT_BATCH}x${ENC_OPT_LEN}x80,speech_lengths:${OPT_BATCH} \
-          --maxShapes=speech:${MAX_BATCH}x${ENC_MAX_LEN}x80,speech_lengths:${MAX_BATCH} \
-          --fp16 \
-          --plugins=./libtrt_wenet.so \
-          --saveEngine=./encoder.plan
-
-     ${trtexec}   \
-          --onnx=./decoder_plugin.onnx \
-          --minShapes=encoder_out:${MIN_BATCH}x${DEC_MIN_LEN}x$d_model,encoder_out_lens:${MIN_BATCH},hyps_pad_sos_eos:${MIN_BATCH}x${BEAM_SIZE}x${MIN_HYPS_PAD},hyps_lens_sos:${MIN_BATCH}x${BEAM_SIZE},ctc_score:${MIN_BATCH}x${BEAM_SIZE} \
-          --optShapes=encoder_out:${OPT_BATCH}x${DEC_OPT_LEN}x$d_model,encoder_out_lens:${OPT_BATCH},hyps_pad_sos_eos:${OPT_BATCH}x${BEAM_SIZE}x${OPT_HYPS_PAD},hyps_lens_sos:${OPT_BATCH}x${BEAM_SIZE},ctc_score:${OPT_BATCH}x${BEAM_SIZE} \
-          --maxShapes=encoder_out:${MAX_BATCH}x${DEC_MAX_LEN}x$d_model,encoder_out_lens:${MAX_BATCH},hyps_pad_sos_eos:${MAX_BATCH}x${BEAM_SIZE}x${MAX_HYPS_PAD},hyps_lens_sos:${MAX_BATCH}x${BEAM_SIZE},ctc_score:${MAX_BATCH}x${BEAM_SIZE} \
-          --fp16 \
-          --plugins=./libtrt_wenet.so \
-          --saveEngine=./decoder.plan \
-          --buildOnly
-          # infer with random input would cause illegal memory access error
-     cd -
-fi
-
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-     echo "auto gen config.pbtxt"
-     dirs="encoder decoder feature_extractor scoring attention_rescoring"
-     DICT_PATH=$onnx_model_dir/words.txt
-     VOCAB_SIZE=$vocab_size
-     MAX_DELAY=0
-     MAX_BATCH_SIZE=$MAX_BATCH
-     D_MODEL=$d_model
-     INSTANCE_NUM=1
-     INSTANCE_NUM_FOR_SCORING=2
-
-     if [ ! -d $model_repo_path ]; then
-        echo "Please cd to model_repo_path"
-        exit 1
-     fi
-
-     for dir in $dirs
-     do
-          cp $model_repo_path/$dir/config.pbtxt.template $model_repo_path/$dir/config.pbtxt
-
-          sed -i "s|DICT_PATH|${DICT_PATH}|g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/BEAM_SIZE/${BEAM_SIZE}/g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/VOCAB_SIZE/${VOCAB_SIZE}/g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/MAX_DELAY/${MAX_DELAY}/g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/D_MODEL/${D_MODEL}/g" $model_repo_path/$dir/config.pbtxt
-          if [ "$dir" == "decoder" ]; then
-               sed -i "s/MAX_BATCH/${MAX_BATCH_FOR_SCORING}/g" $model_repo_path/$dir/config.pbtxt
-               sed -i "s/INSTANCE_NUM/${INSTANCE_NUM}/g" $model_repo_path/$dir/config.pbtxt
-          elif [ "$dir" == "scoring" ]; then
-               sed -i "s/MAX_BATCH/${MAX_BATCH_FOR_SCORING}/g" $model_repo_path/$dir/config.pbtxt
-               sed -i "s/INSTANCE_NUM/${INSTANCE_NUM_FOR_SCORING}/g" $model_repo_path/$dir/config.pbtxt
-          else
-               sed -i "s/MAX_BATCH/${MAX_BATCH_SIZE}/g" $model_repo_path/$dir/config.pbtxt
-               sed -i "s/INSTANCE_NUM/${INSTANCE_NUM}/g" $model_repo_path/$dir/config.pbtxt
-          fi
-     done
-
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-     echo "prepare files, you could skip it if you do it manually"
-     mkdir -p $model_repo_path/encoder/1/
-     cp $outputs_dir/encoder.plan $model_repo_path/encoder/1/
-
-     mkdir -p $model_repo_path/decoder/1/
-     cp $outputs_dir/decoder.plan $model_repo_path/decoder/1/
-
-     mkdir -p $model_repo_path/attention_rescoring/1/
-
-     cp $outputs_dir/libtrt_wenet.so $model_repo_path/../
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-   echo "launch triton server"
-   LD_PRELOAD=./libtrt_wenet.so tritonserver --model-repository $model_repo_path \
-                                          --pinned-memory-pool-byte-size=512000000 \
-                                          --cuda-memory-pool-byte-size=0:1024000000
-
-fi
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/run_large.sh b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/run_large.sh
deleted file mode 100644
index 94020d99522a2c9ef02015e186a3074e2a61b8a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/run_large.sh
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-trtexec=/usr/src/tensorrt/bin/trtexec
-export CUDA_VISIBLE_DEVICES="0"
-stage=-1
-stop_stage=5
-
-#<wenet_onnx_gpu_models>
-onnx_model_dir=$(pwd)/wenetspeech_onnx
-#<your_output_dir>
-outputs_dir=./exp_wenetspeech
-
-# modify model parameters according to your own model
-d_model=512
-head_num=8
-vocab_size=5537
-
-# paramters for TRT engines
-MIN_BATCH=1
-OPT_BATCH=16
-MAX_BATCH=16
-MAX_BATCH_FOR_SCORING=16
-
-ENC_MIN_LEN=16
-ENC_OPT_LEN=512
-ENC_MAX_LEN=2048
-DEC_MIN_LEN=$(( ENC_MIN_LEN / 4))
-DEC_OPT_LEN=$(( ENC_OPT_LEN / 4))
-DEC_MAX_LEN=$(( ENC_MAX_LEN / 4))
-
-MIN_HYPS_PAD=2
-OPT_HYPS_PAD=20
-MAX_HYPS_PAD=64
-
-BEAM_SIZE=10 # Don't modify it
-
-mkdir -p $outputs_dir
-
-model_repo_path=./model_repo_ft_large
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-   echo "export to onnx files"
-   wget https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/wenetspeech/20211025_conformer_exp.tar.gz \
-     --no-check-certificate -O 20211025_conformer_exp_wenetspeech.tar.gz
-   tar zxvf 20211025_conformer_exp_wenetspeech.tar.gz
-   model_dir=$(pwd)/20211025_conformer_exp
-   mkdir -p $onnx_model_dir
-   cd ../../../
-   export PYTHONPATH=$PYTHONPATH:$(pwd)
-   python3 wenet/bin/export_onnx_gpu.py \
-           --config=$model_dir/train.yaml \
-           --checkpoint=$model_dir/final.pt \
-           --cmvn_file=$model_dir/global_cmvn \
-           --ctc_weight=0.5 \
-           --output_onnx_dir=$onnx_model_dir \
-           --fp16 \
-           --decoder_fastertransformer || exit 1
-   cp $model_dir/words.txt $model_dir/train.yaml $onnx_model_dir
-   cd -
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-     echo "extract weights and replace onnx with plugins"
-
-     mkdir -p /weight/enc_large
-     mkdir -p /weight/dec_large
-     python3 extract_weights.py --input_onnx $onnx_model_dir/encoder.onnx --output_dir /weight/enc_large || exit 1
-     python3 extract_weights.py --input_onnx $onnx_model_dir/decoder.onnx --output_dir /weight/dec_large || exit 1
-
-     python3 replace_plugin.py --input_onnx $onnx_model_dir/encoder.onnx \
-                               --use_layernorm_in_conv_module \
-                               --encoder_weight_path /weight/enc_large/ \
-                               --d_model $d_model --head_num $head_num --vocab_size $vocab_size \
-                               --output_onnx ${outputs_dir}/encoder_plugin.onnx || exit 1
-
-     python3 replace_plugin.py --input_onnx $onnx_model_dir/decoder.onnx \
-                               --decoder_weight_path /weight/dec_large/ \
-                               --output_onnx ${outputs_dir}/decoder_plugin.onnx \
-                               --d_model $d_model --head_num $head_num --vocab_size $vocab_size \
-                               --num_layer 6 || exit 1
-fi
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-     echo "compile FasterTransformer"
-     ft_path=./FasterTransformer
-     pushd ${ft_path}
-
-     export TRT_LIBPATH=/usr/lib/x86_64-linux-gnu
-     CUR_DIR=`pwd`
-     mkdir -p build
-     cd build
-
-     cmake \
-          -DCMAKE_BUILD_TYPE=Release \
-          -DCMAKE_VERBOSE_MAKEFILE=OFF \
-          -DCMAKE_INSTALL_PREFIX=${CUR_DIR}/install \
-          -DBUILD_TF=OFF \
-          -DBUILD_PYT=OFF \
-          -DBUILD_MULTI_GPU=OFF \
-          -DUSE_NVTX=OFF \
-          -DBUILD_EXAMPLE=ON \
-          -DBUILD_TEST=OFF \
-          -DBUILD_TRT=ON \
-          -DBUILD_ORGIN_NET=OFF \
-          ..
-
-     make -j$(nproc)
-     popd
-     cp ${ft_path}/build/lib/libtrt_wenet.so $outputs_dir
-fi
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-     cd $outputs_dir
-
-     if [ ! -d /weight/enc ] || [ ! -d /weight/dec ]; then
-        echo "Please extract weights and move them here first"
-        exit 1
-     fi
-
-     echo "convert to trt"
-     ${trtexec} \
-          --onnx=./encoder_plugin.onnx \
-          --minShapes=speech:${MIN_BATCH}x${ENC_MIN_LEN}x80,speech_lengths:${MIN_BATCH} \
-          --optShapes=speech:${OPT_BATCH}x${ENC_OPT_LEN}x80,speech_lengths:${OPT_BATCH} \
-          --maxShapes=speech:${MAX_BATCH}x${ENC_MAX_LEN}x80,speech_lengths:${MAX_BATCH} \
-          --fp16 \
-          --plugins=./libtrt_wenet.so \
-          --saveEngine=./encoder.plan
-
-     ${trtexec}   \
-          --onnx=./decoder_plugin.onnx \
-          --minShapes=encoder_out:${MIN_BATCH}x${DEC_MIN_LEN}x$d_model,encoder_out_lens:${MIN_BATCH},hyps_pad_sos_eos:${MIN_BATCH}x${BEAM_SIZE}x${MIN_HYPS_PAD},hyps_lens_sos:${MIN_BATCH}x${BEAM_SIZE},ctc_score:${MIN_BATCH}x${BEAM_SIZE} \
-          --optShapes=encoder_out:${OPT_BATCH}x${DEC_OPT_LEN}x$d_model,encoder_out_lens:${OPT_BATCH},hyps_pad_sos_eos:${OPT_BATCH}x${BEAM_SIZE}x${OPT_HYPS_PAD},hyps_lens_sos:${OPT_BATCH}x${BEAM_SIZE},ctc_score:${OPT_BATCH}x${BEAM_SIZE} \
-          --maxShapes=encoder_out:${MAX_BATCH}x${DEC_MAX_LEN}x$d_model,encoder_out_lens:${MAX_BATCH},hyps_pad_sos_eos:${MAX_BATCH}x${BEAM_SIZE}x${MAX_HYPS_PAD},hyps_lens_sos:${MAX_BATCH}x${BEAM_SIZE},ctc_score:${MAX_BATCH}x${BEAM_SIZE} \
-          --fp16 \
-          --plugins=./libtrt_wenet.so \
-          --saveEngine=./decoder.plan \
-          --buildOnly
-          # infer with random input would cause illegal memory access error
-     cd -
-fi
-
-
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-     echo "auto gen config.pbtxt"
-     dirs="encoder decoder feature_extractor scoring attention_rescoring"
-     DICT_PATH=$onnx_model_dir/words.txt
-     VOCAB_SIZE=$vocab_size
-     MAX_DELAY=0
-     MAX_BATCH_SIZE=$MAX_BATCH
-     D_MODEL=$d_model
-     INSTANCE_NUM=1
-     INSTANCE_NUM_FOR_SCORING=2
-
-     if [ ! -d $model_repo_path ]; then
-        echo "Please cd to model_repo_path"
-        exit 1
-     fi
-
-     for dir in $dirs
-     do
-          cp $model_repo_path/$dir/config.pbtxt.template $model_repo_path/$dir/config.pbtxt
-
-          sed -i "s|DICT_PATH|${DICT_PATH}|g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/BEAM_SIZE/${BEAM_SIZE}/g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/VOCAB_SIZE/${VOCAB_SIZE}/g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/MAX_DELAY/${MAX_DELAY}/g" $model_repo_path/$dir/config.pbtxt
-          sed -i "s/D_MODEL/${D_MODEL}/g" $model_repo_path/$dir/config.pbtxt
-          if [ "$dir" == "decoder" ]; then
-               sed -i "s/MAX_BATCH/${MAX_BATCH_FOR_SCORING}/g" $model_repo_path/$dir/config.pbtxt
-               sed -i "s/INSTANCE_NUM/${INSTANCE_NUM}/g" $model_repo_path/$dir/config.pbtxt
-          elif [ "$dir" == "scoring" ]; then
-               sed -i "s/MAX_BATCH/${MAX_BATCH_FOR_SCORING}/g" $model_repo_path/$dir/config.pbtxt
-               sed -i "s/INSTANCE_NUM/${INSTANCE_NUM_FOR_SCORING}/g" $model_repo_path/$dir/config.pbtxt
-          else
-               sed -i "s/MAX_BATCH/${MAX_BATCH_SIZE}/g" $model_repo_path/$dir/config.pbtxt
-               sed -i "s/INSTANCE_NUM/${INSTANCE_NUM}/g" $model_repo_path/$dir/config.pbtxt
-          fi
-     done
-
-fi
-
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-     echo "prepare files, you could skip it if you do it manually"
-     mkdir -p $model_repo_path/encoder/1/
-     cp $outputs_dir/encoder.plan $model_repo_path/encoder/1/
-
-     mkdir -p $model_repo_path/decoder/1/
-     cp $outputs_dir/decoder.plan $model_repo_path/decoder/1/
-
-     mkdir -p $model_repo_path/attention_rescoring/1/
-
-     cp $outputs_dir/libtrt_wenet.so $model_repo_path/../
-fi
-
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
-   echo "launch triton server"
-   LD_PRELOAD=./libtrt_wenet.so tritonserver --model-repository $model_repo_path \
-                                          --pinned-memory-pool-byte-size=512000000 \
-                                          --cuda-memory-pool-byte-size=0:1024000000
-
-fi
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/utils.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/utils.py
deleted file mode 100644
index 6bdab7bf7792bb021376884c162a43d70cfa1761..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/tensorrt_fastertransformer/utils.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import numpy as np
-
-
-def get_parent(graph, cur_node, parrent_pos):
-    in0 = cur_node.inputs[parrent_pos]
-    for node in graph.nodes:
-        for out in node.outputs:
-            if out == in0:
-                return node
-    return None
-
-
-def get_weight_by_bias(graph, bname):
-    for node in graph.nodes:
-        if node.op == "Add" and node.inputs[0].name == bname:
-            pnode = get_parent(graph, node, 1)
-            if pnode is not None and pnode.op == "MatMul":
-                return pnode.inputs[1].name
-    return None
-
-
-def onnx_GetAllWeight(model):
-    for w in model.graph.initializer:
-        print(w.name, w.dims)
-    return model.graph.initializer
-
-
-def onnx2np_type(dtype):
-    maps = {
-        1: np.float32,
-        6: np.int32,
-        7: np.int64
-    }
-    return maps[dtype]
-
-
-def onnx_GetWeight(model, name):
-    for w in model.graph.initializer:
-        if w.name == name:
-            return w
-    return None
-
-
-def fold_const(graph):
-    old_cc = len(graph.nodes)
-    graph.fold_constants()
-    graph.cleanup().toposort()
-    new_cc = len(graph.nodes)
-    print("fold const:", old_cc, " -> ", new_cc, " = ", old_cc - new_cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/test.gif b/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/test.gif
deleted file mode 100644
index d695431d7702b330e9cc417de10e99dfb0183c9e..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/gpu/test.gif and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/CMakeLists.txt
deleted file mode 100644
index 9e1790061923628fd10c736b90267cc4ef270d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-project(wenet VERSION 0.1)
-
-option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
-option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
-option(BUILD_TESTING "whether to build unit test" ON)
-
-option(GRPC "whether to build with gRPC" OFF)
-# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
-#                     which is a very big library
-option(WEBSOCKET "whether to build with websocket" OFF)
-option(BPU "whether to build with BPU" ON)
-
-set(CMAKE_VERBOSE_MAKEFILE OFF)
-
-include(FetchContent)
-set(FETCHCONTENT_QUIET OFF)
-get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-set(FETCHCONTENT_BASE_DIR ${fc_base})
-
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-
-# Keep the same with openfst, -fPIC or -fpic
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
-
-# Include all dependency
-include(openfst)
-# This CMakeLists.txt is only used for horizon bpu, so remove the contents
-#  about onnx, libtorch, gpu and windows.
-include(bpu)
-# Compile bpu_asr_model
-add_subdirectory(bpu)
-
-include_directories(
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-)
-
-# Build all libraries
-add_subdirectory(utils)
-add_subdirectory(frontend)
-add_subdirectory(post_processor)
-add_subdirectory(kaldi)  # kaldi: wfst based decoder
-add_subdirectory(decoder)
-add_subdirectory(api)
-
-# Optionally, you can build with websocket
-if(WEBSOCKET)
-  include(boost)
-  add_subdirectory(websocket)
-endif()
-
-# Optionally, you can build with gRPC
-if(GRPC)
-  include(grpc)
-  add_subdirectory(grpc)
-endif()
-
-# Build all bins
-add_subdirectory(bin)
-
-# Unit Test
-if(BUILD_TESTING)
-  include(gtest)
-  add_subdirectory(test)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/README.md
deleted file mode 100644
index 088fb1081d4d000c426c128089f618768ae79a7a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/README.md
+++ /dev/null
@@ -1,90 +0,0 @@
-# WeNet & Horizon BPU (Cross Compile)
-
-* Step 1. Setup environment (install horizon packages and cross compile tools) in the PC. (~10min)
-
-```sh
-# Conda env (This conda env is only used for converting bpu models, not for training torch models,
-#   It's OK to install cpu-version pytorch)
-conda create -n horizonbpu python=3.8
-conda activate horizonbpu
-git clone https://github.com/wenet-e2e/wenet.git
-cd wenet/runtime/horizonbpu
-pip3 install -r ../../requirements.txt -i https://mirrors.aliyun.com/pypi/simple
-pip3 install torch==1.13.0 torchaudio==0.13.0 torchvision==0.14.0 onnx onnxruntime -i https://mirrors.aliyun.com/pypi/simple
-
-# Horizon packages
-wget https://gitee.com/xcsong-thu/toolchain_pkg/releases/download/resource/wheels.tar.gz
-tar -xzf wheels.tar.gz
-pip3 install wheels/* -i https://mirrors.aliyun.com/pypi/simple
-
-# Cross compile tools
-sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
-```
-
-
-* Step 2. Build decoder_main. It requires cmake 3.14 or above. and Send the binary/libraries to Horizon X3PI. (~20min)
-
-``` sh
-# Assume current dir is `wenet/runtime/horizonbpu`
-cmake -B build -DBPU=ON -DONNX=OFF -DTORCH=OFF -DWEBSOCKET=OFF -DGRPC=OFF -DCMAKE_TOOLCHAIN_FILE=toolchains/aarch64-linux-gnu.toolchain.cmake
-cmake --build build
-
-# Send binary and libraries
-export BPUIP=xxx.xxx.xxx.xxx
-export DEMO_PATH_ON_BOARD=/path/to/demo
-scp build/bin/decoder_main sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-scp fc_base/easy_dnn-src/dnn/*j3*/*/*/lib/libdnn.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-scp fc_base/easy_dnn-src/easy_dnn/*j3*/*/*/lib/libeasy_dnn.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-scp fc_base/easy_dnn-src/hlog/*j3*/*/*/lib/libhlog.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-```
-
-* Step 3. Export model to ONNX and convert ONNX to Horizon .bin and Send the model/dict/test_wav to Horizon X3PI. (~40min)
-
-``` sh
-# Assume current dir is `wenet/runtime/horizonbpu`
-conda activate horizonbpu
-export WENET_DIR=$PWD/../../
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=$WENET_DIR:$PYTHONPATH
-export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python'
-
-# Download torch model
-wget https://ghproxy.com/https://github.com/xingchensong/toolchain_pkg/releases/download/conformer_subsample8_110M/model_subsample8_parameter110M.tar.gz
-tar -xzf model_subsample8_parameter110M.tar.gz
-
-# Convert torch model to bpu model (*.pt -> *.onnx -> *.bin)
-# NOTE(xcsong): Convert model with 110M parameters requires CPU MEM >= 16G,
-#   if your CPU does not meet the requirement, you can download pre-converted encoder.bin/ctc.bin
-#   via this link: https://github.com/xingchensong/toolchain_pkg/releases
-python3 $WENET_DIR/tools/onnx2horizonbin.py \
-  --config ./model_subsample8_parameter110M/train.yaml \
-  --checkpoint ./model_subsample8_parameter110M/final.pt \
-  --output_dir ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16 \
-  --chunk_size 8 \
-  --num_decoding_left_chunks 16 \
-  --max_samples 50 \
-  --dict ./model_subsample8_parameter110M/units.txt \
-  --cali_datalist ./model_subsample8_parameter110M/calibration_data/data.list
-
-# scp test wav file and dictionary
-scp ./model_subsample8_parameter110M/test_wav.wav sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-scp ./model_subsample8_parameter110M/units.txt sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-# scp bpu models
-scp ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16/hb_makertbin_output_encoder/encoder.bin sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-scp ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16/hb_makertbin_output_ctc/ctc.bin sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
-```
-
-* Step 4. Testing on X3PI, the RTF(real time factor) is shown in Horizon X3PI's console. (~1min)
-
-``` sh
-cd /path/to/demo
-sudo LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH \
-  GLOG_logtostderr=1 GLOG_v=2 \
-  ./decoder_main \
-      --chunk_size 8 \
-      --num_left_chunks 16 \
-      --rescoring_weight 0.0 \
-      --wav_path ./test_wav.wav \
-      --bpu_model_dir ./ \
-      --unit_path ./units.txt 2>&1 | tee log.txt
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/CMakeLists.txt
deleted file mode 100644
index 8d61ca8477f0f0b6128f1effe0a2738494b2620f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-if(TORCH)
- add_library(wenet_api SHARED wenet_api.cc)
- target_link_libraries(wenet_api PUBLIC decoder)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/README.md
deleted file mode 100644
index 5eaa13b977eb4836eb930452f4434dc9f2ea4139..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# WeNet API
-
-We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
-for the interface design.
-
-
-We are going to implement the following interfaces:
-
-- [x] non-streaming recognition
-- [] streaming recognition
-- [] nbest
-- [] contextual biasing word
-- [] alignment
-- [] language support(post processor)
-- [] label check
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/wenet_api.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/wenet_api.cc
deleted file mode 100644
index cb1e0c8552e0126e2db274a29075578fe351a25f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/wenet_api.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "post_processor/post_processor.h"
-#include "utils/file.h"
-#include "utils/json.h"
-#include "utils/string.h"
-
-class Recognizer {
- public:
-  explicit Recognizer(const std::string& model_dir) {
-    // FeaturePipeline init
-    feature_config_ = std::make_shared<wenet::FeaturePipelineConfig>(80, 16000);
-    feature_pipeline_ =
-        std::make_shared<wenet::FeaturePipeline>(*feature_config_);
-    // Resource init
-    resource_ = std::make_shared<wenet::DecodeResource>();
-    wenet::TorchAsrModel::InitEngineThreads();
-    std::string model_path = wenet::JoinPath(model_dir, "final.zip");
-    CHECK(wenet::FileExists(model_path));
-
-    auto model = std::make_shared<wenet::TorchAsrModel>();
-    model->Read(model_path);
-    resource_->model = model;
-
-    // units.txt: E2E model unit
-    std::string unit_path = wenet::JoinPath(model_dir, "units.txt");
-    CHECK(wenet::FileExists(unit_path));
-    resource_->unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(unit_path));
-
-    std::string fst_path = wenet::JoinPath(model_dir, "TLG.fst");
-    if (wenet::FileExists(fst_path)) {  // With LM
-      resource_->fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-          fst::Fst<fst::StdArc>::Read(fst_path));
-
-      std::string symbol_path = wenet::JoinPath(model_dir, "words.txt");
-      CHECK(wenet::FileExists(symbol_path));
-      resource_->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(symbol_path));
-    } else {  // Without LM, symbol_table is the same as unit_table
-      resource_->symbol_table = resource_->unit_table;
-    }
-
-    // Context config init
-    context_config_ = std::make_shared<wenet::ContextConfig>();
-    decode_options_ = std::make_shared<wenet::DecodeOptions>();
-    post_process_opts_ = std::make_shared<wenet::PostProcessOptions>();
-  }
-
-  void Reset() {
-    if (feature_pipeline_ != nullptr) {
-      feature_pipeline_->Reset();
-    }
-    if (decoder_ != nullptr) {
-      decoder_->Reset();
-    }
-    result_.clear();
-  }
-
-  void InitDecoder() {
-    CHECK(decoder_ == nullptr);
-    // Optional init context graph
-    if (context_.size() > 0) {
-      context_config_->context_score = context_score_;
-      auto context_graph =
-          std::make_shared<wenet::ContextGraph>(*context_config_);
-      context_graph->BuildContextGraph(context_, resource_->symbol_table);
-      resource_->context_graph = context_graph;
-    }
-    // PostProcessor
-    if (language_ == "chs") {  // TODO(Binbin Zhang): CJK(chs, jp, kr)
-      post_process_opts_->language_type = wenet::kMandarinEnglish;
-    } else {
-      post_process_opts_->language_type = wenet::kIndoEuropean;
-    }
-    resource_->post_processor =
-        std::make_shared<wenet::PostProcessor>(*post_process_opts_);
-    // Init decoder
-    decoder_ = std::make_shared<wenet::AsrDecoder>(feature_pipeline_, resource_,
-                                                   *decode_options_);
-  }
-
-  void Decode(const char* data, int len, int last) {
-    using wenet::DecodeState;
-    // Init decoder when it is called first time
-    if (decoder_ == nullptr) {
-      InitDecoder();
-    }
-    // Convert to 16 bits PCM data to float
-    CHECK_EQ(len % 2, 0);
-    feature_pipeline_->AcceptWaveform(reinterpret_cast<const int16_t*>(data),
-                                      len / 2);
-    if (last > 0) {
-      feature_pipeline_->set_input_finished();
-    }
-
-    while (true) {
-      DecodeState state = decoder_->Decode(false);
-      if (state == DecodeState::kWaitFeats) {
-        break;
-      } else if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        break;
-      } else if (state == DecodeState::kEndpoint && continuous_decoding_) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        decoder_->ResetContinuousDecoding();
-      } else {  // kEndBatch
-        UpdateResult(false);
-      }
-    }
-  }
-
-  void UpdateResult(bool final_result) {
-    json::JSON obj;
-    obj["type"] = final_result ? "final_result" : "partial_result";
-    int nbest = final_result ? nbest_ : 1;
-    obj["nbest"] = json::Array();
-    for (int i = 0; i < nbest && i < decoder_->result().size(); i++) {
-      json::JSON one;
-      one["sentence"] = decoder_->result()[i].sentence;
-      if (final_result && enable_timestamp_) {
-        one["word_pieces"] = json::Array();
-        for (const auto& word_piece : decoder_->result()[i].word_pieces) {
-          json::JSON piece;
-          piece["word"] = word_piece.word;
-          piece["start"] = word_piece.start;
-          piece["end"] = word_piece.end;
-          one["word_pieces"].append(piece);
-        }
-      }
-      one["sentence"] = decoder_->result()[i].sentence;
-      obj["nbest"].append(one);
-    }
-    result_ = obj.dump();
-  }
-
-  const char* GetResult() { return result_.c_str(); }
-
-  void set_nbest(int n) { nbest_ = n; }
-  void set_enable_timestamp(bool flag) { enable_timestamp_ = flag; }
-  void AddContext(const char* word) { context_.emplace_back(word); }
-  void set_context_score(float score) { context_score_ = score; }
-  void set_language(const char* lang) { language_ = lang; }
-  void set_continuous_decoding(bool flag) { continuous_decoding_ = flag; }
-
- private:
-  // NOTE(Binbin Zhang): All use shared_ptr for clone in the future
-  std::shared_ptr<wenet::FeaturePipelineConfig> feature_config_ = nullptr;
-  std::shared_ptr<wenet::FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<wenet::DecodeResource> resource_ = nullptr;
-  std::shared_ptr<wenet::DecodeOptions> decode_options_ = nullptr;
-  std::shared_ptr<wenet::AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<wenet::ContextConfig> context_config_ = nullptr;
-  std::shared_ptr<wenet::PostProcessOptions> post_process_opts_ = nullptr;
-
-  int nbest_ = 1;
-  std::string result_;
-  bool enable_timestamp_ = false;
-  std::vector<std::string> context_;
-  float context_score_;
-  std::string language_ = "chs";
-  bool continuous_decoding_ = false;
-};
-
-void* wenet_init(const char* model_dir) {
-  Recognizer* decoder = new Recognizer(model_dir);
-  return reinterpret_cast<void*>(decoder);
-}
-
-void wenet_free(void* decoder) {
-  delete reinterpret_cast<Recognizer*>(decoder);
-}
-
-void wenet_reset(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Reset();
-}
-
-void wenet_decode(void* decoder, const char* data, int len, int last) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Decode(data, len, last);
-}
-
-const char* wenet_get_result(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  return recognizer->GetResult();
-}
-
-void wenet_set_log_level(int level) {
-  FLAGS_logtostderr = true;
-  FLAGS_v = level;
-}
-
-void wenet_set_nbest(void* decoder, int n) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_nbest(n);
-}
-
-void wenet_set_timestamp(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  bool enable = flag > 0 ? true : false;
-  recognizer->set_enable_timestamp(enable);
-}
-
-void wenet_add_context(void* decoder, const char* word) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->AddContext(word);
-}
-
-void wenet_set_context_score(void* decoder, float score) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_context_score(score);
-}
-
-void wenet_set_language(void* decoder, const char* lang) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_language(lang);
-}
-
-void wenet_set_continuous_decoding(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_continuous_decoding(flag > 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/wenet_api.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/wenet_api.h
deleted file mode 100644
index e839aaa40166a6e50d9aa2ac0e697356bd25b941..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/api/wenet_api.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_WENET_API_H_
-#define API_WENET_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Init decoder from the file and returns the object
- *
- * @param model_dir: the model dir
- * @returns model object or NULL if problem occured
- */
-void* wenet_init(const char* model_dir);
-
-/** Free wenet decoder and corresponding resource
- */
-void wenet_free(void* decoder);
-
-/** Reset decoder for next decoding
- */
-void wenet_reset(void* decoder);
-
-/** Decode the input wav data
- * @param data: pcm data, encoded as int16_t(16 bits)
- * @param len: data length
- * @param last: if it is the last package
- */
-void wenet_decode(void* decoder, const char* data, int len, int last);
-
-/** Get decode result in json format
- *  It returns partial result when last is 0
- *  It returns final result when last is 1
-
-    {
-      "nbest" : [{
-          "sentence" : "are you okay"
-          "word_pieces" : [{
-              "end" : 960,
-              "start" : 0,
-              "word" : "are"
-            }, {
-              "end" : 1200,
-              "start" : 960,
-              "word" : "you"
-            }, {
-            ...}]
-        }, {
-          "sentence" : "are you ok"
-        }],
-      "type" : "final_result"
-    }
-
-    "type": final_result/partial_result
-    "nbest": nbest is enabled when n > 1 in final_result
-        "sentence": the ASR result
-        "word_pieces": optional, output timestamp when enabled
- */
-const char* wenet_get_result(void* decoder);
-
-/** Set n-best, range 1~10
- *  wenet_get_result will return top-n best results
- */
-void wenet_set_nbest(void* decoder, int n);
-
-/** Whether to enable word level timestamp in results
-    disable it when flag = 0, otherwise enable
- */
-void wenet_set_timestamp(void* decoder, int flag);
-
-/** Add one contextual biasing
- */
-void wenet_add_context(void* decoder, const char* word);
-
-/** Set contextual biasing bonus score
- */
-void wenet_set_context_score(void* decoder, float score);
-
-/** Set language, has effect on the postpocessing
- *  @param: lang, could be chs/en now
- */
-void wenet_set_language(void* decoder, const char* lang);
-
-/** Set log level
- *  We use glog in wenet, so the level is the glog level
- */
-void wenet_set_log_level(int level);
-
-/** Enable continous decoding or not
- *  flag > 0: enable, otherwise disable
- */
-void wenet_set_continuous_decoding(void* decoder, int flag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // API_WENET_API_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/CMakeLists.txt
deleted file mode 100644
index a117b8bcb580c8738a7ce72f88bc10ff0a450e98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_executable(decoder_main decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC decoder)
-
-add_executable(label_checker_main label_checker_main.cc)
-target_link_libraries(label_checker_main PUBLIC decoder)
-
-# if(TORCH)
-#  add_executable(api_main api_main.cc)
-#  target_link_libraries(api_main PUBLIC wenet_api)
-# endif()
-
-if(WEBSOCKET)
-  add_executable(websocket_client_main websocket_client_main.cc)
-  target_link_libraries(websocket_client_main PUBLIC websocket)
-  add_executable(websocket_server_main websocket_server_main.cc)
-  target_link_libraries(websocket_server_main PUBLIC websocket)
-endif()
-
-if(GRPC)
-  add_executable(grpc_server_main grpc_server_main.cc)
-  target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
-  add_executable(grpc_client_main grpc_client_main.cc)
-  target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
-endif()
-
-if(HTTP)
-  add_executable(http_client_main http_client_main.cc)
-  target_link_libraries(http_client_main PUBLIC http)
-  add_executable(http_server_main http_server_main.cc)
-  target_link_libraries(http_server_main PUBLIC http)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/api_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/api_main.cc
deleted file mode 100644
index 94b20d52a7b8eee5c39a12af4e1e25324d7d880f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/api_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-
-DEFINE_string(model_dir, "", "model dir path");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_bool(enable_timestamp, false, "enable timestamps");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet_set_log_level(2);
-
-  void* decoder = wenet_init(FLAGS_model_dir.c_str());
-  wenet_set_timestamp(decoder, FLAGS_enable_timestamp == true ? 1 : 0);
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  std::vector<int16_t> data(wav_reader.num_samples());
-  for (int i = 0; i < wav_reader.num_samples(); i++) {
-    data[i] = static_cast<int16_t>(*(wav_reader.data() + i));
-  }
-
-  for (int i = 0; i < 10; i++) {
-    // Return the final result when last is 1
-    wenet_decode(decoder, reinterpret_cast<const char*>(data.data()),
-                 data.size() * 2, 1);
-    const char* result = wenet_get_result(decoder);
-    LOG(INFO) << i << " " << result;
-    wenet_reset(decoder);
-  }
-  wenet_free(decoder);
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/decoder_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/decoder_main.cc
deleted file mode 100644
index b8f1dbae6b88390504cc9ce63f33dc9bd54a2d6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/decoder_main.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <iomanip>
-#include <thread>
-#include <utility>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-#include "utils/thread_pool.h"
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-DEFINE_bool(simulate_streaming, false, "simulate streaming input");
-DEFINE_bool(output_nbest, false, "output n-best of decode result");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_string(wav_scp, "", "input wav scp");
-DEFINE_string(result, "", "result output file");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-DEFINE_int32(thread_num, 1, "num of decode thread");
-DEFINE_int32(warmup, 0, "num of warmup decode, 0 means no warmup");
-
-std::shared_ptr<wenet::DecodeOptions> g_decode_config;
-std::shared_ptr<wenet::FeaturePipelineConfig> g_feature_config;
-std::shared_ptr<wenet::DecodeResource> g_decode_resource;
-
-std::ofstream g_result;
-std::mutex g_mutex;
-int g_total_waves_dur = 0;
-int g_total_decode_time = 0;
-
-void decode(std::pair<std::string, std::string> wav, bool warmup = false) {
-  wenet::WavReader wav_reader(wav.second);
-  int num_samples = wav_reader.num_samples();
-  CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-
-  auto feature_pipeline =
-      std::make_shared<wenet::FeaturePipeline>(*g_feature_config);
-  feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-  feature_pipeline->set_input_finished();
-  LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-
-  wenet::AsrDecoder decoder(feature_pipeline, g_decode_resource,
-                            *g_decode_config);
-
-  int wave_dur = static_cast<int>(static_cast<float>(num_samples) /
-                                  wav_reader.sample_rate() * 1000);
-  int decode_time = 0;
-  std::string final_result;
-  while (true) {
-    wenet::Timer timer;
-    wenet::DecodeState state = decoder.Decode();
-    if (state == wenet::DecodeState::kEndFeats) {
-      decoder.Rescoring();
-    }
-    int chunk_decode_time = timer.Elapsed();
-    decode_time += chunk_decode_time;
-    if (decoder.DecodedSomething()) {
-      LOG(INFO) << "Partial result: " << decoder.result()[0].sentence;
-    }
-
-    if (FLAGS_continuous_decoding && state == wenet::DecodeState::kEndpoint) {
-      if (decoder.DecodedSomething()) {
-        decoder.Rescoring();
-        LOG(INFO) << "Final result (continuous decoding): "
-                  << decoder.result()[0].sentence;
-        final_result.append(decoder.result()[0].sentence);
-      }
-      decoder.ResetContinuousDecoding();
-    }
-
-    if (state == wenet::DecodeState::kEndFeats) {
-      break;
-    } else if (FLAGS_chunk_size > 0 && FLAGS_simulate_streaming) {
-      float frame_shift_in_ms =
-          static_cast<float>(g_feature_config->frame_shift) /
-          wav_reader.sample_rate() * 1000;
-      auto wait_time =
-          decoder.num_frames_in_current_chunk() * frame_shift_in_ms -
-          chunk_decode_time;
-      if (wait_time > 0) {
-        LOG(INFO) << "Simulate streaming, waiting for " << wait_time << "ms";
-        std::this_thread::sleep_for(
-            std::chrono::milliseconds(static_cast<int>(wait_time)));
-      }
-    }
-  }
-  if (decoder.DecodedSomething()) {
-    final_result.append(decoder.result()[0].sentence);
-  }
-  LOG(INFO) << wav.first << " Final result: " << final_result << std::endl;
-  LOG(INFO) << "Decoded " << wave_dur << "ms audio taken " << decode_time
-            << "ms.";
-
-  if (!warmup) {
-    g_mutex.lock();
-    std::ostream& buffer = FLAGS_result.empty() ? std::cout : g_result;
-    if (!FLAGS_output_nbest) {
-      buffer << wav.first << " " << final_result << std::endl;
-    } else {
-      buffer << "wav " << wav.first << std::endl;
-      auto& results = decoder.result();
-      for (auto& r : results) {
-        if (r.sentence.empty()) continue;
-        buffer << "candidate " << r.score << " " << r.sentence << std::endl;
-      }
-    }
-    g_total_waves_dur += wave_dur;
-    g_total_decode_time += decode_time;
-    g_mutex.unlock();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  g_decode_config = wenet::InitDecodeOptionsFromFlags();
-  g_feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  g_decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  if (FLAGS_wav_path.empty() && FLAGS_wav_scp.empty()) {
-    LOG(FATAL) << "Please provide the wave path or the wav scp.";
-  }
-  std::vector<std::pair<std::string, std::string>> waves;
-  if (!FLAGS_wav_path.empty()) {
-    waves.emplace_back(make_pair("test", FLAGS_wav_path));
-  } else {
-    std::ifstream wav_scp(FLAGS_wav_scp);
-    std::string line;
-    while (getline(wav_scp, line)) {
-      std::vector<std::string> strs;
-      wenet::SplitString(line, &strs);
-      CHECK_GE(strs.size(), 2);
-      waves.emplace_back(make_pair(strs[0], strs[1]));
-    }
-
-    if (waves.empty()) {
-      LOG(FATAL) << "Please provide non-empty wav scp.";
-    }
-  }
-
-  if (!FLAGS_result.empty()) {
-    g_result.open(FLAGS_result, std::ios::out);
-  }
-
-  // Warmup
-  if (FLAGS_warmup > 0) {
-    LOG(INFO) << "Warming up...";
-    {
-      ThreadPool pool(FLAGS_thread_num);
-      auto wav = waves[0];
-      for (int i = 0; i < FLAGS_warmup; i++) {
-        pool.enqueue(decode, wav, true);
-      }
-    }
-    LOG(INFO) << "Warmup done.";
-  }
-
-  {
-    ThreadPool pool(FLAGS_thread_num);
-    for (auto& wav : waves) {
-      pool.enqueue(decode, wav, false);
-    }
-  }
-
-  LOG(INFO) << "Total: decoded " << g_total_waves_dur << "ms audio taken "
-            << g_total_decode_time << "ms.";
-  LOG(INFO) << "RTF: " << std::setprecision(4)
-            << static_cast<float>(g_total_decode_time) / g_total_waves_dur;
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/grpc_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/grpc_client_main.cc
deleted file mode 100644
index f2d226d48d3757c5f095335eff3288f5d227282b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/grpc_client_main.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "grpc/grpc_client.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::GrpcClient client(FLAGS_hostname, FLAGS_port, FLAGS_nbest,
-                           FLAGS_continuous_decoding);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  std::vector<float> pcm_data(wav_reader.data(),
-                              wav_reader.data() + num_samples);
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(pcm_data[j]));
-    }
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/grpc_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/grpc_server_main.cc
deleted file mode 100644
index b00f3cbade1ee70dadfb49829e9ca73fd50c2be2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/grpc_server_main.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <grpcpp/ext/proto_server_reflection_plugin.h>
-#include <grpcpp/grpcpp.h>
-#include <grpcpp/health_check_service_interface.h>
-
-#include "decoder/params.h"
-#include "grpc/grpc_server.h"
-#include "utils/log.h"
-
-DEFINE_int32(port, 10086, "grpc listening port");
-DEFINE_int32(workers, 4, "grpc num workers");
-
-using grpc::Server;
-using grpc::ServerBuilder;
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::GrpcServer service(feature_config, decode_config, decode_resource);
-  grpc::EnableDefaultHealthCheckService(true);
-  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
-  ServerBuilder builder;
-  std::string address("0.0.0.0:" + std::to_string(FLAGS_port));
-  builder.AddListeningPort(address, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  builder.SetSyncServerOption(ServerBuilder::SyncServerOption::NUM_CQS,
-                              FLAGS_workers);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server->Wait();
-  google::ShutdownGoogleLogging();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/http_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/http_client_main.cc
deleted file mode 100644
index b59ee3f5f32bf08552416b183802029ac5d5afa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/http_client_main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "http/http_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of http server");
-DEFINE_int32(port, 10086, "port of http server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Convert to short
-  std::vector<int16_t> data;
-  data.reserve(num_samples);
-  for (int j = 0; j < num_samples; j++) {
-    data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-  }
-  // Send data
-  wenet::HttpClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  wenet::Timer timer;
-  VLOG(2) << "Send " << data.size() << " samples";
-  client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/http_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/http_server_main.cc
deleted file mode 100644
index e30cf2bcdf746c2072f023e90f470ccba5467c2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/http_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "http/http_server.h"
-
-DEFINE_int32(port, 10086, "http listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
-                           decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/label_checker_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/label_checker_main.cc
deleted file mode 100644
index e36e3d5c29a38a7ebee80606ebd8e69ae8b1eb96..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/label_checker_main.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_string(text, "", "kaldi style text input file");
-DEFINE_string(wav_scp, "", "kaldi style wav scp");
-DEFINE_double(is_penalty, 1.0,
-              "insertion/substitution penalty for align insertion");
-DEFINE_double(del_penalty, 1.0, "deletion penalty for align insertion");
-DEFINE_string(result, "", "result output file");
-DEFINE_string(timestamp, "", "timestamp output file");
-
-namespace wenet {
-
-const char* kDeletion = "<del>";
-// Is: Insertion and substitution
-const char* kIsStart = "<is>";
-const char* kIsEnd = "</is>";
-
-bool MapToLabel(const std::string& text,
-                std::shared_ptr<fst::SymbolTable> symbol_table,
-                std::vector<int>* labels) {
-  labels->clear();
-  // Split label to char sequence
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(text, &chars);
-  for (size_t i = 0; i < chars.size(); i++) {
-    // ▁ is special symbol for white space
-    std::string label = chars[i] != " " ? chars[i] : "▁";
-    int id = symbol_table->Find(label);
-    if (id != -1) {  // fst::kNoSymbol
-      // LOG(INFO) << label << " " << id;
-      labels->push_back(id);
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<fst::SymbolTable> MakeSymbolTableForFst(
-    std::shared_ptr<fst::SymbolTable> isymbol_table) {
-  LOG(INFO) << isymbol_table;
-  CHECK(isymbol_table != nullptr);
-  auto osymbol_table = std::make_shared<fst::SymbolTable>();
-  osymbol_table->AddSymbol("<eps>", 0);
-  CHECK_EQ(isymbol_table->Find("<blank>"), 0);
-  osymbol_table->AddSymbol("<blank>", 1);
-  for (int i = 1; i < isymbol_table->NumSymbols(); i++) {
-    std::string symbol = isymbol_table->Find(i);
-    osymbol_table->AddSymbol(symbol, i + 1);
-  }
-  osymbol_table->AddSymbol(kDeletion, isymbol_table->NumSymbols() + 1);
-  osymbol_table->AddSymbol(kIsStart, isymbol_table->NumSymbols() + 2);
-  osymbol_table->AddSymbol(kIsEnd, isymbol_table->NumSymbols() + 3);
-  return osymbol_table;
-}
-
-void CompileCtcFst(std::shared_ptr<fst::SymbolTable> symbol_table,
-                   fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  CHECK_EQ(symbol_table->Find("<eps>"), 0);
-  CHECK_EQ(symbol_table->Find("<blank>"), 1);
-  ofst->AddArc(start, fst::StdArc(1, 0, 0.0, start));
-  // Exclude kDeletion and kInsertion
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    int s = ofst->AddState();
-    ofst->AddArc(start, fst::StdArc(i, i, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(i, 0, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(0, 0, 0.0, start));
-  }
-  ofst->SetFinal(start, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdOLabelCompare());
-}
-
-void CompileAlignFst(std::vector<int> labels,
-                     std::shared_ptr<fst::SymbolTable> symbol_table,
-                     fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int deletion = symbol_table->Find(kDeletion);
-  int insertion_start = symbol_table->Find(kIsStart);
-  int insertion_end = symbol_table->Find(kIsEnd);
-
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  // Filler State
-  int filler_start = ofst->AddState();
-  int filler_end = ofst->AddState();
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    ofst->AddArc(filler_start, fst::StdArc(i, i, FLAGS_is_penalty, filler_end));
-  }
-  ofst->AddArc(filler_end, fst::StdArc(0, 0, 0.0, filler_start));
-
-  int prev = start;
-  // Alignment path and optional filler
-  for (size_t i = 0; i < labels.size(); i++) {
-    int cur = ofst->AddState();
-    // 1. Insertion or Substitution
-    ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-    ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-    // 2. Correct
-    ofst->AddArc(prev, fst::StdArc(labels[i], labels[i], 0.0, cur));
-    // 3. Deletion
-    ofst->AddArc(prev, fst::StdArc(0, deletion, FLAGS_del_penalty, cur));
-
-    prev = cur;
-  }
-  // Optional add endding filler
-  ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-  ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-  ofst->SetFinal(prev, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdILabelCompare());
-}
-
-}  // namespace wenet
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-  CHECK(decode_resource->unit_table != nullptr);
-
-  auto wfst_symbol_table =
-      wenet::MakeSymbolTableForFst(decode_resource->unit_table);
-  // wfst_symbol_table->WriteText("fst.txt");
-  // Reset symbol_table to on-the-fly generated wfst_symbol_table
-  decode_resource->symbol_table = wfst_symbol_table;
-
-  // Compile ctc FST
-  fst::StdVectorFst ctc_fst;
-  wenet::CompileCtcFst(wfst_symbol_table, &ctc_fst);
-  // ctc_fst.Write("ctc.fst");
-
-  std::unordered_map<std::string, std::string> wav_table;
-  std::ifstream wav_is(FLAGS_wav_scp);
-  std::string line;
-  while (std::getline(wav_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    CHECK_EQ(strs.size(), 2);
-    wav_table[strs[0]] = strs[1];
-  }
-
-  std::ifstream text_is(FLAGS_text);
-  std::ofstream result_os(FLAGS_result, std::ios::out);
-  std::ofstream timestamp_out;
-  if (!FLAGS_timestamp.empty()) {
-    timestamp_out.open(FLAGS_timestamp, std::ios::out);
-  }
-  std::ostream& timestamp_os =
-      FLAGS_timestamp.empty() ? std::cout : timestamp_out;
-
-  while (std::getline(text_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    if (strs.size() < 2) continue;
-    std::string key = strs[0];
-    LOG(INFO) << "Processing " << key;
-    if (wav_table.find(key) != wav_table.end()) {
-      strs.erase(strs.begin());
-      std::string text = wenet::JoinString(" ", strs);
-      std::vector<int> labels;
-      wenet::MapToLabel(text, wfst_symbol_table, &labels);
-      // Prepare FST for alignment decoding
-      fst::StdVectorFst align_fst;
-      wenet::CompileAlignFst(labels, wfst_symbol_table, &align_fst);
-      // align_fst.Write("align.fst");
-      auto decoding_fst = std::make_shared<fst::StdVectorFst>();
-      fst::Compose(ctc_fst, align_fst, decoding_fst.get());
-      // decoding_fst->Write("decoding.fst");
-      // Preapre feature pipeline
-      wenet::WavReader wav_reader;
-      if (!wav_reader.Open(wav_table[key])) {
-        LOG(WARNING) << "Error in reading " << wav_table[key];
-        continue;
-      }
-      int num_samples = wav_reader.num_samples();
-      CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-      auto feature_pipeline =
-          std::make_shared<wenet::FeaturePipeline>(*feature_config);
-      feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-      feature_pipeline->set_input_finished();
-      decode_resource->fst = decoding_fst;
-      LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-      wenet::AsrDecoder decoder(feature_pipeline, decode_resource,
-                                *decode_config);
-      while (true) {
-        wenet::DecodeState state = decoder.Decode();
-        if (state == wenet::DecodeState::kEndFeats) {
-          decoder.Rescoring();
-          break;
-        }
-      }
-      std::string final_result;
-      std::string timestamp_str;
-      if (decoder.DecodedSomething()) {
-        const wenet::DecodeResult& result = decoder.result()[0];
-        final_result = result.sentence;
-        std::stringstream ss;
-        for (const auto& w : result.word_pieces) {
-          ss << " " << w.word << " " << w.start << " " << w.end;
-        }
-        timestamp_str = ss.str();
-      }
-      result_os << key << " " << final_result << std::endl;
-      timestamp_os << key << " " << timestamp_str << std::endl;
-      LOG(INFO) << key << " " << final_result;
-    } else {
-      LOG(WARNING) << "No wav file for " << key;
-    }
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/websocket_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/websocket_client_main.cc
deleted file mode 100644
index 3eaa96069dc5f57673fbb2819bf7d4883e0d5ffa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/websocket_client_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "websocket/websocket_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::WebSocketClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  client.set_continuous_decoding(FLAGS_continuous_decoding);
-  client.SendStartSignal();
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-    }
-    // TODO(Binbin Zhang): Network order?
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-  client.SendEndSignal();
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/websocket_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/websocket_server_main.cc
deleted file mode 100644
index 796d9d2e6d151f7c08b43d66b7245c58ee086cc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bin/websocket_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "websocket/websocket_server.h"
-
-DEFINE_int32(port, 10086, "websocket listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
-                                decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/CMakeLists.txt
deleted file mode 100644
index 2ea533c25be9f2e13fb7b12ec458485113e460cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-message("cmake build type is ${CMAKE_BUILD_TYPE} .")
-
-if(BPU)
-  list(APPEND bpu_asr_model_srcs ./bpu_asr_model.cc)
-  message(STATUS "Use src_files: [ ${bpu_asr_model_srcs} ] to compile bpu_asr_model .")
-
-  # compile bpu_asr_model
-  include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
-  add_library(bpu_asr_model STATIC ${bpu_asr_model_srcs})
-  target_link_libraries(bpu_asr_model PUBLIC easy_dnn dnn)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/bpu_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/bpu_asr_model.cc
deleted file mode 100644
index 6d6cfedbeae6222ddd974bb5fa7be69183952696..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/bpu_asr_model.cc
+++ /dev/null
@@ -1,315 +0,0 @@
-// Copyright (c) 2022 Horizon Inc, Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "bpu/bpu_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-void BPUAsrModel::GetInputOutputInfo(
-    const std::vector<std::shared_ptr<DNNTensor>>& input,
-    const std::vector<std::shared_ptr<DNNTensor>>& output) {
-  // Input info
-  for (size_t i = 0; i < input.size(); ++i) {
-    auto& shapes = input[i]->properties.validShape.dimensionSize;
-    std::string layout = (input[i]->properties.tensorLayout ==
-        hbDNNTensorLayout::HB_DNN_LAYOUT_NHWC ? "NHWC" : "NCHW");
-    LOG(INFO) << "\tInput-" << i << ": Shape [" << shapes[0] << ","
-              << shapes[1] << "," << shapes[2] << "," << shapes[3]
-              << "], Layout [" << layout << "]";
-  }
-  // Output info
-  for (size_t i = 0; i < output.size(); ++i) {
-    auto& shapes = output[i]->properties.validShape.dimensionSize;
-    std::string layout = (output[i]->properties.tensorLayout ==
-        hbDNNTensorLayout::HB_DNN_LAYOUT_NHWC ? "NHWC" : "NCHW");
-    LOG(INFO) << "\tOutput-" << i << ": Shape [" << shapes[0] << ","
-              << shapes[1] << "," << shapes[2] << "," << shapes[3]
-              << "], Layout [" << layout << "]";
-  }
-}
-
-void BPUAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_model_path = model_dir + "/encoder.bin";
-  std::string ctc_model_path = model_dir + "/ctc.bin";
-
-  // 0. Init managers
-  // NOTE(xcsong): XxxManager follows `Singleton Pattern`, there is
-  //  no need to maintain managers as class members, we can simply
-  //  get single instance Just-In-Time.
-  ModelManager* model_manager = ModelManager::GetInstance();
-
-  // 1. Load models
-  std::vector<Model*> models;
-  int ret_code = model_manager->Load(models, encoder_model_path);
-  CHECK_EQ(ret_code, 0) << "Load encoder.bin failed.";
-  encoder_model_.reset(model_manager->GetModel([](Model* model) {
-    return model->GetName().find("encoder") != std::string::npos;
-  }));
-  ret_code = model_manager->Load(models, ctc_model_path);
-  CHECK_EQ(ret_code, 0) << "Load ctc.bin failed.";
-  ctc_model_.reset(model_manager->GetModel([](Model* model) {
-    return model->GetName().find("ctc") != std::string::npos;
-  }));
-
-  // 2. Init input/output tensors
-  AllocMemory(encoder_model_, &encoder_input_, &encoder_output_);
-  AllocMemory(ctc_model_, &ctc_input_, &ctc_output_);
-  Reset();
-
-  // 3. Read model input/output nodes
-  LOG(INFO) << "BPU Encoder:";
-  GetInputOutputInfo(encoder_input_, encoder_output_);
-  LOG(INFO) << "BPU CTC:";
-  GetInputOutputInfo(ctc_input_, ctc_output_);
-
-  // 4. Parse metadatas
-  right_context_ = 14;    // NOTE(xcsong): Only support 1/8 subsample, since
-  subsampling_rate_ = 8;  //   1/4 subsample is too slow on edge-devices.
-  sos_ = ctc_output_[0]->properties.validShape.dimensionSize[1] - 1;
-  eos_ = sos_;
-  chunk_size_ = ctc_input_[0]->properties.validShape.dimensionSize[3];
-  num_left_chunks_ = encoder_input_[3]->properties.validShape.dimensionSize[3]
-      / chunk_size_ - 1;
-  hidden_dim_ = ctc_input_[0]->properties.validShape.dimensionSize[1];
-  int frames = (chunk_size_ - 1) * subsampling_rate_ + right_context_ + 1;
-  CHECK_EQ(frames, encoder_input_[0]->properties.validShape.dimensionSize[2]) <<
-      "NOTE(xcsong): Only support 1/8 subsample, since 1/4 subsample" <<
-      " is too slow on edge-devices.";
-  LOG(INFO) << "Bpu Model Info:";
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\thidden_dim " << hidden_dim_;
-}
-
-BPUAsrModel::BPUAsrModel(const BPUAsrModel& other) {
-  // metadatas (BaseClass)
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // metadatas (ChileClass)
-  hidden_dim_ = other.hidden_dim_;
-  chunk_id_ = other.chunk_id_;
-  // models, NOTE(xcsong): in/out tensors are not copied here.
-  encoder_model_ = other.encoder_model_;
-  ctc_model_ = other.ctc_model_;
-}
-
-std::shared_ptr<AsrModel> BPUAsrModel::Copy() const {
-  auto asr_model = std::make_shared<BPUAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->AllocMemory(encoder_model_, &(asr_model->encoder_input_),
-      &(asr_model->encoder_output_));
-  asr_model->AllocMemory(ctc_model_, &(asr_model->ctc_input_),
-      &(asr_model->ctc_output_));
-  asr_model->Reset();
-  return asr_model;
-}
-
-void BPUAsrModel::AllocMemory(
-    const std::shared_ptr<Model>& model,
-    std::vector<std::shared_ptr<DNNTensor>>* inputs,
-    std::vector<std::shared_ptr<DNNTensor>>* outputs) {
-  int input_counts = model->GetInputCount();
-  inputs->resize(input_counts);
-  for (size_t i = 0; i < input_counts; ++i) {
-    inputs->at(i).reset(new DNNTensor);
-    auto& item = inputs->at(i);
-    model->GetInputTensorProperties(item->properties, i);
-    hbSysAllocCachedMem(&(item->sysMem[0]), item->properties.alignedByteSize);
-  }
-  int output_counts = model->GetOutputCount();
-  outputs->resize(output_counts);
-  for (size_t i = 0; i < output_counts; ++i) {
-    outputs->at(i).reset(new DNNTensor);
-    auto& item = outputs->at(i);
-    model->GetOutputTensorProperties(item->properties, i);
-    hbSysAllocCachedMem(&(item->sysMem[0]), item->properties.alignedByteSize);
-  }
-}
-
-void BPUAsrModel::Reset() {
-  offset_ = 0;
-  chunk_id_ = 0;
-  cached_feature_.clear();
-  encoder_outs_.clear();
-  encoder_outs_.resize(hidden_dim_);  // [512][0~MaxFrames]
-  // Reset input/output tensors with zero
-  for (auto& tensor : encoder_input_) {
-    memset(tensor->sysMem[0].virAddr, 0, tensor->properties.alignedByteSize);
-  }
-  for (auto& tensor : encoder_output_) {
-    memset(tensor->sysMem[0].virAddr, 0, tensor->properties.alignedByteSize);
-  }
-  for (auto& tensor : ctc_input_) {
-    memset(tensor->sysMem[0].virAddr, 0, tensor->properties.alignedByteSize);
-  }
-  for (auto& tensor : ctc_output_) {
-    memset(tensor->sysMem[0].virAddr, 0, tensor->properties.alignedByteSize);
-  }
-}
-
-void BPUAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // NOTE(xcsong): XxxManager follows `Singleton Pattern`, there is
-  //  no need to maintain managers as class members, we can simply
-  //  get single instance Just-In-Time.
-  TaskManager* task_manager = TaskManager::GetInstance();
-  // 1. Forward Encoder
-  PrepareEncoderInput(chunk_feats);
-  for (auto& tensor : encoder_input_) {
-    hbSysFlushMem(&(tensor->sysMem[0]), HB_SYS_MEM_CACHE_CLEAN);
-  }
-  auto infer_task = task_manager->GetModelInferTask(1000);
-  infer_task->SetModel(encoder_model_.get());
-  infer_task->SetInputTensors(encoder_input_);
-  infer_task->SetOutputTensors(encoder_output_);
-  infer_task->RunInfer();
-  infer_task->WaitInferDone(1000);
-  infer_task.reset();
-  for (auto& tensor : encoder_output_) {
-    hbSysFlushMem(&(tensor->sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE);
-  }
-
-  // 2. Forward CTC
-  PrepareCtcInput();
-  for (auto& tensor : ctc_input_) {
-    hbSysFlushMem(&(tensor->sysMem[0]), HB_SYS_MEM_CACHE_CLEAN);
-  }
-  infer_task = task_manager->GetModelInferTask(1000);
-  infer_task->SetModel(ctc_model_.get());
-  infer_task->SetInputTensors(ctc_input_);
-  infer_task->SetOutputTensors(ctc_output_);
-  infer_task->RunInfer();
-  infer_task->WaitInferDone(1000);
-  infer_task.reset();
-  for (auto& tensor : ctc_output_) {
-    hbSysFlushMem(&(tensor->sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE);
-  }
-
-  // 3. Extract final outout_prob
-  const float* raw_data =
-      reinterpret_cast<float*>(ctc_output_[0]->sysMem[0].virAddr);
-  out_prob->resize(chunk_size_);  // v[16][4233]
-  for (auto& val : *out_prob) {
-    val.clear();
-    val.reserve(eos_ + 1);
-  }
-  for (size_t idx = 0, i = 0; i < eos_ + 1; ++i) {
-    for (size_t j = 0; j < chunk_size_; ++j) {
-      (*out_prob)[j].emplace_back(raw_data[idx++]);
-    }
-  }
-
-  // TODO(xcsong): 4. Forward Decoder.
-  //  update encoder_outs_ here.
-}
-
-void BPUAsrModel::PrepareEncoderInput(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  chunk_id_ += 1;
-  // 1. input-0: chunk
-  auto& chunk = encoder_input_[0];
-  auto feat_ptr = reinterpret_cast<float*>(chunk->sysMem[0].virAddr);
-  memset(chunk->sysMem[0].virAddr, 0, chunk->properties.alignedByteSize);
-  int64_t addr_shift = 0;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {  // copy cached_feature_
-    memcpy(feat_ptr + addr_shift, cached_feature_[i].data(),
-           cached_feature_[i].size() * sizeof(float));
-    addr_shift += cached_feature_[i].size();
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {      // copy chunk_feats
-    memcpy(feat_ptr + addr_shift, chunk_feats[i].data(),
-           chunk_feats[i].size() * sizeof(float));
-    addr_shift += chunk_feats[i].size();
-  }
-
-  // 2. att_cache & cnn_cache
-  memcpy(encoder_input_[1]->sysMem[0].virAddr,
-         encoder_output_[1]->sysMem[0].virAddr,
-         encoder_output_[1]->properties.alignedByteSize);
-  memcpy(encoder_input_[2]->sysMem[0].virAddr,
-         encoder_output_[2]->sysMem[0].virAddr,
-         encoder_output_[2]->properties.alignedByteSize);
-
-  // 3. att_mask
-  // NOTE(xcsong): For last chunk_feats whose size < chunk_size * subsampling,
-  //  we will do nothing since it hardly affects wer even if we
-  //  use `wrong` att_mask where trailing zeros are not masked.
-  auto& att_mask = encoder_input_[3];
-  int valid_len = chunk_id_ * chunk_size_;
-  int total_len = (num_left_chunks_ + 1) * chunk_size_;
-  int head = encoder_input_[3]->properties.validShape.dimensionSize[1];
-  if (valid_len <= total_len) {
-    std::vector<float> padding(total_len, 1.0f);
-    for (size_t i = 0; i < total_len - valid_len; ++i) { padding[i] = 0.0f;}
-    for (size_t i = 0; i < head * chunk_size_; ++i) {
-      float* start_ptr =
-          reinterpret_cast<float*>(att_mask->sysMem[0].virAddr) + total_len * i;
-      memcpy(start_ptr, padding.data(), total_len * sizeof(float));
-    }
-  }
-}
-
-void BPUAsrModel::PrepareCtcInput() {
-  // 1. chunk_out
-  memcpy(ctc_input_[0]->sysMem[0].virAddr,
-         encoder_output_[0]->sysMem[0].virAddr,
-         encoder_output_[0]->properties.alignedByteSize);
-}
-
-float BPUAsrModel::ComputeAttentionScore(const float* prob,
-                                         const std::vector<int>& hyp, int eos,
-                                         int decode_out_len) {
-  // TODO(xcsong): Support decoder.
-  //  Currently, running decoder on edge-devices is time-consuming since the
-  //  the length of input is much longer than encoder. To achieve a better
-  //  accuracy-speed trade-off, we disable rescoring by default.
-  return 0.0;
-}
-
-void BPUAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                     float reverse_weight,
-                                     std::vector<float>* rescoring_score) {
-  // TODO(xcsong): Support decoder.
-  //  Currently, running decoder on edge-devices is time-consuming since the
-  //  the length of input is much longer than encoder. To achieve a better
-  //  accuracy-speed trade-off, we disable rescoring by default.
-  LOG(INFO) << "Skip rescore. Please set rescoring_weight = 0.0";
-}
-
-BPUAsrModel::~BPUAsrModel() {
-  for (auto& tensor : encoder_input_) { hbSysFreeMem(tensor->sysMem); }
-  for (auto& tensor : encoder_output_) { hbSysFreeMem(tensor->sysMem); }
-  for (auto& tensor : ctc_input_) { hbSysFreeMem(tensor->sysMem); }
-  for (auto& tensor : ctc_output_) { hbSysFreeMem(tensor->sysMem); }
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/bpu_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/bpu_asr_model.h
deleted file mode 100644
index 2eee1a8c7577ed62727ddb034eb65e4d84f07620..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/bpu/bpu_asr_model.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2022 Horizon Inc, Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef RUNTIME_HORIZONBPU_BPU_BPU_ASR_MODEL_H_
-#define RUNTIME_HORIZONBPU_BPU_BPU_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "easy_dnn/model.h"
-#include "easy_dnn/model_manager.h"
-#include "easy_dnn/task_manager.h"
-#include "easy_dnn/data_structure.h"
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-using hobot::easy_dnn::Model;
-using hobot::easy_dnn::DNNTensor;
-using hobot::easy_dnn::TaskManager;
-using hobot::easy_dnn::ModelManager;
-
-namespace wenet {
-
-class BPUAsrModel : public AsrModel {
- public:
-  BPUAsrModel() = default;
-  ~BPUAsrModel();
-  BPUAsrModel(const BPUAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  static void AllocMemory(
-      const std::shared_ptr<Model>& model,
-      std::vector<std::shared_ptr<DNNTensor>>* input,
-      std::vector<std::shared_ptr<DNNTensor>>* output);
-  void GetInputOutputInfo(
-      const std::vector<std::shared_ptr<DNNTensor>>& input_tensors,
-      const std::vector<std::shared_ptr<DNNTensor>>& output_tensors);
-  void PrepareEncoderInput(const std::vector<std::vector<float>>& chunk_feats);
-  void PrepareCtcInput();
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  // metadatas
-  int hidden_dim_ = 512;
-  int chunk_id_ = 0;
-
-  // models
-  std::shared_ptr<Model> encoder_model_ = nullptr;
-  std::shared_ptr<Model> ctc_model_ = nullptr;
-
-  // input/output tensors
-  std::vector<std::shared_ptr<DNNTensor>> encoder_input_, encoder_output_;
-  std::vector<std::shared_ptr<DNNTensor>> ctc_input_, ctc_output_;
-  std::vector<std::vector<float> > encoder_outs_;
-};
-
-}  // namespace wenet
-
-#endif  // RUNTIME_HORIZONBPU_BPU_BPU_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/CMakeLists.txt
deleted file mode 100644
index 145654105350e91a5f9121b47197f5fc60663f5c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-link_libraries(gtest_main gmock)
-
-add_executable(utils_test utils_test.cc)
-target_link_libraries(utils_test PUBLIC utils)
-add_test(UTILS_TEST utils_test)
-
-add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
-target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
-add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
-
-add_executable(post_processor_test post_processor_test.cc)
-target_link_libraries(post_processor_test PUBLIC post_processor)
-add_test(POST_PROCESSOR_TEST post_processor_test)
-
-
-add_executable(feature_pipeline_test feature_pipeline_test.cc)
-target_link_libraries(feature_pipeline_test PUBLIC frontend)
-add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/ctc_prefix_beam_search_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/ctc_prefix_beam_search_test.cc
deleted file mode 100644
index d8f3b65693b934beb33f3a770795f0b6e7ce3456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/ctc_prefix_beam_search_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <cmath>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(CtcPrefixBeamSearchTest, CtcPrefixBeamSearchLogicTest) {
-  using ::testing::ElementsAre;
-  // See https://robin1001.github.io/2020/12/11/ctc-search for the
-  // graph demonstration of the data
-  std::vector<std::vector<float>> data = {
-      {0.25, 0.40, 0.35}, {0.40, 0.35, 0.25}, {0.10, 0.50, 0.40}};
-  // Apply log
-  for (int i = 0; i < data.size(); i++) {
-    for (int j = 0; j < data[i].size(); j++) {
-      data[i][j] = std::log(data[i][j]);
-    }
-  }
-  wenet::CtcPrefixBeamSearchOptions option;
-  option.first_beam_size = 3;
-  option.second_beam_size = 3;
-  wenet::CtcPrefixBeamSearch prefix_beam_search(option);
-  prefix_beam_search.Search(data);
-  /* Test case info
-  | top k | result index | prefix score | viterbi score | timestamp |
-  |-------|--------------|--------------|---------------|-----------|
-  | top 1 | [2, 1]       | 0.2185       | 0.07          | [0, 2]    |
-  | top 2 | [1, 2]       | 0.1550       | 0.064         | [0, 2]    |
-  | top 3 | [1]          | 0.1525       | 0.07          | [2]       |
-  */
-  const std::vector<std::vector<int>>& result = prefix_beam_search.Outputs();
-  EXPECT_EQ(result.size(), 3);
-  ASSERT_THAT(result[0], ElementsAre(2, 1));
-  ASSERT_THAT(result[1], ElementsAre(1, 2));
-  ASSERT_THAT(result[2], ElementsAre(1));
-
-  const std::vector<float>& likelihood = prefix_beam_search.Likelihood();
-  EXPECT_EQ(likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[0]), 0.2185);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[1]), 0.1550);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[2]), 0.1525);
-
-  const std::vector<float>& viterbi_likelihood =
-      prefix_beam_search.viterbi_likelihood();
-  EXPECT_EQ(viterbi_likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[0]), 0.07);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[1]), 0.064);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[2]), 0.07);
-
-  const std::vector<std::vector<int>>& times = prefix_beam_search.Times();
-  EXPECT_EQ(times.size(), 3);
-  ASSERT_THAT(times[0], ElementsAre(0, 2));
-  ASSERT_THAT(times[1], ElementsAre(0, 2));
-  ASSERT_THAT(times[2], ElementsAre(2));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/feature_pipeline_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/feature_pipeline_test.cc
deleted file mode 100644
index 244ec0735b6086211b476e8d97569e1ee5959bc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/feature_pipeline_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2022 Roney
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <thread>
-#include <vector>
-
-#include "frontend/feature_pipeline.h"
-#include "utils/blocking_queue.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-void pushQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que,
-               std::vector<int> vec) {
-  que->Push(vec);
-}
-
-void popQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que, int num,
-              int back_data) {
-  auto pop_data = que->Pop(num);
-  ASSERT_EQ(pop_data[num - 1], back_data);
-}
-
-TEST(FeaturePipelineTest, BlockingQueueTest) {
-  auto capacity_queue = std::make_shared<wenet::BlockingQueue<int>>(2);
-  std::vector<int> test_data{1, 2, 3, 4, 5};
-  std::thread push_thread(&pushQueue, capacity_queue, test_data);
-  ASSERT_EQ(capacity_queue->Pop(), 1);
-  ASSERT_LE(capacity_queue->Size(), 2);    // capacity_queue: 2 or 2,3
-  auto pop_data = capacity_queue->Pop(3);  // 2,3,4 num > capacity
-  ASSERT_EQ(pop_data.size(), 3);
-  ASSERT_EQ(pop_data[2], 4);
-  push_thread.join();
-  ASSERT_EQ(capacity_queue->Size(), 1);  // capacity_queue:5
-
-  std::thread pop_thread(&popQueue, capacity_queue, 3, 0);  // num > capacity
-  capacity_queue->Push(9);  // capacity_queue:5,9
-  capacity_queue->Push(0);  // capacity_queue:5,9,0
-  pop_thread.join();        // capacity_queue:
-  ASSERT_EQ(capacity_queue->Size(), 0);
-
-  pop_data = capacity_queue->Pop(0);
-  ASSERT_TRUE(pop_data.empty());
-}
-
-TEST(FeaturePipelineTest, PipelineTest) {
-  wenet::FeaturePipelineConfig config(80, 8000);
-  wenet::FeaturePipeline feature_pipeline(config);
-  int audio_len = 8 * 55;  // audio len 55ms,4 frames
-  std::vector<float> pcm(audio_len, 0);
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 4);
-
-  std::vector<std::vector<float>> out_feats;
-  auto b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_TRUE(b);
-  ASSERT_EQ(out_feats.size(), 2);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 2);
-
-  std::vector<float> out_feat;
-  b = feature_pipeline.ReadOne(&out_feat);
-  ASSERT_TRUE(b);
-  ASSERT_FALSE(out_feat.empty());
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 1);
-
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 1);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  feature_pipeline.Read(2, &out_feats);
-  feature_pipeline.Reset();
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 0);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/post_processor_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/post_processor_test.cc
deleted file mode 100644
index fa11fa29231032d62389a93fd00b0ec782bf8a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/post_processor_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(PostProcessorTest, ProcessSpacekMandarinEnglishTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: mandarin character
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "震东好帅",
-      // modeling unit: mandarin word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 吴迪 也 好帅",
-      // modeling unit: english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁binbin▁is▁also▁handsome",
-      // modeling unit: english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " life is short i use wenet",
-      // modeling unit: mandarin character + english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "超哥▁is▁the▁most▁handsome",
-      // modeling unit: mandarin word + english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 人生 苦短 i use wenet",
-  };
-
-  std::vector<std::string> result_lowercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "binbin is also handsome",
-      "life is short i use wenet",
-      "超哥 is the most handsome",
-      "人生苦短i use wenet",
-  };
-
-  std::vector<std::string> result_uppercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "BINBIN IS ALSO HANDSOME",
-      "LIFE IS SHORT I USE WENET",
-      "超哥 IS THE MOST HANDSOME",
-      "人生苦短I USE WENET",
-  };
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
-
-TEST(PostProcessorTest, ProcessSpacekIndoEuropeanTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  opts_lowercase.language_type = wenet::kIndoEuropean;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.language_type = wenet::kIndoEuropean;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁zhendong▁ist▁so▁schön",
-      // modeling unit: word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " zhendong ist so schön"};
-
-  std::vector<std::string> result_lowercase = {"zhendong ist so schön",
-                                               "zhendong ist so schön"};
-
-  std::vector<std::string> result_uppercase = {"ZHENDONG IST SO SCHÖN",
-                                               "ZHENDONG IST SO SCHÖN"};
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/utils_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/utils_test.cc
deleted file mode 100644
index 6b2bbac25e000ce854d5e55a50cb51109d62d758..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/test/utils_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "utils/utils.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-TEST(UtilsTest, TopKTest) {
-  using ::testing::ElementsAre;
-  using ::testing::FloatNear;
-  using ::testing::Pointwise;
-  std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
-  std::vector<float> values;
-  std::vector<int32_t> indices;
-  wenet::TopK(data, 3, &values, &indices);
-  EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
-  ASSERT_THAT(indices, ElementsAre(9, 4, 8));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/toolchains/aarch64-linux-gnu.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/toolchains/aarch64-linux-gnu.toolchain.cmake
deleted file mode 100644
index 9ad37cba9eb6fa58aa194ece96cf9a5da472a76d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/toolchains/aarch64-linux-gnu.toolchain.cmake
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CMAKE_SYSTEM_NAME Linux)
-SET (CMAKE_SYSTEM_PROCESSOR aarch64)
-
-set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
-set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/toolchains/ios.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/toolchains/ios.toolchain.cmake
deleted file mode 100644
index 2bcb0adf7b07c0c5fd5bf16d1b687050579ba673..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/toolchains/ios.toolchain.cmake
+++ /dev/null
@@ -1,1014 +0,0 @@
-# This file is part of the ios-cmake project. It was retrieved from
-# https://github.com/leetal/ios-cmake.git, which is a fork of
-# https://github.com/gerstrong/ios-cmake.git, which is a fork of
-# https://github.com/cristeab/ios-cmake.git, which is a fork of
-# https://code.google.com/p/ios-cmake/. Which in turn is based off of
-# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which
-# are included with CMake 2.8.4
-#
-# The ios-cmake project is licensed under the new BSD license.
-#
-# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software,
-# Kitware, Inc., Insight Software Consortium.  All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# This file is based off of the Platform/Darwin.cmake and
-# Platform/UnixPaths.cmake files which are included with CMake 2.8.4
-# It has been altered for iOS development.
-#
-# Updated by Alex Stewart (alexs.mac@gmail.com)
-#
-# *****************************************************************************
-#      Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com)
-#                      under the BSD-3-Clause license
-#                   https://github.com/leetal/ios-cmake
-# *****************************************************************************
-#
-#                           INFORMATION / HELP
-#
-###############################################################################
-#                                  OPTIONS                                    #
-###############################################################################
-#
-# PLATFORM: (default "OS64")
-#    OS = Build for iPhoneOS.
-#    OS64 = Build for arm64 iphoneOS.
-#    OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR = Build for x86 i386 iphoneOS Simulator.
-#    SIMULATOR64 = Build for x86_64 iphoneOS Simulator.
-#    SIMULATORARM64 = Build for arm64 iphoneOS Simulator.
-#    TVOS = Build for arm64 tvOS.
-#    TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_TVOS = Build for x86_64 tvOS Simulator.
-#    WATCHOS = Build for armv7k arm64_32 for watchOS.
-#    WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator.
-#    MAC = Build for x86_64 macOS.
-#    MAC_ARM64 = Build for Apple Silicon macOS.
-#    MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS).
-#                   Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#    MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS).
-#                         Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#
-# CMAKE_OSX_SYSROOT: Path to the SDK to use.  By default this is
-#    automatically determined from PLATFORM and xcodebuild, but
-#    can also be manually specified (although this should not be required).
-#
-# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform
-#    being compiled for.  By default this is automatically determined from
-#    CMAKE_OSX_SYSROOT, but can also be manually specified (although this should
-#    not be required).
-#
-# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS
-#
-# NAMED_LANGUAGE_SUPPORT:
-#    ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support
-#    OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behaviour, CMake version < 3.16)
-#
-# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default ON
-#
-# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default)
-#
-# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default)
-#
-# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker
-#    to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY)
-#
-# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM
-#    OS = armv7 armv7s arm64 (if applicable)
-#    OS64 = arm64 (if applicable)
-#    SIMULATOR = i386
-#    SIMULATOR64 = x86_64
-#    SIMULATORARM64 = arm64
-#    TVOS = arm64
-#    SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated)
-#    WATCHOS = armv7k arm64_32 (if applicable)
-#    SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated)
-#    MAC = x86_64
-#    MAC_ARM64 = arm64
-#    MAC_CATALYST = x86_64
-#    MAC_CATALYST_ARM64 = arm64
-#
-# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64"
-#
-###############################################################################
-#                                END OPTIONS                                  #
-###############################################################################
-#
-# This toolchain defines the following properties (available via get_property()) for use externally:
-#
-# PLATFORM: The currently targeted platform.
-# XCODE_VERSION: Version number (not including Build version) of Xcode detected.
-# SDK_VERSION: Version of SDK being used.
-# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM).
-# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set!
-#
-# This toolchain defines the following macros for use externally:
-#
-# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT)
-#   A convenience macro for setting xcode specific properties on targets.
-#   Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel
-#   example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all").
-#
-# find_host_package (PROGRAM ARGS)
-#   A macro used to find executable programs on the host system, not within the
-#   environment. Thanks to the android-cmake project for providing the
-#   command.
-#
-
-cmake_minimum_required(VERSION 3.8.0)
-
-# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds.
-if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN})
-  return()
-endif()
-set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true)
-
-# List of supported platform values
-list(APPEND _supported_platforms
-        "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64"
-        "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS"
-        "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS"
-        "MAC" "MAC_ARM64"
-        "MAC_CATALYST" "MAC_CATALYST_ARM64")
-
-# Cache what generator is used
-set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}")
-
-# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib)
-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14")
-  set(MODERN_CMAKE YES)
-endif()
-
-# Get the Xcode version being used.
-# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs.
-# Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable.
-#
-# NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts
-if(DEFINED XCODE_VERSION_INT)
-  # Environment variables are always preserved.
-  set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}")
-elseif(DEFINED ENV{_XCODE_VERSION_INT})
-  set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}")
-elseif(NOT DEFINED XCODE_VERSION_INT)
-  find_program(XCODEBUILD_EXECUTABLE xcodebuild)
-  if(NOT XCODEBUILD_EXECUTABLE)
-    message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.")
-  endif()
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version
-          OUTPUT_VARIABLE XCODE_VERSION_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "")
-endif()
-
-# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur)
-# if you don't set a deployment target it will be set the way you only get 64-bit builds
-if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0)
-  # Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...)
-  set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64")
-endif()
-
-# Check if the platform variable is set
-if(DEFINED PLATFORM)
-  # Environment variables are always preserved.
-  set(ENV{_PLATFORM} "${PLATFORM}")
-elseif(DEFINED ENV{_PLATFORM})
-  set(PLATFORM "$ENV{_PLATFORM}")
-elseif(NOT DEFINED PLATFORM)
-  message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!")
-endif ()
-
-if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The combined builds support requires Xcode to be used as generator via '-G Xcode' command-line argument in CMake")
-endif()
-
-# Safeguard that the platform value is set and is one of the supported values
-list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM)
-if("${contains_PLATFORM}" EQUAL "-1")
-  string(REPLACE ";"  "\n * " _supported_platforms_formatted "${_supported_platforms}")
-  message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n"
-          " Supported PLATFORM values: \n * ${_supported_platforms_formatted}")
-endif()
-
-# Check if Apple Silicon is supported
-if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5")
-  message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5")
-endif()
-
-# Touch toolchain variable to suppress "unused variable" warning.
-# This happens if CMake is invoked with the same command line the second time.
-if(CMAKE_TOOLCHAIN_FILE)
-endif()
-
-# Fix for PThread library not in path
-set(CMAKE_THREAD_LIBS_INIT "-lpthread")
-set(CMAKE_HAVE_THREADS_LIBRARY 1)
-set(CMAKE_USE_WIN32_THREADS_INIT 0)
-set(CMAKE_USE_PTHREADS_INIT 1)
-
-# Specify named language support defaults.
-if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
-  set(NAMED_LANGUAGE_SUPPORT ON)
-  message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.")
-elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  set(NAMED_LANGUAGE_SUPPORT OFF)
-  message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behaviour.")
-elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.")
-endif()
-set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL
-        "Whether or not to enable explicit named language support" FORCE)
-
-# Specify minimum version of deployment target.
-if(NOT DEFINED DEPLOYMENT_TARGET)
-  if (PLATFORM MATCHES "WATCHOS")
-    # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS).
-    set(DEPLOYMENT_TARGET "4.0")
-  elseif(PLATFORM STREQUAL "MAC")
-    # Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos).
-    set(DEPLOYMENT_TARGET "10.13")
-  elseif(PLATFORM STREQUAL "MAC_ARM64")
-    # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm).
-    set(DEPLOYMENT_TARGET "11.0")
-  elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-    # Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement).
-    set(DEPLOYMENT_TARGET "13.1")
-  else()
-    # Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS).
-    set(DEPLOYMENT_TARGET "11.0")
-  endif()
-  message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!")
-elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1")
-  message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!")
-endif()
-
-# Store the DEPLOYMENT_TARGET in the cache
-set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "")
-
-# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially)
-if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "OS64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "SIMULATOR64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-endif()
-
-set(PLATFORM_INT "${PLATFORM}")
-
-if(DEFINED ARCHS)
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-endif()
-
-# Determine the platform name and architectures for use in xcodebuild commands
-# from the specified PLATFORM_INT name.
-if(PLATFORM_INT STREQUAL "OS")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    set(ARCHS armv7 armv7s arm64)
-    set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS arm64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-    else()
-      set(ARCHS arm64)
-    endif()
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64COMBINED")
-  set(SDK_NAME iphoneos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      else()
-        set(ARCHS arm64 x86_64)
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      endif()
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET})
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-  message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.")
-elseif(PLATFORM_INT STREQUAL "SIMULATOR64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATORARM64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "TVOS")
-  set(SDK_NAME appletvos)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-  endif()
-elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED")
-  set(SDK_NAME appletvos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      set(ARCHS arm64 x86_64)
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos${DEPLOYMENT_TARGET})
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64")
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-  set(SDK_NAME appletvsimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOS")
-  set(SDK_NAME watchos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS armv7k arm64_32)
-      set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos${DEPLOYMENT_TARGET})
-    else()
-      set(ARCHS armv7k)
-      set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED")
-  set(SDK_NAME watchos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS armv7k arm64_32 i386)
-        set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      else()
-        set(ARCHS armv7k i386)
-        set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      endif()
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-  set(SDK_NAME watchsimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-else()
-  message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}")
-endif()
-
-string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}")
-
-if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode")
-endif()
-
-if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx")
-  set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst")
-  if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET)
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15")
-  else()
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}")
-  endif()
-elseif(CMAKE_GENERATOR MATCHES "Xcode")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}")
-  if(NOT PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-    set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-  endif()
-endif()
-
-# If user did not specify the SDK root to use, then query xcodebuild for it.
-if(DEFINED CMAKE_OSX_SYSROOT_INT)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}")
-elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT})
-  set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}")
-elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path
-          OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT)
-  message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain"
-          "is pointing to the correct path. Please run:"
-          "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer"
-          "and see if that fixes the problem for you.")
-  message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} "
-          "does not exist.")
-elseif(DEFINED CMAKE_OSX_SYSROOT_INT)
-  set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT.
-  set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-endif()
-
-# Use bitcode or not
-if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+")
-  # Unless specified, enable bitcode support by default
-  message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!")
-  set(ENABLE_BITCODE ON)
-elseif(NOT DEFINED ENABLE_BITCODE)
-  message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!")
-  set(ENABLE_BITCODE OFF)
-endif()
-set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL
-        "Whether or not to enable bitcode" FORCE)
-# Use ARC or not
-if(NOT DEFINED ENABLE_ARC)
-  # Unless specified, enable ARC support by default
-  set(ENABLE_ARC ON)
-  message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!")
-endif()
-set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE)
-# Use hidden visibility or not
-if(NOT DEFINED ENABLE_VISIBILITY)
-  # Unless specified, disable symbols visibility by default
-  set(ENABLE_VISIBILITY OFF)
-  message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!")
-endif()
-set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE)
-# Set strict compiler checks or not
-if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE)
-  # Unless specified, disable strict try_compile()
-  set(ENABLE_STRICT_TRY_COMPILE OFF)
-  message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!")
-endif()
-set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL
-        "Whether or not to use strict compiler checks" FORCE)
-
-# Get the SDK version information.
-if(DEFINED SDK_VERSION)
-  # Environment variables are always preserved.
-  set(ENV{_SDK_VERSION} "${SDK_VERSION}")
-elseif(DEFINED ENV{_SDK_VERSION})
-  set(SDK_VERSION "$ENV{_SDK_VERSION}")
-elseif(NOT DEFINED SDK_VERSION)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion
-          OUTPUT_VARIABLE SDK_VERSION
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-# Find the Developer root for the specific iOS platform being compiled for
-# from CMAKE_OSX_SYSROOT.  Should be ../../ from SDK specified in
-# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain
-# this information from xcrun or xcodebuild.
-if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH)
-  get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH)
-  if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}")
-    message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.")
-  endif()
-endif()
-
-# Find the C & C++ compilers for the specified SDK.
-if(DEFINED CMAKE_C_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_C_COMPILER})
-  set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}")
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-elseif(NOT DEFINED CMAKE_C_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang
-          OUTPUT_VARIABLE CMAKE_C_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-endif()
-if(DEFINED CMAKE_CXX_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_CXX_COMPILER})
-  set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}")
-elseif(NOT DEFINED CMAKE_CXX_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++
-          OUTPUT_VARIABLE CMAKE_CXX_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find (Apple's) libtool.
-if(DEFINED BUILD_LIBTOOL)
-  # Environment variables are always preserved.
-  set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}")
-elseif(DEFINED ENV{_BUILD_LIBTOOL})
-  set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}")
-elseif(NOT DEFINED BUILD_LIBTOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool
-          OUTPUT_VARIABLE BUILD_LIBTOOL
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find the toolchain's provided install_name_tool if none is found on the host
-if(DEFINED CMAKE_INSTALL_NAME_TOOL)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}")
-elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL})
-  set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}")
-elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool
-          OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "")
-endif()
-
-# Configure libtool to be used instead of ar + ranlib to build static libraries.
-# This is required on Xcode 7+, but should also work on previous versions of
-# Xcode.
-get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
-foreach(lang ${languages})
-  set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o <TARGET> <LINK_FLAGS> <OBJECTS> " CACHE INTERNAL "")
-endforeach()
-
-# CMake 3.14+ support building for iOS, watchOS and tvOS out of the box.
-if(MODERN_CMAKE)
-  if(SDK_NAME MATCHES "iphone")
-    set(CMAKE_SYSTEM_NAME iOS)
-  elseif(SDK_NAME MATCHES "macosx")
-    set(CMAKE_SYSTEM_NAME Darwin)
-  elseif(SDK_NAME MATCHES "appletv")
-    set(CMAKE_SYSTEM_NAME tvOS)
-  elseif(SDK_NAME MATCHES "watch")
-    set(CMAKE_SYSTEM_NAME watchOS)
-  endif()
-  # Provide flags for a combined FAT library build on newer CMake versions
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO")
-    set(CMAKE_IOS_INSTALL_COMBINED YES)
-  endif()
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10")
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME iOS)
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME)
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME Darwin)
-endif()
-# Standard settings.
-set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "")
-set(UNIX ON CACHE BOOL "")
-set(APPLE ON CACHE BOOL "")
-if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64")
-  set(IOS OFF CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-  set(IOS ON CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-else()
-  set(IOS ON CACHE BOOL "")
-endif()
-set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
-set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
-set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE)
-# Set the architectures for which to build.
-set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "")
-# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks
-if(NOT ENABLE_STRICT_TRY_COMPILE_INT)
-  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-endif()
-# All iOS/Darwin specific settings - some may be redundant.
-set(CMAKE_MACOSX_BUNDLE YES)
-set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO")
-set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
-set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib")
-set(CMAKE_SHARED_MODULE_PREFIX "lib")
-set(CMAKE_SHARED_MODULE_SUFFIX ".so")
-set(CMAKE_C_COMPILER_ABI ELF)
-set(CMAKE_CXX_COMPILER_ABI ELF)
-set(CMAKE_C_HAS_ISYSROOT 1)
-set(CMAKE_CXX_HAS_ISYSROOT 1)
-set(CMAKE_MODULE_EXISTS 1)
-set(CMAKE_DL_LIBS "")
-set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
-set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
-set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
-set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
-
-if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+")
-  set(CMAKE_C_SIZEOF_DATA_PTR 8)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 8)
-  if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+")
-    set(CMAKE_SYSTEM_PROCESSOR "aarch64")
-  else()
-    set(CMAKE_SYSTEM_PROCESSOR "x86_64")
-  endif()
-else()
-  set(CMAKE_C_SIZEOF_DATA_PTR 4)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 4)
-  set(CMAKE_SYSTEM_PROCESSOR "arm")
-endif()
-
-# Note that only Xcode 7+ supports the newer more specific:
-# -m${SDK_NAME}-version-min flags, older versions of Xcode use:
-# -m(ios/ios-simulator)-version-min instead.
-if(${CMAKE_VERSION} VERSION_LESS "3.11")
-  if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64")
-    if(XCODE_VERSION_INT VERSION_LESS 7.0)
-      set(SDK_NAME_VERSION_FLAGS
-              "-mios-version-min=${DEPLOYMENT_TARGET}")
-    else()
-      # Xcode 7.0+ uses flags we can build directly from SDK_NAME.
-      set(SDK_NAME_VERSION_FLAGS
-              "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}")
-    endif()
-  elseif(PLATFORM_INT STREQUAL "TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "MAC")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mmacosx-version-min=${DEPLOYMENT_TARGET}")
-  else()
-    # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min.
-    set(SDK_NAME_VERSION_FLAGS
-            "-mios-simulator-version-min=${DEPLOYMENT_TARGET}")
-  endif()
-elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST")
-  # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets
-  set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET})
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE_INT)
-  set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "")
-  set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-endif()
-
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks")
-endif()
-
-if(ENABLE_BITCODE_INT)
-  set(BITCODE "-fembed-bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES")
-else()
-  set(BITCODE "")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO")
-endif()
-
-if(ENABLE_ARC_INT)
-  set(FOBJC_ARC "-fobjc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES")
-else()
-  set(FOBJC_ARC "-fno-objc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO")
-endif()
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-  set(OBJC_LEGACY_VARS "")
-else()
-  set(OBJC_VARS "")
-  set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-endif()
-
-if(NOT ENABLE_VISIBILITY_INT)
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES")
-  set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden")
-else()
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO")
-  set(VISIBILITY "-fvisibility=default")
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE)
-  set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}")
-endif()
-
-#Check if Xcode generator is used, since that will handle these flags automagically
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator. Modifying the Xcode build-settings directly instead.")
-else()
-  set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}")
-  set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}")
-  set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}")
-  set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}")
-  set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}")
-  set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}")
-  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}")
-    set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}")
-    set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}")
-    set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}")
-    set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}")
-    set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}")
-  endif()
-  set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
-  set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS}  -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}")
-    set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}")
-  endif()
-  set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}")
-endif()
-
-## Print status messages to inform of the current state
-message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}")
-message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}")
-message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}")
-message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}")
-message(STATUS "Using libtool: ${BUILD_LIBTOOL}")
-message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}")
-if(DEFINED APPLE_TARGET_TRIPLE)
-  message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}")
-endif()
-message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}"
-        " (SDK version: ${SDK_VERSION})")
-if(MODERN_CMAKE)
-  message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!")
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    message(STATUS "Will combine built (static) artifacts into FAT lib...")
-  endif()
-endif()
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}")
-endif()
-message(STATUS "CMake version: ${CMAKE_VERSION}")
-if(DEFINED SDK_NAME_VERSION_FLAGS)
-  message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}")
-endif()
-message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}")
-if(ENABLE_BITCODE_INT)
-  message(STATUS "Bitcode: Enabled")
-else()
-  message(STATUS "Bitcode: Disabled")
-endif()
-
-if(ENABLE_ARC_INT)
-  message(STATUS "ARC: Enabled")
-else()
-  message(STATUS "ARC: Disabled")
-endif()
-
-if(ENABLE_VISIBILITY_INT)
-  message(STATUS "Hiding symbols: Disabled")
-else()
-  message(STATUS "Hiding symbols: Enabled")
-endif()
-
-# Set global properties
-set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}")
-set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}")
-set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}")
-set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}")
-set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}")
-
-# Export configurable variables for the try_compile() command.
-set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        PLATFORM
-        XCODE_VERSION_INT
-        SDK_VERSION
-        NAMED_LANGUAGE_SUPPORT
-        DEPLOYMENT_TARGET
-        CMAKE_DEVELOPER_ROOT
-        CMAKE_OSX_SYSROOT_INT
-        ENABLE_BITCODE
-        ENABLE_ARC
-        CMAKE_ASM_COMPILER
-        CMAKE_C_COMPILER
-        CMAKE_C_COMPILER_TARGET
-        CMAKE_CXX_COMPILER
-        CMAKE_CXX_COMPILER_TARGET
-        BUILD_LIBTOOL
-        CMAKE_INSTALL_NAME_TOOL
-        CMAKE_C_FLAGS
-        CMAKE_C_DEBUG
-        CMAKE_C_MINSIZEREL
-        CMAKE_C_RELWITHDEBINFO
-        CMAKE_C_RELEASE
-        CMAKE_CXX_FLAGS
-        CMAKE_CXX_FLAGS_DEBUG
-        CMAKE_CXX_FLAGS_MINSIZEREL
-        CMAKE_CXX_FLAGS_RELWITHDEBINFO
-        CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_C_LINK_FLAGS
-        CMAKE_CXX_LINK_FLAGS
-        CMAKE_ASM_FLAGS
-)
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        CMAKE_OBJC_FLAGS
-        CMAKE_OBJC_DEBUG
-        CMAKE_OBJC_MINSIZEREL
-        CMAKE_OBJC_RELWITHDEBINFO
-        CMAKE_OBJC_RELEASE
-        CMAKE_OBJCXX_FLAGS
-        CMAKE_OBJCXX_DEBUG
-        CMAKE_OBJCXX_MINSIZEREL
-        CMAKE_OBJCXX_RELWITHDEBINFO
-        CMAKE_OBJCXX_RELEASE
-        CMAKE_OBJC_LINK_FLAGS
-        CMAKE_OBJCXX_LINK_FLAGS
-  )
-endif()
-
-set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
-set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks")
-set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a")
-set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name")
-
-# Set the find root to the SDK developer roots.
-# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds.
-if(NOT PLATFORM_INT MATCHES "^MAC.*$")
-  list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "")
-endif()
-
-# Default to searching for frameworks first.
-set(CMAKE_FIND_FRAMEWORK FIRST)
-
-# Set up the default search directories for frameworks.
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-else()
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-endif()
-
-# By default, search both the specified iOS SDK and the remainder of the host filesystem.
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "")
-endif()
-
-#
-# Some helper-macros below to simplify and beautify the CMakeFile
-#
-
-# This little macro lets you set any Xcode specific property.
-macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION)
-  set(XCODE_RELVERSION_I "${XCODE_RELVERSION}")
-  if(XCODE_RELVERSION_I STREQUAL "All")
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}")
-  else()
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}")
-  endif()
-endmacro(set_xcode_property)
-
-# This macro lets you find executable programs on the host system.
-macro(find_host_package)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER)
-  set(_TOOLCHAIN_IOS ${IOS})
-  set(IOS OFF)
-  find_package(${ARGN})
-  set(IOS ${_TOOLCHAIN_IOS})
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
-endmacro(find_host_package)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/CMakeLists.txt
deleted file mode 100644
index 67447c42d977f120fc39cdab0d052b011edd3efe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(websocket STATIC
-  websocket_client.cc
-  websocket_server.cc
-)
-target_link_libraries(websocket PUBLIC decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_client.cc
deleted file mode 100644
index c0394e6250153e2d59636c9eab62badc4a737d16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_client.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_client.h"
-
-#include "boost/json/src.hpp"
-
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-WebSocketClient::WebSocketClient(const std::string& hostname, int port)
-    : hostname_(hostname), port_(port) {
-  Connect();
-  t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
-}
-
-void WebSocketClient::Connect() {
-  tcp::resolver resolver{ioc_};
-  // Look up the domain name
-  auto const results = resolver.resolve(hostname_, std::to_string(port_));
-  // Make the connection on the IP address we get from a lookup
-  auto ep = asio::connect(ws_.next_layer(), results);
-  // Provide the value of the Host HTTP header during the WebSocket handshake.
-  // See https://tools.ietf.org/html/rfc7230#section-5.4
-  std::string host = hostname_ + ":" + std::to_string(ep.port());
-  // Perform the websocket handshake
-  ws_.handshake(host, "/");
-}
-
-void WebSocketClient::SendTextData(const std::string& data) {
-  ws_.text(true);
-  ws_.write(asio::buffer(data));
-}
-
-void WebSocketClient::SendBinaryData(const void* data, size_t size) {
-  ws_.binary(true);
-  ws_.write(asio::buffer(data, size));
-}
-
-void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }
-
-void WebSocketClient::ReadLoopFunc() {
-  try {
-    while (true) {
-      beast::flat_buffer buffer;
-      ws_.read(buffer);
-      std::string message = beast::buffers_to_string(buffer.data());
-      LOG(INFO) << message;
-      CHECK(ws_.got_text());
-      json::object obj = json::parse(message).as_object();
-      if (obj["status"] != "ok") {
-        break;
-      }
-      if (obj["type"] == "speech_end") {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (beast::system_error const& se) {
-    // This indicates that the session was closed
-    if (se.code() != websocket::error::closed) {
-      LOG(ERROR) << se.code().message();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketClient::Join() { t_->join(); }
-
-void WebSocketClient::SendStartSignal() {
-  // TODO(Binbin Zhang): Add sample rate and other setting support
-  json::value start_tag = {{"signal", "start"},
-                           {"nbest", nbest_},
-                           {"continuous_decoding", continuous_decoding_}};
-  std::string start_message = json::serialize(start_tag);
-  this->SendTextData(start_message);
-}
-
-void WebSocketClient::SendEndSignal() {
-  json::value end_tag = {{"signal", "end"}};
-  std::string end_message = json::serialize(end_tag);
-  this->SendTextData(end_message);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_client.h
deleted file mode 100644
index 76ec3aa451d31c7ee6b158ce21c8acdc10575eb3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_client.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_CLIENT_H_
-#define WEBSOCKET_WEBSOCKET_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class WebSocketClient {
- public:
-  WebSocketClient(const std::string& host, int port);
-
-  void SendTextData(const std::string& data);
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Close();
-  void Join();
-  void SendStartSignal();
-  void SendEndSignal();
-  void set_nbest(int nbest) { nbest_ = nbest; }
-  void set_continuous_decoding(bool continuous_decoding) {
-    continuous_decoding_ = continuous_decoding;
-  }
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string hostname_;
-  int port_;
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  asio::io_context ioc_;
-  websocket::stream<tcp::socket> ws_{ioc_};
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketClient);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_server.cc
deleted file mode 100644
index 52ab088f46d59b9f3f1add1e34d3aceae290f5da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_server.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_server.h"
-
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "boost/json/src.hpp"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-ConnectionHandler::ConnectionHandler(
-    tcp::socket&& socket, std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : ws_(std::move(socket)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void ConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ =
-      std::make_shared<std::thread>(&ConnectionHandler::DecodeThreadFunc, this);
-}
-
-void ConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  if (feature_pipeline_ != nullptr) {
-    feature_pipeline_->set_input_finished();
-  }
-  got_end_tag_ = true;
-}
-
-void ConnectionHandler::OnPartialResult(const std::string& result) {
-  LOG(INFO) << "Partial result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "partial_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinalResult(const std::string& result) {
-  LOG(INFO) << "Final result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "final_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinish() {
-  // Send finish tag
-  json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
-  // Read binary PCM data
-  int num_samples = buffer.size() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  const auto* pcm_data = static_cast<const int16_t*>(buffer.data().data());
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-std::string ConnectionHandler::SerializeResult(bool finish) {
-  json::array nbest;
-  for (const DecodeResult& path : decoder_->result()) {
-    json::object jpath({{"sentence", path.sentence}});
-    if (finish) {
-      json::array word_pieces;
-      for (const WordPiece& word_piece : path.word_pieces) {
-        json::object jword_piece({{"word", word_piece.word},
-                                  {"start", word_piece.start},
-                                  {"end", word_piece.end}});
-        word_pieces.emplace_back(jword_piece);
-      }
-      jpath.emplace("word_pieces", word_pieces);
-    }
-    nbest.emplace_back(jpath);
-
-    if (nbest.size() == nbest_) {
-      break;
-    }
-  }
-  return json::serialize(nbest);
-}
-
-void ConnectionHandler::DecodeThreadFunc() {
-  try {
-    while (true) {
-      DecodeState state = decoder_->Decode();
-      if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      } else if (state == DecodeState::kEndpoint) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        // If it's not continuous decoding, continue to do next recognition
-        // otherwise stop the recognition
-        if (continuous_decoding_) {
-          decoder_->ResetContinuousDecoding();
-        } else {
-          OnFinish();
-          stop_recognition_ = true;
-          break;
-        }
-      } else {
-        if (decoder_->DecodedSomething()) {
-          std::string result = SerializeResult(false);
-          OnPartialResult(result);
-        }
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void ConnectionHandler::OnError(const std::string& message) {
-  json::value rv = {{"status", "failed"}, {"message", message}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  // Close websocket
-  ws_.close(websocket::close_code::normal);
-}
-
-void ConnectionHandler::OnText(const std::string& message) {
-  json::value v = json::parse(message);
-  if (v.is_object()) {
-    json::object obj = v.get_object();
-    if (obj.find("signal") != obj.end()) {
-      json::string signal = obj["signal"].as_string();
-      if (signal == "start") {
-        if (obj.find("nbest") != obj.end()) {
-          if (obj["nbest"].is_int64()) {
-            nbest_ = obj["nbest"].as_int64();
-          } else {
-            OnError("integer is expected for nbest option");
-          }
-        }
-        if (obj.find("continuous_decoding") != obj.end()) {
-          if (obj["continuous_decoding"].is_bool()) {
-            continuous_decoding_ = obj["continuous_decoding"].as_bool();
-          } else {
-            OnError(
-                "boolean true or false is expected for "
-                "continuous_decoding option");
-          }
-        }
-        OnSpeechStart();
-      } else if (signal == "end") {
-        OnSpeechEnd();
-      } else {
-        OnError("Unexpected signal type");
-      }
-    } else {
-      OnError("Wrong message header");
-    }
-  } else {
-    OnError("Wrong protocol");
-  }
-}
-
-void ConnectionHandler::operator()() {
-  try {
-    // Accept the websocket handshake
-    ws_.accept();
-    for (;;) {
-      // This buffer will hold the incoming message
-      beast::flat_buffer buffer;
-      // Read a message
-      ws_.read(buffer);
-      if (ws_.got_text()) {
-        std::string message = beast::buffers_to_string(buffer.data());
-        LOG(INFO) << message;
-        OnText(message);
-        if (got_end_tag_) {
-          break;
-        }
-      } else {
-        if (!got_start_tag_) {
-          OnError("Start signal is expected before binary data");
-        } else {
-          if (stop_recognition_) {
-            break;
-          }
-          OnSpeechData(buffer);
-        }
-      }
-    }
-
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (beast::system_error const& se) {
-    LOG(INFO) << se.code().message();
-    // This indicates that the session was closed
-    if (se.code() == websocket::error::closed) {
-      OnSpeechEnd();
-    }
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketServer::Start() {
-  try {
-    auto const address = asio::ip::make_address("0.0.0.0");
-    tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
-    for (;;) {
-      // This will receive the new connection
-      tcp::socket socket{ioc_};
-      // Block until we get a connection
-      acceptor.accept(socket);
-      // Launch the session, transferring ownership of the socket
-      ConnectionHandler handler(std::move(socket), feature_config_,
-                                decode_config_, decode_resource_);
-      std::thread t(std::move(handler));
-      t.detach();
-    }
-  } catch (const std::exception& e) {
-    LOG(FATAL) << e.what();
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_server.h
deleted file mode 100644
index a1241834221dcf93c34d6414bd9b5ae40ef1cf38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/horizonbpu/websocket/websocket_server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_SERVER_H_
-#define WEBSOCKET_WEBSOCKET_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class ConnectionHandler {
- public:
-  ConnectionHandler(tcp::socket&& socket,
-                    std::shared_ptr<FeaturePipelineConfig> feature_config,
-                    std::shared_ptr<DecodeOptions> decode_config,
-                    std::shared_ptr<DecodeResource> decode_resource_);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnText(const std::string& message);
-  void OnFinish();
-  void OnSpeechData(const beast::flat_buffer& buffer);
-  void OnError(const std::string& message);
-  void OnPartialResult(const std::string& result);
-  void OnFinalResult(const std::string& result);
-  void DecodeThreadFunc();
-  std::string SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  websocket::stream<tcp::socket> ws_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class WebSocketServer {
- public:
-  WebSocketServer(int port,
-                  std::shared_ptr<FeaturePipelineConfig> feature_config,
-                  std::shared_ptr<DecodeOptions> decode_config,
-                  std::shared_ptr<DecodeResource> decode_resource)
-      : port_(port),
-        feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-
-  void Start();
-
- private:
-  int port_;
-  // The io_context is required for all I/O
-  asio::io_context ioc_{1};
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketServer);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/CMakeLists.txt
deleted file mode 100644
index 3fd20cd6240c10adea29a9559ea914f14abbb168..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/CMakeLists.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-project(wenet VERSION 0.1)
-
-option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
-option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
-option(BUILD_TESTING "whether to build unit test" ON)
-
-option(GRPC "whether to build with gRPC" OFF)
-# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
-#                     which is a very big library
-option(WEBSOCKET "whether to build with websocket" ON)
-option(TORCH "whether to build with Torch" ON)
-option(ONNX "whether to build with ONNX" OFF)
-option(GPU "whether to build with GPU" OFF)
-
-set(CMAKE_VERBOSE_MAKEFILE OFF)
-
-include(FetchContent)
-set(FETCHCONTENT_QUIET OFF)
-get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-set(FETCHCONTENT_BASE_DIR ${fc_base})
-
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-
-if(NOT MSVC)
-  # Keep the same with openfst, -fPIC or -fpic
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
-else()
-  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-  add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
-endif()
-
-# Include all dependency
-if(TORCH)
-  include(libtorch)
-endif()
-if(ONNX)
-  include(onnx)
-endif()
-include(openfst)
-include_directories(
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-  ${CMAKE_CURRENT_SOURCE_DIR}/build/Pods/LibTorch/install/include
-)
-
-# Build all libraries
-add_subdirectory(utils)
-add_subdirectory(frontend)
-add_subdirectory(post_processor)
-add_subdirectory(kaldi)  # kaldi: wfst based decoder
-add_subdirectory(decoder)
-
-# Unit Test
-if(BUILD_TESTING)
-  include(gtest)
-  add_subdirectory(test)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/README.md
deleted file mode 100644
index 1a9f091dee41c827315618eae0110788161e8c59..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/README.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# WeNet On-device ASR iOS Demo
-
-## Build application from source code
-
-### 1) Generate cmake project, install LibTorch pod and build static libraries
-
-```
-cd runtime/ios/build
-cmake .. -G Xcode -DTORCH=ON -DONNX=OFF -DIOS=ON -DGRAPH_TOOLS=OFF -DBUILD_TESTING=OFF -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DPLATFORM=OS64 -DENABLE_BITCODE=FALSE
-pod install
-
-# Build debug version
-cmake --build . --config Debug
-
-# Build release version
-cmake --build . --config Release
-```
-
-### 2) Build and run iOS application
-
-You can use our pretrained model (click the following link to download):
-
-[AISHELL-1](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20210601_u2%2B%2B_conformer_libtorch.tar.gz)
-| [AISHELL-2](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell2/20210618_u2pp_conformer_libtorch.tar.gz)
-| [GigaSpeech](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/gigaspeech/20210728_u2pp_conformer_libtorch.tar.gz)
-| [LibriSpeech](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/librispeech/20210610_u2pp_conformer_libtorch.tar.gz)
-| [Multi-CN](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/multi_cn/20210815_unified_conformer_libtorch.tar.gz)
-
-
-Or you can train your own model using WeNet training pipeline on your data.
-
-When your model is ready, put `final.zip` and `units.txt` into model (`WenetDemo/WenetDemo/model`) folder.
-
-Open WenetDemo.xcodeproj with Xcode, build and run on iOS device.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.pbxproj b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.pbxproj
deleted file mode 100644
index f39d10f70463c9141f4393b9dcd1331263b4ff0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.pbxproj
+++ /dev/null
@@ -1,563 +0,0 @@
-// !$*UTF8*$!
-{
-    archiveVersion = 1;
-    classes = {
-    };
-    objectVersion = 56;
-    objects = {
-
-/* Begin PBXBuildFile section */
-        374FEB34291019EE00513F88 /* libeigen_blas.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E1E2910198A00853C23 /* libeigen_blas.a */; };
-        37C64C892912A94B00BFCE0B /* final.zip in Resources */ = {isa = PBXBuildFile; fileRef = 37C64C882912A94A00BFCE0B /* final.zip */; };
-        37C64C8B2912ABE900BFCE0B /* units.txt in Resources */ = {isa = PBXBuildFile; fileRef = 37C64C8A2912ABE900BFCE0B /* units.txt */; };
-        37C64C8F2912ADDD00BFCE0B /* libfst.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37C64C8E2912ADDD00BFCE0B /* libfst.a */; };
-        37C64C942912AF7F00BFCE0B /* libkaldi-decoder.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37C64C8C2912AD3300BFCE0B /* libkaldi-decoder.a */; };
-        37C64C962912AFC900BFCE0B /* libpost_processor.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37C64C952912AFC900BFCE0B /* libpost_processor.a */; };
-        37C64C982912B06800BFCE0B /* libutils.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37C64C972912B06800BFCE0B /* libutils.a */; };
-        37C64C9A2912B0A000BFCE0B /* libfrontend.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37C64C992912B0A000BFCE0B /* libfrontend.a */; };
-        37FD7E032910094300853C23 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FD7E022910094300853C23 /* AppDelegate.swift */; };
-        37FD7E052910094300853C23 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FD7E042910094300853C23 /* SceneDelegate.swift */; };
-        37FD7E072910094300853C23 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FD7E062910094300853C23 /* ViewController.swift */; };
-        37FD7E0A2910094300853C23 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 37FD7E082910094300853C23 /* Main.storyboard */; };
-        37FD7E0C2910094500853C23 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 37FD7E0B2910094500853C23 /* Assets.xcassets */; };
-        37FD7E0F2910094500853C23 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 37FD7E0D2910094500853C23 /* LaunchScreen.storyboard */; };
-        37FD7E1A291009EC00853C23 /* wenet.mm in Sources */ = {isa = PBXBuildFile; fileRef = 37FD7E18291009EC00853C23 /* wenet.mm */; };
-        37FD7E1D2910195900853C23 /* libdecoder.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E1C2910195900853C23 /* libdecoder.a */; };
-        37FD7E282910198B00853C23 /* libpthreadpool.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E1F2910198B00853C23 /* libpthreadpool.a */; };
-        37FD7E292910198B00853C23 /* libc10.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E202910198B00853C23 /* libc10.a */; };
-        37FD7E2A2910198B00853C23 /* libtorch_cpu.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E212910198B00853C23 /* libtorch_cpu.a */; };
-        37FD7E2B2910198B00853C23 /* libtorch.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E222910198B00853C23 /* libtorch.a */; };
-        37FD7E2C2910198B00853C23 /* libclog.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E232910198B00853C23 /* libclog.a */; };
-        37FD7E2D2910198B00853C23 /* libcpuinfo.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E242910198B00853C23 /* libcpuinfo.a */; };
-        37FD7E2E2910198C00853C23 /* libXNNPACK.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E252910198B00853C23 /* libXNNPACK.a */; };
-        37FD7E2F2910198C00853C23 /* libpytorch_qnnpack.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 37FD7E262910198B00853C23 /* libpytorch_qnnpack.a */; };
-/* End PBXBuildFile section */
-
-/* Begin PBXFileReference section */
-        373FDD93291BE53D00DF4BEA /* libglog.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libglog.a; path = "../fc_base/glog-build/Release-iphoneos/libglog.a"; sourceTree = "<group>"; };
-        373FDD95291BE5A800DF4BEA /* libgflags_nothreads.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libgflags_nothreads.a; path = "../fc_base/gflags-build/Release-iphoneos/libgflags_nothreads.a"; sourceTree = "<group>"; };
-        37C64C882912A94A00BFCE0B /* final.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = final.zip; sourceTree = "<group>"; };
-        37C64C8A2912ABE900BFCE0B /* units.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = units.txt; sourceTree = "<group>"; };
-        37C64C8C2912AD3300BFCE0B /* libkaldi-decoder.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libkaldi-decoder.a"; path = "../build/kaldi/Debug-iphoneos/libkaldi-decoder.a"; sourceTree = "<group>"; };
-        37C64C8E2912ADDD00BFCE0B /* libfst.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libfst.a; path = "../fc_base/openfst-build/src/lib/Debug-iphoneos/libfst.a"; sourceTree = "<group>"; };
-        37C64C902912AE2B00BFCE0B /* libgflags_nothreads_debug.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libgflags_nothreads_debug.a; path = "../fc_base/gflags-build/Debug-iphoneos/libgflags_nothreads_debug.a"; sourceTree = "<group>"; };
-        37C64C922912AE7E00BFCE0B /* libglogd.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libglogd.a; path = "../fc_base/glog-build/Debug-iphoneos/libglogd.a"; sourceTree = "<group>"; };
-        37C64C952912AFC900BFCE0B /* libpost_processor.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libpost_processor.a; path = "../build/post_processor/Debug-iphoneos/libpost_processor.a"; sourceTree = "<group>"; };
-        37C64C972912B06800BFCE0B /* libutils.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libutils.a; path = "../build/utils/Debug-iphoneos/libutils.a"; sourceTree = "<group>"; };
-        37C64C992912B0A000BFCE0B /* libfrontend.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libfrontend.a; path = "../build/frontend/Debug-iphoneos/libfrontend.a"; sourceTree = "<group>"; };
-        37FD7DFF2910094300853C23 /* WenetDemo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = WenetDemo.app; sourceTree = BUILT_PRODUCTS_DIR; };
-        37FD7E022910094300853C23 /* AppDelegate.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; tabWidth = 2; };
-        37FD7E042910094300853C23 /* SceneDelegate.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; tabWidth = 2; };
-        37FD7E062910094300853C23 /* ViewController.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; tabWidth = 2; };
-        37FD7E092910094300853C23 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
-        37FD7E0B2910094500853C23 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
-        37FD7E0E2910094500853C23 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
-        37FD7E102910094500853C23 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
-        37FD7E17291009EC00853C23 /* WenetDemo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "WenetDemo-Bridging-Header.h"; sourceTree = "<group>"; };
-        37FD7E18291009EC00853C23 /* wenet.mm */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.cpp.objcpp; path = wenet.mm; sourceTree = "<group>"; tabWidth = 2; };
-        37FD7E19291009EC00853C23 /* wenet.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = wenet.h; sourceTree = "<group>"; };
-        37FD7E1C2910195900853C23 /* libdecoder.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libdecoder.a; path = "../wenet/runtime/ios/build/decoder/Debug-iphoneos/libdecoder.a"; sourceTree = "<group>"; };
-        37FD7E1E2910198A00853C23 /* libeigen_blas.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libeigen_blas.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libeigen_blas.a; sourceTree = "<group>"; };
-        37FD7E1F2910198B00853C23 /* libpthreadpool.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libpthreadpool.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libpthreadpool.a; sourceTree = "<group>"; };
-        37FD7E202910198B00853C23 /* libc10.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libc10.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libc10.a; sourceTree = "<group>"; };
-        37FD7E212910198B00853C23 /* libtorch_cpu.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libtorch_cpu.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libtorch_cpu.a; sourceTree = "<group>"; };
-        37FD7E222910198B00853C23 /* libtorch.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libtorch.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libtorch.a; sourceTree = "<group>"; };
-        37FD7E232910198B00853C23 /* libclog.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libclog.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libclog.a; sourceTree = "<group>"; };
-        37FD7E242910198B00853C23 /* libcpuinfo.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libcpuinfo.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libcpuinfo.a; sourceTree = "<group>"; };
-        37FD7E252910198B00853C23 /* libXNNPACK.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libXNNPACK.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libXNNPACK.a; sourceTree = "<group>"; };
-        37FD7E262910198B00853C23 /* libpytorch_qnnpack.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libpytorch_qnnpack.a; path = ../wenet/runtime/ios/build/Pods/LibTorch/install/lib/libpytorch_qnnpack.a; sourceTree = "<group>"; };
-/* End PBXFileReference section */
-
-/* Begin PBXFrameworksBuildPhase section */
-        37FD7DFC2910094300853C23 /* Frameworks */ = {
-            isa = PBXFrameworksBuildPhase;
-            buildActionMask = 2147483647;
-            files = (
-                37C64C9A2912B0A000BFCE0B /* libfrontend.a in Frameworks */,
-                37C64C982912B06800BFCE0B /* libutils.a in Frameworks */,
-                37C64C962912AFC900BFCE0B /* libpost_processor.a in Frameworks */,
-                37C64C942912AF7F00BFCE0B /* libkaldi-decoder.a in Frameworks */,
-                37C64C8F2912ADDD00BFCE0B /* libfst.a in Frameworks */,
-                374FEB34291019EE00513F88 /* libeigen_blas.a in Frameworks */,
-                37FD7E282910198B00853C23 /* libpthreadpool.a in Frameworks */,
-                37FD7E292910198B00853C23 /* libc10.a in Frameworks */,
-                37FD7E2A2910198B00853C23 /* libtorch_cpu.a in Frameworks */,
-                37FD7E2B2910198B00853C23 /* libtorch.a in Frameworks */,
-                37FD7E2C2910198B00853C23 /* libclog.a in Frameworks */,
-                37FD7E2D2910198B00853C23 /* libcpuinfo.a in Frameworks */,
-                37FD7E2E2910198C00853C23 /* libXNNPACK.a in Frameworks */,
-                37FD7E2F2910198C00853C23 /* libpytorch_qnnpack.a in Frameworks */,
-                37FD7E1D2910195900853C23 /* libdecoder.a in Frameworks */,
-            );
-            runOnlyForDeploymentPostprocessing = 0;
-        };
-/* End PBXFrameworksBuildPhase section */
-
-/* Begin PBXGroup section */
-        37C64C812912A21300BFCE0B /* model */ = {
-            isa = PBXGroup;
-            children = (
-                37C64C8A2912ABE900BFCE0B /* units.txt */,
-                37C64C882912A94A00BFCE0B /* final.zip */,
-            );
-            path = model;
-            sourceTree = "<group>";
-        };
-        37FD7DF62910094300853C23 = {
-            isa = PBXGroup;
-            children = (
-                37FD7E012910094300853C23 /* WenetDemo */,
-                37FD7E002910094300853C23 /* Products */,
-                37FD7E1B2910195900853C23 /* Frameworks */,
-            );
-            sourceTree = "<group>";
-        };
-        37FD7E002910094300853C23 /* Products */ = {
-            isa = PBXGroup;
-            children = (
-                37FD7DFF2910094300853C23 /* WenetDemo.app */,
-            );
-            name = Products;
-            sourceTree = "<group>";
-        };
-        37FD7E012910094300853C23 /* WenetDemo */ = {
-            isa = PBXGroup;
-            children = (
-                37C64C812912A21300BFCE0B /* model */,
-                37FD7E16291009B900853C23 /* wenet */,
-                37FD7E022910094300853C23 /* AppDelegate.swift */,
-                37FD7E042910094300853C23 /* SceneDelegate.swift */,
-                37FD7E062910094300853C23 /* ViewController.swift */,
-                37FD7E082910094300853C23 /* Main.storyboard */,
-                37FD7E0B2910094500853C23 /* Assets.xcassets */,
-                37FD7E0D2910094500853C23 /* LaunchScreen.storyboard */,
-                37FD7E102910094500853C23 /* Info.plist */,
-            );
-            path = WenetDemo;
-            sourceTree = "<group>";
-        };
-        37FD7E16291009B900853C23 /* wenet */ = {
-            isa = PBXGroup;
-            children = (
-                37FD7E18291009EC00853C23 /* wenet.mm */,
-                37FD7E19291009EC00853C23 /* wenet.h */,
-                37FD7E17291009EC00853C23 /* WenetDemo-Bridging-Header.h */,
-            );
-            path = wenet;
-            sourceTree = "<group>";
-        };
-        37FD7E1B2910195900853C23 /* Frameworks */ = {
-            isa = PBXGroup;
-            children = (
-                373FDD95291BE5A800DF4BEA /* libgflags_nothreads.a */,
-                373FDD93291BE53D00DF4BEA /* libglog.a */,
-                37C64C992912B0A000BFCE0B /* libfrontend.a */,
-                37C64C972912B06800BFCE0B /* libutils.a */,
-                37C64C952912AFC900BFCE0B /* libpost_processor.a */,
-                37C64C922912AE7E00BFCE0B /* libglogd.a */,
-                37C64C902912AE2B00BFCE0B /* libgflags_nothreads_debug.a */,
-                37C64C8E2912ADDD00BFCE0B /* libfst.a */,
-                37C64C8C2912AD3300BFCE0B /* libkaldi-decoder.a */,
-                37FD7E202910198B00853C23 /* libc10.a */,
-                37FD7E232910198B00853C23 /* libclog.a */,
-                37FD7E242910198B00853C23 /* libcpuinfo.a */,
-                37FD7E1E2910198A00853C23 /* libeigen_blas.a */,
-                37FD7E1F2910198B00853C23 /* libpthreadpool.a */,
-                37FD7E262910198B00853C23 /* libpytorch_qnnpack.a */,
-                37FD7E212910198B00853C23 /* libtorch_cpu.a */,
-                37FD7E222910198B00853C23 /* libtorch.a */,
-                37FD7E252910198B00853C23 /* libXNNPACK.a */,
-                37FD7E1C2910195900853C23 /* libdecoder.a */,
-            );
-            name = Frameworks;
-            sourceTree = "<group>";
-        };
-/* End PBXGroup section */
-
-/* Begin PBXNativeTarget section */
-        37FD7DFE2910094300853C23 /* WenetDemo */ = {
-            isa = PBXNativeTarget;
-            buildConfigurationList = 37FD7E132910094500853C23 /* Build configuration list for PBXNativeTarget "WenetDemo" */;
-            buildPhases = (
-                37FD7DFB2910094300853C23 /* Sources */,
-                37FD7DFC2910094300853C23 /* Frameworks */,
-                37FD7DFD2910094300853C23 /* Resources */,
-            );
-            buildRules = (
-            );
-            dependencies = (
-            );
-            name = WenetDemo;
-            productName = WenetDemo;
-            productReference = 37FD7DFF2910094300853C23 /* WenetDemo.app */;
-            productType = "com.apple.product-type.application";
-        };
-/* End PBXNativeTarget section */
-
-/* Begin PBXProject section */
-        37FD7DF72910094300853C23 /* Project object */ = {
-            isa = PBXProject;
-            attributes = {
-                BuildIndependentTargetsInParallel = 1;
-                LastSwiftUpdateCheck = 1400;
-                LastUpgradeCheck = 1400;
-                TargetAttributes = {
-                    37FD7DFE2910094300853C23 = {
-                        CreatedOnToolsVersion = 14.0.1;
-                        LastSwiftMigration = 1400;
-                    };
-                };
-            };
-            buildConfigurationList = 37FD7DFA2910094300853C23 /* Build configuration list for PBXProject "WenetDemo" */;
-            compatibilityVersion = "Xcode 14.0";
-            developmentRegion = en;
-            hasScannedForEncodings = 0;
-            knownRegions = (
-                en,
-                Base,
-            );
-            mainGroup = 37FD7DF62910094300853C23;
-            productRefGroup = 37FD7E002910094300853C23 /* Products */;
-            projectDirPath = "";
-            projectRoot = "";
-            targets = (
-                37FD7DFE2910094300853C23 /* WenetDemo */,
-            );
-        };
-/* End PBXProject section */
-
-/* Begin PBXResourcesBuildPhase section */
-        37FD7DFD2910094300853C23 /* Resources */ = {
-            isa = PBXResourcesBuildPhase;
-            buildActionMask = 2147483647;
-            files = (
-                37C64C8B2912ABE900BFCE0B /* units.txt in Resources */,
-                37FD7E0F2910094500853C23 /* LaunchScreen.storyboard in Resources */,
-                37FD7E0C2910094500853C23 /* Assets.xcassets in Resources */,
-                37C64C892912A94B00BFCE0B /* final.zip in Resources */,
-                37FD7E0A2910094300853C23 /* Main.storyboard in Resources */,
-            );
-            runOnlyForDeploymentPostprocessing = 0;
-        };
-/* End PBXResourcesBuildPhase section */
-
-/* Begin PBXSourcesBuildPhase section */
-        37FD7DFB2910094300853C23 /* Sources */ = {
-            isa = PBXSourcesBuildPhase;
-            buildActionMask = 2147483647;
-            files = (
-                37FD7E072910094300853C23 /* ViewController.swift in Sources */,
-                37FD7E1A291009EC00853C23 /* wenet.mm in Sources */,
-                37FD7E032910094300853C23 /* AppDelegate.swift in Sources */,
-                37FD7E052910094300853C23 /* SceneDelegate.swift in Sources */,
-            );
-            runOnlyForDeploymentPostprocessing = 0;
-        };
-/* End PBXSourcesBuildPhase section */
-
-/* Begin PBXVariantGroup section */
-        37FD7E082910094300853C23 /* Main.storyboard */ = {
-            isa = PBXVariantGroup;
-            children = (
-                37FD7E092910094300853C23 /* Base */,
-            );
-            name = Main.storyboard;
-            sourceTree = "<group>";
-        };
-        37FD7E0D2910094500853C23 /* LaunchScreen.storyboard */ = {
-            isa = PBXVariantGroup;
-            children = (
-                37FD7E0E2910094500853C23 /* Base */,
-            );
-            name = LaunchScreen.storyboard;
-            sourceTree = "<group>";
-        };
-/* End PBXVariantGroup section */
-
-/* Begin XCBuildConfiguration section */
-        37FD7E112910094500853C23 /* Debug */ = {
-            isa = XCBuildConfiguration;
-            buildSettings = {
-                ALWAYS_SEARCH_USER_PATHS = NO;
-                CLANG_ANALYZER_NONNULL = YES;
-                CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-                CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
-                CLANG_ENABLE_MODULES = YES;
-                CLANG_ENABLE_OBJC_ARC = YES;
-                CLANG_ENABLE_OBJC_WEAK = YES;
-                CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
-                CLANG_WARN_BOOL_CONVERSION = YES;
-                CLANG_WARN_COMMA = YES;
-                CLANG_WARN_CONSTANT_CONVERSION = YES;
-                CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
-                CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-                CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
-                CLANG_WARN_EMPTY_BODY = YES;
-                CLANG_WARN_ENUM_CONVERSION = YES;
-                CLANG_WARN_INFINITE_RECURSION = YES;
-                CLANG_WARN_INT_CONVERSION = YES;
-                CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
-                CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
-                CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
-                CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
-                CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
-                CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
-                CLANG_WARN_STRICT_PROTOTYPES = YES;
-                CLANG_WARN_SUSPICIOUS_MOVE = YES;
-                CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
-                CLANG_WARN_UNREACHABLE_CODE = YES;
-                CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-                COPY_PHASE_STRIP = NO;
-                DEBUG_INFORMATION_FORMAT = dwarf;
-                ENABLE_STRICT_OBJC_MSGSEND = YES;
-                ENABLE_TESTABILITY = YES;
-                GCC_C_LANGUAGE_STANDARD = gnu11;
-                GCC_DYNAMIC_NO_PIC = NO;
-                GCC_NO_COMMON_BLOCKS = YES;
-                GCC_OPTIMIZATION_LEVEL = 0;
-                GCC_PREPROCESSOR_DEFINITIONS = (
-                    "DEBUG=1",
-                    "$(inherited)",
-                );
-                GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-                GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
-                GCC_WARN_UNDECLARED_SELECTOR = YES;
-                GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
-                GCC_WARN_UNUSED_FUNCTION = YES;
-                GCC_WARN_UNUSED_VARIABLE = YES;
-                IPHONEOS_DEPLOYMENT_TARGET = 16.0;
-                MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
-                MTL_FAST_MATH = YES;
-                ONLY_ACTIVE_ARCH = YES;
-                SDKROOT = iphoneos;
-                SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
-                SWIFT_OPTIMIZATION_LEVEL = "-Onone";
-            };
-            name = Debug;
-        };
-        37FD7E122910094500853C23 /* Release */ = {
-            isa = XCBuildConfiguration;
-            buildSettings = {
-                ALWAYS_SEARCH_USER_PATHS = NO;
-                CLANG_ANALYZER_NONNULL = YES;
-                CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-                CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
-                CLANG_ENABLE_MODULES = YES;
-                CLANG_ENABLE_OBJC_ARC = YES;
-                CLANG_ENABLE_OBJC_WEAK = YES;
-                CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
-                CLANG_WARN_BOOL_CONVERSION = YES;
-                CLANG_WARN_COMMA = YES;
-                CLANG_WARN_CONSTANT_CONVERSION = YES;
-                CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
-                CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-                CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
-                CLANG_WARN_EMPTY_BODY = YES;
-                CLANG_WARN_ENUM_CONVERSION = YES;
-                CLANG_WARN_INFINITE_RECURSION = YES;
-                CLANG_WARN_INT_CONVERSION = YES;
-                CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
-                CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
-                CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
-                CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
-                CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
-                CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
-                CLANG_WARN_STRICT_PROTOTYPES = YES;
-                CLANG_WARN_SUSPICIOUS_MOVE = YES;
-                CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
-                CLANG_WARN_UNREACHABLE_CODE = YES;
-                CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-                COPY_PHASE_STRIP = NO;
-                DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-                ENABLE_NS_ASSERTIONS = NO;
-                ENABLE_STRICT_OBJC_MSGSEND = YES;
-                GCC_C_LANGUAGE_STANDARD = gnu11;
-                GCC_NO_COMMON_BLOCKS = YES;
-                GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-                GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
-                GCC_WARN_UNDECLARED_SELECTOR = YES;
-                GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
-                GCC_WARN_UNUSED_FUNCTION = YES;
-                GCC_WARN_UNUSED_VARIABLE = YES;
-                IPHONEOS_DEPLOYMENT_TARGET = 16.0;
-                MTL_ENABLE_DEBUG_INFO = NO;
-                MTL_FAST_MATH = YES;
-                SDKROOT = iphoneos;
-                SWIFT_COMPILATION_MODE = wholemodule;
-                SWIFT_OPTIMIZATION_LEVEL = "-O";
-                VALIDATE_PRODUCT = YES;
-            };
-            name = Release;
-        };
-        37FD7E142910094500853C23 /* Debug */ = {
-            isa = XCBuildConfiguration;
-            buildSettings = {
-                ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-                ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-                CLANG_CXX_LANGUAGE_STANDARD = "c++14";
-                CLANG_ENABLE_MODULES = YES;
-                CODE_SIGN_STYLE = Automatic;
-                CURRENT_PROJECT_VERSION = 1;
-                DEVELOPMENT_TEAM = LXGRKDMEQU;
-                GCC_C_LANGUAGE_STANDARD = gnu11;
-                GENERATE_INFOPLIST_FILE = YES;
-                HEADER_SEARCH_PATHS = (
-                    ../,
-                    "../fc_base/openfst-src/src/include",
-                    "../fc_base/gflags-build/include",
-                    "../fc_base/glog-build",
-                    "../fc_base/glog-src/src",
-                    ../kaldi,
-                    ../build/Pods/LibTorch/install/include,
-                );
-                INFOPLIST_FILE = WenetDemo/Info.plist;
-                INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
-                INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
-                INFOPLIST_KEY_UIMainStoryboardFile = Main;
-                INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-                INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-                IPHONEOS_DEPLOYMENT_TARGET = 14.6;
-                LD_RUNPATH_SEARCH_PATHS = (
-                    "$(inherited)",
-                    "@executable_path/Frameworks",
-                );
-                LIBRARY_SEARCH_PATHS = (
-                    "../build/decoder/Debug-iphoneos",
-                    ../build/Pods/LibTorch/install/lib,
-                    "../fc_base/openfst-build/src/lib/Debug-iphoneos",
-                    "../fc_base/gflags-build/Debug-iphoneos",
-                    "../fc_base/glog-build/Debug-iphoneos",
-                    "../build/kaldi/Debug-iphoneos",
-                    "../build/post_processor/Debug-iphoneos",
-                    "../build/utils/Debug-iphoneos",
-                    "../build/frontend/Debug-iphoneos",
-                );
-                MARKETING_VERSION = 1.0;
-                OTHER_LDFLAGS = (
-                    "-ObjC",
-                    "-l\"XNNPACK\"",
-                    "-l\"c++\"",
-                    "-l\"c10\"",
-                    "-l\"clog\"",
-                    "-l\"cpuinfo\"",
-                    "-l\"eigen_blas\"",
-                    "-l\"pthreadpool\"",
-                    "-l\"pytorch_qnnpack\"",
-                    "-l\"stdc++\"",
-                    "-l\"torch\"",
-                    "-l\"torch_cpu\"",
-                    "-force_load",
-                    ../build/Pods/LibTorch/install/lib/libtorch.a,
-                    "-force_load",
-                    ../build/Pods/LibTorch/install/lib/libtorch_cpu.a,
-                    "-l\"gflags_nothreads_debug\"",
-                    "-l\"glogd\"",
-                );
-                PRODUCT_BUNDLE_IDENTIFIER = com.wenet.WenetDemo;
-                PRODUCT_NAME = "$(TARGET_NAME)";
-                SWIFT_EMIT_LOC_STRINGS = YES;
-                SWIFT_OBJC_BRIDGING_HEADER = "WenetDemo/wenet/WenetDemo-Bridging-Header.h";
-                SWIFT_OPTIMIZATION_LEVEL = "-Onone";
-                SWIFT_VERSION = 5.0;
-                TARGETED_DEVICE_FAMILY = "1,2";
-            };
-            name = Debug;
-        };
-        37FD7E152910094500853C23 /* Release */ = {
-            isa = XCBuildConfiguration;
-            buildSettings = {
-                ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-                ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-                CLANG_CXX_LANGUAGE_STANDARD = "c++14";
-                CLANG_ENABLE_MODULES = YES;
-                CODE_SIGN_STYLE = Automatic;
-                CURRENT_PROJECT_VERSION = 1;
-                DEVELOPMENT_TEAM = LXGRKDMEQU;
-                GCC_C_LANGUAGE_STANDARD = gnu11;
-                GENERATE_INFOPLIST_FILE = YES;
-                HEADER_SEARCH_PATHS = (
-                    ../,
-                    "../fc_base/openfst-src/src/include",
-                    "../fc_base/gflags-build/include",
-                    "../fc_base/glog-build",
-                    "../fc_base/glog-src/src",
-                    ../kaldi,
-                    ../build/Pods/LibTorch/install/include,
-                );
-                INFOPLIST_FILE = WenetDemo/Info.plist;
-                INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
-                INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
-                INFOPLIST_KEY_UIMainStoryboardFile = Main;
-                INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-                INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-                IPHONEOS_DEPLOYMENT_TARGET = 14.6;
-                LD_RUNPATH_SEARCH_PATHS = (
-                    "$(inherited)",
-                    "@executable_path/Frameworks",
-                );
-                LIBRARY_SEARCH_PATHS = (
-                    "../build/decoder/Release-iphoneos",
-                    ../build/Pods/LibTorch/install/lib,
-                    "../fc_base/openfst-build/src/lib/Release-iphoneos",
-                    "../fc_base/gflags-build/Release-iphoneos",
-                    "../fc_base/glog-build/Release-iphoneos",
-                    "../build/kaldi/Release-iphoneos",
-                    "../build/post_processor/Release-iphoneos",
-                    "../build/utils/Release-iphoneos",
-                    "../build/frontend/Release-iphoneos",
-                );
-                MARKETING_VERSION = 1.0;
-                OTHER_LDFLAGS = (
-                    "-ObjC",
-                    "-l\"XNNPACK\"",
-                    "-l\"c++\"",
-                    "-l\"c10\"",
-                    "-l\"clog\"",
-                    "-l\"cpuinfo\"",
-                    "-l\"eigen_blas\"",
-                    "-l\"pthreadpool\"",
-                    "-l\"pytorch_qnnpack\"",
-                    "-l\"stdc++\"",
-                    "-l\"torch\"",
-                    "-l\"torch_cpu\"",
-                    "-force_load",
-                    ../build/Pods/LibTorch/install/lib/libtorch.a,
-                    "-force_load",
-                    ../build/Pods/LibTorch/install/lib/libtorch_cpu.a,
-                    "-l\"gflags_nothreads\"",
-                    "-l\"glog\"",
-                );
-                PRODUCT_BUNDLE_IDENTIFIER = com.wenet.WenetDemo;
-                PRODUCT_NAME = "$(TARGET_NAME)";
-                SWIFT_EMIT_LOC_STRINGS = YES;
-                SWIFT_OBJC_BRIDGING_HEADER = "WenetDemo/wenet/WenetDemo-Bridging-Header.h";
-                SWIFT_VERSION = 5.0;
-                TARGETED_DEVICE_FAMILY = "1,2";
-            };
-            name = Release;
-        };
-/* End XCBuildConfiguration section */
-
-/* Begin XCConfigurationList section */
-        37FD7DFA2910094300853C23 /* Build configuration list for PBXProject "WenetDemo" */ = {
-            isa = XCConfigurationList;
-            buildConfigurations = (
-                37FD7E112910094500853C23 /* Debug */,
-                37FD7E122910094500853C23 /* Release */,
-            );
-            defaultConfigurationIsVisible = 0;
-            defaultConfigurationName = Release;
-        };
-        37FD7E132910094500853C23 /* Build configuration list for PBXNativeTarget "WenetDemo" */ = {
-            isa = XCConfigurationList;
-            buildConfigurations = (
-                37FD7E142910094500853C23 /* Debug */,
-                37FD7E152910094500853C23 /* Release */,
-            );
-            defaultConfigurationIsVisible = 0;
-            defaultConfigurationName = Release;
-        };
-/* End XCConfigurationList section */
-    };
-    rootObject = 37FD7DF72910094300853C23 /* Project object */;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/contents.xcworkspacedata
deleted file mode 100644
index 919434a6254f0e9651f402737811be6634a03e9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<Workspace
-   version = "1.0">
-   <FileRef
-      location = "self:">
-   </FileRef>
-</Workspace>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
deleted file mode 100644
index 3d4c1e55259fee0e8b5b359a400118f9218de99d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-    <key>IDEDidComputeMac32BitWarning</key>
-    <true/>
-</dict>
-</plist>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/AppDelegate.swift b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/AppDelegate.swift
deleted file mode 100644
index 19ae3160ed6102eb7c73fe1aee852854c3ce4bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/AppDelegate.swift
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) 2022 Dan Ma (1067837450@qq.com)
-//
-//  AppDelegate.swift
-//  WenetDemo
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import UIKit
-
-@main
-class AppDelegate: UIResponder, UIApplicationDelegate {
-
-
-
-  func application(_ application: UIApplication, didFinishLaunchingWithOptions
-                   launchOptions: [UIApplication.LaunchOptionsKey: Any]?)
-  -> Bool {
-    // Override point for customization after application launch.
-    return true
-  }
-
-  // MARK: UISceneSession Lifecycle
-
-  func application(_ application: UIApplication, configurationForConnecting
-                   connectingSceneSession: UISceneSession,
-                   options: UIScene.ConnectionOptions)
-  -> UISceneConfiguration {
-    // Called when a new scene session is being created.
-    // Use this method to select a configuration
-    // to create the new scene with.
-    return UISceneConfiguration(name: "Default Configuration",
-                                sessionRole: connectingSceneSession.role)
-  }
-
-  func application(_ application: UIApplication,
-                   didDiscardSceneSessions
-                   sceneSessions: Set<UISceneSession>) {
-    // Called when the user discards a scene session.
-    // If any sessions were discarded while the application was not running,
-    // this will be called shortly after
-    // application:didFinishLaunchingWithOptions.
-    // Use this method to release any resources that were specific to
-    // the discarded scenes, as they will not return.
-  }
-
-
-}
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Base.lproj/LaunchScreen.storyboard b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Base.lproj/LaunchScreen.storyboard
deleted file mode 100644
index 865e9329f3767a7c1dd66294b8025bf81dee7d2c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Base.lproj/LaunchScreen.storyboard
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
-    <dependencies>
-        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
-        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
-        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
-    </dependencies>
-    <scenes>
-        <!--View Controller-->
-        <scene sceneID="EHf-IW-A2E">
-            <objects>
-                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
-                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
-                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
-                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                        <color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
-                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
-                    </view>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="53" y="375"/>
-        </scene>
-    </scenes>
-</document>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Base.lproj/Main.storyboard b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Base.lproj/Main.storyboard
deleted file mode 100644
index 24468cc4fa95609375619c746061d0b2c0985d18..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Base.lproj/Main.storyboard
+++ /dev/null
@@ -1,55 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21507" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
-    <device id="retina6_0" orientation="portrait" appearance="light"/>
-    <dependencies>
-        <deployment identifier="iOS"/>
-        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21505"/>
-        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
-        <capability name="System colors in document resources" minToolsVersion="11.0"/>
-        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
-    </dependencies>
-    <scenes>
-        <!--View Controller-->
-        <scene sceneID="tne-QT-ifu">
-            <objects>
-                <viewController id="BYZ-38-t0r" customClass="ViewController" customModule="WenetDemo" customModuleProvider="target" sceneMemberID="viewController">
-                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
-                        <rect key="frame" x="0.0" y="0.0" width="390" height="844"/>
-                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                        <subviews>
-                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" text="" lineBreakMode="characterWrap" numberOfLines="100" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="nhy-Db-49k">
-                                <rect key="frame" x="38" y="149" width="308" height="289"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <fontDescription key="fontDescription" type="system" pointSize="17"/>
-                                <nil key="textColor"/>
-                                <nil key="highlightedColor"/>
-                            </label>
-                            <button opaque="NO" contentMode="scaleToFill" fixedFrame="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="system" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="Ted-Dr-R4E">
-                                <rect key="frame" x="137" y="475" width="120" height="35"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <state key="normal" title="Button"/>
-                                <buttonConfiguration key="configuration" style="gray" title="Start Record"/>
-                                <connections>
-                                    <action selector="btnClicked:" destination="BYZ-38-t0r" eventType="touchUpInside" id="8kj-dz-oNy"/>
-                                </connections>
-                            </button>
-                        </subviews>
-                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
-                        <color key="backgroundColor" systemColor="systemBackgroundColor"/>
-                    </view>
-                    <connections>
-                        <outlet property="button" destination="Ted-Dr-R4E" id="fpx-7m-qCm"/>
-                        <outlet property="label" destination="nhy-Db-49k" id="yhJ-1z-DMh"/>
-                    </connections>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="30.769230769230766" y="-28.436018957345969"/>
-        </scene>
-    </scenes>
-    <resources>
-        <systemColor name="systemBackgroundColor">
-            <color white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-        </systemColor>
-    </resources>
-</document>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Info.plist b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Info.plist
deleted file mode 100644
index 16ad1537f36c818128d96ec61f63df0c2f45498c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/Info.plist
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-    <key>UIApplicationSceneManifest</key>
-    <dict>
-        <key>UIApplicationSupportsMultipleScenes</key>
-        <false/>
-        <key>UISceneConfigurations</key>
-        <dict>
-            <key>UIWindowSceneSessionRoleApplication</key>
-            <array>
-                <dict>
-                    <key>UISceneConfigurationName</key>
-                    <string>Default Configuration</string>
-                    <key>UISceneDelegateClassName</key>
-                    <string>$(PRODUCT_MODULE_NAME).SceneDelegate</string>
-                    <key>UISceneStoryboardFile</key>
-                    <string>Main</string>
-                </dict>
-            </array>
-        </dict>
-    </dict>
-    <key>NSMicrophoneUsageDescription</key>
-    <string>Need microphone access for recording speech</string>
-</dict>
-</plist>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/SceneDelegate.swift b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/SceneDelegate.swift
deleted file mode 100644
index 1c61b7853fce34cb4bd1cea152fb3013ef47015c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/SceneDelegate.swift
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (c) 2022 Dan Ma (1067837450@qq.com)
-//
-//  SceneDelegate.swift
-//  WenetDemo
-//
-
-import UIKit
-
-class SceneDelegate: UIResponder, UIWindowSceneDelegate {
-
-  var window: UIWindow?
-
-
-  func scene(_ scene: UIScene, willConnectTo session: UISceneSession,
-             options connectionOptions: UIScene.ConnectionOptions) {
-    // Use this method to optionally configure and attach the UIWindow
-    // `window` to the provided UIWindowScene `scene`.
-    // If using a storyboard, the `window` property will
-    // automatically be initialized and attached to the scene.
-    // This delegate does not imply the connecting scene or session
-    // are new (see
-    // `application:configurationForConnectingSceneSession` instead).
-    guard let _ = (scene as? UIWindowScene) else { return }
-  }
-
-  func sceneDidDisconnect(_ scene: UIScene) {
-    // Called as the scene is being released by the system.
-    // This occurs shortly after the scene enters the background, or when
-    // its session is discarded.
-    // Release any resources associated with this scene that can be
-    // re-created the next time the scene connects.
-    // The scene may re-connect later, as its session was not necessarily
-    // discarded (see `application:didDiscardSceneSessions` instead).
-  }
-
-  func sceneDidBecomeActive(_ scene: UIScene) {
-    // Called when the scene has moved from an inactive state
-    // to an active state.
-    // Use this method to restart any tasks that were
-    // paused (or not yet started) when the scene was inactive.
-  }
-
-  func sceneWillResignActive(_ scene: UIScene) {
-    // Called when the scene will move from an active state to
-    // an inactive state.
-    // This may occur due to temporary interruptions
-    // (ex. an incoming phone call).
-  }
-
-  func sceneWillEnterForeground(_ scene: UIScene) {
-    // Called as the scene transitions from the background
-    // to the foreground.
-    // Use this method to undo the changes made on
-    // entering the background.
-  }
-
-  func sceneDidEnterBackground(_ scene: UIScene) {
-    // Called as the scene transitions from the foreground to
-    // the background.
-    // Use this method to save data, release shared resources,
-    // and store enough scene-specific state information
-    // to restore the scene back to its current state.
-  }
-
-
-}
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/ViewController.swift b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/ViewController.swift
deleted file mode 100644
index 707073a2d271c22faa6e4aa604148079e9b458e9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/ViewController.swift
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2022 Dan Ma (1067837450@qq.com)
-//
-//  ViewController.swift
-//  WenetDemo
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import UIKit
-import AVFoundation
-
-class ViewController: UIViewController {
-
-  @IBOutlet weak var label: UILabel!
-  @IBOutlet weak var button: UIButton!
-
-  var wenetModel: Wenet?
-  var audioEngine: AVAudioEngine?
-  var startRecord: Bool?
-  private var workItem: DispatchWorkItem?
-
-  override func viewDidLoad() {
-    super.viewDidLoad()
-    // Do any additional setup after loading the view.
-
-    initModel()
-
-    initRecorder()
-  }
-
-  func initModel() {
-    let modelPath = Bundle.main.path(forResource: "final", ofType: "zip")
-    let dictPath = Bundle.main.path(forResource: "units", ofType: "txt")
-    wenetModel = Wenet(modelPath:modelPath, dictPath:dictPath)!
-
-    wenetModel?.reset()
-  }
-
-  func initRecorder() {
-    startRecord = false
-
-    audioEngine = AVAudioEngine()
-    let inputNode = self.audioEngine?.inputNode
-    let bus = 0
-    let inputFormat = inputNode?.outputFormat(forBus: bus)
-    let outputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32,
-                                     sampleRate: 16000, channels: 1,
-                                     interleaved: false)!
-    let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!
-    inputNode!.installTap(onBus: bus,
-                          bufferSize: 1024,
-                          format: inputFormat) {
-      (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
-      var newBufferAvailable = true
-
-      let inputCallback: AVAudioConverterInputBlock = {
-        inNumPackets, outStatus in
-        if newBufferAvailable {
-          outStatus.pointee = .haveData
-          newBufferAvailable = false
-
-          return buffer
-        } else {
-          outStatus.pointee = .noDataNow
-          return nil
-        }
-      }
-
-      let convertedBuffer = AVAudioPCMBuffer(
-        pcmFormat: outputFormat,
-        frameCapacity:
-          AVAudioFrameCount(outputFormat.sampleRate)
-        * buffer.frameLength
-        / AVAudioFrameCount(buffer.format.sampleRate))!
-
-      var error: NSError?
-      let status = converter.convert(
-        to: convertedBuffer,
-        error: &error, withInputFrom: inputCallback)
-
-      // 16000 Hz buffer
-      let actualSampleCount = Int(convertedBuffer.frameLength)
-      guard let floatChannelData = convertedBuffer.floatChannelData
-      else { return }
-
-      self.wenetModel?.acceptWaveForm(floatChannelData[0],
-                                      Int32(actualSampleCount))
-    }
-  }
-
-  @IBAction func btnClicked(_ sender: Any) {
-    if(!startRecord!) {
-      //Clear result
-      self.setResult(text: "")
-
-      //Reset model
-      self.wenetModel?.reset()
-
-      //Start record
-      do {
-        try self.audioEngine?.start()
-      } catch let error as NSError {
-        print("Got an error starting audioEngine: \(error.domain), \(error)")
-      }
-
-      //Start decode thread
-      workItem = DispatchWorkItem {
-        while(!self.workItem!.isCancelled) {
-          self.wenetModel?.decode()
-          DispatchQueue.main.sync {
-            self.setResult(text: (self.wenetModel?.get_result())!)
-          }
-        }
-      }
-      DispatchQueue.global().async(execute: workItem!)
-
-      startRecord = true
-      button.setTitle("Stop Record", for: UIControl.State.normal)
-    } else {
-      //Stop record
-      self.audioEngine?.stop()
-
-      //Stop decode thread
-      workItem!.cancel()
-
-      startRecord = false
-      button.setTitle("Start Record", for: UIControl.State.normal)
-    }
-  }
-
-  @objc func setResult(text: String) {
-    label.text = text
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/model/.gitkeep b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/model/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/WenetDemo-Bridging-Header.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/WenetDemo-Bridging-Header.h
deleted file mode 100644
index 5cec9898b6b7826f988892d28b22daf6080e96f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/WenetDemo-Bridging-Header.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2022 Dan Ma (1067837450@qq.com)
-//
-//  Use this file to import your target's public headers
-//  that you would like to expose to Swift.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_
-#define RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_
-
-#import "wenet.h"
-
-#endif  // RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/wenet.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/wenet.h
deleted file mode 100644
index 0d430e3577e2d5fa56dfab0925c9131b9197aefa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/wenet.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 Dan Ma (1067837450@qq.com)
-//
-//  wenet.h
-//  WenetDemo
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_
-#define RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_
-
-#include <stdio.h>
-
-#import <Foundation/Foundation.h>
-
-@interface Wenet : NSObject
-
-- (nullable instancetype)initWithModelPath:
-(NSString*)modelPath DictPath:(NSString*)dictPath;  // NOLINT
-
-- (void)reset;
-
-- (void)acceptWaveForm: (float*)pcm: (int)size;  // NOLINT
-
-- (void)decode;
-
-- (NSString*)get_result;  // NOLINT
-
-@end
-
-#endif  // RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/wenet.mm b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/wenet.mm
deleted file mode 100644
index bab9a085ca47fa8004ec8995b144fd224d5c926d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/WenetDemo/WenetDemo/wenet/wenet.mm
+++ /dev/null
@@ -1,135 +0,0 @@
-// Copyright (c) 2022 Dan Ma (1067837450@qq.com)
-//
-//  wenet.mm
-//  WenetDemo
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "wenet.h"
-
-#define IOS
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "frontend/feature_pipeline.h"
-#include "frontend/wav.h"
-#include "post_processor/post_processor.h"
-#include "utils/log.h"
-#include "utils/string.h"
-
-using namespace wenet;
-
-@implementation Wenet {
-@protected
-  std::shared_ptr<DecodeOptions> decode_config;
-  std::shared_ptr<FeaturePipelineConfig> feature_config;
-  std::shared_ptr<FeaturePipeline> feature_pipeline;
-  std::shared_ptr<AsrDecoder> decoder;
-  std::shared_ptr<DecodeResource> resource;
-  DecodeState state;
-  std::string total_result;
-}
-
-- (nullable instancetype)initWithModelPath:
-(NSString*)modelPath DictPath:(NSString*)dictPath {
-  self = [super init];
-  if (self) {
-    try {
-      auto qengines = at::globalContext().supportedQEngines();
-      if (std::find(qengines.begin(), qengines.end(), at::QEngine::QNNPACK)
-          != qengines.end()) {
-        at::globalContext().setQEngine(at::QEngine::QNNPACK);
-      }
-      auto model = std::make_shared<TorchAsrModel>();
-      model->Read(modelPath.UTF8String);
-      resource = std::make_shared<DecodeResource>();
-      resource->model = model;
-      resource->symbol_table = std::shared_ptr<fst::SymbolTable>
-      (fst::SymbolTable::ReadText(dictPath.UTF8String));
-
-      PostProcessOptions post_process_opts;
-      resource->post_processor =
-      std::make_shared<PostProcessor>(post_process_opts);
-
-      feature_config = std::make_shared<FeaturePipelineConfig>(80, 16000);
-      feature_pipeline = std::make_shared<FeaturePipeline>(*feature_config);
-
-      decode_config = std::make_shared<DecodeOptions>();
-      decode_config->chunk_size = 16;
-      decoder = std::make_shared<AsrDecoder>(feature_pipeline,
-                                             resource,
-                                             *decode_config);
-
-      state = kEndBatch;
-    } catch (const std::exception& exception) {
-      NSLog(@"%s", exception.what());
-      return nil;
-    }
-  }
-
-  return self;
-}
-
-- (void)reset {
-  decoder->Reset();
-  state = kEndBatch;
-  total_result = "";
-}
-
-- (void)acceptWaveForm: (float*)pcm: (int)size {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = pcm[i] * 65535;
-  }
-  feature_pipeline->AcceptWaveform(float_pcm, size);
-}
-
-- (void)decode {
-  state = decoder->Decode();
-  if (state == kEndFeats || state == kEndpoint) {
-    decoder->Rescoring();
-  }
-
-  std::string result;
-  if (decoder->DecodedSomething()) {
-    result = decoder->result()[0].sentence;
-  }
-
-  if (state == kEndFeats) {
-    LOG(INFO) << "wenet endfeats final result: " << result;
-    NSLog(@"wenet endfeats final result: %s", result.c_str());
-    total_result += result;
-  } else if (state == kEndpoint) {
-    LOG(INFO) << "wenet endpoint final result: " << result;
-    NSLog(@"wenet endpoint final result: %s", result.c_str());
-    total_result += result + "，";
-    decoder->ResetContinuousDecoding();
-  } else {
-    if (decoder->DecodedSomething()) {
-      LOG(INFO) << "wenet partial result: " << result;
-      NSLog(@"wenet partial result: %s", result.c_str());
-    }
-  }
-}
-
-- (NSString*)get_result {
-  std::string result;
-  if (decoder->DecodedSomething()) {
-    result = decoder->result()[0].sentence;
-  }
-  LOG(INFO) << "wenet ui result: " << total_result + result;
-  NSLog(@"wenet ui result: %s", (total_result + result).c_str());
-  return [NSString stringWithUTF8String:(total_result + result).c_str()];
-}
-
-@end
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/CMakeLists.txt
deleted file mode 100644
index 145654105350e91a5f9121b47197f5fc60663f5c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-link_libraries(gtest_main gmock)
-
-add_executable(utils_test utils_test.cc)
-target_link_libraries(utils_test PUBLIC utils)
-add_test(UTILS_TEST utils_test)
-
-add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
-target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
-add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
-
-add_executable(post_processor_test post_processor_test.cc)
-target_link_libraries(post_processor_test PUBLIC post_processor)
-add_test(POST_PROCESSOR_TEST post_processor_test)
-
-
-add_executable(feature_pipeline_test feature_pipeline_test.cc)
-target_link_libraries(feature_pipeline_test PUBLIC frontend)
-add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/ctc_prefix_beam_search_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/ctc_prefix_beam_search_test.cc
deleted file mode 100644
index d8f3b65693b934beb33f3a770795f0b6e7ce3456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/ctc_prefix_beam_search_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <cmath>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(CtcPrefixBeamSearchTest, CtcPrefixBeamSearchLogicTest) {
-  using ::testing::ElementsAre;
-  // See https://robin1001.github.io/2020/12/11/ctc-search for the
-  // graph demonstration of the data
-  std::vector<std::vector<float>> data = {
-      {0.25, 0.40, 0.35}, {0.40, 0.35, 0.25}, {0.10, 0.50, 0.40}};
-  // Apply log
-  for (int i = 0; i < data.size(); i++) {
-    for (int j = 0; j < data[i].size(); j++) {
-      data[i][j] = std::log(data[i][j]);
-    }
-  }
-  wenet::CtcPrefixBeamSearchOptions option;
-  option.first_beam_size = 3;
-  option.second_beam_size = 3;
-  wenet::CtcPrefixBeamSearch prefix_beam_search(option);
-  prefix_beam_search.Search(data);
-  /* Test case info
-  | top k | result index | prefix score | viterbi score | timestamp |
-  |-------|--------------|--------------|---------------|-----------|
-  | top 1 | [2, 1]       | 0.2185       | 0.07          | [0, 2]    |
-  | top 2 | [1, 2]       | 0.1550       | 0.064         | [0, 2]    |
-  | top 3 | [1]          | 0.1525       | 0.07          | [2]       |
-  */
-  const std::vector<std::vector<int>>& result = prefix_beam_search.Outputs();
-  EXPECT_EQ(result.size(), 3);
-  ASSERT_THAT(result[0], ElementsAre(2, 1));
-  ASSERT_THAT(result[1], ElementsAre(1, 2));
-  ASSERT_THAT(result[2], ElementsAre(1));
-
-  const std::vector<float>& likelihood = prefix_beam_search.Likelihood();
-  EXPECT_EQ(likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[0]), 0.2185);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[1]), 0.1550);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[2]), 0.1525);
-
-  const std::vector<float>& viterbi_likelihood =
-      prefix_beam_search.viterbi_likelihood();
-  EXPECT_EQ(viterbi_likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[0]), 0.07);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[1]), 0.064);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[2]), 0.07);
-
-  const std::vector<std::vector<int>>& times = prefix_beam_search.Times();
-  EXPECT_EQ(times.size(), 3);
-  ASSERT_THAT(times[0], ElementsAre(0, 2));
-  ASSERT_THAT(times[1], ElementsAre(0, 2));
-  ASSERT_THAT(times[2], ElementsAre(2));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/feature_pipeline_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/feature_pipeline_test.cc
deleted file mode 100644
index 244ec0735b6086211b476e8d97569e1ee5959bc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/feature_pipeline_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2022 Roney
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <thread>
-#include <vector>
-
-#include "frontend/feature_pipeline.h"
-#include "utils/blocking_queue.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-void pushQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que,
-               std::vector<int> vec) {
-  que->Push(vec);
-}
-
-void popQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que, int num,
-              int back_data) {
-  auto pop_data = que->Pop(num);
-  ASSERT_EQ(pop_data[num - 1], back_data);
-}
-
-TEST(FeaturePipelineTest, BlockingQueueTest) {
-  auto capacity_queue = std::make_shared<wenet::BlockingQueue<int>>(2);
-  std::vector<int> test_data{1, 2, 3, 4, 5};
-  std::thread push_thread(&pushQueue, capacity_queue, test_data);
-  ASSERT_EQ(capacity_queue->Pop(), 1);
-  ASSERT_LE(capacity_queue->Size(), 2);    // capacity_queue: 2 or 2,3
-  auto pop_data = capacity_queue->Pop(3);  // 2,3,4 num > capacity
-  ASSERT_EQ(pop_data.size(), 3);
-  ASSERT_EQ(pop_data[2], 4);
-  push_thread.join();
-  ASSERT_EQ(capacity_queue->Size(), 1);  // capacity_queue:5
-
-  std::thread pop_thread(&popQueue, capacity_queue, 3, 0);  // num > capacity
-  capacity_queue->Push(9);  // capacity_queue:5,9
-  capacity_queue->Push(0);  // capacity_queue:5,9,0
-  pop_thread.join();        // capacity_queue:
-  ASSERT_EQ(capacity_queue->Size(), 0);
-
-  pop_data = capacity_queue->Pop(0);
-  ASSERT_TRUE(pop_data.empty());
-}
-
-TEST(FeaturePipelineTest, PipelineTest) {
-  wenet::FeaturePipelineConfig config(80, 8000);
-  wenet::FeaturePipeline feature_pipeline(config);
-  int audio_len = 8 * 55;  // audio len 55ms,4 frames
-  std::vector<float> pcm(audio_len, 0);
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 4);
-
-  std::vector<std::vector<float>> out_feats;
-  auto b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_TRUE(b);
-  ASSERT_EQ(out_feats.size(), 2);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 2);
-
-  std::vector<float> out_feat;
-  b = feature_pipeline.ReadOne(&out_feat);
-  ASSERT_TRUE(b);
-  ASSERT_FALSE(out_feat.empty());
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 1);
-
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 1);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  feature_pipeline.Read(2, &out_feats);
-  feature_pipeline.Reset();
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 0);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/post_processor_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/post_processor_test.cc
deleted file mode 100644
index fa11fa29231032d62389a93fd00b0ec782bf8a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/post_processor_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(PostProcessorTest, ProcessSpacekMandarinEnglishTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: mandarin character
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "震东好帅",
-      // modeling unit: mandarin word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 吴迪 也 好帅",
-      // modeling unit: english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁binbin▁is▁also▁handsome",
-      // modeling unit: english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " life is short i use wenet",
-      // modeling unit: mandarin character + english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "超哥▁is▁the▁most▁handsome",
-      // modeling unit: mandarin word + english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 人生 苦短 i use wenet",
-  };
-
-  std::vector<std::string> result_lowercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "binbin is also handsome",
-      "life is short i use wenet",
-      "超哥 is the most handsome",
-      "人生苦短i use wenet",
-  };
-
-  std::vector<std::string> result_uppercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "BINBIN IS ALSO HANDSOME",
-      "LIFE IS SHORT I USE WENET",
-      "超哥 IS THE MOST HANDSOME",
-      "人生苦短I USE WENET",
-  };
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
-
-TEST(PostProcessorTest, ProcessSpacekIndoEuropeanTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  opts_lowercase.language_type = wenet::kIndoEuropean;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.language_type = wenet::kIndoEuropean;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁zhendong▁ist▁so▁schön",
-      // modeling unit: word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " zhendong ist so schön"};
-
-  std::vector<std::string> result_lowercase = {"zhendong ist so schön",
-                                               "zhendong ist so schön"};
-
-  std::vector<std::string> result_uppercase = {"ZHENDONG IST SO SCHÖN",
-                                               "ZHENDONG IST SO SCHÖN"};
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/utils_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/utils_test.cc
deleted file mode 100644
index 6b2bbac25e000ce854d5e55a50cb51109d62d758..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/test/utils_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "utils/utils.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-TEST(UtilsTest, TopKTest) {
-  using ::testing::ElementsAre;
-  using ::testing::FloatNear;
-  using ::testing::Pointwise;
-  std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
-  std::vector<float> values;
-  std::vector<int32_t> indices;
-  wenet::TopK(data, 3, &values, &indices);
-  EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
-  ASSERT_THAT(indices, ElementsAre(9, 4, 8));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/toolchains/aarch64-linux-gnu.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/toolchains/aarch64-linux-gnu.toolchain.cmake
deleted file mode 100644
index 9ad37cba9eb6fa58aa194ece96cf9a5da472a76d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/toolchains/aarch64-linux-gnu.toolchain.cmake
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CMAKE_SYSTEM_NAME Linux)
-SET (CMAKE_SYSTEM_PROCESSOR aarch64)
-
-set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
-set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/toolchains/ios.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/toolchains/ios.toolchain.cmake
deleted file mode 100644
index 2bcb0adf7b07c0c5fd5bf16d1b687050579ba673..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/toolchains/ios.toolchain.cmake
+++ /dev/null
@@ -1,1014 +0,0 @@
-# This file is part of the ios-cmake project. It was retrieved from
-# https://github.com/leetal/ios-cmake.git, which is a fork of
-# https://github.com/gerstrong/ios-cmake.git, which is a fork of
-# https://github.com/cristeab/ios-cmake.git, which is a fork of
-# https://code.google.com/p/ios-cmake/. Which in turn is based off of
-# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which
-# are included with CMake 2.8.4
-#
-# The ios-cmake project is licensed under the new BSD license.
-#
-# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software,
-# Kitware, Inc., Insight Software Consortium.  All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# This file is based off of the Platform/Darwin.cmake and
-# Platform/UnixPaths.cmake files which are included with CMake 2.8.4
-# It has been altered for iOS development.
-#
-# Updated by Alex Stewart (alexs.mac@gmail.com)
-#
-# *****************************************************************************
-#      Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com)
-#                      under the BSD-3-Clause license
-#                   https://github.com/leetal/ios-cmake
-# *****************************************************************************
-#
-#                           INFORMATION / HELP
-#
-###############################################################################
-#                                  OPTIONS                                    #
-###############################################################################
-#
-# PLATFORM: (default "OS64")
-#    OS = Build for iPhoneOS.
-#    OS64 = Build for arm64 iphoneOS.
-#    OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR = Build for x86 i386 iphoneOS Simulator.
-#    SIMULATOR64 = Build for x86_64 iphoneOS Simulator.
-#    SIMULATORARM64 = Build for arm64 iphoneOS Simulator.
-#    TVOS = Build for arm64 tvOS.
-#    TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_TVOS = Build for x86_64 tvOS Simulator.
-#    WATCHOS = Build for armv7k arm64_32 for watchOS.
-#    WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator.
-#    MAC = Build for x86_64 macOS.
-#    MAC_ARM64 = Build for Apple Silicon macOS.
-#    MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS).
-#                   Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#    MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS).
-#                         Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#
-# CMAKE_OSX_SYSROOT: Path to the SDK to use.  By default this is
-#    automatically determined from PLATFORM and xcodebuild, but
-#    can also be manually specified (although this should not be required).
-#
-# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform
-#    being compiled for.  By default this is automatically determined from
-#    CMAKE_OSX_SYSROOT, but can also be manually specified (although this should
-#    not be required).
-#
-# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS
-#
-# NAMED_LANGUAGE_SUPPORT:
-#    ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support
-#    OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behaviour, CMake version < 3.16)
-#
-# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default ON
-#
-# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default)
-#
-# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default)
-#
-# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker
-#    to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY)
-#
-# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM
-#    OS = armv7 armv7s arm64 (if applicable)
-#    OS64 = arm64 (if applicable)
-#    SIMULATOR = i386
-#    SIMULATOR64 = x86_64
-#    SIMULATORARM64 = arm64
-#    TVOS = arm64
-#    SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated)
-#    WATCHOS = armv7k arm64_32 (if applicable)
-#    SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated)
-#    MAC = x86_64
-#    MAC_ARM64 = arm64
-#    MAC_CATALYST = x86_64
-#    MAC_CATALYST_ARM64 = arm64
-#
-# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64"
-#
-###############################################################################
-#                                END OPTIONS                                  #
-###############################################################################
-#
-# This toolchain defines the following properties (available via get_property()) for use externally:
-#
-# PLATFORM: The currently targeted platform.
-# XCODE_VERSION: Version number (not including Build version) of Xcode detected.
-# SDK_VERSION: Version of SDK being used.
-# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM).
-# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set!
-#
-# This toolchain defines the following macros for use externally:
-#
-# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT)
-#   A convenience macro for setting xcode specific properties on targets.
-#   Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel
-#   example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all").
-#
-# find_host_package (PROGRAM ARGS)
-#   A macro used to find executable programs on the host system, not within the
-#   environment. Thanks to the android-cmake project for providing the
-#   command.
-#
-
-cmake_minimum_required(VERSION 3.8.0)
-
-# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds.
-if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN})
-  return()
-endif()
-set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true)
-
-# List of supported platform values
-list(APPEND _supported_platforms
-        "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64"
-        "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS"
-        "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS"
-        "MAC" "MAC_ARM64"
-        "MAC_CATALYST" "MAC_CATALYST_ARM64")
-
-# Cache what generator is used
-set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}")
-
-# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib)
-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14")
-  set(MODERN_CMAKE YES)
-endif()
-
-# Get the Xcode version being used.
-# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs.
-# Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable.
-#
-# NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts
-if(DEFINED XCODE_VERSION_INT)
-  # Environment variables are always preserved.
-  set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}")
-elseif(DEFINED ENV{_XCODE_VERSION_INT})
-  set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}")
-elseif(NOT DEFINED XCODE_VERSION_INT)
-  find_program(XCODEBUILD_EXECUTABLE xcodebuild)
-  if(NOT XCODEBUILD_EXECUTABLE)
-    message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.")
-  endif()
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version
-          OUTPUT_VARIABLE XCODE_VERSION_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "")
-endif()
-
-# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur)
-# if you don't set a deployment target it will be set the way you only get 64-bit builds
-if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0)
-  # Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...)
-  set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64")
-endif()
-
-# Check if the platform variable is set
-if(DEFINED PLATFORM)
-  # Environment variables are always preserved.
-  set(ENV{_PLATFORM} "${PLATFORM}")
-elseif(DEFINED ENV{_PLATFORM})
-  set(PLATFORM "$ENV{_PLATFORM}")
-elseif(NOT DEFINED PLATFORM)
-  message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!")
-endif ()
-
-if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The combined builds support requires Xcode to be used as generator via '-G Xcode' command-line argument in CMake")
-endif()
-
-# Safeguard that the platform value is set and is one of the supported values
-list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM)
-if("${contains_PLATFORM}" EQUAL "-1")
-  string(REPLACE ";"  "\n * " _supported_platforms_formatted "${_supported_platforms}")
-  message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n"
-          " Supported PLATFORM values: \n * ${_supported_platforms_formatted}")
-endif()
-
-# Check if Apple Silicon is supported
-if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5")
-  message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5")
-endif()
-
-# Touch toolchain variable to suppress "unused variable" warning.
-# This happens if CMake is invoked with the same command line the second time.
-if(CMAKE_TOOLCHAIN_FILE)
-endif()
-
-# Fix for PThread library not in path
-set(CMAKE_THREAD_LIBS_INIT "-lpthread")
-set(CMAKE_HAVE_THREADS_LIBRARY 1)
-set(CMAKE_USE_WIN32_THREADS_INIT 0)
-set(CMAKE_USE_PTHREADS_INIT 1)
-
-# Specify named language support defaults.
-if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
-  set(NAMED_LANGUAGE_SUPPORT ON)
-  message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.")
-elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  set(NAMED_LANGUAGE_SUPPORT OFF)
-  message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behaviour.")
-elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.")
-endif()
-set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL
-        "Whether or not to enable explicit named language support" FORCE)
-
-# Specify minimum version of deployment target.
-if(NOT DEFINED DEPLOYMENT_TARGET)
-  if (PLATFORM MATCHES "WATCHOS")
-    # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS).
-    set(DEPLOYMENT_TARGET "4.0")
-  elseif(PLATFORM STREQUAL "MAC")
-    # Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos).
-    set(DEPLOYMENT_TARGET "10.13")
-  elseif(PLATFORM STREQUAL "MAC_ARM64")
-    # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm).
-    set(DEPLOYMENT_TARGET "11.0")
-  elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-    # Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement).
-    set(DEPLOYMENT_TARGET "13.1")
-  else()
-    # Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS).
-    set(DEPLOYMENT_TARGET "11.0")
-  endif()
-  message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!")
-elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1")
-  message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!")
-endif()
-
-# Store the DEPLOYMENT_TARGET in the cache
-set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "")
-
-# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially)
-if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "OS64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "SIMULATOR64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-endif()
-
-set(PLATFORM_INT "${PLATFORM}")
-
-if(DEFINED ARCHS)
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-endif()
-
-# Determine the platform name and architectures for use in xcodebuild commands
-# from the specified PLATFORM_INT name.
-if(PLATFORM_INT STREQUAL "OS")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    set(ARCHS armv7 armv7s arm64)
-    set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS arm64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-    else()
-      set(ARCHS arm64)
-    endif()
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64COMBINED")
-  set(SDK_NAME iphoneos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      else()
-        set(ARCHS arm64 x86_64)
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      endif()
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET})
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-  message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.")
-elseif(PLATFORM_INT STREQUAL "SIMULATOR64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATORARM64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "TVOS")
-  set(SDK_NAME appletvos)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-  endif()
-elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED")
-  set(SDK_NAME appletvos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      set(ARCHS arm64 x86_64)
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos${DEPLOYMENT_TARGET})
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64")
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-  set(SDK_NAME appletvsimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOS")
-  set(SDK_NAME watchos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS armv7k arm64_32)
-      set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos${DEPLOYMENT_TARGET})
-    else()
-      set(ARCHS armv7k)
-      set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED")
-  set(SDK_NAME watchos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS armv7k arm64_32 i386)
-        set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      else()
-        set(ARCHS armv7k i386)
-        set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      endif()
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-  set(SDK_NAME watchsimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-else()
-  message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}")
-endif()
-
-string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}")
-
-if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode")
-endif()
-
-if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx")
-  set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst")
-  if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET)
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15")
-  else()
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}")
-  endif()
-elseif(CMAKE_GENERATOR MATCHES "Xcode")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}")
-  if(NOT PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-    set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-  endif()
-endif()
-
-# If user did not specify the SDK root to use, then query xcodebuild for it.
-if(DEFINED CMAKE_OSX_SYSROOT_INT)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}")
-elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT})
-  set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}")
-elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path
-          OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT)
-  message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain"
-          "is pointing to the correct path. Please run:"
-          "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer"
-          "and see if that fixes the problem for you.")
-  message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} "
-          "does not exist.")
-elseif(DEFINED CMAKE_OSX_SYSROOT_INT)
-  set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT.
-  set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-endif()
-
-# Use bitcode or not
-if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+")
-  # Unless specified, enable bitcode support by default
-  message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!")
-  set(ENABLE_BITCODE ON)
-elseif(NOT DEFINED ENABLE_BITCODE)
-  message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!")
-  set(ENABLE_BITCODE OFF)
-endif()
-set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL
-        "Whether or not to enable bitcode" FORCE)
-# Use ARC or not
-if(NOT DEFINED ENABLE_ARC)
-  # Unless specified, enable ARC support by default
-  set(ENABLE_ARC ON)
-  message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!")
-endif()
-set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE)
-# Use hidden visibility or not
-if(NOT DEFINED ENABLE_VISIBILITY)
-  # Unless specified, disable symbols visibility by default
-  set(ENABLE_VISIBILITY OFF)
-  message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!")
-endif()
-set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE)
-# Set strict compiler checks or not
-if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE)
-  # Unless specified, disable strict try_compile()
-  set(ENABLE_STRICT_TRY_COMPILE OFF)
-  message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!")
-endif()
-set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL
-        "Whether or not to use strict compiler checks" FORCE)
-
-# Get the SDK version information.
-if(DEFINED SDK_VERSION)
-  # Environment variables are always preserved.
-  set(ENV{_SDK_VERSION} "${SDK_VERSION}")
-elseif(DEFINED ENV{_SDK_VERSION})
-  set(SDK_VERSION "$ENV{_SDK_VERSION}")
-elseif(NOT DEFINED SDK_VERSION)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion
-          OUTPUT_VARIABLE SDK_VERSION
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-# Find the Developer root for the specific iOS platform being compiled for
-# from CMAKE_OSX_SYSROOT.  Should be ../../ from SDK specified in
-# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain
-# this information from xcrun or xcodebuild.
-if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH)
-  get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH)
-  if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}")
-    message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.")
-  endif()
-endif()
-
-# Find the C & C++ compilers for the specified SDK.
-if(DEFINED CMAKE_C_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_C_COMPILER})
-  set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}")
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-elseif(NOT DEFINED CMAKE_C_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang
-          OUTPUT_VARIABLE CMAKE_C_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-endif()
-if(DEFINED CMAKE_CXX_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_CXX_COMPILER})
-  set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}")
-elseif(NOT DEFINED CMAKE_CXX_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++
-          OUTPUT_VARIABLE CMAKE_CXX_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find (Apple's) libtool.
-if(DEFINED BUILD_LIBTOOL)
-  # Environment variables are always preserved.
-  set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}")
-elseif(DEFINED ENV{_BUILD_LIBTOOL})
-  set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}")
-elseif(NOT DEFINED BUILD_LIBTOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool
-          OUTPUT_VARIABLE BUILD_LIBTOOL
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find the toolchain's provided install_name_tool if none is found on the host
-if(DEFINED CMAKE_INSTALL_NAME_TOOL)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}")
-elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL})
-  set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}")
-elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool
-          OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "")
-endif()
-
-# Configure libtool to be used instead of ar + ranlib to build static libraries.
-# This is required on Xcode 7+, but should also work on previous versions of
-# Xcode.
-get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
-foreach(lang ${languages})
-  set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o <TARGET> <LINK_FLAGS> <OBJECTS> " CACHE INTERNAL "")
-endforeach()
-
-# CMake 3.14+ support building for iOS, watchOS and tvOS out of the box.
-if(MODERN_CMAKE)
-  if(SDK_NAME MATCHES "iphone")
-    set(CMAKE_SYSTEM_NAME iOS)
-  elseif(SDK_NAME MATCHES "macosx")
-    set(CMAKE_SYSTEM_NAME Darwin)
-  elseif(SDK_NAME MATCHES "appletv")
-    set(CMAKE_SYSTEM_NAME tvOS)
-  elseif(SDK_NAME MATCHES "watch")
-    set(CMAKE_SYSTEM_NAME watchOS)
-  endif()
-  # Provide flags for a combined FAT library build on newer CMake versions
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO")
-    set(CMAKE_IOS_INSTALL_COMBINED YES)
-  endif()
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10")
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME iOS)
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME)
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME Darwin)
-endif()
-# Standard settings.
-set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "")
-set(UNIX ON CACHE BOOL "")
-set(APPLE ON CACHE BOOL "")
-if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64")
-  set(IOS OFF CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-  set(IOS ON CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-else()
-  set(IOS ON CACHE BOOL "")
-endif()
-set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
-set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
-set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE)
-# Set the architectures for which to build.
-set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "")
-# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks
-if(NOT ENABLE_STRICT_TRY_COMPILE_INT)
-  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-endif()
-# All iOS/Darwin specific settings - some may be redundant.
-set(CMAKE_MACOSX_BUNDLE YES)
-set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO")
-set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
-set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib")
-set(CMAKE_SHARED_MODULE_PREFIX "lib")
-set(CMAKE_SHARED_MODULE_SUFFIX ".so")
-set(CMAKE_C_COMPILER_ABI ELF)
-set(CMAKE_CXX_COMPILER_ABI ELF)
-set(CMAKE_C_HAS_ISYSROOT 1)
-set(CMAKE_CXX_HAS_ISYSROOT 1)
-set(CMAKE_MODULE_EXISTS 1)
-set(CMAKE_DL_LIBS "")
-set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
-set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
-set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
-set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
-
-if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+")
-  set(CMAKE_C_SIZEOF_DATA_PTR 8)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 8)
-  if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+")
-    set(CMAKE_SYSTEM_PROCESSOR "aarch64")
-  else()
-    set(CMAKE_SYSTEM_PROCESSOR "x86_64")
-  endif()
-else()
-  set(CMAKE_C_SIZEOF_DATA_PTR 4)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 4)
-  set(CMAKE_SYSTEM_PROCESSOR "arm")
-endif()
-
-# Note that only Xcode 7+ supports the newer more specific:
-# -m${SDK_NAME}-version-min flags, older versions of Xcode use:
-# -m(ios/ios-simulator)-version-min instead.
-if(${CMAKE_VERSION} VERSION_LESS "3.11")
-  if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64")
-    if(XCODE_VERSION_INT VERSION_LESS 7.0)
-      set(SDK_NAME_VERSION_FLAGS
-              "-mios-version-min=${DEPLOYMENT_TARGET}")
-    else()
-      # Xcode 7.0+ uses flags we can build directly from SDK_NAME.
-      set(SDK_NAME_VERSION_FLAGS
-              "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}")
-    endif()
-  elseif(PLATFORM_INT STREQUAL "TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "MAC")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mmacosx-version-min=${DEPLOYMENT_TARGET}")
-  else()
-    # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min.
-    set(SDK_NAME_VERSION_FLAGS
-            "-mios-simulator-version-min=${DEPLOYMENT_TARGET}")
-  endif()
-elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST")
-  # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets
-  set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET})
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE_INT)
-  set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "")
-  set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-endif()
-
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks")
-endif()
-
-if(ENABLE_BITCODE_INT)
-  set(BITCODE "-fembed-bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES")
-else()
-  set(BITCODE "")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO")
-endif()
-
-if(ENABLE_ARC_INT)
-  set(FOBJC_ARC "-fobjc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES")
-else()
-  set(FOBJC_ARC "-fno-objc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO")
-endif()
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-  set(OBJC_LEGACY_VARS "")
-else()
-  set(OBJC_VARS "")
-  set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-endif()
-
-if(NOT ENABLE_VISIBILITY_INT)
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES")
-  set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden")
-else()
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO")
-  set(VISIBILITY "-fvisibility=default")
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE)
-  set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}")
-endif()
-
-#Check if Xcode generator is used, since that will handle these flags automagically
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator. Modifying the Xcode build-settings directly instead.")
-else()
-  set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}")
-  set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}")
-  set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}")
-  set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}")
-  set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}")
-  set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}")
-  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}")
-    set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}")
-    set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}")
-    set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}")
-    set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}")
-    set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}")
-  endif()
-  set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
-  set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS}  -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}")
-    set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}")
-  endif()
-  set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}")
-endif()
-
-## Print status messages to inform of the current state
-message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}")
-message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}")
-message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}")
-message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}")
-message(STATUS "Using libtool: ${BUILD_LIBTOOL}")
-message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}")
-if(DEFINED APPLE_TARGET_TRIPLE)
-  message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}")
-endif()
-message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}"
-        " (SDK version: ${SDK_VERSION})")
-if(MODERN_CMAKE)
-  message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!")
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    message(STATUS "Will combine built (static) artifacts into FAT lib...")
-  endif()
-endif()
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}")
-endif()
-message(STATUS "CMake version: ${CMAKE_VERSION}")
-if(DEFINED SDK_NAME_VERSION_FLAGS)
-  message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}")
-endif()
-message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}")
-if(ENABLE_BITCODE_INT)
-  message(STATUS "Bitcode: Enabled")
-else()
-  message(STATUS "Bitcode: Disabled")
-endif()
-
-if(ENABLE_ARC_INT)
-  message(STATUS "ARC: Enabled")
-else()
-  message(STATUS "ARC: Disabled")
-endif()
-
-if(ENABLE_VISIBILITY_INT)
-  message(STATUS "Hiding symbols: Disabled")
-else()
-  message(STATUS "Hiding symbols: Enabled")
-endif()
-
-# Set global properties
-set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}")
-set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}")
-set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}")
-set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}")
-set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}")
-
-# Export configurable variables for the try_compile() command.
-set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        PLATFORM
-        XCODE_VERSION_INT
-        SDK_VERSION
-        NAMED_LANGUAGE_SUPPORT
-        DEPLOYMENT_TARGET
-        CMAKE_DEVELOPER_ROOT
-        CMAKE_OSX_SYSROOT_INT
-        ENABLE_BITCODE
-        ENABLE_ARC
-        CMAKE_ASM_COMPILER
-        CMAKE_C_COMPILER
-        CMAKE_C_COMPILER_TARGET
-        CMAKE_CXX_COMPILER
-        CMAKE_CXX_COMPILER_TARGET
-        BUILD_LIBTOOL
-        CMAKE_INSTALL_NAME_TOOL
-        CMAKE_C_FLAGS
-        CMAKE_C_DEBUG
-        CMAKE_C_MINSIZEREL
-        CMAKE_C_RELWITHDEBINFO
-        CMAKE_C_RELEASE
-        CMAKE_CXX_FLAGS
-        CMAKE_CXX_FLAGS_DEBUG
-        CMAKE_CXX_FLAGS_MINSIZEREL
-        CMAKE_CXX_FLAGS_RELWITHDEBINFO
-        CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_C_LINK_FLAGS
-        CMAKE_CXX_LINK_FLAGS
-        CMAKE_ASM_FLAGS
-)
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        CMAKE_OBJC_FLAGS
-        CMAKE_OBJC_DEBUG
-        CMAKE_OBJC_MINSIZEREL
-        CMAKE_OBJC_RELWITHDEBINFO
-        CMAKE_OBJC_RELEASE
-        CMAKE_OBJCXX_FLAGS
-        CMAKE_OBJCXX_DEBUG
-        CMAKE_OBJCXX_MINSIZEREL
-        CMAKE_OBJCXX_RELWITHDEBINFO
-        CMAKE_OBJCXX_RELEASE
-        CMAKE_OBJC_LINK_FLAGS
-        CMAKE_OBJCXX_LINK_FLAGS
-  )
-endif()
-
-set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
-set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks")
-set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a")
-set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name")
-
-# Set the find root to the SDK developer roots.
-# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds.
-if(NOT PLATFORM_INT MATCHES "^MAC.*$")
-  list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "")
-endif()
-
-# Default to searching for frameworks first.
-set(CMAKE_FIND_FRAMEWORK FIRST)
-
-# Set up the default search directories for frameworks.
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-else()
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-endif()
-
-# By default, search both the specified iOS SDK and the remainder of the host filesystem.
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "")
-endif()
-
-#
-# Some helper-macros below to simplify and beautify the CMakeFile
-#
-
-# This little macro lets you set any Xcode specific property.
-macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION)
-  set(XCODE_RELVERSION_I "${XCODE_RELVERSION}")
-  if(XCODE_RELVERSION_I STREQUAL "All")
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}")
-  else()
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}")
-  endif()
-endmacro(set_xcode_property)
-
-# This macro lets you find executable programs on the host system.
-macro(find_host_package)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER)
-  set(_TOOLCHAIN_IOS ${IOS})
-  set(IOS OFF)
-  find_package(${ARGN})
-  set(IOS ${_TOOLCHAIN_IOS})
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
-endmacro(find_host_package)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/ios/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/CMakeLists.txt
deleted file mode 100644
index 71628eb7f5e12b00f6c52b390f140e37465de43b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/CMakeLists.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-project(wenet VERSION 0.1)
-
-option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
-option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
-option(BUILD_TESTING "whether to build unit test" OFF)
-
-option(GRPC "whether to build with gRPC" OFF)
-# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
-#                     which is a very big library
-option(WEBSOCKET "whether to build with websocket" OFF)
-option(XPU "whether to build with XPU" ON)
-
-set(CMAKE_VERBOSE_MAKEFILE OFF)
-
-include(FetchContent)
-set(FETCHCONTENT_QUIET OFF)
-get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-set(FETCHCONTENT_BASE_DIR ${fc_base})
-
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
-
-# Include all dependency
-include(openfst)
-# This CMakeLists.txt is only used for kunlun xpu, so remove the contents
-#  about onnx, libtorch, gpu and windows.
-include(xpu)
-# Compile xpu_conformer.a and conformer_test
-add_subdirectory(xpu)
-
-include_directories(
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-)
-
-# Build all libraries
-add_subdirectory(utils)
-add_subdirectory(frontend)
-add_subdirectory(post_processor)
-add_subdirectory(kaldi)  # kaldi: wfst based decoder
-add_subdirectory(decoder)
-add_subdirectory(api)
-
-# Optionally, you can build with websocket
-if(WEBSOCKET)
-  include(boost)
-  add_subdirectory(websocket)
-endif()
-
-# Optionally, you can build with gRPC
-if(GRPC)
-  include(grpc)
-  add_subdirectory(grpc)
-endif()
-
-# Build all bins
-add_subdirectory(bin)
-
-# Unit Test
-if(BUILD_TESTING)
-  include(gtest)
-  add_subdirectory(test)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/README.md
deleted file mode 100644
index 2e096b796a603571b79372ecc8955a2981f2913e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# 在昆仑芯片上运行Wenet
-## 介绍
-下面的示例展示了如何在XPU上部署WeNet离线或在线的ASR模型。XPU是一种由昆仑芯100%自主研发的通用人工智能计算核心架构。
-
-## 准备XPU运行环境
-
-在开始之前，请确认您获得以下必须的环境。
-
-    XRE(XPU Runtime Environment):昆仑芯片的基础运行环境，包括芯片驱动程序、runtime api库、固件FW工具等功能模块。
-    XDNN(XPU Deep Neural Network Library):加速深度神经网络的昆仑芯片库，提供应用程序中使用的高性能DNN功能库。
-
-如果您需要任何帮助，或是想要进一步了解昆仑芯片，请通过官方网址联系我们：
-https://www.kunlunxin.com.cn/
-
-## 操作步骤
-- 第一步：构建，需要cmake 3.14及以上版本
-
-``` sh
-export CXX=${your_g++_path}
-export CC=${your_gcc_path}
-export XPU_API_PATH=${your_api_path}
-
-# -r : release version; -d : debug version
-bash ./compile.sh -r
-```
-
-- 第二步：测试，测试结果将在控制台输出
-
-``` sh
-## set KUNLUN XPU visible device
-export XPU_VISIBLE_DEVICES=0
-export XPUSIM_DEVICE_MODEL=KUNLUN2
-## set logging level
-export GLOG_logtostderr=1
-export GLOG_v=3
-## set speech wav and model/weight path
-wav_path=${your_test_wav_path}
-xpu_model_dir=${your_xpu_weight_dir}
-units=${your_units.txt}
-## executive command
-./build/bin/decoder_main \
-    --chunk_size -1 \
-    --wav_path ${wav_path} \
-    --xpu_model_dir ${xpu_model_di} \
-    --unit_path ${units}   \
-    --device_id 0           \
-    --nbest  3  2>&1 | tee log.txt
-```
-
-单条语音执行结果如下所示:
-
-``` sh
-XPURT /docker_workspace/icode-api/baidu/xpu/api/../runtime/output/so/libxpurt.so loaded
-I1027 06:06:21.933722 111767 params.h:152] Reading XPU WeNet model weight from /docker_workspace/icode-api/baidu/xpu/api/example/wenet-conformer/all_data/
-I1027 06:06:21.934103 111767 xpu_asr_model.cc:46] XPU weight_dir is: /docker_workspace/icode-api/baidu/xpu/api/example/wenet-conformer/all_data//model_weights/
-I1027 06:06:23.832731 111767 xpu_asr_model.cc:65] ======= XPU Kunlun Model Info: =======
-I1027 06:06:23.832749 111767 xpu_asr_model.cc:66]       subsampling_rate 4
-I1027 06:06:23.832777 111767 xpu_asr_model.cc:67]       right_context 6
-I1027 06:06:23.832789 111767 xpu_asr_model.cc:68]       sos 5538
-I1027 06:06:23.832795 111767 xpu_asr_model.cc:69]       eos 5538
-I1027 06:06:23.832799 111767 xpu_asr_model.cc:70]       is bidirectional decoder 1
-I1027 06:06:23.832804 111767 params.h:165] Reading unit table /docker_workspace/icode-api/baidu/xpu/api/example/wenet-conformer/all_data/dict
-I1027 06:06:23.843475 111776 decoder_main.cc:54] num frames 418
-I1027 06:06:23.843521 111776 asr_decoder.cc:104] Required 2147483647 get 418
-I1027 06:06:23.843528 111776 xpu_asr_model.cc:116] Now Use XPU:0!
-I1027 06:06:23.843616 111776 xpu_asr_model.cc:173]       max_seqlen is 418
-I1027 06:06:23.843619 111776 xpu_asr_model.cc:174]       q_seqlen   is 103
-I1027 06:06:23.843623 111776 xpu_asr_model.cc:175]       att_dim    is 512
-I1027 06:06:23.843626 111776 xpu_asr_model.cc:176]       ctc_dim    is 5538
-I1027 06:06:23.852284 111776 asr_decoder.cc:113] forward takes 7 ms, search takes 1 ms
-I1027 06:06:23.852383 111776 asr_decoder.cc:194] Partial CTC result 甚至出现交易几乎停滞的情况
-I1027 06:06:23.852530 111776 asr_decoder.cc:194] Partial CTC result 甚至出现交易几乎停滞的情况
-I1027 06:06:23.852537 111776 xpu_asr_model.cc:248]       num_hyps  is 3
-I1027 06:06:23.852541 111776 xpu_asr_model.cc:249]       beam_size is 3
-I1027 06:06:23.852545 111776 xpu_asr_model.cc:250]       new_bs    is 3
-I1027 06:06:23.852545 111776 xpu_asr_model.cc:251]       max_hyps_len is 14
-I1027 06:06:23.853902 111776 asr_decoder.cc:84] Rescoring cost latency: 1ms.
-I1027 06:06:23.853911 111776 decoder_main.cc:72] Partial result: 甚至出现交易几乎停滞的情况
-I1027 06:06:23.853914 111776 decoder_main.cc:104] test Final result: 甚至出现交易几乎停滞的情况
-I1027 06:06:23.853924 111776 decoder_main.cc:105] Decoded 4203ms audio taken 10ms.
-test 甚至出现交易几乎停滞的情况
-I1027 06:06:23.853984 111767 decoder_main.cc:180] Total: decoded 4203ms audio taken 10ms.
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/README_EN.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/README_EN.md
deleted file mode 100644
index ff78792f995e8c7074339c1b4b9fb9439fa18de7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/README_EN.md
+++ /dev/null
@@ -1,87 +0,0 @@
-# WeNet running on KUNLUNXIN XPU device
-## Introduction
-The below example shows how to deploy WeNet offline and online ASR models on XPUs.
-XPU is a core architecture 100% independently developed by KUNLUNXIN for general artificial intelligence computing.
-
-## Setup environment for XPU device
-
-Before the start, makesure you have these necessary environment
-
-    XRE(XPU Runtime Environment):The basic operating environment of the XPUs
-    includes functional modules such as chip drivers, runtime api library, and firmware tools.
-
-    XDNN(XPU Deep Neural Network Library): XPU library for accelerating deep neural networks, providing high-performance DNN function library used in applications.
-
-If you would like to know more about XPUs or need any help, please contact us through the official website:
-
-https://www.kunlunxin.com.cn/
-
-## Instruction
-- Step 1. Build, the build requires cmake 3.14 or above.
-
-``` sh
-export CXX=${your_g++_path}
-export CC=${your_gcc_path}
-export XPU_API_PATH=${your_api_path}
-
-# -r : release version; -d : debug version
-bash ./compile.sh -r
-```
-
-- Step 2. Testing, the result is shown in the console.
-
-``` sh
-## set KUNLUN XPU visible device
-export XPU_VISIBLE_DEVICES=0
-export XPUSIM_DEVICE_MODEL=KUNLUN2
-## set logging level
-export GLOG_logtostderr=1
-export GLOG_v=3
-## set speech wav and model/weight/units path
-wav_path=${your_test_wav_path}
-xpu_model_dir=${your_xpu_weight_dir}
-units=${your_units.txt}
-## executive command
-./build/bin/decoder_main \
-    --chunk_size -1 \
-    --wav_path $wav_path \
-    --xpu_model_dir $xpu_model_dir \
-    --unit_path $units   \
-    --device_id 0           \
-    --nbest  3  2>&1 | tee log.txt
-```
-
-A typical output result is as following:
-
-``` sh
-XPURT /docker_workspace/icode-api/baidu/xpu/api/../runtime/output/so/libxpurt.so loaded
-I1027 06:06:21.933722 111767 params.h:152] Reading XPU WeNet model weight from /docker_workspace/icode-api/baidu/xpu/api/example/wenet-conformer/all_data/
-I1027 06:06:21.934103 111767 xpu_asr_model.cc:46] XPU weight_dir is: /docker_workspace/icode-api/baidu/xpu/api/example/wenet-conformer/all_data//model_weights/
-I1027 06:06:23.832731 111767 xpu_asr_model.cc:65] ======= XPU Kunlun Model Info: =======
-I1027 06:06:23.832749 111767 xpu_asr_model.cc:66]       subsampling_rate 4
-I1027 06:06:23.832777 111767 xpu_asr_model.cc:67]       right_context 6
-I1027 06:06:23.832789 111767 xpu_asr_model.cc:68]       sos 5538
-I1027 06:06:23.832795 111767 xpu_asr_model.cc:69]       eos 5538
-I1027 06:06:23.832799 111767 xpu_asr_model.cc:70]       is bidirectional decoder 1
-I1027 06:06:23.832804 111767 params.h:165] Reading unit table /docker_workspace/icode-api/baidu/xpu/api/example/wenet-conformer/all_data/dict
-I1027 06:06:23.843475 111776 decoder_main.cc:54] num frames 418
-I1027 06:06:23.843521 111776 asr_decoder.cc:104] Required 2147483647 get 418
-I1027 06:06:23.843528 111776 xpu_asr_model.cc:116] Now Use XPU:0!
-I1027 06:06:23.843616 111776 xpu_asr_model.cc:173]       max_seqlen is 418
-I1027 06:06:23.843619 111776 xpu_asr_model.cc:174]       q_seqlen   is 103
-I1027 06:06:23.843623 111776 xpu_asr_model.cc:175]       att_dim    is 512
-I1027 06:06:23.843626 111776 xpu_asr_model.cc:176]       ctc_dim    is 5538
-I1027 06:06:23.852284 111776 asr_decoder.cc:113] forward takes 7 ms, search takes 1 ms
-I1027 06:06:23.852383 111776 asr_decoder.cc:194] Partial CTC result 甚至出现交易几乎停滞的情况
-I1027 06:06:23.852530 111776 asr_decoder.cc:194] Partial CTC result 甚至出现交易几乎停滞的情况
-I1027 06:06:23.852537 111776 xpu_asr_model.cc:248]       num_hyps  is 3
-I1027 06:06:23.852541 111776 xpu_asr_model.cc:249]       beam_size is 3
-I1027 06:06:23.852545 111776 xpu_asr_model.cc:250]       new_bs    is 3
-I1027 06:06:23.852545 111776 xpu_asr_model.cc:251]       max_hyps_len is 14
-I1027 06:06:23.853902 111776 asr_decoder.cc:84] Rescoring cost latency: 1ms.
-I1027 06:06:23.853911 111776 decoder_main.cc:72] Partial result: 甚至出现交易几乎停滞的情况
-I1027 06:06:23.853914 111776 decoder_main.cc:104] test Final result: 甚至出现交易几乎停滞的情况
-I1027 06:06:23.853924 111776 decoder_main.cc:105] Decoded 4203ms audio taken 10ms.
-test 甚至出现交易几乎停滞的情况
-I1027 06:06:23.853984 111767 decoder_main.cc:180] Total: decoded 4203ms audio taken 10ms.
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/CMakeLists.txt
deleted file mode 100644
index 8d61ca8477f0f0b6128f1effe0a2738494b2620f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-if(TORCH)
- add_library(wenet_api SHARED wenet_api.cc)
- target_link_libraries(wenet_api PUBLIC decoder)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/README.md
deleted file mode 100644
index 5eaa13b977eb4836eb930452f4434dc9f2ea4139..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# WeNet API
-
-We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
-for the interface design.
-
-
-We are going to implement the following interfaces:
-
-- [x] non-streaming recognition
-- [] streaming recognition
-- [] nbest
-- [] contextual biasing word
-- [] alignment
-- [] language support(post processor)
-- [] label check
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/wenet_api.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/wenet_api.cc
deleted file mode 100644
index cb1e0c8552e0126e2db274a29075578fe351a25f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/wenet_api.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "post_processor/post_processor.h"
-#include "utils/file.h"
-#include "utils/json.h"
-#include "utils/string.h"
-
-class Recognizer {
- public:
-  explicit Recognizer(const std::string& model_dir) {
-    // FeaturePipeline init
-    feature_config_ = std::make_shared<wenet::FeaturePipelineConfig>(80, 16000);
-    feature_pipeline_ =
-        std::make_shared<wenet::FeaturePipeline>(*feature_config_);
-    // Resource init
-    resource_ = std::make_shared<wenet::DecodeResource>();
-    wenet::TorchAsrModel::InitEngineThreads();
-    std::string model_path = wenet::JoinPath(model_dir, "final.zip");
-    CHECK(wenet::FileExists(model_path));
-
-    auto model = std::make_shared<wenet::TorchAsrModel>();
-    model->Read(model_path);
-    resource_->model = model;
-
-    // units.txt: E2E model unit
-    std::string unit_path = wenet::JoinPath(model_dir, "units.txt");
-    CHECK(wenet::FileExists(unit_path));
-    resource_->unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(unit_path));
-
-    std::string fst_path = wenet::JoinPath(model_dir, "TLG.fst");
-    if (wenet::FileExists(fst_path)) {  // With LM
-      resource_->fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-          fst::Fst<fst::StdArc>::Read(fst_path));
-
-      std::string symbol_path = wenet::JoinPath(model_dir, "words.txt");
-      CHECK(wenet::FileExists(symbol_path));
-      resource_->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(symbol_path));
-    } else {  // Without LM, symbol_table is the same as unit_table
-      resource_->symbol_table = resource_->unit_table;
-    }
-
-    // Context config init
-    context_config_ = std::make_shared<wenet::ContextConfig>();
-    decode_options_ = std::make_shared<wenet::DecodeOptions>();
-    post_process_opts_ = std::make_shared<wenet::PostProcessOptions>();
-  }
-
-  void Reset() {
-    if (feature_pipeline_ != nullptr) {
-      feature_pipeline_->Reset();
-    }
-    if (decoder_ != nullptr) {
-      decoder_->Reset();
-    }
-    result_.clear();
-  }
-
-  void InitDecoder() {
-    CHECK(decoder_ == nullptr);
-    // Optional init context graph
-    if (context_.size() > 0) {
-      context_config_->context_score = context_score_;
-      auto context_graph =
-          std::make_shared<wenet::ContextGraph>(*context_config_);
-      context_graph->BuildContextGraph(context_, resource_->symbol_table);
-      resource_->context_graph = context_graph;
-    }
-    // PostProcessor
-    if (language_ == "chs") {  // TODO(Binbin Zhang): CJK(chs, jp, kr)
-      post_process_opts_->language_type = wenet::kMandarinEnglish;
-    } else {
-      post_process_opts_->language_type = wenet::kIndoEuropean;
-    }
-    resource_->post_processor =
-        std::make_shared<wenet::PostProcessor>(*post_process_opts_);
-    // Init decoder
-    decoder_ = std::make_shared<wenet::AsrDecoder>(feature_pipeline_, resource_,
-                                                   *decode_options_);
-  }
-
-  void Decode(const char* data, int len, int last) {
-    using wenet::DecodeState;
-    // Init decoder when it is called first time
-    if (decoder_ == nullptr) {
-      InitDecoder();
-    }
-    // Convert to 16 bits PCM data to float
-    CHECK_EQ(len % 2, 0);
-    feature_pipeline_->AcceptWaveform(reinterpret_cast<const int16_t*>(data),
-                                      len / 2);
-    if (last > 0) {
-      feature_pipeline_->set_input_finished();
-    }
-
-    while (true) {
-      DecodeState state = decoder_->Decode(false);
-      if (state == DecodeState::kWaitFeats) {
-        break;
-      } else if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        break;
-      } else if (state == DecodeState::kEndpoint && continuous_decoding_) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        decoder_->ResetContinuousDecoding();
-      } else {  // kEndBatch
-        UpdateResult(false);
-      }
-    }
-  }
-
-  void UpdateResult(bool final_result) {
-    json::JSON obj;
-    obj["type"] = final_result ? "final_result" : "partial_result";
-    int nbest = final_result ? nbest_ : 1;
-    obj["nbest"] = json::Array();
-    for (int i = 0; i < nbest && i < decoder_->result().size(); i++) {
-      json::JSON one;
-      one["sentence"] = decoder_->result()[i].sentence;
-      if (final_result && enable_timestamp_) {
-        one["word_pieces"] = json::Array();
-        for (const auto& word_piece : decoder_->result()[i].word_pieces) {
-          json::JSON piece;
-          piece["word"] = word_piece.word;
-          piece["start"] = word_piece.start;
-          piece["end"] = word_piece.end;
-          one["word_pieces"].append(piece);
-        }
-      }
-      one["sentence"] = decoder_->result()[i].sentence;
-      obj["nbest"].append(one);
-    }
-    result_ = obj.dump();
-  }
-
-  const char* GetResult() { return result_.c_str(); }
-
-  void set_nbest(int n) { nbest_ = n; }
-  void set_enable_timestamp(bool flag) { enable_timestamp_ = flag; }
-  void AddContext(const char* word) { context_.emplace_back(word); }
-  void set_context_score(float score) { context_score_ = score; }
-  void set_language(const char* lang) { language_ = lang; }
-  void set_continuous_decoding(bool flag) { continuous_decoding_ = flag; }
-
- private:
-  // NOTE(Binbin Zhang): All use shared_ptr for clone in the future
-  std::shared_ptr<wenet::FeaturePipelineConfig> feature_config_ = nullptr;
-  std::shared_ptr<wenet::FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<wenet::DecodeResource> resource_ = nullptr;
-  std::shared_ptr<wenet::DecodeOptions> decode_options_ = nullptr;
-  std::shared_ptr<wenet::AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<wenet::ContextConfig> context_config_ = nullptr;
-  std::shared_ptr<wenet::PostProcessOptions> post_process_opts_ = nullptr;
-
-  int nbest_ = 1;
-  std::string result_;
-  bool enable_timestamp_ = false;
-  std::vector<std::string> context_;
-  float context_score_;
-  std::string language_ = "chs";
-  bool continuous_decoding_ = false;
-};
-
-void* wenet_init(const char* model_dir) {
-  Recognizer* decoder = new Recognizer(model_dir);
-  return reinterpret_cast<void*>(decoder);
-}
-
-void wenet_free(void* decoder) {
-  delete reinterpret_cast<Recognizer*>(decoder);
-}
-
-void wenet_reset(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Reset();
-}
-
-void wenet_decode(void* decoder, const char* data, int len, int last) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Decode(data, len, last);
-}
-
-const char* wenet_get_result(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  return recognizer->GetResult();
-}
-
-void wenet_set_log_level(int level) {
-  FLAGS_logtostderr = true;
-  FLAGS_v = level;
-}
-
-void wenet_set_nbest(void* decoder, int n) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_nbest(n);
-}
-
-void wenet_set_timestamp(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  bool enable = flag > 0 ? true : false;
-  recognizer->set_enable_timestamp(enable);
-}
-
-void wenet_add_context(void* decoder, const char* word) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->AddContext(word);
-}
-
-void wenet_set_context_score(void* decoder, float score) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_context_score(score);
-}
-
-void wenet_set_language(void* decoder, const char* lang) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_language(lang);
-}
-
-void wenet_set_continuous_decoding(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_continuous_decoding(flag > 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/wenet_api.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/wenet_api.h
deleted file mode 100644
index e839aaa40166a6e50d9aa2ac0e697356bd25b941..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/api/wenet_api.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_WENET_API_H_
-#define API_WENET_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Init decoder from the file and returns the object
- *
- * @param model_dir: the model dir
- * @returns model object or NULL if problem occured
- */
-void* wenet_init(const char* model_dir);
-
-/** Free wenet decoder and corresponding resource
- */
-void wenet_free(void* decoder);
-
-/** Reset decoder for next decoding
- */
-void wenet_reset(void* decoder);
-
-/** Decode the input wav data
- * @param data: pcm data, encoded as int16_t(16 bits)
- * @param len: data length
- * @param last: if it is the last package
- */
-void wenet_decode(void* decoder, const char* data, int len, int last);
-
-/** Get decode result in json format
- *  It returns partial result when last is 0
- *  It returns final result when last is 1
-
-    {
-      "nbest" : [{
-          "sentence" : "are you okay"
-          "word_pieces" : [{
-              "end" : 960,
-              "start" : 0,
-              "word" : "are"
-            }, {
-              "end" : 1200,
-              "start" : 960,
-              "word" : "you"
-            }, {
-            ...}]
-        }, {
-          "sentence" : "are you ok"
-        }],
-      "type" : "final_result"
-    }
-
-    "type": final_result/partial_result
-    "nbest": nbest is enabled when n > 1 in final_result
-        "sentence": the ASR result
-        "word_pieces": optional, output timestamp when enabled
- */
-const char* wenet_get_result(void* decoder);
-
-/** Set n-best, range 1~10
- *  wenet_get_result will return top-n best results
- */
-void wenet_set_nbest(void* decoder, int n);
-
-/** Whether to enable word level timestamp in results
-    disable it when flag = 0, otherwise enable
- */
-void wenet_set_timestamp(void* decoder, int flag);
-
-/** Add one contextual biasing
- */
-void wenet_add_context(void* decoder, const char* word);
-
-/** Set contextual biasing bonus score
- */
-void wenet_set_context_score(void* decoder, float score);
-
-/** Set language, has effect on the postpocessing
- *  @param: lang, could be chs/en now
- */
-void wenet_set_language(void* decoder, const char* lang);
-
-/** Set log level
- *  We use glog in wenet, so the level is the glog level
- */
-void wenet_set_log_level(int level);
-
-/** Enable continous decoding or not
- *  flag > 0: enable, otherwise disable
- */
-void wenet_set_continuous_decoding(void* decoder, int flag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // API_WENET_API_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/CMakeLists.txt
deleted file mode 100644
index a117b8bcb580c8738a7ce72f88bc10ff0a450e98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_executable(decoder_main decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC decoder)
-
-add_executable(label_checker_main label_checker_main.cc)
-target_link_libraries(label_checker_main PUBLIC decoder)
-
-# if(TORCH)
-#  add_executable(api_main api_main.cc)
-#  target_link_libraries(api_main PUBLIC wenet_api)
-# endif()
-
-if(WEBSOCKET)
-  add_executable(websocket_client_main websocket_client_main.cc)
-  target_link_libraries(websocket_client_main PUBLIC websocket)
-  add_executable(websocket_server_main websocket_server_main.cc)
-  target_link_libraries(websocket_server_main PUBLIC websocket)
-endif()
-
-if(GRPC)
-  add_executable(grpc_server_main grpc_server_main.cc)
-  target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
-  add_executable(grpc_client_main grpc_client_main.cc)
-  target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
-endif()
-
-if(HTTP)
-  add_executable(http_client_main http_client_main.cc)
-  target_link_libraries(http_client_main PUBLIC http)
-  add_executable(http_server_main http_server_main.cc)
-  target_link_libraries(http_server_main PUBLIC http)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/api_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/api_main.cc
deleted file mode 100644
index 94b20d52a7b8eee5c39a12af4e1e25324d7d880f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/api_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-
-DEFINE_string(model_dir, "", "model dir path");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_bool(enable_timestamp, false, "enable timestamps");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet_set_log_level(2);
-
-  void* decoder = wenet_init(FLAGS_model_dir.c_str());
-  wenet_set_timestamp(decoder, FLAGS_enable_timestamp == true ? 1 : 0);
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  std::vector<int16_t> data(wav_reader.num_samples());
-  for (int i = 0; i < wav_reader.num_samples(); i++) {
-    data[i] = static_cast<int16_t>(*(wav_reader.data() + i));
-  }
-
-  for (int i = 0; i < 10; i++) {
-    // Return the final result when last is 1
-    wenet_decode(decoder, reinterpret_cast<const char*>(data.data()),
-                 data.size() * 2, 1);
-    const char* result = wenet_get_result(decoder);
-    LOG(INFO) << i << " " << result;
-    wenet_reset(decoder);
-  }
-  wenet_free(decoder);
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/decoder_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/decoder_main.cc
deleted file mode 100644
index b8f1dbae6b88390504cc9ce63f33dc9bd54a2d6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/decoder_main.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <iomanip>
-#include <thread>
-#include <utility>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-#include "utils/thread_pool.h"
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-DEFINE_bool(simulate_streaming, false, "simulate streaming input");
-DEFINE_bool(output_nbest, false, "output n-best of decode result");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_string(wav_scp, "", "input wav scp");
-DEFINE_string(result, "", "result output file");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-DEFINE_int32(thread_num, 1, "num of decode thread");
-DEFINE_int32(warmup, 0, "num of warmup decode, 0 means no warmup");
-
-std::shared_ptr<wenet::DecodeOptions> g_decode_config;
-std::shared_ptr<wenet::FeaturePipelineConfig> g_feature_config;
-std::shared_ptr<wenet::DecodeResource> g_decode_resource;
-
-std::ofstream g_result;
-std::mutex g_mutex;
-int g_total_waves_dur = 0;
-int g_total_decode_time = 0;
-
-void decode(std::pair<std::string, std::string> wav, bool warmup = false) {
-  wenet::WavReader wav_reader(wav.second);
-  int num_samples = wav_reader.num_samples();
-  CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-
-  auto feature_pipeline =
-      std::make_shared<wenet::FeaturePipeline>(*g_feature_config);
-  feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-  feature_pipeline->set_input_finished();
-  LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-
-  wenet::AsrDecoder decoder(feature_pipeline, g_decode_resource,
-                            *g_decode_config);
-
-  int wave_dur = static_cast<int>(static_cast<float>(num_samples) /
-                                  wav_reader.sample_rate() * 1000);
-  int decode_time = 0;
-  std::string final_result;
-  while (true) {
-    wenet::Timer timer;
-    wenet::DecodeState state = decoder.Decode();
-    if (state == wenet::DecodeState::kEndFeats) {
-      decoder.Rescoring();
-    }
-    int chunk_decode_time = timer.Elapsed();
-    decode_time += chunk_decode_time;
-    if (decoder.DecodedSomething()) {
-      LOG(INFO) << "Partial result: " << decoder.result()[0].sentence;
-    }
-
-    if (FLAGS_continuous_decoding && state == wenet::DecodeState::kEndpoint) {
-      if (decoder.DecodedSomething()) {
-        decoder.Rescoring();
-        LOG(INFO) << "Final result (continuous decoding): "
-                  << decoder.result()[0].sentence;
-        final_result.append(decoder.result()[0].sentence);
-      }
-      decoder.ResetContinuousDecoding();
-    }
-
-    if (state == wenet::DecodeState::kEndFeats) {
-      break;
-    } else if (FLAGS_chunk_size > 0 && FLAGS_simulate_streaming) {
-      float frame_shift_in_ms =
-          static_cast<float>(g_feature_config->frame_shift) /
-          wav_reader.sample_rate() * 1000;
-      auto wait_time =
-          decoder.num_frames_in_current_chunk() * frame_shift_in_ms -
-          chunk_decode_time;
-      if (wait_time > 0) {
-        LOG(INFO) << "Simulate streaming, waiting for " << wait_time << "ms";
-        std::this_thread::sleep_for(
-            std::chrono::milliseconds(static_cast<int>(wait_time)));
-      }
-    }
-  }
-  if (decoder.DecodedSomething()) {
-    final_result.append(decoder.result()[0].sentence);
-  }
-  LOG(INFO) << wav.first << " Final result: " << final_result << std::endl;
-  LOG(INFO) << "Decoded " << wave_dur << "ms audio taken " << decode_time
-            << "ms.";
-
-  if (!warmup) {
-    g_mutex.lock();
-    std::ostream& buffer = FLAGS_result.empty() ? std::cout : g_result;
-    if (!FLAGS_output_nbest) {
-      buffer << wav.first << " " << final_result << std::endl;
-    } else {
-      buffer << "wav " << wav.first << std::endl;
-      auto& results = decoder.result();
-      for (auto& r : results) {
-        if (r.sentence.empty()) continue;
-        buffer << "candidate " << r.score << " " << r.sentence << std::endl;
-      }
-    }
-    g_total_waves_dur += wave_dur;
-    g_total_decode_time += decode_time;
-    g_mutex.unlock();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  g_decode_config = wenet::InitDecodeOptionsFromFlags();
-  g_feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  g_decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  if (FLAGS_wav_path.empty() && FLAGS_wav_scp.empty()) {
-    LOG(FATAL) << "Please provide the wave path or the wav scp.";
-  }
-  std::vector<std::pair<std::string, std::string>> waves;
-  if (!FLAGS_wav_path.empty()) {
-    waves.emplace_back(make_pair("test", FLAGS_wav_path));
-  } else {
-    std::ifstream wav_scp(FLAGS_wav_scp);
-    std::string line;
-    while (getline(wav_scp, line)) {
-      std::vector<std::string> strs;
-      wenet::SplitString(line, &strs);
-      CHECK_GE(strs.size(), 2);
-      waves.emplace_back(make_pair(strs[0], strs[1]));
-    }
-
-    if (waves.empty()) {
-      LOG(FATAL) << "Please provide non-empty wav scp.";
-    }
-  }
-
-  if (!FLAGS_result.empty()) {
-    g_result.open(FLAGS_result, std::ios::out);
-  }
-
-  // Warmup
-  if (FLAGS_warmup > 0) {
-    LOG(INFO) << "Warming up...";
-    {
-      ThreadPool pool(FLAGS_thread_num);
-      auto wav = waves[0];
-      for (int i = 0; i < FLAGS_warmup; i++) {
-        pool.enqueue(decode, wav, true);
-      }
-    }
-    LOG(INFO) << "Warmup done.";
-  }
-
-  {
-    ThreadPool pool(FLAGS_thread_num);
-    for (auto& wav : waves) {
-      pool.enqueue(decode, wav, false);
-    }
-  }
-
-  LOG(INFO) << "Total: decoded " << g_total_waves_dur << "ms audio taken "
-            << g_total_decode_time << "ms.";
-  LOG(INFO) << "RTF: " << std::setprecision(4)
-            << static_cast<float>(g_total_decode_time) / g_total_waves_dur;
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/grpc_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/grpc_client_main.cc
deleted file mode 100644
index f2d226d48d3757c5f095335eff3288f5d227282b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/grpc_client_main.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "grpc/grpc_client.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::GrpcClient client(FLAGS_hostname, FLAGS_port, FLAGS_nbest,
-                           FLAGS_continuous_decoding);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  std::vector<float> pcm_data(wav_reader.data(),
-                              wav_reader.data() + num_samples);
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(pcm_data[j]));
-    }
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/grpc_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/grpc_server_main.cc
deleted file mode 100644
index b00f3cbade1ee70dadfb49829e9ca73fd50c2be2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/grpc_server_main.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <grpcpp/ext/proto_server_reflection_plugin.h>
-#include <grpcpp/grpcpp.h>
-#include <grpcpp/health_check_service_interface.h>
-
-#include "decoder/params.h"
-#include "grpc/grpc_server.h"
-#include "utils/log.h"
-
-DEFINE_int32(port, 10086, "grpc listening port");
-DEFINE_int32(workers, 4, "grpc num workers");
-
-using grpc::Server;
-using grpc::ServerBuilder;
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::GrpcServer service(feature_config, decode_config, decode_resource);
-  grpc::EnableDefaultHealthCheckService(true);
-  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
-  ServerBuilder builder;
-  std::string address("0.0.0.0:" + std::to_string(FLAGS_port));
-  builder.AddListeningPort(address, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  builder.SetSyncServerOption(ServerBuilder::SyncServerOption::NUM_CQS,
-                              FLAGS_workers);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server->Wait();
-  google::ShutdownGoogleLogging();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/http_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/http_client_main.cc
deleted file mode 100644
index b59ee3f5f32bf08552416b183802029ac5d5afa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/http_client_main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "http/http_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of http server");
-DEFINE_int32(port, 10086, "port of http server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Convert to short
-  std::vector<int16_t> data;
-  data.reserve(num_samples);
-  for (int j = 0; j < num_samples; j++) {
-    data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-  }
-  // Send data
-  wenet::HttpClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  wenet::Timer timer;
-  VLOG(2) << "Send " << data.size() << " samples";
-  client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/http_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/http_server_main.cc
deleted file mode 100644
index e30cf2bcdf746c2072f023e90f470ccba5467c2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/http_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "http/http_server.h"
-
-DEFINE_int32(port, 10086, "http listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
-                           decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/label_checker_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/label_checker_main.cc
deleted file mode 100644
index e36e3d5c29a38a7ebee80606ebd8e69ae8b1eb96..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/label_checker_main.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_string(text, "", "kaldi style text input file");
-DEFINE_string(wav_scp, "", "kaldi style wav scp");
-DEFINE_double(is_penalty, 1.0,
-              "insertion/substitution penalty for align insertion");
-DEFINE_double(del_penalty, 1.0, "deletion penalty for align insertion");
-DEFINE_string(result, "", "result output file");
-DEFINE_string(timestamp, "", "timestamp output file");
-
-namespace wenet {
-
-const char* kDeletion = "<del>";
-// Is: Insertion and substitution
-const char* kIsStart = "<is>";
-const char* kIsEnd = "</is>";
-
-bool MapToLabel(const std::string& text,
-                std::shared_ptr<fst::SymbolTable> symbol_table,
-                std::vector<int>* labels) {
-  labels->clear();
-  // Split label to char sequence
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(text, &chars);
-  for (size_t i = 0; i < chars.size(); i++) {
-    // ▁ is special symbol for white space
-    std::string label = chars[i] != " " ? chars[i] : "▁";
-    int id = symbol_table->Find(label);
-    if (id != -1) {  // fst::kNoSymbol
-      // LOG(INFO) << label << " " << id;
-      labels->push_back(id);
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<fst::SymbolTable> MakeSymbolTableForFst(
-    std::shared_ptr<fst::SymbolTable> isymbol_table) {
-  LOG(INFO) << isymbol_table;
-  CHECK(isymbol_table != nullptr);
-  auto osymbol_table = std::make_shared<fst::SymbolTable>();
-  osymbol_table->AddSymbol("<eps>", 0);
-  CHECK_EQ(isymbol_table->Find("<blank>"), 0);
-  osymbol_table->AddSymbol("<blank>", 1);
-  for (int i = 1; i < isymbol_table->NumSymbols(); i++) {
-    std::string symbol = isymbol_table->Find(i);
-    osymbol_table->AddSymbol(symbol, i + 1);
-  }
-  osymbol_table->AddSymbol(kDeletion, isymbol_table->NumSymbols() + 1);
-  osymbol_table->AddSymbol(kIsStart, isymbol_table->NumSymbols() + 2);
-  osymbol_table->AddSymbol(kIsEnd, isymbol_table->NumSymbols() + 3);
-  return osymbol_table;
-}
-
-void CompileCtcFst(std::shared_ptr<fst::SymbolTable> symbol_table,
-                   fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  CHECK_EQ(symbol_table->Find("<eps>"), 0);
-  CHECK_EQ(symbol_table->Find("<blank>"), 1);
-  ofst->AddArc(start, fst::StdArc(1, 0, 0.0, start));
-  // Exclude kDeletion and kInsertion
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    int s = ofst->AddState();
-    ofst->AddArc(start, fst::StdArc(i, i, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(i, 0, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(0, 0, 0.0, start));
-  }
-  ofst->SetFinal(start, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdOLabelCompare());
-}
-
-void CompileAlignFst(std::vector<int> labels,
-                     std::shared_ptr<fst::SymbolTable> symbol_table,
-                     fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int deletion = symbol_table->Find(kDeletion);
-  int insertion_start = symbol_table->Find(kIsStart);
-  int insertion_end = symbol_table->Find(kIsEnd);
-
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  // Filler State
-  int filler_start = ofst->AddState();
-  int filler_end = ofst->AddState();
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    ofst->AddArc(filler_start, fst::StdArc(i, i, FLAGS_is_penalty, filler_end));
-  }
-  ofst->AddArc(filler_end, fst::StdArc(0, 0, 0.0, filler_start));
-
-  int prev = start;
-  // Alignment path and optional filler
-  for (size_t i = 0; i < labels.size(); i++) {
-    int cur = ofst->AddState();
-    // 1. Insertion or Substitution
-    ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-    ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-    // 2. Correct
-    ofst->AddArc(prev, fst::StdArc(labels[i], labels[i], 0.0, cur));
-    // 3. Deletion
-    ofst->AddArc(prev, fst::StdArc(0, deletion, FLAGS_del_penalty, cur));
-
-    prev = cur;
-  }
-  // Optional add endding filler
-  ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-  ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-  ofst->SetFinal(prev, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdILabelCompare());
-}
-
-}  // namespace wenet
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-  CHECK(decode_resource->unit_table != nullptr);
-
-  auto wfst_symbol_table =
-      wenet::MakeSymbolTableForFst(decode_resource->unit_table);
-  // wfst_symbol_table->WriteText("fst.txt");
-  // Reset symbol_table to on-the-fly generated wfst_symbol_table
-  decode_resource->symbol_table = wfst_symbol_table;
-
-  // Compile ctc FST
-  fst::StdVectorFst ctc_fst;
-  wenet::CompileCtcFst(wfst_symbol_table, &ctc_fst);
-  // ctc_fst.Write("ctc.fst");
-
-  std::unordered_map<std::string, std::string> wav_table;
-  std::ifstream wav_is(FLAGS_wav_scp);
-  std::string line;
-  while (std::getline(wav_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    CHECK_EQ(strs.size(), 2);
-    wav_table[strs[0]] = strs[1];
-  }
-
-  std::ifstream text_is(FLAGS_text);
-  std::ofstream result_os(FLAGS_result, std::ios::out);
-  std::ofstream timestamp_out;
-  if (!FLAGS_timestamp.empty()) {
-    timestamp_out.open(FLAGS_timestamp, std::ios::out);
-  }
-  std::ostream& timestamp_os =
-      FLAGS_timestamp.empty() ? std::cout : timestamp_out;
-
-  while (std::getline(text_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    if (strs.size() < 2) continue;
-    std::string key = strs[0];
-    LOG(INFO) << "Processing " << key;
-    if (wav_table.find(key) != wav_table.end()) {
-      strs.erase(strs.begin());
-      std::string text = wenet::JoinString(" ", strs);
-      std::vector<int> labels;
-      wenet::MapToLabel(text, wfst_symbol_table, &labels);
-      // Prepare FST for alignment decoding
-      fst::StdVectorFst align_fst;
-      wenet::CompileAlignFst(labels, wfst_symbol_table, &align_fst);
-      // align_fst.Write("align.fst");
-      auto decoding_fst = std::make_shared<fst::StdVectorFst>();
-      fst::Compose(ctc_fst, align_fst, decoding_fst.get());
-      // decoding_fst->Write("decoding.fst");
-      // Preapre feature pipeline
-      wenet::WavReader wav_reader;
-      if (!wav_reader.Open(wav_table[key])) {
-        LOG(WARNING) << "Error in reading " << wav_table[key];
-        continue;
-      }
-      int num_samples = wav_reader.num_samples();
-      CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-      auto feature_pipeline =
-          std::make_shared<wenet::FeaturePipeline>(*feature_config);
-      feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-      feature_pipeline->set_input_finished();
-      decode_resource->fst = decoding_fst;
-      LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-      wenet::AsrDecoder decoder(feature_pipeline, decode_resource,
-                                *decode_config);
-      while (true) {
-        wenet::DecodeState state = decoder.Decode();
-        if (state == wenet::DecodeState::kEndFeats) {
-          decoder.Rescoring();
-          break;
-        }
-      }
-      std::string final_result;
-      std::string timestamp_str;
-      if (decoder.DecodedSomething()) {
-        const wenet::DecodeResult& result = decoder.result()[0];
-        final_result = result.sentence;
-        std::stringstream ss;
-        for (const auto& w : result.word_pieces) {
-          ss << " " << w.word << " " << w.start << " " << w.end;
-        }
-        timestamp_str = ss.str();
-      }
-      result_os << key << " " << final_result << std::endl;
-      timestamp_os << key << " " << timestamp_str << std::endl;
-      LOG(INFO) << key << " " << final_result;
-    } else {
-      LOG(WARNING) << "No wav file for " << key;
-    }
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/websocket_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/websocket_client_main.cc
deleted file mode 100644
index 3eaa96069dc5f57673fbb2819bf7d4883e0d5ffa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/websocket_client_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "websocket/websocket_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::WebSocketClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  client.set_continuous_decoding(FLAGS_continuous_decoding);
-  client.SendStartSignal();
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-    }
-    // TODO(Binbin Zhang): Network order?
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-  client.SendEndSignal();
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/websocket_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/websocket_server_main.cc
deleted file mode 100644
index 796d9d2e6d151f7c08b43d66b7245c58ee086cc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/bin/websocket_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "websocket/websocket_server.h"
-
-DEFINE_int32(port, 10086, "websocket listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
-                                decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/compile.sh b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/compile.sh
deleted file mode 100644
index d64a6f050184e6fa3c4fdd71177b26a24649c7b9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/compile.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-set -e
-
-usage() {
-    echo "Usage:"
-    echo "bash compile.sh [-r] [-d] [-c]"
-    echo "Description:"
-    echo "-r, build release."
-    echo "-d, build debug."
-    echo "-c, remove cmakecache or build dir, then build."
-    echo "Example 1:"
-    echo "  ./compile.sh -r "
-    echo "  means: remove cache files in build dir, then build release."
-    echo "Example 2:"
-    echo "  ./compile.sh -d -c all "
-    echo "  means: remove all files in build dir, then build debug."
-    exit -1
-}
-
-if [ -z $CXX ]; then
-  echo -e "\033[31m [WARNING]: NO CXX in your env. Suggest setting CXX variable to support C++14. \033[0m"
-  sleep 2
-fi
-
-build_type='Release'
-clean_type='cache'
-
-while getopts 'rdc:h' OPT; do
-    case $OPT in
-        r) build_type="Release";;
-        d) build_type="Debug";;
-        c) clean_type="$OPTARG";;
-        h) usage;;
-        ?) usage;;
-    esac
-done
-
-if [ ! -d ./build ];then
-  mkdir build
-fi
-
-if [ "$clean_type" = "all" ];then
-  pushd build
-  rm -rf ./*
-  popd
-else
-  pushd build
-  rm -rf CMakeFiles/ cmake_install.cmake CMakeCache.txt CPackSourceConfig.cmake
-  popd
-fi
-
-build_cmd="cd build && cmake -DINTTYPES_FORMAT:STRING=C99 "
-
-if [ "$build_type" = "Release" ];then
-  build_cmd="${build_cmd} -DCMAKE_BUILD_TYPE=Release .. && cmake --build ./ "
-else
-  build_cmd="${build_cmd} -DCMAKE_BUILD_TYPE=Debug .. && cmake --build ./ "
-fi
-
-echo "build command is ${build_cmd}"
-
-eval ${build_cmd}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/CMakeLists.txt
deleted file mode 100644
index 2a152dd0d38cdc17d2758d7dbd542cd974d5f0c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# compile wenet.proto
-set(PROTO_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-add_custom_command(
-  OUTPUT  ${PROTO_DIR}/wenet.pb.cc
-          ${PROTO_DIR}/wenet.pb.h
-          ${PROTO_DIR}/wenet.grpc.pb.cc
-          ${PROTO_DIR}/wenet.grpc.pb.h
-  COMMAND ${protobuf_BINARY_DIR}/protoc
-  ARGS --grpc_out "${PROTO_DIR}"
-    --cpp_out "${PROTO_DIR}"
-    -I "${PROTO_DIR}"
-    --plugin=protoc-gen-grpc=${grpc_BINARY_DIR}/grpc_cpp_plugin
-    wenet.proto)
-
-# grpc_server/client
-link_directories(${protobuf_BINARY_DIR}/lib)
-add_library(wenet_grpc STATIC
-  grpc_client.cc
-  grpc_server.cc
-  wenet.pb.cc
-  wenet.grpc.pb.cc
-)
-target_link_libraries(wenet_grpc PUBLIC grpc++ grpc++_reflection decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_client.cc
deleted file mode 100644
index 7a2e3f6f384980b6566468213d3eead43a404070..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_client.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_client.h"
-
-#include "utils/log.h"
-
-namespace wenet {
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using grpc::Status;
-using wenet::Request;
-using wenet::Response;
-
-GrpcClient::GrpcClient(const std::string& host, int port, int nbest,
-                       bool continuous_decoding)
-    : host_(host),
-      port_(port),
-      nbest_(nbest),
-      continuous_decoding_(continuous_decoding) {
-  Connect();
-  t_.reset(new std::thread(&GrpcClient::ReadLoopFunc, this));
-}
-
-void GrpcClient::Connect() {
-  channel_ = grpc::CreateChannel(host_ + ":" + std::to_string(port_),
-                                 grpc::InsecureChannelCredentials());
-  stub_ = ASR::NewStub(channel_);
-  context_ = std::make_shared<ClientContext>();
-  stream_ = stub_->Recognize(context_.get());
-  request_ = std::make_shared<Request>();
-  response_ = std::make_shared<Response>();
-  request_->mutable_decode_config()->set_nbest_config(nbest_);
-  request_->mutable_decode_config()->set_continuous_decoding_config(
-      continuous_decoding_);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::SendBinaryData(const void* data, size_t size) {
-  const int16_t* pdata = reinterpret_cast<const int16_t*>(data);
-  request_->set_audio_data(pdata, size);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::ReadLoopFunc() {
-  try {
-    while (stream_->Read(response_.get())) {
-      for (int i = 0; i < response_->nbest_size(); i++) {
-        // you can also traverse wordpieces like demonstrated above
-        LOG(INFO) << i + 1 << "best " << response_->nbest(i).sentence();
-      }
-      if (response_->status() != Response_Status_ok) {
-        break;
-      }
-      if (response_->type() == Response_Type_speech_end) {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void GrpcClient::Join() {
-  stream_->WritesDone();
-  t_->join();
-  Status status = stream_->Finish();
-  if (!status.ok()) {
-    LOG(INFO) << "Recognize rpc failed.";
-  }
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_client.h
deleted file mode 100644
index 36e36a0f5f5ec5bbb818009fe931e863eaa7fd60..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_client.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_CLIENT_H_
-#define GRPC_GRPC_CLIENT_H_
-
-#include <grpc/grpc.h>
-#include <grpcpp/channel.h>
-#include <grpcpp/client_context.h>
-#include <grpcpp/create_channel.h>
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "grpc/wenet.grpc.pb.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcClient {
- public:
-  GrpcClient(const std::string& host, int port, int nbest,
-             bool continuous_decoding);
-
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Join();
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string host_;
-  int port_;
-  std::shared_ptr<Channel> channel_{nullptr};
-  std::unique_ptr<ASR::Stub> stub_{nullptr};
-  std::shared_ptr<ClientContext> context_{nullptr};
-  std::unique_ptr<ClientReaderWriter<Request, Response>> stream_{nullptr};
-  std::shared_ptr<Request> request_{nullptr};
-  std::shared_ptr<Response> response_{nullptr};
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(GrpcClient);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_server.cc
deleted file mode 100644
index 26268bc02a2f2ea56bb24a1eb379a565f693429a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_server.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_server.h"
-
-namespace wenet {
-
-using grpc::ServerReaderWriter;
-using wenet::Request;
-using wenet::Response;
-
-GrpcConnectionHandler::GrpcConnectionHandler(
-    ServerReaderWriter<Response, Request>* stream,
-    std::shared_ptr<Request> request, std::shared_ptr<Response> response,
-    std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : stream_(std::move(stream)),
-      request_(std::move(request)),
-      response_(std::move(response)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void GrpcConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  response_->set_status(Response::ok);
-  response_->set_type(Response::server_ready);
-  stream_->Write(*response_);
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ = std::make_shared<std::thread>(
-      &GrpcConnectionHandler::DecodeThreadFunc, this);
-}
-
-void GrpcConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  CHECK(feature_pipeline_ != nullptr);
-  feature_pipeline_->set_input_finished();
-  got_end_tag_ = true;
-}
-
-void GrpcConnectionHandler::OnPartialResult() {
-  LOG(INFO) << "Partial result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::partial_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinalResult() {
-  LOG(INFO) << "Final result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::final_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinish() {
-  // Send finish tag
-  response_->set_status(Response::ok);
-  response_->set_type(Response::speech_end);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnSpeechData() {
-  // Read binary PCM data
-  const int16_t* pcm_data =
-      reinterpret_cast<const int16_t*>(request_->audio_data().c_str());
-  int num_samples = request_->audio_data().length() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-void GrpcConnectionHandler::SerializeResult(bool finish) {
-  for (const DecodeResult& path : decoder_->result()) {
-    Response_OneBest* one_best_ = response_->add_nbest();
-    one_best_->set_sentence(path.sentence);
-    if (finish) {
-      for (const WordPiece& word_piece : path.word_pieces) {
-        Response_OnePiece* one_piece_ = one_best_->add_wordpieces();
-        one_piece_->set_word(word_piece.word);
-        one_piece_->set_start(word_piece.start);
-        one_piece_->set_end(word_piece.end);
-      }
-    }
-    if (response_->nbest_size() == nbest_) {
-      break;
-    }
-  }
-  return;
-}
-
-void GrpcConnectionHandler::DecodeThreadFunc() {
-  while (true) {
-    DecodeState state = decoder_->Decode();
-    response_->clear_status();
-    response_->clear_type();
-    response_->clear_nbest();
-    if (state == DecodeState::kEndFeats) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      OnFinish();
-      stop_recognition_ = true;
-      break;
-    } else if (state == DecodeState::kEndpoint) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      // If it's not continuous decoding, continue to do next recognition
-      // otherwise stop the recognition
-      if (continuous_decoding_) {
-        decoder_->ResetContinuousDecoding();
-      } else {
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      }
-    } else {
-      if (decoder_->DecodedSomething()) {
-        SerializeResult(false);
-        OnPartialResult();
-      }
-    }
-  }
-}
-
-void GrpcConnectionHandler::operator()() {
-  try {
-    while (stream_->Read(request_.get())) {
-      if (!got_start_tag_) {
-        nbest_ = request_->decode_config().nbest_config();
-        continuous_decoding_ =
-            request_->decode_config().continuous_decoding_config();
-        OnSpeechStart();
-      } else {
-        OnSpeechData();
-      }
-    }
-    OnSpeechEnd();
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-Status GrpcServer::Recognize(ServerContext* context,
-                             ServerReaderWriter<Response, Request>* stream) {
-  LOG(INFO) << "Get Recognize request" << std::endl;
-  auto request = std::make_shared<Request>();
-  auto response = std::make_shared<Response>();
-  GrpcConnectionHandler handler(stream, request, response, feature_config_,
-                                decode_config_, decode_resource_);
-  std::thread t(std::move(handler));
-  t.join();
-  return Status::OK;
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_server.h
deleted file mode 100644
index 3ab47ce5b15897c2a596d8ef27f2e7c4f8d26a3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/grpc_server.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_SERVER_H_
-#define GRPC_GRPC_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-#include "grpc/wenet.grpc.pb.h"
-
-namespace wenet {
-
-using grpc::ServerContext;
-using grpc::ServerReaderWriter;
-using grpc::Status;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcConnectionHandler {
- public:
-  GrpcConnectionHandler(ServerReaderWriter<Response, Request>* stream,
-                        std::shared_ptr<Request> request,
-                        std::shared_ptr<Response> response,
-                        std::shared_ptr<FeaturePipelineConfig> feature_config,
-                        std::shared_ptr<DecodeOptions> decode_config,
-                        std::shared_ptr<DecodeResource> decode_resource);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnFinish();
-  void OnSpeechData();
-  void OnPartialResult();
-  void OnFinalResult();
-  void DecodeThreadFunc();
-  void SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  ServerReaderWriter<Response, Request>* stream_;
-  std::shared_ptr<Request> request_;
-  std::shared_ptr<Response> response_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class GrpcServer final : public ASR::Service {
- public:
-  GrpcServer(std::shared_ptr<FeaturePipelineConfig> feature_config,
-             std::shared_ptr<DecodeOptions> decode_config,
-             std::shared_ptr<DecodeResource> decode_resource)
-      : feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-  Status Recognize(ServerContext* context,
-                   ServerReaderWriter<Response, Request>* reader) override;
-
- private:
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  DISALLOW_COPY_AND_ASSIGN(GrpcServer);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/wenet.proto b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/wenet.proto
deleted file mode 100644
index 4c3033c034c513611c9159ff9db42b225be2cc98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/grpc/wenet.proto
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-syntax = "proto3";
-
-option java_package = "ex.grpc";
-option objc_class_prefix = "wenet";
-
-package wenet;
-
-service ASR {
-  rpc Recognize (stream Request) returns (stream Response) {}
-}
-
-message Request {
-
-  message DecodeConfig {
-    int32 nbest_config = 1;
-    bool continuous_decoding_config = 2;
-  }
-
-  oneof RequestPayload {
-    DecodeConfig decode_config = 1;
-    bytes audio_data = 2;
-  }
-}
-
-message Response {
-
-  message OneBest {
-    string sentence = 1;
-    repeated OnePiece wordpieces = 2;
-  }
-
-  message OnePiece {
-    string word = 1;
-    int32 start = 2;
-    int32 end = 3;
-  }
-
-  enum Status {
-    ok = 0;
-    failed = 1;
-  }
-
-  enum Type {
-    server_ready = 0;
-    partial_result = 1;
-    final_result = 2;
-    speech_end = 3;
-  }
-
-  Status status = 1;
-  Type type = 2;
-  repeated OneBest nbest = 3;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/CMakeLists.txt
deleted file mode 100644
index 145654105350e91a5f9121b47197f5fc60663f5c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-link_libraries(gtest_main gmock)
-
-add_executable(utils_test utils_test.cc)
-target_link_libraries(utils_test PUBLIC utils)
-add_test(UTILS_TEST utils_test)
-
-add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
-target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
-add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
-
-add_executable(post_processor_test post_processor_test.cc)
-target_link_libraries(post_processor_test PUBLIC post_processor)
-add_test(POST_PROCESSOR_TEST post_processor_test)
-
-
-add_executable(feature_pipeline_test feature_pipeline_test.cc)
-target_link_libraries(feature_pipeline_test PUBLIC frontend)
-add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/ctc_prefix_beam_search_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/ctc_prefix_beam_search_test.cc
deleted file mode 100644
index d8f3b65693b934beb33f3a770795f0b6e7ce3456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/ctc_prefix_beam_search_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <cmath>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(CtcPrefixBeamSearchTest, CtcPrefixBeamSearchLogicTest) {
-  using ::testing::ElementsAre;
-  // See https://robin1001.github.io/2020/12/11/ctc-search for the
-  // graph demonstration of the data
-  std::vector<std::vector<float>> data = {
-      {0.25, 0.40, 0.35}, {0.40, 0.35, 0.25}, {0.10, 0.50, 0.40}};
-  // Apply log
-  for (int i = 0; i < data.size(); i++) {
-    for (int j = 0; j < data[i].size(); j++) {
-      data[i][j] = std::log(data[i][j]);
-    }
-  }
-  wenet::CtcPrefixBeamSearchOptions option;
-  option.first_beam_size = 3;
-  option.second_beam_size = 3;
-  wenet::CtcPrefixBeamSearch prefix_beam_search(option);
-  prefix_beam_search.Search(data);
-  /* Test case info
-  | top k | result index | prefix score | viterbi score | timestamp |
-  |-------|--------------|--------------|---------------|-----------|
-  | top 1 | [2, 1]       | 0.2185       | 0.07          | [0, 2]    |
-  | top 2 | [1, 2]       | 0.1550       | 0.064         | [0, 2]    |
-  | top 3 | [1]          | 0.1525       | 0.07          | [2]       |
-  */
-  const std::vector<std::vector<int>>& result = prefix_beam_search.Outputs();
-  EXPECT_EQ(result.size(), 3);
-  ASSERT_THAT(result[0], ElementsAre(2, 1));
-  ASSERT_THAT(result[1], ElementsAre(1, 2));
-  ASSERT_THAT(result[2], ElementsAre(1));
-
-  const std::vector<float>& likelihood = prefix_beam_search.Likelihood();
-  EXPECT_EQ(likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[0]), 0.2185);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[1]), 0.1550);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[2]), 0.1525);
-
-  const std::vector<float>& viterbi_likelihood =
-      prefix_beam_search.viterbi_likelihood();
-  EXPECT_EQ(viterbi_likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[0]), 0.07);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[1]), 0.064);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[2]), 0.07);
-
-  const std::vector<std::vector<int>>& times = prefix_beam_search.Times();
-  EXPECT_EQ(times.size(), 3);
-  ASSERT_THAT(times[0], ElementsAre(0, 2));
-  ASSERT_THAT(times[1], ElementsAre(0, 2));
-  ASSERT_THAT(times[2], ElementsAre(2));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/feature_pipeline_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/feature_pipeline_test.cc
deleted file mode 100644
index 244ec0735b6086211b476e8d97569e1ee5959bc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/feature_pipeline_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2022 Roney
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <thread>
-#include <vector>
-
-#include "frontend/feature_pipeline.h"
-#include "utils/blocking_queue.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-void pushQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que,
-               std::vector<int> vec) {
-  que->Push(vec);
-}
-
-void popQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que, int num,
-              int back_data) {
-  auto pop_data = que->Pop(num);
-  ASSERT_EQ(pop_data[num - 1], back_data);
-}
-
-TEST(FeaturePipelineTest, BlockingQueueTest) {
-  auto capacity_queue = std::make_shared<wenet::BlockingQueue<int>>(2);
-  std::vector<int> test_data{1, 2, 3, 4, 5};
-  std::thread push_thread(&pushQueue, capacity_queue, test_data);
-  ASSERT_EQ(capacity_queue->Pop(), 1);
-  ASSERT_LE(capacity_queue->Size(), 2);    // capacity_queue: 2 or 2,3
-  auto pop_data = capacity_queue->Pop(3);  // 2,3,4 num > capacity
-  ASSERT_EQ(pop_data.size(), 3);
-  ASSERT_EQ(pop_data[2], 4);
-  push_thread.join();
-  ASSERT_EQ(capacity_queue->Size(), 1);  // capacity_queue:5
-
-  std::thread pop_thread(&popQueue, capacity_queue, 3, 0);  // num > capacity
-  capacity_queue->Push(9);  // capacity_queue:5,9
-  capacity_queue->Push(0);  // capacity_queue:5,9,0
-  pop_thread.join();        // capacity_queue:
-  ASSERT_EQ(capacity_queue->Size(), 0);
-
-  pop_data = capacity_queue->Pop(0);
-  ASSERT_TRUE(pop_data.empty());
-}
-
-TEST(FeaturePipelineTest, PipelineTest) {
-  wenet::FeaturePipelineConfig config(80, 8000);
-  wenet::FeaturePipeline feature_pipeline(config);
-  int audio_len = 8 * 55;  // audio len 55ms,4 frames
-  std::vector<float> pcm(audio_len, 0);
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 4);
-
-  std::vector<std::vector<float>> out_feats;
-  auto b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_TRUE(b);
-  ASSERT_EQ(out_feats.size(), 2);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 2);
-
-  std::vector<float> out_feat;
-  b = feature_pipeline.ReadOne(&out_feat);
-  ASSERT_TRUE(b);
-  ASSERT_FALSE(out_feat.empty());
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 1);
-
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 1);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  feature_pipeline.Read(2, &out_feats);
-  feature_pipeline.Reset();
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 0);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/post_processor_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/post_processor_test.cc
deleted file mode 100644
index fa11fa29231032d62389a93fd00b0ec782bf8a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/post_processor_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(PostProcessorTest, ProcessSpacekMandarinEnglishTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: mandarin character
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "震东好帅",
-      // modeling unit: mandarin word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 吴迪 也 好帅",
-      // modeling unit: english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁binbin▁is▁also▁handsome",
-      // modeling unit: english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " life is short i use wenet",
-      // modeling unit: mandarin character + english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "超哥▁is▁the▁most▁handsome",
-      // modeling unit: mandarin word + english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 人生 苦短 i use wenet",
-  };
-
-  std::vector<std::string> result_lowercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "binbin is also handsome",
-      "life is short i use wenet",
-      "超哥 is the most handsome",
-      "人生苦短i use wenet",
-  };
-
-  std::vector<std::string> result_uppercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "BINBIN IS ALSO HANDSOME",
-      "LIFE IS SHORT I USE WENET",
-      "超哥 IS THE MOST HANDSOME",
-      "人生苦短I USE WENET",
-  };
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
-
-TEST(PostProcessorTest, ProcessSpacekIndoEuropeanTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  opts_lowercase.language_type = wenet::kIndoEuropean;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.language_type = wenet::kIndoEuropean;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁zhendong▁ist▁so▁schön",
-      // modeling unit: word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " zhendong ist so schön"};
-
-  std::vector<std::string> result_lowercase = {"zhendong ist so schön",
-                                               "zhendong ist so schön"};
-
-  std::vector<std::string> result_uppercase = {"ZHENDONG IST SO SCHÖN",
-                                               "ZHENDONG IST SO SCHÖN"};
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/utils_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/utils_test.cc
deleted file mode 100644
index 6b2bbac25e000ce854d5e55a50cb51109d62d758..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/test/utils_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "utils/utils.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-TEST(UtilsTest, TopKTest) {
-  using ::testing::ElementsAre;
-  using ::testing::FloatNear;
-  using ::testing::Pointwise;
-  std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
-  std::vector<float> values;
-  std::vector<int32_t> indices;
-  wenet::TopK(data, 3, &values, &indices);
-  EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
-  ASSERT_THAT(indices, ElementsAre(9, 4, 8));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/CMakeLists.txt
deleted file mode 100644
index 67447c42d977f120fc39cdab0d052b011edd3efe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(websocket STATIC
-  websocket_client.cc
-  websocket_server.cc
-)
-target_link_libraries(websocket PUBLIC decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_client.cc
deleted file mode 100644
index c0394e6250153e2d59636c9eab62badc4a737d16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_client.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_client.h"
-
-#include "boost/json/src.hpp"
-
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-WebSocketClient::WebSocketClient(const std::string& hostname, int port)
-    : hostname_(hostname), port_(port) {
-  Connect();
-  t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
-}
-
-void WebSocketClient::Connect() {
-  tcp::resolver resolver{ioc_};
-  // Look up the domain name
-  auto const results = resolver.resolve(hostname_, std::to_string(port_));
-  // Make the connection on the IP address we get from a lookup
-  auto ep = asio::connect(ws_.next_layer(), results);
-  // Provide the value of the Host HTTP header during the WebSocket handshake.
-  // See https://tools.ietf.org/html/rfc7230#section-5.4
-  std::string host = hostname_ + ":" + std::to_string(ep.port());
-  // Perform the websocket handshake
-  ws_.handshake(host, "/");
-}
-
-void WebSocketClient::SendTextData(const std::string& data) {
-  ws_.text(true);
-  ws_.write(asio::buffer(data));
-}
-
-void WebSocketClient::SendBinaryData(const void* data, size_t size) {
-  ws_.binary(true);
-  ws_.write(asio::buffer(data, size));
-}
-
-void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }
-
-void WebSocketClient::ReadLoopFunc() {
-  try {
-    while (true) {
-      beast::flat_buffer buffer;
-      ws_.read(buffer);
-      std::string message = beast::buffers_to_string(buffer.data());
-      LOG(INFO) << message;
-      CHECK(ws_.got_text());
-      json::object obj = json::parse(message).as_object();
-      if (obj["status"] != "ok") {
-        break;
-      }
-      if (obj["type"] == "speech_end") {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (beast::system_error const& se) {
-    // This indicates that the session was closed
-    if (se.code() != websocket::error::closed) {
-      LOG(ERROR) << se.code().message();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketClient::Join() { t_->join(); }
-
-void WebSocketClient::SendStartSignal() {
-  // TODO(Binbin Zhang): Add sample rate and other setting support
-  json::value start_tag = {{"signal", "start"},
-                           {"nbest", nbest_},
-                           {"continuous_decoding", continuous_decoding_}};
-  std::string start_message = json::serialize(start_tag);
-  this->SendTextData(start_message);
-}
-
-void WebSocketClient::SendEndSignal() {
-  json::value end_tag = {{"signal", "end"}};
-  std::string end_message = json::serialize(end_tag);
-  this->SendTextData(end_message);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_client.h
deleted file mode 100644
index 76ec3aa451d31c7ee6b158ce21c8acdc10575eb3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_client.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_CLIENT_H_
-#define WEBSOCKET_WEBSOCKET_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class WebSocketClient {
- public:
-  WebSocketClient(const std::string& host, int port);
-
-  void SendTextData(const std::string& data);
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Close();
-  void Join();
-  void SendStartSignal();
-  void SendEndSignal();
-  void set_nbest(int nbest) { nbest_ = nbest; }
-  void set_continuous_decoding(bool continuous_decoding) {
-    continuous_decoding_ = continuous_decoding;
-  }
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string hostname_;
-  int port_;
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  asio::io_context ioc_;
-  websocket::stream<tcp::socket> ws_{ioc_};
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketClient);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_server.cc
deleted file mode 100644
index 52ab088f46d59b9f3f1add1e34d3aceae290f5da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_server.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_server.h"
-
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "boost/json/src.hpp"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-ConnectionHandler::ConnectionHandler(
-    tcp::socket&& socket, std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : ws_(std::move(socket)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void ConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ =
-      std::make_shared<std::thread>(&ConnectionHandler::DecodeThreadFunc, this);
-}
-
-void ConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  if (feature_pipeline_ != nullptr) {
-    feature_pipeline_->set_input_finished();
-  }
-  got_end_tag_ = true;
-}
-
-void ConnectionHandler::OnPartialResult(const std::string& result) {
-  LOG(INFO) << "Partial result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "partial_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinalResult(const std::string& result) {
-  LOG(INFO) << "Final result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "final_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinish() {
-  // Send finish tag
-  json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
-  // Read binary PCM data
-  int num_samples = buffer.size() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  const auto* pcm_data = static_cast<const int16_t*>(buffer.data().data());
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-std::string ConnectionHandler::SerializeResult(bool finish) {
-  json::array nbest;
-  for (const DecodeResult& path : decoder_->result()) {
-    json::object jpath({{"sentence", path.sentence}});
-    if (finish) {
-      json::array word_pieces;
-      for (const WordPiece& word_piece : path.word_pieces) {
-        json::object jword_piece({{"word", word_piece.word},
-                                  {"start", word_piece.start},
-                                  {"end", word_piece.end}});
-        word_pieces.emplace_back(jword_piece);
-      }
-      jpath.emplace("word_pieces", word_pieces);
-    }
-    nbest.emplace_back(jpath);
-
-    if (nbest.size() == nbest_) {
-      break;
-    }
-  }
-  return json::serialize(nbest);
-}
-
-void ConnectionHandler::DecodeThreadFunc() {
-  try {
-    while (true) {
-      DecodeState state = decoder_->Decode();
-      if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      } else if (state == DecodeState::kEndpoint) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        // If it's not continuous decoding, continue to do next recognition
-        // otherwise stop the recognition
-        if (continuous_decoding_) {
-          decoder_->ResetContinuousDecoding();
-        } else {
-          OnFinish();
-          stop_recognition_ = true;
-          break;
-        }
-      } else {
-        if (decoder_->DecodedSomething()) {
-          std::string result = SerializeResult(false);
-          OnPartialResult(result);
-        }
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void ConnectionHandler::OnError(const std::string& message) {
-  json::value rv = {{"status", "failed"}, {"message", message}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  // Close websocket
-  ws_.close(websocket::close_code::normal);
-}
-
-void ConnectionHandler::OnText(const std::string& message) {
-  json::value v = json::parse(message);
-  if (v.is_object()) {
-    json::object obj = v.get_object();
-    if (obj.find("signal") != obj.end()) {
-      json::string signal = obj["signal"].as_string();
-      if (signal == "start") {
-        if (obj.find("nbest") != obj.end()) {
-          if (obj["nbest"].is_int64()) {
-            nbest_ = obj["nbest"].as_int64();
-          } else {
-            OnError("integer is expected for nbest option");
-          }
-        }
-        if (obj.find("continuous_decoding") != obj.end()) {
-          if (obj["continuous_decoding"].is_bool()) {
-            continuous_decoding_ = obj["continuous_decoding"].as_bool();
-          } else {
-            OnError(
-                "boolean true or false is expected for "
-                "continuous_decoding option");
-          }
-        }
-        OnSpeechStart();
-      } else if (signal == "end") {
-        OnSpeechEnd();
-      } else {
-        OnError("Unexpected signal type");
-      }
-    } else {
-      OnError("Wrong message header");
-    }
-  } else {
-    OnError("Wrong protocol");
-  }
-}
-
-void ConnectionHandler::operator()() {
-  try {
-    // Accept the websocket handshake
-    ws_.accept();
-    for (;;) {
-      // This buffer will hold the incoming message
-      beast::flat_buffer buffer;
-      // Read a message
-      ws_.read(buffer);
-      if (ws_.got_text()) {
-        std::string message = beast::buffers_to_string(buffer.data());
-        LOG(INFO) << message;
-        OnText(message);
-        if (got_end_tag_) {
-          break;
-        }
-      } else {
-        if (!got_start_tag_) {
-          OnError("Start signal is expected before binary data");
-        } else {
-          if (stop_recognition_) {
-            break;
-          }
-          OnSpeechData(buffer);
-        }
-      }
-    }
-
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (beast::system_error const& se) {
-    LOG(INFO) << se.code().message();
-    // This indicates that the session was closed
-    if (se.code() == websocket::error::closed) {
-      OnSpeechEnd();
-    }
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketServer::Start() {
-  try {
-    auto const address = asio::ip::make_address("0.0.0.0");
-    tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
-    for (;;) {
-      // This will receive the new connection
-      tcp::socket socket{ioc_};
-      // Block until we get a connection
-      acceptor.accept(socket);
-      // Launch the session, transferring ownership of the socket
-      ConnectionHandler handler(std::move(socket), feature_config_,
-                                decode_config_, decode_resource_);
-      std::thread t(std::move(handler));
-      t.detach();
-    }
-  } catch (const std::exception& e) {
-    LOG(FATAL) << e.what();
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_server.h
deleted file mode 100644
index a1241834221dcf93c34d6414bd9b5ae40ef1cf38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/websocket/websocket_server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_SERVER_H_
-#define WEBSOCKET_WEBSOCKET_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class ConnectionHandler {
- public:
-  ConnectionHandler(tcp::socket&& socket,
-                    std::shared_ptr<FeaturePipelineConfig> feature_config,
-                    std::shared_ptr<DecodeOptions> decode_config,
-                    std::shared_ptr<DecodeResource> decode_resource_);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnText(const std::string& message);
-  void OnFinish();
-  void OnSpeechData(const beast::flat_buffer& buffer);
-  void OnError(const std::string& message);
-  void OnPartialResult(const std::string& result);
-  void OnFinalResult(const std::string& result);
-  void DecodeThreadFunc();
-  std::string SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  websocket::stream<tcp::socket> ws_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class WebSocketServer {
- public:
-  WebSocketServer(int port,
-                  std::shared_ptr<FeaturePipelineConfig> feature_config,
-                  std::shared_ptr<DecodeOptions> decode_config,
-                  std::shared_ptr<DecodeResource> decode_resource)
-      : port_(port),
-        feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-
-  void Start();
-
- private:
-  int port_;
-  // The io_context is required for all I/O
-  asio::io_context ioc_{1};
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketServer);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/CMakeLists.txt
deleted file mode 100644
index 380e23204b32dad2ee66999430a66450066ea6a7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-message("cmake build type is ${CMAKE_BUILD_TYPE} .")
-
-if(XPU)
-  list(APPEND xpu_conformer_srcs ./xpu_asr_model.cc)
-  list(APPEND xpu_conformer_srcs ./xpu_conformer.cpp)
-  list(APPEND xpu_conformer_srcs ./xpu_util.cpp)
-  message(STATUS "Use src_files: [ ${xpu_conformer_srcs} ] to compile xpu_conformer.a .")
-
-  # compile xpu_conformer.a
-  add_library(xpu_conformer STATIC ${xpu_conformer_srcs})
-  target_link_libraries(xpu_conformer PUBLIC xpuapi xpurt)
-endif()
-
-set(CMAKE_VERBOSE_MAKEFILE OFF)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive")
-set(CMAKE_EXE_LINKER_FLAGS "-lpthread -lrt -lm -ldl")
-
-set(SRC_FILES ./conformer_test.cpp ./xpu_conformer.cpp ./xpu_util.cpp)
-message(STATUS "Use src_files: [ ${SRC_FILES} ] to compile xpu_conformer_test.")
-
-add_executable(xpu_conformer_test ${SRC_FILES})
-target_link_libraries(xpu_conformer_test -lxpuapi -lxpurt)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/conformer_test.cpp b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/conformer_test.cpp
deleted file mode 100644
index 1d9fd672a31eb5d7a787368f274df516d19dc7a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/conformer_test.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright (c) 2022 KUNLUNXIN Inc.
-//               2022 Han Qi (qihan@baidu.com)
-//                    Hehe Pan (panhehe@baidu.com)
-//                    Zikui Yan (yanzikui@baidu.com)
-//                    Chaolin Li (lichaolin@baidu.com)
-// All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <chrono>
-#include <mutex>
-#include <thread>
-#include <tuple>
-#include "xpu_conformer.h"  // NOLINT
-#include "xpu_util.h"       // NOLINT
-namespace api = baidu::xpu::api;
-namespace wenet = xpu::wenet;
-
-template <typename T, typename TW, typename TGEMM>
-static void conformer_test(const std::string& data_dir,
-                           const std::string& params_dir, int threads_number,
-                           int dev_id) {
-  typedef std::vector<
-      std::tuple<std::tuple<float*, std::vector<int>>,
-                 std::tuple<std::vector<int>, std::vector<int>>>>
-      Dtype;
-  ConformerEncoderParam<T, TW> encoder_param;
-  init_encoder_params<T, TW>(params_dir, encoder_param);
-  ConformerDecoderParam<T, TW> decoder_param;
-  init_decoder_params<T, TW>(params_dir, decoder_param);
-  int real_threads_number = threads_number <= 0 ? 1 : threads_number;
-  std::cout << "Encoder + Decoder MultiStreamTest threads:"
-            << real_threads_number << std::endl;
-  // init test data
-  std::vector<int> ids = get_all_ids(data_dir);
-  Dtype data_list;
-  for (auto index_id : ids) {
-    std::string input_lenghts_prefix =
-        data_dir + std::to_string(index_id) + "_len";
-    std::string input_prefix = data_dir + std::to_string(index_id);
-    auto input_lenghts_cpu_info =
-        read_cpu_data_from_file<int>(input_lenghts_prefix, 1);
-    auto input_xpu_info = read_xpu_data_from_file<float>(input_prefix, 3);
-    data_list.push_back(
-        std::make_tuple(input_xpu_info, input_lenghts_cpu_info));
-  }
-  bool write_res = true;
-  // init mem
-  int ret = 0;
-  std::vector<api::Context*> ctx_xpu_ptrs(real_threads_number);
-  std::vector<XPUStream> streams(real_threads_number);
-
-  int nsdnn = real_threads_number > 1 ? 2 : 6;
-  int ncluster = real_threads_number > 1 ? 2 : 8;
-  for (int i = 0; i < real_threads_number; i++) {
-    ret = xpu_stream_create(&streams[i]);
-    ctx_xpu_ptrs[i] = new api::Context(api::kXPU2);
-    ctx_xpu_ptrs[i]->xpu_stream = streams[i];
-    ctx_xpu_ptrs[i]->set_nsdnn(nsdnn);
-    ctx_xpu_ptrs[i]->set_ncluster(ncluster);
-  }
-  // threads
-  std::vector<float> thread_times(real_threads_number);
-  std::vector<std::thread> threads;
-  int data_counter = 0;
-  std::mutex data_mutex;
-  std::vector<float> time_info(real_threads_number, 0.0f);
-  auto f = [&](int thread_id) {
-    xpu_set_device(dev_id);
-    api::Context* ctx_xpu = ctx_xpu_ptrs[thread_id];
-    api::ctx_guard RAII_GUARD(ctx_xpu);
-    while (true) {
-      int data_index = -1;
-      data_mutex.lock();
-      if (data_counter >= data_list.size()) {
-        data_mutex.unlock();
-        break;
-      }
-      data_index = data_counter++;
-      data_mutex.unlock();
-      if (data_index < 0) {
-        continue;
-      }
-      auto start_time = std::chrono::system_clock::now();
-      // get input data
-      auto& input_xpu_info = std::get<0>(data_list[data_index]);
-      auto& input_lenghts_info = std::get<1>(data_list[data_index]);
-      auto& input_xpu_data = std::get<0>(input_xpu_info);
-      auto& speech_shape = std::get<1>(input_xpu_info);
-      int batch = speech_shape[0];
-      int max_seqlen = speech_shape[1];
-      auto xpu_mask_info_float = create_mask_according_speech_length<float>(
-          std::get<0>(input_lenghts_info), max_seqlen, ctx_xpu->xpu_stream);
-      ret = xpu_wait(ctx_xpu->xpu_stream);
-      CHECK_RET(ret);
-      int q_seqlen = ((max_seqlen - 1) / 2 - 1) / 2;
-      // encoder run
-      int att_dim = encoder_param.head_num * encoder_param.head_dim;
-      int ctc_dim = encoder_param.ctc_dim;
-      T* encoder_out = RAII_GUARD.alloc<T>(batch * q_seqlen * att_dim);
-      T* ctc_probs = RAII_GUARD.alloc<T>(batch * q_seqlen * ctc_dim);
-      // get encoder_out & ctc_probs
-      ret = wenet::conformer_encoder_wenet<T, TW, TGEMM>(
-          ctx_xpu, input_xpu_data, speech_shape, encoder_out, ctc_probs,
-          encoder_param, xpu_mask_info_float);
-      CHECK_RET(ret);
-      ret = xpu_wait(ctx_xpu->xpu_stream);
-      CHECK_RET(ret);
-      // ctc_prefix_beamsearch implement in cpu
-      int beam_size = encoder_param.beam_size;
-      int new_bs = batch * beam_size;
-      std::vector<int> hyps_len(new_bs);
-      std::vector<float> ctc_scores(new_bs);
-      std::vector<int> hyps_cpu;
-      int* hyps = RAII_GUARD.alloc<int>(new_bs * q_seqlen);
-      ret = wenet::ctc_prefix_beamsearch<T>(ctx_xpu, ctc_probs, hyps_cpu,
-                                            hyps_len, ctc_scores, batch,
-                                            beam_size, q_seqlen, ctc_dim);
-      CHECK_RET(ret);
-      ret = xpu_wait(ctx_xpu->xpu_stream);
-      CHECK_RET(ret);
-      int max_target_len =
-          padding_target(hyps_cpu, hyps_len, beam_size, decoder_param.eos_id);
-      ret = xpu_memcpy(hyps, reinterpret_cast<void*>(&hyps_cpu[0]),
-                       max_target_len * new_bs * sizeof(int),
-                       XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-      ret = xpu_wait(ctx_xpu->xpu_stream);
-      CHECK_RET(ret);
-      // decoder
-      int pad_target_len = decoder_param.add_sos_num + max_target_len;
-      float* character_scores =
-          RAII_GUARD.alloc<float>(new_bs * pad_target_len * ctc_dim);
-      ret = wenet::conformer_decoder_wenet<T, TW, TGEMM>(
-          ctx_xpu, encoder_out, {batch, q_seqlen, att_dim},
-          std::get<0>(xpu_mask_info_float), hyps, {new_bs, max_target_len},
-          character_scores, decoder_param);
-      CHECK_RET(ret);
-      ret = xpu_wait(ctx_xpu->xpu_stream);
-      CHECK_RET(ret);
-      // Only use decoder score for rescoring
-      std::vector<float> best_score(batch, -std::numeric_limits<float>::max());
-      std::vector<int> best_index(batch, 0);
-      float ctc_weight = 0.5;
-      std::vector<float> decoder_out(new_bs * pad_target_len * ctc_dim);
-      ret = xpu_memcpy(&decoder_out[0], character_scores,
-                       new_bs * max_target_len * ctc_dim * sizeof(float),
-                       XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-      xpu_wait(ctx_xpu->xpu_stream);
-      CHECK_RET(ret);
-      // cal score && output
-      std::string wav_prefix =
-          data_dir + std::to_string(data_index) + "_wav.txt";
-      std::string res_prefix = "./token_id.txt";
-      std::ofstream res;
-      std::string wav_name;
-      std::vector<std::string> wav_info;
-      if (write_res) {
-        std::ifstream wav(wav_prefix.c_str());
-        if (!wav.is_open()) {
-          std::cout << "wav file open fail" << std::endl;
-          exit(0);
-        }
-        while (getline(wav, wav_name)) {
-          wav_info.push_back(wav_name);
-        }
-        wav.close();
-      }
-      for (int i = 0; i < batch; i++) {
-        for (int j = 0; j < beam_size; j++) {
-          T score = 0.0;
-          for (int k = 0; k < hyps_len[i * beam_size + j]; k++) {
-            int index = i * beam_size * max_target_len * ctc_dim +
-                        j * max_target_len * ctc_dim + k * ctc_dim +
-                        hyps_cpu[k];
-            score += decoder_out[index];
-          }
-          score += decoder_out[i * beam_size * max_target_len * ctc_dim +
-                               j * max_target_len * ctc_dim +
-                               hyps_len[i * batch + j] * ctc_dim + ctc_dim - 1];
-          // add ctc score
-          score += ctc_weight * ctc_scores[i * beam_size + j];
-          if (score > best_score[i]) {
-            best_score[i] = score;
-            best_index[i] = j;
-          }
-        }
-        int token_index = best_index[i] + i * beam_size;
-        if (write_res) {
-          data_mutex.lock();
-          res.open(res_prefix, std::ios::app);
-          if (!res.is_open()) {
-            std::cout << "res file open fail" << std::endl;
-            exit(0);
-          }
-          res << wav_info[i] << ":";
-          for (int k = 0; k < hyps_len[token_index]; k++)
-            res << hyps_cpu[k] << " ";
-          res << std::endl;
-          res.close();
-          data_mutex.unlock();
-        }
-      }
-      auto end_time = std::chrono::system_clock::now();
-      auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
-          end_time - start_time);
-      time_info[thread_id] += static_cast<float>(duration.count()) / 1000;
-      ret = xpu_free(std::get<0>(input_xpu_info));
-      CHECK_RET(ret);
-      ret = xpu_free(std::get<0>(xpu_mask_info_float));
-      CHECK_RET(ret);
-    }
-  };
-  auto all_start = std::chrono::system_clock::now();
-  for (auto i = 0; i < real_threads_number; i++) {
-    std::thread t(f, i);
-    threads.push_back(std::move(t));
-  }
-  for (auto& t : threads) {
-    t.join();
-  }
-  auto all_end = std::chrono::system_clock::now();
-  auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
-      all_end - all_start);
-  float total_time = static_cast<float>(duration.count()) / 1000;
-  std::cout << "Total time cost:" << total_time << std::endl;
-  for (int i = 0; i < real_threads_number; i++) {
-    if (ctx_xpu_ptrs[i]) delete ctx_xpu_ptrs[i];
-  }
-}
-
-int main(int argc, char* argv[]) {
-  if (argc != 6) {
-    std::cout << "Only support the following three params:" << std::endl;
-    std::cout
-        << "\t1. " << argv[0]
-        << " encoder_test [params_dir] [data_dir] [dev_id] [threads_number]"
-        << std::endl;
-    std::cout
-        << "\t2. " << argv[0]
-        << " decoder_test [params_dir] [data_dir] [dev_id] [threads_number]"
-        << std::endl;
-    std::cout << "\t3. " << argv[0]
-              << " all [params_dir] [data_dir] [dev_id] [threads_number]"
-              << std::endl;
-    return 0;
-  }
-  std::string mode = argv[1];
-  std::string params_dir = argv[2];
-  std::string data_dir = argv[3];
-  int dev_id = std::stoi(argv[4]);
-  int threads_number = std::stoi(argv[5]);
-  add_separator_when_necessary(params_dir);
-  add_separator_when_necessary(data_dir);
-  xpu_set_device(dev_id);
-
-  typedef float16 T;
-  typedef int16_t TW;
-  typedef int16_t TGEMM;
-
-  if (mode == "all") {
-    conformer_test<T, TW, TGEMM>(data_dir, params_dir, threads_number, dev_id);
-  } else {
-    std::cout << "Unkown test mode: " << mode << std::endl;
-    std::exit(1);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_asr_model.cc
deleted file mode 100644
index 71b60bd156e5b1812dec903d1ba4a3d3f54625ea..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_asr_model.cc
+++ /dev/null
@@ -1,318 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Han Qi (qihan@baidu.com, Kunlunxin Inc)
-//                    Hehe Pan (panhehe@baidu.com, Kunlunxin Inc)
-//                    Zikui Yan (yanzikui@baidu.com, Kunlunxin Inc)
-// All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "xpu_asr_model.h"  // NOLINT
-
-#include <algorithm>
-#include <fstream>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-void XPUAsrModel::SetEngineThreads(int num_threads) {
-  real_threads_number = num_threads;
-}
-
-void XPUAsrModel::SetDeviceId(int dev_id) { device_id_ = dev_id; }
-
-void XPUAsrModel::Read(const std::string& model_dir) {
-  // init xpu runtime params
-  ctx_xpu_ptr = std::make_shared<api::Context>(api::kXPU2);
-  RAII_GUARD.reset(new api::ctx_guard(ctx_xpu_ptr.get()));
-
-  // For XPU, model_dir is params_dir, which is used to store weights for every
-  // layer.
-  std::string weight_dir = model_dir + "/model_weights/";
-  std::string weight_info_txt_path = weight_dir + "/weights_info.txt";
-
-  LOG(INFO) << "\e[1;34mXPU weight_dir is: " << weight_dir << "\e[0m\n";
-  if (!std::ifstream(weight_info_txt_path.c_str()).good()) {
-    LOG(FATAL) << "weight_info_txt: " << weight_info_txt_path
-               << " NOT exist !!!\n";
-  }
-
-  // 1. Load weight for every layer
-  init_encoder_params<T, TW>(weight_dir, encoder_param);
-  init_decoder_params<T, TW>(weight_dir, decoder_param);
-
-  // 2. Read metadata
-  // TODO(panhehe): Load following parameters from config file or
-  // encoder/decoder params.
-  subsampling_rate_ = 4;
-  right_context_ = 6;
-  sos_ = 5538;
-  eos_ = 5538;
-  is_bidirectional_decoder_ = 1;
-
-  LOG(INFO) << "======= XPU Kunlun Model Info: =======";
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-XPUAsrModel::XPUAsrModel(const XPUAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  l3ptr = other.l3ptr;
-  real_threads_number = other.real_threads_number;
-  device_id_ = other.device_id_;
-  ctx_xpu_ptr = other.ctx_xpu_ptr;
-  RAII_GUARD = other.RAII_GUARD;
-  encoder_param = other.encoder_param;
-  decoder_param = other.decoder_param;
-  stream = other.stream;
-  // other member variables may not need to copy here
-}
-
-std::shared_ptr<AsrModel> XPUAsrModel::Copy() const {
-  auto asr_model = std::make_shared<XPUAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void XPUAsrModel::Reset() {
-  offset_ = 0;
-  encoder_out = nullptr;
-  ctc_probs = nullptr;
-  cached_feature_.clear();
-  // Reset att_cache
-  att_cache_.resize(0, 0.0);
-  cnn_cache_.resize(0, 0.0);
-}
-
-void XPUAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // Set Device Id
-  LOG(INFO) << "Now Use XPU:" << device_id_ << "!\n";
-  xpu_set_device(device_id_);
-
-  // 1. Prepare XPU required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  // chunk
-
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-
-  std::vector<int> feats_length_shape = {1};
-  std::vector<int> feats_length_data = {num_frames};
-  input_lenghts_cpu_info =
-      std::make_tuple(feats_length_data, feats_length_shape);
-
-  std::vector<int> feats_data_shape = {1, num_frames, feature_dim};
-  std::vector<float> feats_data_cpu;
-  feats_data_cpu.reserve(1 * num_frames * feature_dim);
-  // convert 2d-vector to 1d-vector
-  for (auto& row : chunk_feats) {
-    auto end_iter = feats_data_cpu.end();
-    feats_data_cpu.insert(end_iter, row.cbegin(), row.cend());
-  }
-
-  float* input_xpu_data = get_xpu_data<float>("wav_test", feats_data_cpu);
-  input_xpu_info = std::make_tuple(input_xpu_data, feats_data_shape);
-
-  // init L3 Memory
-  int ret = 0;
-  real_threads_number = 1;
-  int nsdnn = real_threads_number > 1 ? 2 : 6;
-  int ncluster = real_threads_number > 1 ? 2 : 8;
-  for (int i = 0; i < real_threads_number; i++) {
-    ret = xpu_stream_create(&stream);
-    ctx_xpu_ptr->xpu_stream = stream;
-    ctx_xpu_ptr->set_nsdnn(nsdnn);
-    ctx_xpu_ptr->set_ncluster(ncluster);
-  }
-
-  std::shared_ptr<api::Context> ctx_xpu = ctx_xpu_ptr;
-
-  // get input speech info and data
-  batch = feats_data_shape.at(0);  // batch = 1
-  max_seqlen = feats_data_shape.at(1);
-
-  xpu_mask_info_float = create_mask_according_speech_length<float>(
-      feats_length_data, max_seqlen, ctx_xpu->xpu_stream);
-
-  ret = xpu_wait(ctx_xpu->xpu_stream);
-  CHECK_RET(ret);
-
-  q_seqlen = ((max_seqlen - 1) / 2 - 1) / 2;
-
-  // Encoder run
-  int att_dim = encoder_param.head_num * encoder_param.head_dim;
-  int ctc_dim = encoder_param.ctc_dim;
-
-  LOG(INFO) << "\t max_seqlen is " << max_seqlen << "\n";
-  LOG(INFO) << "\t q_seqlen   is " << q_seqlen << "\n";
-  LOG(INFO) << "\t att_dim    is " << att_dim << "\n";
-  LOG(INFO) << "\t ctc_dim    is " << ctc_dim << "\n";
-
-  // T is float16
-  encoder_out = RAII_GUARD->alloc<T>(batch * q_seqlen * att_dim);
-  ctc_probs = RAII_GUARD->alloc<T>(batch * q_seqlen * ctc_dim);
-
-  // 2. Encoder chunk forward, including ctc_activation
-  // get encoder_out & ctc_probs
-  ret = xpu::wenet::conformer_encoder_wenet<T, TW, int16_t>(
-      ctx_xpu.get(), input_xpu_data, feats_data_shape, encoder_out, ctc_probs,
-      encoder_param, xpu_mask_info_float);
-  CHECK_RET(ret);
-
-  // Copy to output(cpu)
-  int num_outputs = q_seqlen;
-  int output_dim = ctc_dim;
-  out_prob->resize(num_outputs);
-
-  float* logp = RAII_GUARD->alloc<float>(batch * q_seqlen * ctc_dim);
-  // cast T to float32
-  ret = api::cast_v2<T, float>(ctx_xpu.get(), ctc_probs, logp,
-                               batch * q_seqlen * ctc_dim);
-  CHECK_RET(ret);
-  ret = xpu_wait(ctx_xpu->xpu_stream);
-  CHECK_RET(ret);
-
-  // xpu_memcpy logp from device to host
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    ret = xpu_memcpy(reinterpret_cast<void*>((*out_prob)[i].data()),
-                     logp + output_dim * i, output_dim * sizeof(float),
-                     XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-    CHECK_RET(ret);
-  }
-}
-
-float XPUAsrModel::ComputeAttentionScore(const float* prob,
-                                         const std::vector<int>& hyp, int eos,
-                                         int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void XPUAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                     float reverse_weight,
-                                     std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-
-  if (encoder_out == nullptr) {
-    return;
-  }
-
-  int beam_size = encoder_param.beam_size;
-  int new_bs = batch * beam_size;
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-  LOG(INFO) << "\t num_hyps  is " << num_hyps << "\n";
-  LOG(INFO) << "\t beam_size is " << beam_size << "\n";
-  LOG(INFO) << "\t new_bs    is " << new_bs << "\n";
-  LOG(INFO) << "\t max_hyps_len is " << max_hyps_len << "\n";
-
-  // pad hyps
-  std::vector<int> hyps_pad_cpu(max_hyps_len * beam_size);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad_cpu.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad_cpu.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad_cpu.emplace_back(0);
-    }
-  }
-  int* hyps_xpu = RAII_GUARD->alloc<int>(new_bs * q_seqlen);
-  int max_target_len = max_hyps_len;
-  // xpu_memcpy hyps_pad_cup to device
-  int ret = xpu_memcpy(hyps_xpu, reinterpret_cast<void*>(hyps_pad_cpu.data()),
-                       max_target_len * new_bs * sizeof(int),
-                       XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-  CHECK_RET(ret);
-
-  // Decoder
-  int att_dim = encoder_param.head_num * encoder_param.head_dim;
-  int ctc_dim = encoder_param.ctc_dim;
-  int pad_target_len = decoder_param.add_sos_num + max_target_len;
-  float* character_scores =
-      RAII_GUARD->alloc<float>(new_bs * pad_target_len * ctc_dim);
-  ret = xpu::wenet::conformer_decoder_wenet<T, TW, int16_t>(
-      ctx_xpu_ptr.get(), encoder_out, {batch, q_seqlen, att_dim},
-      std::get<0>(xpu_mask_info_float), hyps_xpu, {new_bs, max_target_len},
-      character_scores, decoder_param);
-  CHECK_RET(ret);
-  ret = xpu_wait(ctx_xpu_ptr->xpu_stream);
-  CHECK_RET(ret);
-
-  // xpu_memcpy from xpu device to host
-  std::vector<float> decoder_out(new_bs * pad_target_len * ctc_dim);
-  ret = xpu_memcpy(&decoder_out[0], character_scores,
-                   new_bs * max_target_len * ctc_dim * sizeof(float),
-                   XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  CHECK_RET(ret);
-  ret = xpu_wait(ctx_xpu_ptr->xpu_stream);
-  CHECK_RET(ret);
-
-  // cal score
-  float* decoder_outs_data = decoder_out.data();
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    // ctc_dim maybe equal to decode_out_len
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_target_len * ctc_dim * i, hyp, eos_, ctc_dim);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    // reverse_weight is 0 ; so the codes in if-condition is be ignored.
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_asr_model.h
deleted file mode 100644
index 500081ad9d6b3cb54c996e127117627863b9372c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_asr_model.h
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Han Qi (qihan@baidu.com, Kunlunxin Inc)
-//                    Hehe Pan (panhehe@baidu.com, Kunlunxin Inc)
-//                    Zikui Yan (yanzikui@baidu.com, Kunlunxin Inc)
-// All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef RUNTIME_KUNLUN_XPU_XPU_ASR_MODEL_H_
-#define RUNTIME_KUNLUN_XPU_XPU_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <tuple>
-#include <vector>
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-#include "xpu_conformer.h"  // NOLINT
-
-namespace wenet {
-
-class XPUAsrModel : public AsrModel {
-  typedef float16 T;
-  typedef int16_t TW;
-
- public:
-  // Note: Do not call the InitEngineThreads function more than once.
-  void SetEngineThreads(int num_threads = 1);
-
- public:
-  XPUAsrModel() = default;
-  XPUAsrModel(const XPUAsrModel& other);
-  void SetDeviceId(int dev_id);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // XPU device id
-  int device_id_ = 0;
-  int real_threads_number = 1;
-
-  // XPU Conformer EncoderParam and DecoderParam
-  ConformerEncoderParam<T, TW> encoder_param;
-  ConformerDecoderParam<T, TW> decoder_param;
-
-  // XPU input and weights params
-  using INPUT_LENGTH_CPU_TUPLE = std::tuple<std::vector<int>, std::vector<int>>;
-  using INPUT_XPU_INFO_TUPLE = std::tuple<float*, std::vector<int>>;
-  INPUT_LENGTH_CPU_TUPLE input_lenghts_cpu_info;
-  INPUT_XPU_INFO_TUPLE input_xpu_info;
-  INPUT_XPU_INFO_TUPLE xpu_mask_info_float;
-
-  // XPU encoder and decoder outputs
-  T* encoder_out = nullptr;
-  T* ctc_probs = nullptr;
-
-  // XPU runtime params
-  void* l3ptr = nullptr;
-  XPUStream stream;
-  std::shared_ptr<api::Context> ctx_xpu_ptr;
-  std::shared_ptr<api::ctx_guard> RAII_GUARD;
-
-  int batch, max_seqlen, q_seqlen;
-
-  // caches
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // RUNTIME_KUNLUN_XPU_XPU_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_conformer.cpp b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_conformer.cpp
deleted file mode 100644
index f5fd562a624f04cd196a7d2084cc35f52d5a7bbb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_conformer.cpp
+++ /dev/null
@@ -1,971 +0,0 @@
-// Copyright (c) 2022 KUNLUNXIN Inc.
-//               2022 Han Qi (qihan@baidu.com)
-//                    Hehe Pan (panhehe@baidu.com)
-//                    Zikui Yan (yanzikui@baidu.com)
-//                    Chaolin Li (lichaolin@baidu.com)
-// All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "xpu_conformer.h"  // NOLINT
-#include <chrono>
-#include <mutex>
-#include <thread>
-#include <tuple>
-
-namespace xpu {
-namespace wenet {
-const int X4_BEGIN = 8;
-template <typename T, typename TW>
-static int encoder_embed(api::Context* ctx_xpu, const float* x, T* y, int batch,
-                         int max_seqlen, int seq_dim, int att_dim,
-                         const ConformerEncoderParam<T, TW>& param) {
-  api::ctx_guard RAII_GUARD(ctx_xpu);
-  int ret = 0;
-  int h_seqlen = (max_seqlen - 1) / 2;
-  int q_seqlen = (h_seqlen - 1) / 2;
-  int out_channels = att_dim;
-  int h_dim = (seq_dim - 1) / 2;
-  int q_dim = (h_dim - 1) / 2;
-
-  float xscale = std::sqrt(att_dim);
-  std::vector<int> sizes = {std::max(batch * max_seqlen * seq_dim,
-                                     batch * out_channels * q_seqlen * q_dim),
-                            batch * out_channels * h_seqlen * h_dim};
-  std::vector<T*> ptrs;
-  for (auto size_ind : sizes) {
-    ptrs.push_back(RAII_GUARD.alloc<T>(size_ind));
-  }
-
-  auto& emb_conv_w_list = param.emb_conv_w_list;
-  auto& emb_conv_maxw_list = param.emb_conv_maxw_list;
-  auto& emb_conv_bias_list = param.emb_conv_bias_list;
-  auto& emb_fc_w = param.emb_fc_w_list;
-  auto& emb_fc_maxw = param.emb_fc_maxw_list;
-  auto& emb_fc_bias = param.emb_fc_bias_list;
-
-  ret =
-      api::cast_v2<float, T>(ctx_xpu, x, ptrs[0], batch * max_seqlen * seq_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx_xpu, ret);
-  ret = api::conv2d_fusion<T, TW, T, int16_t>(
-      ctx_xpu, ptrs[0], emb_conv_w_list[0], ptrs[1], batch, 1, max_seqlen,
-      seq_dim, out_channels, {3, 3}, {2, 2}, {0, 0}, {1, 1}, 1, nullptr,
-      emb_conv_maxw_list[0], nullptr, true, emb_conv_bias_list[0], nullptr,
-      api::Activation_t::RELU, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx_xpu, ret);
-  ret = api::conv2d_fusion<T, TW, T, int16_t>(
-      ctx_xpu, ptrs[1], emb_conv_w_list[1], ptrs[0], batch, out_channels,
-      h_seqlen, h_dim, out_channels, {3, 3}, {2, 2}, {0, 0}, {1, 1}, 1, nullptr,
-      emb_conv_maxw_list[1], nullptr, true, emb_conv_bias_list[1], nullptr,
-      api::Activation_t::RELU, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx_xpu, ret);
-  ret = api::transpose<T>(ctx_xpu, ptrs[0], ptrs[1],
-                          {batch, out_channels, q_seqlen, q_dim}, {0, 2, 1, 3});
-  WRAPPER_ASSERT_SUCCESS(ctx_xpu, ret);
-  ret = api::fc_fusion<T, TW, T, int16_t>(
-      ctx_xpu, ptrs[1], emb_fc_w[0], ptrs[0], batch * q_seqlen, att_dim,
-      out_channels * q_dim, false, true, nullptr, emb_fc_maxw[0], nullptr,
-      out_channels * q_dim, out_channels * q_dim, att_dim, 1.0f, 0.0f,
-      emb_fc_bias[0], api::Activation_t::LINEAR);
-  WRAPPER_ASSERT_SUCCESS(ctx_xpu, ret);
-  ret = api::scale<T>(ctx_xpu, ptrs[0], y, batch * q_seqlen * out_channels,
-                      false, xscale, 0);
-  WRAPPER_ASSERT_SUCCESS(ctx_xpu, ret);
-  ret = xpu_wait(ctx_xpu->xpu_stream);
-  WRAPPER_ASSERT_SUCCESS(ctx_xpu, ret);
-  return api::SUCCESS;
-}
-
-template <typename T, typename TW, typename TGEMM>
-static int ffn(api::Context* ctx, int batch, int q_seqlen, int hidden_dim,
-               bool with_endln, const T* x, T* y, int ln_begin, int fc_begin,
-               std::vector<const float*> ln_scale_list,
-               std::vector<const float*> ln_bias_list,
-               std::vector<const TW*> fc_w_list,
-               std::vector<const float*> fc_maxw_list,
-               std::vector<const float*> fc_bias_list,
-               std::vector<T*> mem_single, int ffn_factor) {
-  api::ctx_guard RAII_GUARD(ctx);
-  int ret = api::SUCCESS;
-  std::unordered_map<std::string, T*> buf_mapping = {
-      {"ffn_ln", mem_single[1]},          {"ffn_fc0", mem_single[X4_BEGIN]},
-      {"tmp0", mem_single[X4_BEGIN + 1]}, {"tmp1", mem_single[X4_BEGIN]},
-      {"ffn_fc1", mem_single[1]},
-  };
-  int ffn1_out_dim = hidden_dim * ffn_factor;
-  int ffn2_input_dim = ffn1_out_dim;
-  ret = api::layer_norm<T>(ctx, x, buf_mapping["ffn_ln"], batch * q_seqlen,
-                           hidden_dim, 1e-5, ln_scale_list[ln_begin],
-                           ln_bias_list[ln_begin], nullptr, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::fc_fusion<T, TW, T, TGEMM>(
-      ctx, buf_mapping["ffn_ln"], fc_w_list[fc_begin], buf_mapping["ffn_fc0"],
-      batch * q_seqlen, ffn1_out_dim, hidden_dim, false, true, nullptr,
-      fc_maxw_list[fc_begin], nullptr, hidden_dim, hidden_dim, ffn1_out_dim,
-      1.0f, 0.0f, fc_bias_list[fc_begin], api::Activation_t::LINEAR);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::sigmoid<T>(ctx, buf_mapping["ffn_fc0"], buf_mapping["tmp0"],
-                        batch * q_seqlen * hidden_dim * ffn_factor);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::mul<T>(ctx, buf_mapping["ffn_fc0"], buf_mapping["tmp0"],
-                    buf_mapping["tmp1"],
-                    batch * q_seqlen * hidden_dim * ffn_factor);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::fc_fusion<T, TW, T, TGEMM>(
-      ctx, buf_mapping["tmp1"], fc_w_list[fc_begin + 1], buf_mapping["ffn_fc1"],
-      batch * q_seqlen, hidden_dim, ffn2_input_dim, false, true, nullptr,
-      fc_maxw_list[fc_begin + 1], nullptr, ffn2_input_dim, ffn2_input_dim,
-      hidden_dim, 0.5f, 0.0f, fc_bias_list[fc_begin + 1],
-      api::Activation_t::LINEAR);
-  if (with_endln) {
-    ret = api::add_layer_norm_fusion<T>(
-        ctx, x, buf_mapping["ffn_fc1"], y, batch * q_seqlen, hidden_dim, 1e-5,
-        ln_scale_list[ln_begin + 1], ln_bias_list[ln_begin + 1]);
-  } else {
-    ret = api::add<T>(ctx, x, buf_mapping["ffn_fc1"], y,
-                      batch * q_seqlen * hidden_dim);
-  }
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  return api::SUCCESS;
-}
-
-template <typename T, typename TW, typename TGEMM>
-int wenet_encoder_layer(api::Context* ctx,
-                        api::ctx_guard& RAII_GUARD,  // NOLINT
-                        int batch, int q_seqlen, int hidden_dim, int ln_begin,
-                        int fc_begin, int attn_pos_begin, int conv_begin,
-                        const T* x, T* y,
-                        ConformerEncoderParam<T, TW>& param,  // NOLINT
-                        std::vector<T*>& mem_single,          // NOLINT
-                        std::vector<T*>& mem_double,          // NOLINT
-                        float* mem_float, float* mask_score) {
-  WRAPPER_CHECK_CTX(ctx);
-  int max_size = ctx->max_ptr_size();
-  int ret = api::SUCCESS;
-  std::unordered_map<std::string, T*> buf_mapping = {
-      {"ffn0_out", mem_single[1]},
-      {"swp0", mem_single[2]},
-      {"swp1", mem_single[3]},
-      {"matrix_bd_pre", mem_double[0]},
-      {"soft_scores", mem_double[0]},
-      {"qkv", mem_single[2]},
-      {"qkv_add", mem_single[1]},
-      {"conv_p1", mem_single[X4_BEGIN + 2]},
-      {"conv_glu0", mem_single[X4_BEGIN + 3]},
-      {"conv_glu1", mem_single[X4_BEGIN + 4]},
-      {"conv_d1", mem_single[X4_BEGIN + 3]},
-      {"conv_p2", mem_single[X4_BEGIN + 2]},
-      {"conv_after", mem_single[0]},
-  };
-
-  auto ln_scale_list = param.ln_scale_list;
-  auto ln_bias_list = param.ln_bias_list;
-
-  auto fc_w_list = param.fc_w_list;
-  auto fc_maxw_list = param.fc_maxw_list;
-  auto fc_bias_list = param.fc_bias_list;
-
-  auto attn_pos_w_list = param.attn_pos_w_list;
-  auto attn_pos_maxw_list = param.attn_pos_maxw_list;
-  auto attn_pos_uv_bias_list = param.attn_pos_uv_bias_list;
-
-  auto conv_w_list = param.conv_w_list;
-  auto conv_maxw_list = param.conv_maxw_list;
-  auto conv_bias_list = param.conv_bias_list;
-
-  auto kernel_size = param.conv_param.kernel_size;
-  auto lorder = param.conv_param.lorder;
-  auto padding = param.conv_param.padding;
-  auto head_num = param.head_num;
-  auto head_dim = param.head_dim;
-  /*
-  ** feed forward macaron-style module
-  ** x = residual + 0.5*ff(x)
-  */
-  ret = ffn<T, TW, TGEMM>(ctx, batch, q_seqlen, hidden_dim, false, x,
-                          buf_mapping["ffn0_out"], ln_begin, fc_begin,
-                          ln_scale_list, ln_bias_list, fc_w_list, fc_maxw_list,
-                          fc_bias_list, mem_single, param.ffn_factor);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  /*
-  ** multi-headed self-attention module
-  ** qkv_list[0-4]: q,k,v,qu,qv  mapping single[2-6]
-  ** attn_pos_uv_bias_list : float -> float16
-  ** q_pos_attention : get pos_emb before cal
-  ** q_pos_attention : cal matrix_bd to qk_attention's mask ,when cal
-  *qk_attention, mask will be added
-  **/
-  T* qkv_list[5] = {mem_single[6], mem_single[3], mem_single[4], mem_single[5],
-                    mem_single[2]};
-  ret = api::layer_norm<T>(ctx, buf_mapping["ffn0_out"], buf_mapping["swp0"],
-                           batch * q_seqlen, hidden_dim, 1e-5,
-                           ln_scale_list[ln_begin + 1],
-                           ln_bias_list[ln_begin + 1], nullptr, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::fc_fusion_3c<T, TW, T, TGEMM>(
-      ctx, buf_mapping["swp0"], fc_w_list[fc_begin + 2], qkv_list[0],
-      qkv_list[1], qkv_list[2], batch * q_seqlen, hidden_dim * 3, hidden_dim,
-      false, true, nullptr, fc_maxw_list[fc_begin + 2], nullptr, hidden_dim,
-      hidden_dim, hidden_dim * 3, 1.0f, 0.0f, fc_bias_list[fc_begin + 2],
-      api::Activation_t::LINEAR);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  for (int i = 0; i < 2; i++) {
-    ret = api::broadcast_add<T>(
-        ctx, qkv_list[0], attn_pos_uv_bias_list[attn_pos_begin * 2 + i],
-        qkv_list[i + 3], {batch, q_seqlen, hidden_dim}, {1, 1, hidden_dim});
-    WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  }
-  int pos_emb_dim = 2 * q_seqlen - 1;
-  T* pos_emb_sliced = RAII_GUARD.alloc<T>(pos_emb_dim * hidden_dim);
-  ret = api::slice<T>(ctx, param.pos_emb[attn_pos_begin], pos_emb_sliced,
-                      {5000, head_num, head_dim}, {0, 0, 0},
-                      {pos_emb_dim, head_num, head_dim});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  int tmp_sliced_len = batch * head_num * q_seqlen * q_seqlen;
-  float* tmp_mask = RAII_GUARD.alloc<float>(tmp_sliced_len);
-  ret = api::q_pos_attention<T, T, T, TGEMM>(
-      ctx, qkv_list[4], pos_emb_sliced, buf_mapping["matrix_bd_pre"], batch,
-      q_seqlen, head_num, head_dim, 1.0f / std::sqrt(head_dim), nullptr,
-      nullptr, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::slice<T>(ctx, buf_mapping["matrix_bd_pre"],
-                      reinterpret_cast<T*>(mem_float),
-                      {batch, head_num, q_seqlen, pos_emb_dim}, {0, 0, 0, 0},
-                      {batch, head_num, q_seqlen, q_seqlen});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::cast_v2<T, float>(ctx, reinterpret_cast<T*>(mem_float), tmp_mask,
-                               batch * head_num * q_seqlen * q_seqlen);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::broadcast_add<float>(ctx, tmp_mask, mask_score, mem_float,
-                                  {batch, head_num, q_seqlen, q_seqlen},
-                                  {batch, q_seqlen});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  api::QKVAttnParam loop_p(batch, q_seqlen, head_num, head_dim,
-                           {batch, head_num, q_seqlen, q_seqlen},
-                           api::Activation_t::LINEAR, -1, false, hidden_dim);
-  float* qk_maxptr = RAII_GUARD.alloc<float>(max_size);
-  ret = api::qk_attention<T, T, T, TGEMM>(
-      ctx, qkv_list[3], qkv_list[1], buf_mapping["soft_scores"], nullptr,
-      nullptr, qk_maxptr, loop_p, mem_float);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  float* qkv_maxptr = RAII_GUARD.alloc<float>(max_size);
-  ret = api::qk_v_attention<T, T, T, TGEMM>(
-      ctx, buf_mapping["soft_scores"], qkv_list[2], buf_mapping["qkv"],
-      qk_maxptr, nullptr, qkv_maxptr, loop_p);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::fc_fusion<T, TW, T, TGEMM>(
-      ctx, buf_mapping["qkv"], fc_w_list[fc_begin + 3], buf_mapping["swp1"],
-      batch * q_seqlen, hidden_dim, hidden_dim, false, true, qkv_maxptr,
-      fc_maxw_list[fc_begin + 3], nullptr, hidden_dim, hidden_dim, hidden_dim,
-      1.0f, 0.0f, fc_bias_list[fc_begin + 3], api::Activation_t::LINEAR);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::add<T>(ctx, buf_mapping["ffn0_out"], buf_mapping["swp1"],
-                    buf_mapping["qkv_add"], batch * q_seqlen * hidden_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  /*
-  ** Conv conv_p1-conv_d1-conv_p2
-  */
-  ret = api::layer_norm<T>(ctx, buf_mapping["qkv_add"], buf_mapping["swp1"],
-                           batch * q_seqlen, hidden_dim, 1e-5,
-                           ln_scale_list[ln_begin + 2],
-                           ln_bias_list[ln_begin + 2], nullptr, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::transpose<T>(ctx, buf_mapping["swp1"], buf_mapping["swp0"],
-                          {batch, q_seqlen, hidden_dim}, {0, 2, 1});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  int pad_seqlen = q_seqlen;
-  if (lorder > 0) {
-    ret = api::pad<T>(ctx, buf_mapping["swp0"], buf_mapping["swp1"],
-                      {batch, hidden_dim, q_seqlen}, {0, 0, lorder}, {0, 0, 0},
-                      padding);
-    WRAPPER_ASSERT_SUCCESS(ctx, ret);
-    pad_seqlen += lorder;
-  }
-  ret = api::conv2d_fusion<T, TW, T, TGEMM>(
-      ctx, buf_mapping["swp1"], conv_w_list[conv_begin], buf_mapping["swp0"],
-      batch, hidden_dim, 1, pad_seqlen, hidden_dim * 2, {1, 1}, {1, 1},
-      {0, 0, 0, 0}, {1, 1}, 1, nullptr, conv_maxw_list[conv_begin], nullptr,
-      true, conv_bias_list[conv_begin], nullptr, api::Activation_t::LINEAR,
-      nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::split<T>(ctx, buf_mapping["swp0"],
-                      {buf_mapping["conv_glu0"], buf_mapping["conv_glu1"]},
-                      {batch, hidden_dim * 2, pad_seqlen},
-                      {hidden_dim, hidden_dim}, 1);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::sigmoid(ctx, buf_mapping["conv_glu1"], buf_mapping["conv_glu1"],
-                     batch * pad_seqlen * hidden_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::mul(ctx, buf_mapping["conv_glu0"], buf_mapping["conv_glu1"],
-                 buf_mapping["conv_p1"], batch * pad_seqlen * hidden_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::conv1d_fusion<T, TW, T, TGEMM>(
-      ctx, buf_mapping["conv_p1"], conv_w_list[conv_begin + 1],
-      buf_mapping["conv_d1"], batch, hidden_dim, pad_seqlen, hidden_dim,
-      kernel_size, 1, {0}, 1, hidden_dim, nullptr,
-      conv_maxw_list[conv_begin + 1], nullptr, true,
-      conv_bias_list[conv_begin + 1], nullptr, api::Activation_t::LINEAR,
-      nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-
-  ret = api::transpose<T>(ctx, buf_mapping["conv_d1"], buf_mapping["swp0"],
-                          {batch, hidden_dim, q_seqlen}, {0, 2, 1});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::layer_norm<T>(ctx, buf_mapping["swp0"], buf_mapping["swp1"],
-                           batch * q_seqlen, hidden_dim, 1e-5,
-                           ln_scale_list[ln_begin + 3],
-                           ln_bias_list[ln_begin + 3], nullptr, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::sigmoid<T>(ctx, buf_mapping["swp1"], buf_mapping["swp0"],
-                        batch * q_seqlen * hidden_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::mul<T>(ctx, buf_mapping["swp0"], buf_mapping["swp1"],
-                    buf_mapping["conv_p1"], batch * q_seqlen * hidden_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::transpose<T>(ctx, buf_mapping["conv_p1"], buf_mapping["conv_d1"],
-                          {batch, q_seqlen, hidden_dim}, {0, 2, 1});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::conv2d_fusion<T, TW, T, TGEMM>(
-      ctx, buf_mapping["conv_d1"], conv_w_list[conv_begin + 2],
-      buf_mapping["conv_p2"], batch, hidden_dim, 1, q_seqlen, hidden_dim,
-      {1, 1}, {1, 1}, {0, 0, 0, 0}, {1, 1}, 1, nullptr,
-      conv_maxw_list[conv_begin + 2], nullptr, true,
-      conv_bias_list[conv_begin + 2], nullptr, api::Activation_t::LINEAR,
-      nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::transpose<T>(ctx, buf_mapping["conv_p2"], buf_mapping["swp0"],
-                          {batch, hidden_dim, q_seqlen}, {0, 2, 1});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::add<T>(ctx, buf_mapping["swp0"], buf_mapping["qkv_add"],
-                    buf_mapping["conv_after"], batch * q_seqlen * hidden_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  /*
-  ** feed forward module
-  ** x = residual + 0.5*ff(x)
-  */
-  ret = ffn<T, TW, TGEMM>(
-      ctx, batch, q_seqlen, hidden_dim, true, buf_mapping["conv_after"], y,
-      ln_begin + 4, fc_begin + 4, ln_scale_list, ln_bias_list, fc_w_list,
-      fc_maxw_list, fc_bias_list, mem_single, param.ffn_factor);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  return api::SUCCESS;
-}
-
-template <typename T, typename TW, typename TGEMM>
-int conformer_encoder_wenet(
-    api::Context* ctx, float* x, const std::vector<int>& data_shape,
-    T* encoder_out, T* ctc_probs,
-    ConformerEncoderParam<T, TW>& param,  // NOLINT
-    const std::tuple<float*, std::vector<int>>& xpu_mask_info) {
-  // Embedding -> Encoder_layer * N -> Layernorm -> Ctc_loss
-  int ret = 0;
-  int fc_num_per_layer = param.fc_num_per_layer;
-  int conv_num_per_layer = param.conv_num_per_layer;
-  int ln_num_per_layer = param.ln_num_per_layer;
-  int ffn_factor = param.ffn_factor;
-  int head_num = param.head_num;
-  int head_dim = param.head_dim;
-  int att_dim = head_num * head_dim;
-  int ctc_dim = param.ctc_dim;
-  int batch = data_shape[0];
-  int max_seqlen = data_shape[1];
-  int seq_dim = data_shape[2];
-  int h_seqlen = (max_seqlen - 1) / 2;
-  int q_seqlen = (h_seqlen - 1) / 2;
-
-  WRAPPER_ASSERT_GT(ctx, param.layer_num, 0);
-  WRAPPER_ASSERT_GT(ctx, batch, 0);
-  WRAPPER_ASSERT_GT(ctx, head_num, 0);
-  WRAPPER_ASSERT_GT(ctx, ctc_dim, 0);
-  WRAPPER_ASSERT_GT(ctx, head_dim, 0);
-  // Inital GM
-  api::ctx_guard RAII_GUARD(ctx);
-  std::vector<T*> mem_double;
-  std::vector<T*> mem_single;
-  int base_len = batch * (q_seqlen + 14) * (att_dim + 14);
-  for (int i = 0; i < 8; i++) {
-    mem_single.push_back(RAII_GUARD.alloc<T>(base_len));
-  }
-  mem_single.push_back(RAII_GUARD.alloc<T>(base_len * ffn_factor));
-  mem_single.push_back(RAII_GUARD.alloc<T>(base_len * ffn_factor));
-  mem_single.push_back(RAII_GUARD.alloc<T>(base_len * 4));
-  mem_single.push_back(RAII_GUARD.alloc<T>(base_len * 4));
-  mem_single.push_back(RAII_GUARD.alloc<T>(base_len * 2));
-  mem_double.push_back(
-      RAII_GUARD.alloc<T>(batch * head_num * q_seqlen * q_seqlen * 3));
-  mem_double.push_back(
-      RAII_GUARD.alloc<T>(batch * head_num * q_seqlen * q_seqlen));
-  int ind_len = base_len * 6 + batch * param.head_num * q_seqlen * q_seqlen * 2;
-  int lens =
-      batch * param.head_num * q_seqlen * q_seqlen * sizeof(float) / sizeof(T);
-  float* mem_float = RAII_GUARD.alloc<float>(lens);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  T* calx = mem_single[0];
-  T* caly = mem_single[0];
-
-  // embedding + mask
-  float* emb = RAII_GUARD.alloc<float>(batch * max_seqlen * seq_dim);
-  float* emb_nm = RAII_GUARD.alloc<float>(batch * max_seqlen * seq_dim);
-  T* emb_fc = RAII_GUARD.alloc<T>(batch * q_seqlen * att_dim);
-  ret = api::broadcast_sub<float>(ctx, x, param.cmvn_mean, emb, data_shape,
-                                  {1, 1, 80});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::broadcast_mul<float>(ctx, emb, param.cmvn_istd, emb_nm, data_shape,
-                                  {1, 1, 80});
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = encoder_embed<T, TW>(ctx, emb_nm, calx, batch, max_seqlen, seq_dim,
-                             att_dim, param);
-  float* mask_scores = RAII_GUARD.alloc<float>(batch * q_seqlen);
-  ret = api::scale<float>(ctx, std::get<0>(xpu_mask_info), mask_scores,
-                          batch * q_seqlen, false, 1e4, -1);
-  CHECK_RET(ret);
-  // encoder * N
-  for (int i = 0; i < param.layer_num; i++) {
-    int ln_begin = i * ln_num_per_layer;
-    int fc_begin = i * fc_num_per_layer;
-    int attn_pos_begin = i;
-    int conv_begin = i * conv_num_per_layer;
-    ret = wenet_encoder_layer<T, TW, int16_t>(
-        ctx, RAII_GUARD, batch, q_seqlen, att_dim, ln_begin, fc_begin,
-        attn_pos_begin, conv_begin, calx, caly, param, mem_single, mem_double,
-        mem_float, mask_scores);
-    WRAPPER_ASSERT_SUCCESS(ctx, ret);
-    calx = caly;
-  }
-  // Final Layer_Norm
-  int ln_begin = param.layer_num * param.ln_num_per_layer;
-  int fc_begin = param.layer_num * param.fc_num_per_layer;
-  auto final_ln_scale = param.ln_scale_list[ln_begin];
-  auto final_ln_bias = param.ln_bias_list[ln_begin];
-  ret = api::layer_norm(ctx, caly, encoder_out, batch * q_seqlen, att_dim, 1e-5,
-                        final_ln_scale, final_ln_bias, nullptr, nullptr);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  // Ctc_Loss + log_sofmax
-  auto ctc_fc_w = param.fc_w_list[fc_begin];
-  auto ctc_fc_maxw = param.fc_maxw_list[fc_begin];
-  auto ctc_fc_bias = param.fc_bias_list[fc_begin];
-  float* ctc_buffer = RAII_GUARD.alloc<float>(batch * q_seqlen * ctc_dim);
-  ret = api::fc_fusion<T, TW, float, TGEMM>(
-      ctx, encoder_out, ctc_fc_w, ctc_buffer, batch * q_seqlen, ctc_dim,
-      att_dim, false, true, nullptr, ctc_fc_maxw, nullptr, att_dim, att_dim,
-      ctc_dim, 1.0f, 0.0f, ctc_fc_bias, api::Activation_t::LINEAR);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  float* softmax_out = RAII_GUARD.alloc<float>(batch * q_seqlen * ctc_dim);
-  ret = api::softmax<float>(ctx, ctc_buffer, softmax_out,
-                            {batch, q_seqlen, ctc_dim}, 2);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  float* log_out = RAII_GUARD.alloc<float>(batch * q_seqlen * ctc_dim);
-  ret = api::log<float>(ctx, softmax_out, log_out, batch * q_seqlen * ctc_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::cast_v2<float, T>(ctx, log_out, ctc_probs,
-                               batch * q_seqlen * ctc_dim);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  return api::SUCCESS;
-}
-
-#define INSTANTIATION_CONSFORMER_WENET(T, TW, TGEMM)          \
-  template int conformer_encoder_wenet<T, TW, TGEMM>(         \
-      api::Context*, float*, const std::vector<int>&, T*, T*, \
-      ConformerEncoderParam<T, TW>&,                          \
-      const std::tuple<float*, std::vector<int>>&);
-INSTANTIATION_CONSFORMER_WENET(float16, int16_t, int16_t);
-
-const float kFloatMax = std::numeric_limits<float>::max();
-float logadd(std::vector<float> const& x) {
-  float xmax = *max_element(x.begin(), x.end());
-  if (xmax <= -kFloatMax) {
-    return -kFloatMax;
-  }
-  float sum = 0.0;
-  for (auto& it : x) {
-    sum += std::exp(it - xmax);
-  }
-  return std::log(sum) + xmax;
-}
-
-struct PrefixScore {
-  float s = -kFloatMax;
-  float ns = -kFloatMax;
-  float score() const { return logadd({s, ns}); }
-  void check() const {
-    std::cout << "score " << s << std::endl;
-    std::cout << "nscore " << ns << std::endl;
-  }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.score() > b.second.score();
-}
-
-template <typename T>
-int ctc_prefix_beamsearch(api::Context* ctx, T* ctc_probs,
-                          std::vector<int>& hyps,                     // NOLINT
-                          std::vector<int>& hyps_len,                 // NOLINT
-                          std::vector<float>& ctc_scores, int batch,  // NOLINT
-                          int beam_size, int max_len, int ctc_dim) {
-  // 0. get topk
-  api::ctx_guard RAII_GUARD(ctx);
-  int data_len = batch * max_len * beam_size;
-  int* topk_index_buf = RAII_GUARD.alloc<int>(data_len);
-  float* topk_score_buf = RAII_GUARD.alloc<float>(data_len);
-  float* logp = RAII_GUARD.alloc<float>(batch * max_len * ctc_dim);
-  int ret =
-      api::cast_v2<T, float>(ctx, ctc_probs, logp, batch * max_len * ctc_dim);
-  ret = api::sorted_topk<float>(ctx, logp, topk_score_buf, topk_index_buf,
-                                max_len, ctc_dim, beam_size, true);
-  xpu_wait(ctx->xpu_stream);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  std::vector<int> topk_index(data_len);
-  std::vector<float> topk_score(data_len);
-  ret = xpu_memcpy(reinterpret_cast<void*>(&topk_index[0]), topk_index_buf,
-                   data_len * sizeof(int), XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  CHECK_RET(ret);
-  ret = xpu_memcpy(reinterpret_cast<void*>(&topk_score[0]), topk_score_buf,
-                   data_len * sizeof(float), XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  CHECK_RET(ret);
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  std::vector<int> empty;
-  cur_hyps[empty] = prefix_score;
-  for (int t = 0; t < max_len; ++t) {
-    int offset = beam_size * t;
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. Token passing
-    for (int i = 0; i < beam_size; ++i) {
-      int id = topk_index[i + offset];
-      float prob = topk_score[i + offset];
-      for (const auto& it : cur_hyps) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        if (id == 0) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = logadd(
-              {next_score.s, prefix_score.s + prob, prefix_score.ns + prob});
-          // Prefix not changed, copy the context from prefix.
-          next_hyps[prefix] = next_score;
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.ns = logadd({next_score.ns, prefix_score.ns + prob});
-          next_hyps[prefix] = next_score;
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score1 = next_hyps[new_prefix];
-          next_score1.ns = logadd({next_score1.ns, prefix_score.s + prob});
-          next_hyps[new_prefix] = next_score1;
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = logadd(
-              {next_score.ns, prefix_score.s + prob, prefix_score.ns + prob});
-          next_hyps[new_prefix] = next_score;
-        }
-      }
-    }
-    // 2. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    std::nth_element(arr.begin(), arr.begin() + beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-    // 3. Update cur_hyps and get new result
-    cur_hyps.clear();
-    for (int k = 0; k < beam_size; k++) {
-      cur_hyps[arr[k].first] = arr[k].second;
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps.begin(),
-                                                            cur_hyps.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-  int beam = 0;
-  for (auto it : arr) {
-    auto vec = it.first;
-    hyps_len[beam] = vec.size();
-    ctc_scores[beam] = it.second.score();
-    hyps.insert(hyps.end(), vec.begin(), vec.end());
-    beam++;
-  }
-  return api::SUCCESS;
-}
-
-template int ctc_prefix_beamsearch<float16>(
-    api::Context* ctx, float16* logp,
-    std::vector<int>& hyps,          // NOLINT
-    std::vector<int>& hyps_len,      // NOLINT
-    std::vector<float>& ctc_scores,  // NOLINT
-    int batch, int beam_size, int max_len, int ctc_dim);
-
-static int clip_cpu(int x, int min, int max) {
-  if (x <= min) return min;
-  if (x >= max) return max;
-  return x;
-}
-
-static int add_sos_and_pad_ignored_id(
-    api::Context* ctx, const int* target,
-    std::vector<int>& pad_target,      // NOLINT
-    std::vector<int>& pad_target_lod,  // NOLINT
-    int batch_size, int target_seq_len, int max_target_seq_len, int eos_id,
-    int ignored_id, int add_sos_num, int vocab_size) {
-  int ret = -1;
-  int target_data_len = batch_size * target_seq_len;
-  std::vector<int> target_cpu(target_data_len);
-  ret = xpu_wait(ctx->xpu_stream);
-  ret = xpu_memcpy(reinterpret_cast<void*>(target_cpu.data()), target,
-                   target_data_len * sizeof(int),
-                   XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  for (int i = 0; i < batch_size; i++) {
-    int valid_target_len = add_sos_num;
-    for (int j = 0; j < target_seq_len; j++) {
-      if (target_cpu[i * target_seq_len + j] == eos_id) {
-        pad_target[i * max_target_seq_len + j + add_sos_num] = ignored_id;
-      } else {
-        pad_target[i * max_target_seq_len + j + add_sos_num] =
-            clip_cpu(target_cpu[i * target_seq_len + j], 0, vocab_size);
-        valid_target_len++;
-      }
-    }
-    pad_target_lod[i + 1] = pad_target_lod[i] + valid_target_len;
-  }
-  return api::SUCCESS;
-}
-
-template <typename T, typename TW, typename TGEMM>
-int conformer_decoder_wenet(api::Context* ctx, const T* x,
-                            const std::vector<int32_t>& x_shape,
-                            const float* x_mask, const int* padded_target,
-                            const std::vector<int32_t>& target_shape,
-                            float* character_scores,
-                            const ConformerDecoderParam<T, TW>& param) {
-  int layer_num = param.layer_num;
-  int batch_size = x_shape[0];
-  int beam_size = param.beam_size;
-  int head_num = param.head_num;
-  int head_dim = param.head_dim;
-  int vocab_size = param.vocab_size;
-  int dim = head_num * head_dim;
-  int add_sos_num = param.add_sos_num;
-  int new_bs = batch_size * beam_size;
-  int sos_id = param.sos_id;
-  int eos_id = param.eos_id;
-  int ignored_id = param.ignored_id;
-  WRAPPER_CHECK_CTX(ctx);
-  WRAPPER_ASSERT_GT(ctx, layer_num, 0);
-  WRAPPER_ASSERT_GT(ctx, batch_size, 0);
-  WRAPPER_ASSERT_GT(ctx, head_num, 0);
-  WRAPPER_ASSERT_GT(ctx, vocab_size, 0);
-  WRAPPER_ASSERT_GT(ctx, dim, 0);
-
-  api::ctx_guard RAII_GUARD(ctx);
-  const int max_seq_len = x_shape[1];
-  WRAPPER_ASSERT_GT(ctx, max_seq_len, 0);
-  const int ffn1_out_dim = param.ffn_dim;
-  // if ffn_act is glu
-  const int ffn2_input_dim = ffn1_out_dim;
-  const int d_k = dim / head_num;
-  WRAPPER_ASSERT_GT(ctx, d_k, 0);
-  int target_seq_len = target_shape[1];
-  WRAPPER_ASSERT_GT(ctx, target_seq_len, 1);
-  int max_target_seq_len = target_seq_len + add_sos_num;  // add sos
-  WRAPPER_ASSERT_GT(ctx, max_seq_len, max_target_seq_len);
-
-  int seqlen_sum = new_bs * max_seq_len;
-  T* new_x = const_cast<T*>(x);
-  int ret = -1;
-  // get src_attn vsl input
-  std::vector<float> cpu_mask_data(new_bs * max_seq_len, 0);
-  std::vector<int> src_lod_vec(new_bs + 1, 0);
-  ret = xpu_wait(ctx->xpu_stream);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = xpu_memcpy(reinterpret_cast<void*>(&cpu_mask_data.front()), x_mask,
-                   new_bs * max_seq_len * sizeof(float),
-                   XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  for (int b = 1; b < src_lod_vec.size(); b++) {
-    int curr_seqlen = 0;
-    for (int idx = 0; idx < max_seq_len; idx++) {
-      if (static_cast<int>(cpu_mask_data[idx]) == 1) {
-        curr_seqlen++;
-      }
-    }
-    src_lod_vec[b] = src_lod_vec[b - 1] + curr_seqlen;
-  }
-  api::VectorParam<int> src_qk_lods = {
-      src_lod_vec.data(), static_cast<int>(src_lod_vec.size()), nullptr};
-  src_qk_lods = src_qk_lods.to_xpu(RAII_GUARD);
-  seqlen_sum = src_qk_lods.cpu[new_bs];
-
-  T* broadcast_x = RAII_GUARD.alloc<T>(new_bs * max_seq_len * dim);
-  ret = api::broadcast<T>(ctx, x, broadcast_x, {batch_size, max_seq_len, dim},
-                          {new_bs, max_seq_len, dim});
-
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  // add sos_id and pad ignored_id
-  std::vector<int> real_target_cpu(max_target_seq_len * new_bs, sos_id);
-  std::vector<int> real_target_lod(new_bs + 1, 0);
-
-  ret = add_sos_and_pad_ignored_id(ctx, padded_target, real_target_cpu,
-                                   real_target_lod, batch_size * beam_size,
-                                   target_seq_len, max_target_seq_len, eos_id,
-                                   ignored_id, add_sos_num, vocab_size);
-
-  // get self/src QKVParam
-  int target_seq_sum = real_target_lod[new_bs];
-  api::VectorParam<int> self_qk_lods = {
-      real_target_lod.data(), static_cast<int>(real_target_lod.size()),
-      nullptr};
-  self_qk_lods = self_qk_lods.to_xpu(RAII_GUARD);
-  api::QKVAttnParam self_qkv_param(self_qk_lods, head_num, d_k,
-                                   api::Activation_t::LINEAR);
-  api::ConformerQKVParam src_qkv_param(self_qk_lods, src_qk_lods, head_num, d_k,
-                                       false, -1);
-
-  seqlen_sum = seqlen_sum > target_seq_sum ? seqlen_sum : target_seq_sum;
-  std::vector<int> buf_sizes = {
-      new_bs * max_target_seq_len *
-          static_cast<int>(sizeof(int) / sizeof(T)),  // padded_target
-      new_bs * max_target_seq_len * dim,              // embedding_out
-      new_bs * max_target_seq_len * dim,              // mid_a
-      new_bs * max_target_seq_len * dim,              // mid_b
-      new_bs * max_target_seq_len *
-          dim,  // attention_out, src_attention qk_v的结果
-      new_bs * max_target_seq_len * dim,  // residual
-      // ffn buffer
-      new_bs * max_target_seq_len * ffn1_out_dim,    // ffn1_out
-      new_bs * max_target_seq_len * ffn2_input_dim,  // ffn_glu_out
-      new_bs * max_target_seq_len * ffn2_input_dim,  // ffn_glu_a
-      new_bs * max_target_seq_len * ffn2_input_dim,  // ffn_glu_b
-      new_bs * max_target_seq_len * ffn2_input_dim,  // ffn_glu_sigmoid
-      // feature buffer
-      new_bs * max_target_seq_len * dim * 3,  // feature_in buffer
-      new_bs * max_target_seq_len * dim * 2,  // feature_out buffer
-      new_bs * max_target_seq_len * 2,        // final_out
-      seqlen_sum * dim,                       // q
-      seqlen_sum * dim,                       // k
-      seqlen_sum * dim,                       // v
-      new_bs * max_seq_len * dim,             // src_x
-      // attention buffer
-      new_bs * max_seq_len * max_seq_len * dim,  // src_qk
-  };
-  std::vector<T*> buffer_ptrs(buf_sizes.size());
-  for (int i = 0; i < buf_sizes.size(); i++) {
-    buffer_ptrs[i] = RAII_GUARD.alloc<T>(buf_sizes[i]);
-  }
-  int b_id = 0;
-  std::unordered_map<std::string, T*> buffer_map = {
-      {"padded_target", buffer_ptrs[b_id++]},
-      {"embedding_out", buffer_ptrs[b_id++]},
-      {"mid_a", buffer_ptrs[b_id++]},
-      {"mid_b", buffer_ptrs[b_id++]},
-      {"attention_out", buffer_ptrs[b_id++]},
-      {"residual", buffer_ptrs[b_id++]},
-      {"ffn1_out", buffer_ptrs[b_id++]},
-      {"ffn_glu_out", buffer_ptrs[b_id++]},
-      {"ffn_glu_a", buffer_ptrs[b_id++]},
-      {"ffn_glu_b", buffer_ptrs[b_id++]},
-      {"ffn_glu_sigmoid", buffer_ptrs[b_id++]},
-      {"feature_in", buffer_ptrs[b_id++]},
-      {"feature_out", buffer_ptrs[b_id++]},
-      {"final_out", buffer_ptrs[b_id++]},
-      {"q", buffer_ptrs[b_id++]},
-      {"k", buffer_ptrs[b_id++]},
-      {"v", buffer_ptrs[b_id++]},
-      {"src_x", buffer_ptrs[b_id++]},
-      {"src_qk", buffer_ptrs[b_id++]},
-  };
-  // maxptr buffer
-  int max_size = ctx->max_ptr_size();
-  float* max_buffer = RAII_GUARD.alloc<float>(6 * max_size);
-  float* max_x = max_buffer;
-  float* max_q = max_buffer + max_size;
-  float* max_k = max_buffer + 2 * max_size;
-  float* max_v = max_buffer + 3 * max_size;
-  float* max_qk = max_buffer + 4 * max_size;
-  float* max_qkv = max_buffer + 5 * max_size;
-  // copy pad_sos target to xpu
-  int* new_paded_target = reinterpret_cast<int*>(buffer_map["padded_target"]);
-  ret = api::do_host2device(ctx, real_target_cpu.data(), new_paded_target,
-                            max_target_seq_len * new_bs * sizeof(int));
-  T* embedding_out = buffer_map["embedding_out"];
-  T* attention_out = buffer_map["attention_out"];
-  T* mid_a = buffer_map["mid_a"];
-  T* mid_b = buffer_map["mid_b"];
-  T* q = buffer_map["q"];
-  T* k = buffer_map["k"];
-  T* v = buffer_map["v"];
-  T* src_qk = buffer_map["src_qk"];
-  T* residual = buffer_map["residual"];
-  T* ffn1_out = buffer_map["ffn1_out"];
-  T* ffn_glu_a = buffer_map["ffn_glu_a"];
-  T* ffn_glu_b = buffer_map["ffn_glu_b"];
-  T* ffn_glu_sigmoid = buffer_map["ffn_glu_sigmoid"];
-  T* ffn_glu_out = buffer_map["ffn_glu_out"];
-  // 1.1 embedding input: target{3,14} out:{3,14,512}
-  ret =
-      api::embedding<T, int>(ctx, param.embed_table, new_paded_target, residual,
-                             vocab_size, dim, new_bs * max_target_seq_len, -1);
-  float logit_scale = 1.0f;
-  ret =
-      api::scale<T>(ctx, residual, embedding_out,
-                    new_bs * max_target_seq_len * dim, true, logit_scale, 0.0f);
-  // 1.2 pos_embed, pos=[1, 5000, dim]
-  ret = api::broadcast_add<T>(ctx, embedding_out, param.pe, residual,
-                              {new_bs, max_target_seq_len, dim},
-                              {1, max_target_seq_len, dim});
-  // 2. decoder
-  auto fc_weight_itr = param.fc_w_list.begin();
-  auto fc_bias_itr = param.fc_bias_list.begin();
-  auto fc_w_maxptr_itr = param.fc_maxw_list.begin();
-  auto ln_scale_itr = param.ln_scale_list.begin();
-  auto ln_bias_itr = param.ln_bias_list.begin();
-  const float eps = 1e-5f;
-
-  std::vector<float> mask_cpu(max_target_seq_len * max_target_seq_len, 0.0);
-  const float kFloatMax = std::numeric_limits<float>::max();
-  for (int j = 0; j < max_target_seq_len; j++) {
-    for (int k = j + 1; k < max_target_seq_len; k++)
-      mask_cpu[j * max_target_seq_len + k] = -kFloatMax;
-  }
-  float* mask_xpu;
-  mask_xpu = reinterpret_cast<float*>(
-      RAII_GUARD.alloc<float>(max_target_seq_len * max_target_seq_len));
-  float* tg_mask;
-  tg_mask = reinterpret_cast<float*>(RAII_GUARD.alloc<float>(
-      new_bs * head_num * max_target_seq_len * max_target_seq_len));
-  ret = xpu_memcpy(mask_xpu, reinterpret_cast<void*>(&mask_cpu[0]),
-                   max_target_seq_len * max_target_seq_len * sizeof(float),
-                   XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-  ret = api::broadcast<float>(
-      ctx, mask_xpu, tg_mask, {1, 1, max_target_seq_len, max_target_seq_len},
-      {new_bs, head_num, max_target_seq_len, max_target_seq_len});
-  for (int j = 0; j < layer_num; j++) {
-    // 2.1 self attention
-    ret = api::layer_norm<T>(ctx, residual, mid_b, new_bs * max_target_seq_len,
-                             dim, eps, *ln_scale_itr++, *ln_bias_itr++, nullptr,
-                             nullptr);
-    ret = api::fc_fusion_3c<T, TW, T, TGEMM>(
-        ctx, mid_b, *fc_weight_itr++, q, k, v, target_seq_sum, dim * 3, dim,
-        false, true, nullptr, *fc_w_maxptr_itr++, max_q, dim, dim, dim * 3,
-        1.0f, 0.0f, *fc_bias_itr++, api::Activation_t::LINEAR);
-
-    api::QKVAttnParam loop_p(
-        new_bs, max_target_seq_len, head_num, d_k,
-        {new_bs, head_num, max_target_seq_len, max_target_seq_len},
-        api::Activation_t::LINEAR, -1, false, dim);
-
-    ret = api::qk_attention<T, T, T, TGEMM>(ctx, q, k, src_qk, nullptr, nullptr,
-                                            max_qk, loop_p, tg_mask);
-    ret = api::qk_v_attention<T, T, T, TGEMM>(ctx, src_qk, v, mid_a, max_qk,
-                                              nullptr, max_qkv, loop_p);
-    // x + residual fused with fc
-    ret = api::fc_fusion<T, TW, T, TGEMM>(
-        ctx, mid_a, *fc_weight_itr++, residual, new_bs * max_target_seq_len,
-        dim, dim, false, true, nullptr, *fc_w_maxptr_itr++, nullptr, dim, dim,
-        dim, 1.0f, 1.0f, *fc_bias_itr++, api::Activation_t::LINEAR);
-    // 2.2 src attention
-    ret = api::layer_norm<T>(ctx, residual, mid_a, new_bs * max_target_seq_len,
-                             dim, eps, *ln_scale_itr++, *ln_bias_itr++, nullptr,
-                             nullptr);
-    ret = api::fc_fusion<T, TW, T, TGEMM>(
-        ctx, mid_a, *fc_weight_itr++, mid_b, new_bs * max_target_seq_len, dim,
-        dim, false, true, nullptr, *fc_w_maxptr_itr++, max_q, dim, dim, dim,
-        1.0f, 0.0f, *fc_bias_itr++, api::Activation_t::LINEAR);
-    // get k,v use encoder_out
-    ret = api::fc_fusion<T, TW, T, TGEMM>(
-        ctx, broadcast_x, *fc_weight_itr++, k, new_bs * max_seq_len, dim, dim,
-        false, true, nullptr, *fc_w_maxptr_itr++, nullptr, dim, dim, dim, 1.0f,
-        0.0f, *fc_bias_itr++, api::Activation_t::LINEAR);
-    ret = api::fc_fusion<T, TW, T, TGEMM>(
-        ctx, broadcast_x, *fc_weight_itr++, v, new_bs * max_seq_len, dim, dim,
-        false, true, nullptr, *fc_w_maxptr_itr++, nullptr, dim, dim, dim, 1.0f,
-        0.0f, *fc_bias_itr++, api::Activation_t::LINEAR);
-    ret = api::qk_attention<T, T, T, TGEMM>(ctx, mid_b, k, src_qk, nullptr,
-                                            nullptr, max_qk, src_qkv_param);
-
-    ret = api::qk_v_attention<T, T, T, TGEMM>(ctx, src_qk, v, mid_a, max_qk,
-                                              nullptr, max_qkv, src_qkv_param);
-    // x = x + residual fused with fc
-    ret = api::fc_fusion<T, TW, T, TGEMM>(
-        ctx, mid_a, *fc_weight_itr++, residual, new_bs * max_target_seq_len,
-        dim, dim, false, true, max_qkv, *fc_w_maxptr_itr++, nullptr, dim, dim,
-        dim, 1.0f, 1.0f, *fc_bias_itr++, api::Activation_t::LINEAR);
-    // normalize before
-    ret = api::layer_norm<T>(ctx, residual, mid_a, new_bs * max_target_seq_len,
-                             dim, eps, *ln_scale_itr++, *ln_bias_itr++, nullptr,
-                             nullptr);
-    // ffn1
-    ret = api::fc_fusion<T, TW, T, TGEMM>(
-        ctx, mid_a, *fc_weight_itr++, ffn1_out, new_bs * max_target_seq_len,
-        ffn1_out_dim, dim, false, true, nullptr, *fc_w_maxptr_itr++, nullptr,
-        dim, dim, ffn1_out_dim, 1.0, 0.0, *fc_bias_itr++,
-        api::Activation_t::RELU);
-    // ffn2
-    ret = api::fc_fusion<T, TW, T, TGEMM>(
-        ctx, ffn1_out, *fc_weight_itr++, residual, new_bs * max_target_seq_len,
-        dim, ffn2_input_dim, false, true, nullptr, *fc_w_maxptr_itr++, nullptr,
-        ffn2_input_dim, ffn2_input_dim, dim, 1.0, 1.0, *fc_bias_itr++,
-        api::Activation_t::LINEAR);
-  }
-
-  ret =
-      api::layer_norm(ctx, residual, mid_a, new_bs * max_target_seq_len, dim,
-                      1e-5, *ln_scale_itr++, *ln_bias_itr++, nullptr, nullptr);
-  int ctc_dim = param.vocab_size;
-  ret = api::fc_fusion<T, TW, T, TGEMM>(
-      ctx, mid_a, *fc_weight_itr++, mid_b, new_bs * max_target_seq_len, ctc_dim,
-      dim, false, true, nullptr, *fc_w_maxptr_itr++, nullptr, dim, dim, ctc_dim,
-      1.0f, 0.0f, *fc_bias_itr++, api::Activation_t::LINEAR);
-  // log_softmax
-  int data_len = new_bs * max_target_seq_len * ctc_dim;
-  float* softmax_in = RAII_GUARD.alloc<float>(data_len);
-  float* softmax_out = RAII_GUARD.alloc<float>(data_len);
-  float* log_out = RAII_GUARD.alloc<float>(data_len);
-  ret = api::cast_v2<T, float>(ctx, mid_b, softmax_in, data_len);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::softmax<float>(ctx, softmax_in, softmax_out,
-                            {new_bs, max_target_seq_len, ctc_dim}, 2);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-  ret = api::log<float>(ctx, softmax_out, character_scores, data_len);
-  WRAPPER_ASSERT_SUCCESS(ctx, ret);
-
-  return api::SUCCESS;
-}
-
-template int conformer_decoder_wenet<float16, int16_t, int16_t>(
-    api::Context* ctx, const float16* x, const std::vector<int32_t>& x_shape,
-    const float* x_mask, const int* padded_target,
-    const std::vector<int32_t>& target_shape, float* character_scores,
-    const ConformerDecoderParam<float16, int16_t>& param);
-
-}  // namespace wenet
-}  // namespace xpu
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_conformer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_conformer.h
deleted file mode 100644
index c20af03e11a4e1807ebd3b7d453292d9373d2f80..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_conformer.h
+++ /dev/null
@@ -1,781 +0,0 @@
-// Copyright (c) 2022 KUNLUNXIN Inc.
-//               2022 Han Qi (qihan@baidu.com)
-//                    Hehe Pan (panhehe@baidu.com)
-//                    Zikui Yan (yanzikui@baidu.com)
-//                    Chaolin Li (lichaolin@baidu.com)
-// All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "xpu/runtime.h"
-#include "xpu/xdnn.h"
-#include "xpu_util.h"  // NOLINT
-#pragma once
-
-namespace api = baidu::xpu::api;
-template <typename T, typename TW>
-class ConformerEncoderParam {
- public:
-  int layer_num;
-  int fc_num_per_layer;
-  int conv_num_per_layer;
-  int ln_num_per_layer;
-  int head_num;
-  int head_dim;
-  int ctc_dim;
-  int ffn_factor;
-  int beam_size;
-  struct Embedding {
-    int conv_num;
-    int fc_num;
-    int embed_dim;
-  } emb_param;
-  struct ConvBlock {
-    bool is_casual;
-    int kernel_size;
-    int lorder;
-    T padding;
-  } conv_param;
-
-  std::vector<const T*> pos_emb;
-  std::vector<const TW*> emb_conv_w_list;
-  std::vector<const float*> emb_conv_maxw_list;
-  std::vector<const float*> emb_conv_bias_list;
-  std::vector<const TW*> emb_fc_w_list;
-  std::vector<const float*> emb_fc_maxw_list;
-  std::vector<const float*> emb_fc_bias_list;
-
-  std::vector<const TW*> conv_w_list;
-  std::vector<const float*> conv_maxw_list;
-  std::vector<const float*> conv_bias_list;
-
-  std::vector<const float*> ln_scale_list;
-  std::vector<const float*> ln_bias_list;
-
-  std::vector<const TW*> fc_w_list;
-  std::vector<const float*> fc_maxw_list;
-  std::vector<const float*> fc_bias_list;
-
-  std::vector<const TW*> attn_pos_w_list_;
-  std::vector<const T*> attn_pos_w_list;
-  std::vector<const float*> attn_pos_maxw_list;
-  std::vector<const T*> attn_pos_uv_bias_list;
-
-  const float* cmvn_istd{nullptr};
-  const float* cmvn_mean{nullptr};
-  const float* pe{nullptr};
-  float* mask{nullptr};
-};
-
-template <typename T, typename TW>
-class ConformerDecoderParam {
- public:
-  int layer_num;
-  int fc_num_per_layer;
-  int ln_num_per_layer;
-
-  int head_num;
-  int head_dim;
-  int vocab_size;
-  int sos_id;
-  int eos_id;
-  int ignored_id;
-  int beam_size;
-  int max_token_num;
-  int add_sos_num;
-  int ffn_dim;
-
-  const T* embed_table{nullptr};
-  const T* pe{nullptr};
-  std::vector<const TW*> fc_w_list;
-  std::vector<const float*> fc_maxw_list;
-  std::vector<const float*> fc_bias_list;
-  std::vector<const float*> ln_scale_list;
-  std::vector<const float*> ln_bias_list;
-};
-
-template <typename T>
-static int64_t vec_prod(const std::vector<T>& data) {
-  int len = data.size();
-  if (len < 1) {
-    return 0;
-  }
-  int64_t prod = data[0];
-  for (int i = 1; i < len; ++i) {
-    prod *= data[i];
-  }
-  return prod;
-}
-
-template <typename T>
-static std::vector<const T*> get_w_list_from(
-    const std::vector<XPUQunatData<T>>& quant_data_list) {
-  int len = quant_data_list.size();
-  std::vector<const T*> w_list(len, nullptr);
-  for (int i = 0; i < len; ++i) {
-    w_list[i] = quant_data_list[i].data_;
-  }
-  return w_list;
-}
-
-template <typename T>
-static std::vector<const float*> get_w_maxptr_list_from(
-    const std::vector<XPUQunatData<T>>& quant_data_list) {
-  int len = quant_data_list.size();
-  std::vector<const float*> w_maxptr_list(len, nullptr);
-  for (int i = 0; i < len; ++i) {
-    w_maxptr_list[i] = quant_data_list[i].max_ptr_;
-  }
-  return w_maxptr_list;
-}
-
-template <typename TW>
-void get_fc_param(const std::unordered_map<std::string, int>& weights_len_info,
-                  const std::string& params_dir,
-                  const std::string& fc_name_prefix,
-                  XPUQunatData<TW>& fc_w,                         // NOLINT
-                  const float*& fc_bias, bool has_bias = true) {  // NOLINT
-  const std::string fc_file_prefix = params_dir + fc_name_prefix;
-  int wlen = weights_len_info.at(fc_name_prefix + "weight");
-  fc_w = get_xpu_quant_data<float, TW>(fc_file_prefix + "weight", wlen);
-  if (has_bias) {
-    int blen = weights_len_info.at(fc_name_prefix + "bias");
-    fc_bias = get_xpu_data<float>(fc_file_prefix + "bias", blen);
-  } else {
-    fc_bias = nullptr;
-  }
-}
-
-template <typename TW>
-void get_conv_param(
-    const std::unordered_map<std::string, int>& weights_len_info,
-    const std::string& params_dir, const std::string& conv_name_prefix,
-    XPUQunatData<TW>& conv_w, const float*& conv_b,  // NOLINT
-    bool has_bias = true) {                          // NOLINT
-  std::string conv_file_prefix = params_dir + conv_name_prefix;
-  int wlen = weights_len_info.at(conv_name_prefix + "weight");
-  conv_w = get_xpu_quant_data<float, TW>(conv_file_prefix + "weight", wlen);
-  if (has_bias) {
-    int blen = weights_len_info.at(conv_name_prefix + "bias");
-    conv_b = get_xpu_data<float>(conv_file_prefix + "bias", blen);
-  } else {
-    conv_b = nullptr;
-  }
-}
-
-template <typename TW>
-void get_fc_fused_param(
-    const std::unordered_map<std::string, int>& weights_len_info,
-    const std::string& params_dir,
-    const std::vector<std::string> fc_name_prefixs,
-    XPUQunatData<TW>& _fc_w,                      // NOLINT
-    const float*& _fc_b, bool has_bias = true) {  // NOLINT
-  // get cpu fc params
-  std::vector<float> fc_ws;
-  std::vector<float> fc_bs;
-  for (int ids = 0; ids < fc_name_prefixs.size(); ids++) {
-    std::string fc_file_prefix = params_dir + fc_name_prefixs[ids];
-    int wlen = weights_len_info.at(fc_name_prefixs[ids] + "weight");
-    std::vector<float> fc_w =
-        get_cpu_data<float>(fc_file_prefix + "weight", wlen);
-    std::vector<float> fc_b;
-    if (has_bias) {
-      int blen = weights_len_info.at(fc_name_prefixs[ids] + "bias");
-      fc_b = get_cpu_data<float>(fc_file_prefix + "bias", blen);
-    }
-    fc_ws.insert(fc_ws.end(), fc_w.begin(), fc_w.end());
-    fc_bs.insert(fc_bs.end(), fc_b.begin(), fc_b.end());
-  }
-  _fc_w = get_xpu_quant_data<float, TW>("fused_fc_weight", fc_ws);
-  _fc_b = get_xpu_data<float>("fused_fc_bias", fc_bs);
-}
-
-template <typename TW>
-void get_fc_ln_fused_param(
-    const std::unordered_map<std::string, int>& weights_len_info,
-    const std::string& params_dir,
-    const std::vector<std::string> fc_name_prefixs,
-    std::vector<std::string> ln_name_prefixs,
-    XPUQunatData<TW>& _fc_w,                      // NOLINT
-    const float*& _fc_b, bool has_bias = true) {  // NOLINT
-  // get cpu fc params
-  std::vector<float> fc_ws;
-  std::vector<float> fc_bs;
-  for (int ids = 0; ids < fc_name_prefixs.size(); ids++) {
-    std::string fc_file_prefix = params_dir + fc_name_prefixs[ids];
-    int wlen = weights_len_info.at(fc_name_prefixs[ids] + "weight");
-    std::vector<float> fc_w =
-        get_cpu_data<float>(fc_file_prefix + "weight", wlen);
-    std::vector<float> fc_b;
-    if (has_bias) {
-      int blen = weights_len_info.at(fc_name_prefixs[ids] + "bias");
-      fc_b = get_cpu_data<float>(fc_file_prefix + "bias", blen);
-    }
-    // get cpu ln params
-    std::string ln_file_prefix = params_dir + ln_name_prefixs[ids];
-    wlen = weights_len_info.at(ln_name_prefixs[ids] + "weight");
-    int blen = weights_len_info.at(ln_name_prefixs[ids] + "bias");
-    std::vector<float> ln_scale =
-        get_cpu_data<float>(ln_file_prefix + "weight", wlen);
-    std::vector<float> ln_bias =
-        get_cpu_data<float>(ln_file_prefix + "bias", blen);
-    int col = ln_scale.size();
-    int row = static_cast<int>(fc_w.size()) / col;
-    if (!has_bias) {
-      fc_b.resize(row);
-    }
-    // get new fc_bias
-    for (int i = 0; i < row; i++) {
-      float b = has_bias ? fc_b[i] : 0.f;
-      for (int j = 0; j < col; j++) {
-        b += fc_w[i * col + j] * ln_bias[j];
-      }
-      fc_b[i] = b;
-    }
-    // get new fc_weight
-    for (int i = 0; i < row; i++) {
-      for (int j = 0; j < col; j++) {
-        fc_w[i * col + j] = fc_w[i * col + j] * ln_scale[j];
-      }
-    }
-    fc_ws.insert(fc_ws.end(), fc_w.begin(), fc_w.end());
-    fc_bs.insert(fc_bs.end(), fc_b.begin(), fc_b.end());
-  }
-  _fc_w = get_xpu_quant_data<float, TW>("fused_fc_weight", fc_ws);
-  _fc_b = get_xpu_data<float>("fused_fc_bias", fc_bs);
-}
-
-template <typename TW>
-void get_conv_bn_fused_param(
-    const std::unordered_map<std::string, int>& weights_len_info,
-    const std::string& params_dir, const std::string& conv_name_prefix,
-    const std::string& bn_name_prefix, XPUQunatData<TW>& _conv_w,  // NOLINT
-    const float*& _conv_b, bool has_bias = true) {                 // NOLINT
-  // get cpu conv params
-  std::string conv_file_prefix = params_dir + conv_name_prefix;
-  int wlen = weights_len_info.at(conv_name_prefix + "weight");
-  std::vector<float> conv_w =
-      get_cpu_data<float>(conv_file_prefix + "weight", wlen);
-  std::vector<float> conv_b;
-  if (has_bias) {
-    int blen = weights_len_info.at(conv_name_prefix + "bias");
-    conv_b = get_cpu_data<float>(conv_file_prefix + "bias", blen);
-  }
-  // get cpu bn params
-  std::string bn_file_prefix = params_dir + bn_name_prefix;
-  wlen = weights_len_info.at(bn_name_prefix + "weight");
-  int blen = weights_len_info.at(bn_name_prefix + "bias");
-  int mlen = weights_len_info.at(bn_name_prefix + "running_mean");
-  int vlen = weights_len_info.at(bn_name_prefix + "running_var");
-  std::vector<float> bn_scale =
-      get_cpu_data<float>(bn_file_prefix + "weight", wlen);
-  std::vector<float> bn_bias =
-      get_cpu_data<float>(bn_file_prefix + "bias", blen);
-  std::vector<float> bn_mean =
-      get_cpu_data<float>(bn_file_prefix + "running_mean", mlen);
-  std::vector<float> bn_var =
-      get_cpu_data<float>(bn_file_prefix + "running_var", vlen);
-  // fuse conv, bn, new weight is conv_w, new bias is bn_bias
-  int h = bn_scale.size();
-  int w = static_cast<int>(conv_w.size()) / h;
-  float eps = 1e-5f;  // assume eps is 1e-5;
-  for (int i = 0; i < h; ++i) {
-    bn_scale[i] = bn_scale[i] / std::sqrt(bn_var[i] + eps);
-  }
-  for (int i = 0; i < h; ++i) {
-    for (int j = 0; j < w; ++j) {
-      conv_w[i * w + j] *= bn_scale[i];
-    }
-  }
-  for (int i = 0; i < h; ++i) {
-    float b = has_bias ? conv_b[i] : 0.f;
-    bn_bias[i] += ((b - bn_mean[i]) * bn_scale[i]);
-  }
-  _conv_w = get_xpu_quant_data<float, TW>("fused_conv_weight", conv_w);
-  _conv_b = get_xpu_data<float>("fused_conv_bias", bn_bias);
-}
-
-template <typename T>
-static std::tuple<std::vector<T>, std::vector<int>> read_cpu_data_from_file(
-    const std::string& data_file_prefix, int shape_ndim) {
-  std::vector<T> res_data;
-  std::string data_file = data_file_prefix + ".dat";
-  std::string shape_file = data_file_prefix + "_shape.txt";
-  std::ifstream inF(shape_file);
-  if (!inF) {
-    std::cout << "ERR: open file failed! " << shape_file << std::endl;
-    std::exit(1);
-  }
-  char useless;  // (16, 523, 80) or (160, 1)
-  std::vector<int> inshape(shape_ndim, 0);
-  if (shape_ndim == 3) {
-    inF >> useless >> inshape[0] >> useless >> inshape[1] >> useless >>
-        inshape[2] >> useless;
-  } else if (shape_ndim == 2) {
-    inF >> useless >> inshape[0] >> useless >> inshape[1] >> useless;
-  } else if (shape_ndim == 1) {
-    inF >> useless >> inshape[0] >> useless >> useless;
-  } else {
-    std::cout << "ERR: only support shape ndim == 1, 2 or 3, but got "
-              << shape_ndim << std::endl;
-    std::exit(1);
-  }
-
-  int data_len = vec_prod(inshape);
-  res_data = get_cpu_data<T>(data_file, data_len);
-  return std::make_tuple(res_data, inshape);
-}
-
-template <typename T>
-static std::tuple<T*, std::vector<int>> read_xpu_data_from_file(
-    const std::string& data_file_prefix, int shape_ndim) {
-  auto cpu_data_info = read_cpu_data_from_file<T>(data_file_prefix, shape_ndim);
-  T* xpu_data = get_xpu_data<T>(data_file_prefix, std::get<0>(cpu_data_info));
-  return std::make_tuple(xpu_data, std::get<1>(cpu_data_info));
-}
-
-template <typename T>
-static std::tuple<T*, std::vector<int>> create_mask_according_speech_length(
-    const std::vector<int>& speech_length, int max_seqlen,
-    void* xpu_stream = nullptr) {
-  int batch = speech_length.size();
-  int mask_len = batch * max_seqlen;
-  int subsample_mask_len = batch * (((max_seqlen - 1) / 2 - 1) / 2);
-  std::vector<T> mask_cpu(mask_len, 0);
-  std::vector<T> subsample_mask_cpu(subsample_mask_len, 0);
-  // create mask, equal to 'masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)'
-  for (int b = 0; b < batch; ++b) {
-    int curr_seqlen = speech_length[b];
-    for (int idx = 0; idx < curr_seqlen; ++idx) {
-      mask_cpu.at(b * max_seqlen + idx) = 1;
-    }
-  }
-  // create subsample_mask, equal to 'x_mask[:, :, :-2:2][:, :, :-2:2]'
-  int sub_seqlen = subsample_mask_len / batch;
-  for (int b = 0; b < batch; ++b) {
-    for (int idx = 0; idx < sub_seqlen; ++idx) {
-      subsample_mask_cpu.at(b * sub_seqlen + idx) =
-          mask_cpu.at(b * max_seqlen + idx * 4);
-    }
-  }
-  // copy to xpu
-  T* subsample_mask_xpu = nullptr;
-  int r = xpu_malloc(reinterpret_cast<void**>(&subsample_mask_xpu),
-                     subsample_mask_len * sizeof(T));
-  if (r != 0) {
-    std::cout << "ERR: xpu_malloc failed!" << std::endl;
-    std::exit(1);
-  }
-  r = xpu_wait(xpu_stream);
-  if (r != 0) {
-    std::cout << "ERR: xpu_wait failed!" << std::endl;
-    std::exit(1);
-  }
-  r = xpu_memcpy(subsample_mask_xpu, subsample_mask_cpu.data(),
-                 subsample_mask_len * sizeof(T),
-                 XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-  if (r != 0) {
-    std::cout << "ERR: xpu_memcpy failed!" << std::endl;
-    std::exit(1);
-  }
-
-  std::vector<int> subsample_mask_shape{batch, 1, sub_seqlen};
-  return std::make_tuple(subsample_mask_xpu, subsample_mask_shape);
-}
-
-template <typename T, typename TW>
-int init_encoder_params(
-    const std::string& params_dir,
-    ConformerEncoderParam<T, TW>& encoder_param) {  // NOLINT
-  std::unordered_map<std::string, int> weights_len_info =
-      get_weights_lens(params_dir + "weights_info.txt");
-  std::unordered_map<std::string, std::vector<int>> weights_shape_info =
-      get_weights_shape(params_dir + "weights_info.txt");
-
-  // model struct param
-  auto& head_num = encoder_param.head_num;
-  auto& head_dim = encoder_param.head_dim;
-  auto& ffn_factor = encoder_param.ffn_factor;
-  auto& conv_param = encoder_param.conv_param;
-  auto& emb_param = encoder_param.emb_param;
-  auto& ctc_dim = encoder_param.ctc_dim;
-  auto& encoder_layer_num = encoder_param.layer_num;
-  auto& fc_num_per_layer = encoder_param.fc_num_per_layer;
-  auto& conv_num_per_layer = encoder_param.conv_num_per_layer;
-  auto& ln_num_per_layer = encoder_param.ln_num_per_layer;
-  encoder_layer_num = 12;
-  fc_num_per_layer = 6;
-  conv_num_per_layer = 3;
-  ln_num_per_layer = 6;
-  emb_param.conv_num = 2;
-  emb_param.fc_num = 1;
-  emb_param.embed_dim = 512;
-  ffn_factor =
-      weights_shape_info.at("encoder.encoders.0.feed_forward.w_1.weight")[0] /
-      weights_shape_info.at("encoder.encoders.0.feed_forward.w_1.weight")[1];
-  head_dim =
-      weights_shape_info.at("encoder.encoders.0.self_attn.pos_bias_u")[1];
-  head_num =
-      weights_shape_info.at("encoder.encoders.0.self_attn.pos_bias_u")[0];
-  conv_param.kernel_size = weights_shape_info.at(
-      "encoder.encoders.0.conv_module.depthwise_conv.weight")[2];
-  conv_param.lorder = conv_param.kernel_size - 1;
-  conv_param.padding = 0.0;
-  conv_param.is_casual = true;
-  ctc_dim = weights_len_info.at("ctc.ctc_lo.bias");
-  encoder_param.beam_size = 3;
-
-  // init encoder cmvn
-  auto& pe = encoder_param.pe;
-  auto& cmvn_istd = encoder_param.cmvn_istd;
-  auto& cmvn_mean = encoder_param.cmvn_mean;
-  int pe_len = weights_len_info.at("encoder.pe");
-  int mlen = weights_len_info.at("encoder.global_cmvn.mean");
-  int ilen = weights_len_info.at("encoder.global_cmvn.istd");
-  pe = get_xpu_data<float>(params_dir + "encoder.pe", pe_len);
-  cmvn_mean =
-      get_xpu_data<float>(params_dir + "encoder.global_cmvn.mean", mlen);
-  cmvn_istd =
-      get_xpu_data<float>(params_dir + "encoder.global_cmvn.istd", ilen);
-
-  // init encoder embedding param
-  std::vector<XPUQunatData<TW>> emb_conv_w_list;
-  auto& emb_conv_bias_list = encoder_param.emb_conv_bias_list;
-  std::vector<XPUQunatData<TW>> emb_fc_w_list;
-  auto& emb_fc_bias_list = encoder_param.emb_fc_bias_list;
-  emb_conv_w_list.resize(emb_param.conv_num);
-  emb_conv_bias_list.resize(emb_param.conv_num);
-  emb_fc_w_list.resize(emb_param.fc_num);
-  emb_fc_bias_list.resize(emb_param.fc_num);
-  for (int i = 0; i < emb_param.conv_num; ++i) {
-    std::string conv_name_prefix =
-        "encoder.embed.conv." + std::to_string(i * 2) + ".";
-    get_conv_param<TW>(weights_len_info, params_dir, conv_name_prefix,
-                       emb_conv_w_list[i], emb_conv_bias_list[i]);
-  }
-  get_fc_param<TW>(weights_len_info, params_dir, "encoder.embed.out.0.",
-                   emb_fc_w_list[0], emb_fc_bias_list[0]);
-
-  // encoder_param_layer
-  int enc_fc_num = encoder_layer_num * fc_num_per_layer + 1;
-  int enc_conv_num = encoder_layer_num * conv_num_per_layer;
-  int enc_ln_num = encoder_layer_num * ln_num_per_layer + 1;
-
-  std::vector<XPUQunatData<TW>> fc_w_list;
-  auto& fc_bias_list = encoder_param.fc_bias_list;
-
-  std::vector<XPUQunatData<TW>> conv_w_list;
-  auto& conv_bias_list = encoder_param.conv_bias_list;
-
-  auto& ln_scale_list = encoder_param.ln_scale_list;
-  auto& ln_bias_list = encoder_param.ln_bias_list;
-
-  std::vector<XPUQunatData<TW>> attn_pos_w_list;
-  std::vector<const float*> attn_pos_uv_bias_list;
-  // w_param need to be quanted & get maxw
-  fc_w_list.resize(enc_fc_num);
-  fc_bias_list.resize(enc_fc_num);
-  conv_w_list.resize(enc_conv_num);
-  conv_bias_list.resize(enc_conv_num);
-  ln_scale_list.resize(enc_ln_num);
-  ln_bias_list.resize(enc_ln_num);
-  attn_pos_w_list.resize(encoder_layer_num);
-  attn_pos_uv_bias_list.resize(encoder_layer_num *
-                               2);  // pos_bias_u, pos_bias_v
-  for (int i = 0; i < encoder_layer_num; ++i) {
-    std::string enc_prefix = "encoder.encoders." + std::to_string(i) + ".";
-    int fc_offset = i * fc_num_per_layer;
-    int conv_offset = i * conv_num_per_layer;
-    int ln_offset = i * ln_num_per_layer;
-    // init FeedForwardParam macaron
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     enc_prefix + "feed_forward_macaron.w_1.",
-                     fc_w_list[fc_offset], fc_bias_list[fc_offset]);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     enc_prefix + "feed_forward_macaron.w_2.",
-                     fc_w_list[fc_offset + 1], fc_bias_list[fc_offset + 1]);
-    get_fc_fused_param<TW>(
-        weights_len_info, params_dir,
-        {enc_prefix + "self_attn.linear_q.", enc_prefix + "self_attn.linear_k.",
-         enc_prefix + "self_attn.linear_v."},
-        fc_w_list[fc_offset + 2], fc_bias_list[fc_offset + 2]);
-    get_fc_param<TW>(
-        weights_len_info, params_dir, enc_prefix + "self_attn.linear_out.",
-        fc_w_list[fc_offset + 3], fc_bias_list[fc_offset + 3], true);
-    // get pos w, pos u bias, pos v bias
-    std::string pos_w_name = enc_prefix + "self_attn.linear_pos.weight";
-    std::string pos_ubias_name = enc_prefix + "self_attn.pos_bias_u";
-    std::string pos_vbias_name = enc_prefix + "self_attn.pos_bias_v";
-    int pos_wlen = weights_len_info.at(pos_w_name);
-    int pos_ublen = weights_len_info.at(pos_ubias_name);
-    int pos_vblen = weights_len_info.at(pos_vbias_name);
-    attn_pos_w_list[i] =
-        get_xpu_quant_data<float, TW>(params_dir + pos_w_name, pos_wlen);
-    attn_pos_uv_bias_list[i * 2] =
-        get_xpu_data<float>(params_dir + pos_ubias_name, pos_ublen);
-    attn_pos_uv_bias_list[i * 2 + 1] =
-        get_xpu_data<float>(params_dir + pos_vbias_name, pos_vblen);
-    // init ConvModuleParam
-    get_conv_param<TW>(weights_len_info, params_dir,
-                       enc_prefix + "conv_module.pointwise_conv1.",
-                       conv_w_list[conv_offset], conv_bias_list[conv_offset],
-                       true);
-    get_conv_param<TW>(weights_len_info, params_dir,
-                       enc_prefix + "conv_module.depthwise_conv.",
-                       conv_w_list[conv_offset + 1],
-                       conv_bias_list[conv_offset + 1], true);
-    get_conv_param<TW>(weights_len_info, params_dir,
-                       enc_prefix + "conv_module.pointwise_conv2.",
-                       conv_w_list[conv_offset + 2],
-                       conv_bias_list[conv_offset + 2], true);
-    // init FeedForwardParam
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     enc_prefix + "feed_forward.w_1.", fc_w_list[fc_offset + 4],
-                     fc_bias_list[fc_offset + 4]);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     enc_prefix + "feed_forward.w_2.", fc_w_list[fc_offset + 5],
-                     fc_bias_list[fc_offset + 5]);
-    // init LayerNormParam
-    get_ln_param(weights_len_info, params_dir, enc_prefix + "norm_ff_macaron.",
-                 ln_scale_list[ln_offset], ln_bias_list[ln_offset]);
-    get_ln_param(weights_len_info, params_dir, enc_prefix + "norm_mha.",
-                 ln_scale_list[ln_offset + 1], ln_bias_list[ln_offset + 1]);
-    get_ln_param(weights_len_info, params_dir, enc_prefix + "norm_conv.",
-                 ln_scale_list[ln_offset + 2], ln_bias_list[ln_offset + 2]);
-    get_ln_param(weights_len_info, params_dir, enc_prefix + "conv_module.norm.",
-                 ln_scale_list[ln_offset + 3], ln_bias_list[ln_offset + 3]);
-    get_ln_param(weights_len_info, params_dir, enc_prefix + "norm_ff.",
-                 ln_scale_list[ln_offset + 4], ln_bias_list[ln_offset + 4]);
-    get_ln_param(weights_len_info, params_dir, enc_prefix + "norm_final.",
-                 ln_scale_list[ln_offset + 5], ln_bias_list[ln_offset + 5]);
-  }
-  get_ln_param(weights_len_info, params_dir, "encoder.after_norm.",
-               ln_scale_list[enc_ln_num - 1], ln_bias_list[enc_ln_num - 1]);
-  get_fc_param<TW>(weights_len_info, params_dir, "ctc.ctc_lo.",
-                   fc_w_list[enc_fc_num - 1], fc_bias_list[enc_fc_num - 1]);
-  /* get maxw && w */
-  encoder_param.emb_conv_w_list = get_w_list_from<TW>(emb_conv_w_list);
-  encoder_param.emb_conv_maxw_list =
-      get_w_maxptr_list_from<TW>(emb_conv_w_list);
-  encoder_param.emb_fc_w_list = get_w_list_from<TW>(emb_fc_w_list);
-  encoder_param.emb_fc_maxw_list = get_w_maxptr_list_from<TW>(emb_fc_w_list);
-
-  encoder_param.conv_w_list = get_w_list_from<TW>(conv_w_list);
-  encoder_param.conv_maxw_list = get_w_maxptr_list_from<TW>(conv_w_list);
-
-  encoder_param.fc_w_list = get_w_list_from<TW>(fc_w_list);
-  encoder_param.fc_maxw_list = get_w_maxptr_list_from<TW>(fc_w_list);
-
-  encoder_param.attn_pos_w_list_ = get_w_list_from<TW>(attn_pos_w_list);
-  encoder_param.attn_pos_maxw_list =
-      get_w_maxptr_list_from<TW>(attn_pos_w_list);
-  /* prepare params */
-  api::Context ctx_xpu(api::kXPU2);
-  api::ctx_guard RAII_GUARD(&ctx_xpu);
-  int ret = 0;
-  int hidden_dim = head_num * head_dim;
-  encoder_param.pos_emb.resize(encoder_layer_num);
-  for (int i = 0; i < encoder_layer_num; i++) {
-    ret = xpu_malloc((void**)&(encoder_param.pos_emb[i]),  // NOLINT
-                     5000 * hidden_dim * sizeof(T));
-    ret = api::fc_fusion<float, TW, T, int16_t>(
-        &ctx_xpu, encoder_param.pe, encoder_param.attn_pos_w_list_[i],
-        const_cast<T*>(encoder_param.pos_emb[i]), 5000, hidden_dim, hidden_dim,
-        false, true, nullptr, encoder_param.attn_pos_maxw_list[i], nullptr,
-        hidden_dim, hidden_dim, hidden_dim, 1.0f, 0.0f, nullptr,
-        api::Activation_t::LINEAR);
-  }
-  for (int i = 0; i < encoder_layer_num; i++) {
-    ret = api::scale<float>(
-        &ctx_xpu, encoder_param.fc_bias_list[i * fc_num_per_layer + 1],
-        const_cast<float*>(
-            encoder_param.fc_bias_list[i * fc_num_per_layer + 1]),
-        hidden_dim, true, 0.5f, 0.0f);
-    ret = api::scale<float>(
-        &ctx_xpu, encoder_param.fc_bias_list[i * fc_num_per_layer + 5],
-        const_cast<float*>(
-            encoder_param.fc_bias_list[i * fc_num_per_layer + 5]),
-        hidden_dim, true, 0.5f, 0.0f);
-  }
-  for (int i = 0; i < attn_pos_uv_bias_list.size(); i++) {
-    T* tmppos = nullptr;
-    ret = xpu_malloc(reinterpret_cast<void**>(&tmppos), hidden_dim * sizeof(T));
-    ret = api::cast_v2<float, T>(&ctx_xpu, attn_pos_uv_bias_list[i], tmppos,
-                                 hidden_dim);
-    encoder_param.attn_pos_uv_bias_list.push_back(tmppos);
-  }
-  return 0;
-}
-
-template <typename T, typename TW>
-int init_decoder_params(
-    const std::string& params_dir,
-    ConformerDecoderParam<T, TW>& decoder_param) {  // NOLINT
-  std::unordered_map<std::string, int> weights_len_info =
-      get_weights_lens(params_dir + "weights_info.txt");
-
-  // init DecoderLayerParam
-  auto& decoder_layer_num = decoder_param.layer_num;
-  auto& fc_num_per_layer = decoder_param.fc_num_per_layer;
-  auto& ln_num_per_layer = decoder_param.ln_num_per_layer;
-  std::vector<XPUQunatData<TW>> fc_w_list;
-  auto& fc_bias_list = decoder_param.fc_bias_list;
-  auto& ln_scale_list = decoder_param.ln_scale_list;
-  auto& ln_bias_list = decoder_param.ln_bias_list;
-  decoder_layer_num = 3;
-  fc_num_per_layer = 8;
-  ln_num_per_layer = 3;
-  int dec_fc_num = decoder_layer_num * fc_num_per_layer + 1;
-  int dec_ln_num = decoder_layer_num * ln_num_per_layer + 1;
-  fc_w_list.resize(dec_fc_num);
-  fc_bias_list.resize(dec_fc_num);
-  ln_scale_list.resize(dec_ln_num);
-  ln_bias_list.resize(dec_ln_num);
-  decoder_param.head_num = 8;
-  decoder_param.head_dim = 64;
-  decoder_param.vocab_size = 5538;
-  decoder_param.sos_id = 5537;
-  decoder_param.eos_id = 5537;
-  decoder_param.ignored_id = 2;
-  decoder_param.beam_size = 3;
-  decoder_param.max_token_num = 200;
-  decoder_param.add_sos_num = 1;
-  decoder_param.ffn_dim = 2048;
-  auto att_dim = decoder_param.head_num * decoder_param.head_dim;
-
-  // init EmbeddingParam
-  std::string embed_table_name = "decoder.left_decoder.embed.0.weight";
-  std::vector<float> embed_table_cpu = get_cpu_data<float>(
-      params_dir + embed_table_name, weights_len_info.at(embed_table_name));
-  std::vector<T> embed_table_cpu_t(embed_table_cpu.size(), 0);
-  for (int i = 0; i < static_cast<int>(embed_table_cpu.size()); ++i) {
-    embed_table_cpu_t[i] =
-        static_cast<T>(embed_table_cpu[i] * std::sqrt(att_dim));
-  }
-  decoder_param.embed_table =
-      get_xpu_data<T>(embed_table_name, embed_table_cpu_t);
-
-  // init pe
-  std::string pe_name = "encoder.pe";
-  std::vector<float> pe_cpu =
-      get_cpu_data<float>(params_dir + pe_name, weights_len_info.at(pe_name));
-  std::vector<T> pe_cpu_t(pe_cpu.size(), 0);
-  for (int i = 0; i < static_cast<int>(pe_cpu.size()); ++i) {
-    pe_cpu_t[i] = static_cast<T>(pe_cpu[i]);
-  }
-  decoder_param.pe = get_xpu_data<T>(pe_name, pe_cpu_t);
-  for (int i = 0; i < decoder_layer_num; ++i) {
-    std::string dec_prefix =
-        "decoder.left_decoder.decoders." + std::to_string(i) + ".";
-    int offset = i * fc_num_per_layer;
-    // init fc param
-    // self attention qkv fc
-    get_fc_fused_param<TW>(weights_len_info, params_dir,
-                           {
-                               dec_prefix + "self_attn.linear_q.",
-                               dec_prefix + "self_attn.linear_k.",
-                               dec_prefix + "self_attn.linear_v.",
-                           },
-                           fc_w_list[offset], fc_bias_list[offset], true);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     dec_prefix + "self_attn.linear_out.",
-                     fc_w_list[offset + 1], fc_bias_list[offset + 1], true);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     dec_prefix + "src_attn.linear_q.", fc_w_list[offset + 2],
-                     fc_bias_list[offset + 2], true);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     dec_prefix + "src_attn.linear_k.", fc_w_list[offset + 3],
-                     fc_bias_list[offset + 3], true);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     dec_prefix + "src_attn.linear_v.", fc_w_list[offset + 4],
-                     fc_bias_list[offset + 4], true);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     dec_prefix + "src_attn.linear_out.", fc_w_list[offset + 5],
-                     fc_bias_list[offset + 5], true);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     dec_prefix + "feed_forward.w_1.", fc_w_list[offset + 6],
-                     fc_bias_list[offset + 6]);
-    get_fc_param<TW>(weights_len_info, params_dir,
-                     dec_prefix + "feed_forward.w_2.", fc_w_list[offset + 7],
-                     fc_bias_list[offset + 7]);
-    // init ln param
-    offset = i * ln_num_per_layer;
-    get_ln_param(weights_len_info, params_dir, dec_prefix + "norm1.",
-                 ln_scale_list[offset], ln_bias_list[offset]);
-    get_ln_param(weights_len_info, params_dir, dec_prefix + "norm2.",
-                 ln_scale_list[offset + 1], ln_bias_list[offset + 1]);
-    get_ln_param(weights_len_info, params_dir, dec_prefix + "norm3.",
-                 ln_scale_list[offset + 2], ln_bias_list[offset + 2]);
-  }
-  // init after ln
-  get_ln_param(weights_len_info, params_dir, "decoder.left_decoder.after_norm.",
-               ln_scale_list[dec_ln_num - 1], ln_bias_list[dec_ln_num - 1]);
-  // init output layer fc
-  get_fc_param<TW>(
-      weights_len_info, params_dir, "decoder.left_decoder.output_layer.",
-      fc_w_list[dec_fc_num - 1], fc_bias_list[dec_fc_num - 1], true);
-  decoder_param.fc_w_list = get_w_list_from<TW>(fc_w_list);
-  decoder_param.fc_maxw_list = get_w_maxptr_list_from<TW>(fc_w_list);
-  return 0;
-}
-
-static int padding_target(std::vector<int>& hyps,      // NOLINT
-                          std::vector<int>& hyps_len,  // NOLINT
-                          int beam_size, int eos_id) {
-  int max_target_len = *max_element(hyps_len.begin(), hyps_len.end());
-  std::vector<int> pad(max_target_len * beam_size);
-  int offset = 0;
-  for (int i = 0; i < beam_size; i++) {
-    for (int j = 0; j < max_target_len; j++) {
-      pad[i * max_target_len + j] = j < hyps_len[i] ? hyps[j + offset] : eos_id;
-    }
-    offset += hyps_len[i];
-  }
-  hyps.swap(pad);
-  return max_target_len;
-}
-
-namespace xpu {
-namespace wenet {
-
-template <typename T, typename TW, typename TGEMM>
-int conformer_encoder_wenet(
-    api::Context* ctx, float* x, const std::vector<int>& data_shape,
-    T* encoder_out, T* ctc_probs,
-    ConformerEncoderParam<T, TW>& param,  // NOLINT
-    const std::tuple<float*, std::vector<int>>& xpu_mask_info);
-template <typename T>
-int ctc_prefix_beamsearch(api::Context* ctx, T* ctc_probs,
-                          std::vector<int>& hyps,          // NOLINT
-                          std::vector<int>& hyps_len,      // NOLINT
-                          std::vector<float>& ctc_scores,  // NOLINT
-                          int batch_size, int beam_size, int max_len,
-                          int ctc_dim);
-
-template <typename T, typename TW, typename TGEMM>
-int conformer_decoder_wenet(api::Context* ctx, const T* x,
-                            const std::vector<int32_t>& x_shape,
-                            const float* x_mask, const int* padded_target,
-                            const std::vector<int32_t>& target_shape,
-                            float* character_scores,
-                            const ConformerDecoderParam<T, TW>& param);
-}  // namespace wenet
-}  // namespace xpu
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_util.cpp b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_util.cpp
deleted file mode 100644
index b18cd12b7e2d46131076ed90b4df0aebf5f8039b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_util.cpp
+++ /dev/null
@@ -1,491 +0,0 @@
-// Copyright (c) 2022 KUNLUNXIN Inc.
-//               2022 Han Qi (qihan@baidu.com)
-//                    Hehe Pan (panhehe@baidu.com)
-//                    Zikui Yan (yanzikui@baidu.com)
-//                    Chaolin Li (lichaolin@baidu.com)
-// All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "xpu_util.h"  // NOLINT
-
-template <typename T>
-static double vec_sum(const std::vector<T>& data) {
-  double res = 0;
-  for (int i = 0; i < static_cast<int>(data.size()); ++i) {
-    res += static_cast<double>(data[i]);
-  }
-  return res;
-}
-
-int vector_prod(std::vector<int> shape) {
-  int accumlate = 1;
-  for (auto a : shape) {
-    accumlate *= a;
-  }
-  return accumlate;
-}
-void add_separator_when_necessary(std::string& str) {  // NOLINT
-  int len = str.size();
-  char ch = '/';
-  if (str[len - 1] != ch) {
-    str.append(1, ch);
-  }
-}
-
-template <typename T>
-static std::string print_vec(const std::vector<T>& data) {
-  std::stringstream ss;
-  const int dump_len = data.size() > 8 ? 8 : data.size();
-  std::vector<T> dump_data(dump_len, 0);
-  int half_dump_len = dump_len / 2;
-  std::copy(data.cbegin(), data.cbegin() + half_dump_len, dump_data.begin());
-  std::copy(data.cend() - (dump_len - half_dump_len), data.cend(),
-            dump_data.begin() + half_dump_len);
-  for (int i = 0; i < dump_len - 1; ++i) {
-    ss << dump_data[i] << ", ";
-    if ((i + 1) == dump_len / 2) {
-      ss << " ... ";
-    }
-  }
-  ss << dump_data[dump_len - 1];
-  return ss.str();
-}
-
-template <typename T>
-static T parse_string(const std::string& str) {
-  return str;
-}
-
-template <>
-float parse_string(const std::string& str) {
-  return std::stof(str);
-}
-template <>
-double parse_string(const std::string& str) {
-  return std::stod(str);
-}
-template <>
-int parse_string(const std::string& str) {
-  return std::stoi(str);
-}
-template <>
-int64_t parse_string(const std::string& str) {
-  return std::stoll(str);
-}
-
-template <typename T>
-std::vector<T> Split(const std::string& str, const std::string& separator) {
-  std::vector<T> res;
-  std::string::size_type pos1, pos2;
-  pos1 = str.find_first_not_of(separator);
-  pos2 = str.find(separator, pos1);
-  while (std::string::npos != pos1 && std::string::npos != pos2) {
-    res.emplace_back(parse_string<T>(str.substr(pos1, pos2 - pos1)));
-    pos1 = str.find_first_not_of(separator, pos2);
-    pos2 = str.find(separator, pos1);
-  }
-  if (std::string::npos != pos1 && pos1 < str.length()) {
-    res.emplace_back(parse_string<T>(str.substr(pos1)));
-  }
-  return res;
-}
-
-std::unordered_map<std::string, int> get_weights_lens(
-    const std::string& file_path) {
-  std::unordered_map<std::string, int> res;
-  std::ifstream inF(file_path, std::ifstream::in);
-  if (inF) {
-    // std::cout << "read success from " << file_path << std::endl;
-    std::string buffer;
-    while (std::getline(inF, buffer)) {
-      std::vector<std::string> weight_info = Split<std::string>(buffer, ":");
-      std::string w_name = weight_info[0];
-      int w_len = std::stoi(weight_info[3]);
-      res.insert(std::make_pair(w_name, w_len));
-    }
-  } else {
-    std::cout << "ERR: read failed, " << file_path << std::endl;
-    std::exit(1);
-  }
-
-  return res;
-}
-
-std::unordered_map<std::string, std::vector<int>> get_weights_shape(
-    const std::string& file_path) {
-  std::unordered_map<std::string, std::vector<int>> res;
-  std::ifstream inF(file_path, std::ifstream::in);
-  if (inF) {
-    // std::cout << "read success from " << file_path << std::endl;
-    std::string buffer;
-    while (std::getline(inF, buffer)) {
-      std::vector<std::string> weight_info = Split<std::string>(buffer, ":");
-      std::string w_name = weight_info[0];
-      std::string w_shape_str = weight_info[2];  // example: (512, 1, 3, 3)
-      std::string w_shape_str_without_bracket(
-          w_shape_str.begin() + 1,
-          w_shape_str.end() - 1);  // example: 512, 1, 3, 3
-      std::vector<int> w_shape = Split<int>(w_shape_str_without_bracket, ",");
-      res.insert(std::make_pair(w_name, w_shape));
-    }
-  } else {
-    std::cout << "ERR: read failed, " << file_path << std::endl;
-    std::exit(1);
-  }
-
-  return res;
-}
-
-template <typename T>
-std::vector<T> get_cpu_data(const std::string& file_path, int len) {
-  std::vector<T> result(len, 0);
-  std::ifstream inF(file_path, std::ifstream::binary);
-  if (!inF) {
-    std::cout << "ERR: std::ifstream init failed! " << file_path << std::endl;
-    std::exit(1);
-  }
-  if (inF.read(reinterpret_cast<char*>(result.data()), len * sizeof(T))) {
-    // std::cout << "read success from " << file_path << std::endl;
-  } else {
-    std::cout << "ERR: something wrong: " << file_path << ", len=" << len
-              << std::endl;
-    std::exit(1);
-  }
-  return result;
-}
-
-template std::vector<float> get_cpu_data<float>(const std::string&, int len);
-template std::vector<float16> get_cpu_data<float16>(const std::string&,
-                                                    int len);
-template std::vector<int64_t> get_cpu_data<int64_t>(const std::string&,
-                                                    int len);
-template std::vector<int> get_cpu_data<int>(const std::string&, int len);
-
-template <typename T>
-T* get_xpu_data(const std::string& data_name, const std::vector<T>& cpu_data) {
-  int len = cpu_data.size();
-#ifdef TEST_DEBUG
-  std::cout << "DEBUG: file_path=" << data_name << ", len=" << len
-            << ", vec_sum=" << vec_sum(cpu_data)
-            << ", details: " << print_vec(cpu_data) << std::endl;
-#endif
-
-  T* xpu_data = nullptr;
-  int r = xpu_malloc(reinterpret_cast<void**>(&xpu_data), len * sizeof(T));
-  if (r != 0) {
-    std::cout << "ERR: xpu_malloc failed! " << data_name << std::endl;
-    std::exit(1);
-  }
-
-  r = xpu_wait();
-  if (r != 0) {
-    std::cout << "ERR: xpu_wait failed!" << std::endl;
-    std::exit(1);
-  }
-  r = xpu_memcpy(xpu_data, cpu_data.data(), len * sizeof(T),
-                 XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-  if (r != 0) {
-    std::cout << "ERR: xpu_memcpy failed! " << data_name << std::endl;
-    std::exit(1);
-  }
-
-#ifdef TEST_DEBUG
-  std::cout << "DEBUG: xpu_data=" << xpu_data << std::endl;
-#endif
-
-  return xpu_data;
-}
-
-template float* get_xpu_data(const std::string&, const std::vector<float>&);
-template float16* get_xpu_data(const std::string&, const std::vector<float16>&);
-template int64_t* get_xpu_data(const std::string&, const std::vector<int64_t>&);
-template int* get_xpu_data(const std::string&, const std::vector<int>&);
-
-template <typename T>
-T* get_xpu_data(const std::string& file_path, int len) {
-  std::vector<T> cpu_data = get_cpu_data<T>(file_path, len);
-  return get_xpu_data<T>(file_path, cpu_data);
-}
-
-template float* get_xpu_data<float>(const std::string&, int);
-template float16* get_xpu_data<float16>(const std::string&, int);
-template int64_t* get_xpu_data<int64_t>(const std::string&, int);
-template int* get_xpu_data<int>(const std::string&, int);
-
-template <typename TX, typename TY>
-std::vector<TY> quant_cpu(const std::vector<TX>& cpu_data) {
-  int len = cpu_data.size();
-  std::vector<TY> cpu_quant_data(len, 0);
-  api::Context ctx(api::kCPU);
-  int r = api::quantization<TX, TY>(&ctx, cpu_data.data(),
-                                    cpu_quant_data.data(), len, nullptr);
-  if (r != 0) {
-    std::cout << "ERR: quantization failed!" << std::endl;
-    std::exit(1);
-  }
-  return cpu_quant_data;
-}
-
-template <>
-std::vector<float> quant_cpu<float, float>(const std::vector<float>& cpu_data) {
-  return cpu_data;
-}
-
-template <typename TX, typename TY>
-XPUQunatData<TY> get_xpu_quant_data(const std::string& data_name,
-                                    const std::vector<TX>& cpu_data) {
-  XPUQunatData<TY> xpu_quant_data;
-
-  int len = cpu_data.size();
-  // quant
-  std::vector<TY> cpu_quant_data = quant_cpu<TX, TY>(cpu_data);
-  // findmax
-  float abs_max = 1e-30f;
-  if (std::is_same<TX, float>::value || std::is_same<TX, float16>::value) {
-    for (int i = 0; i < len; ++i) {
-      float abs_val = std::fabs(static_cast<float>(cpu_data[i]));
-      abs_max = std::max<float>(abs_max, abs_val);
-    }
-  }
-
-  constexpr int max_ptr_len = 6;  // for xpu2
-  std::vector<float> cpu_max(max_ptr_len, abs_max);
-  // xpu malloc
-  TY* xpu_data = nullptr;
-  float* xpu_max_ptr = nullptr;
-  int r = xpu_malloc(reinterpret_cast<void**>(&xpu_data), len * sizeof(TY));
-  if (r != 0) {
-    std::cout << "ERR: xpu_malloc failed! " << data_name << std::endl;
-    std::exit(1);
-  }
-  r = xpu_malloc(reinterpret_cast<void**>(&xpu_max_ptr),
-                 max_ptr_len * sizeof(float));
-  if (r != 0) {
-    std::cout << "ERR: xpu_malloc failed! " << data_name << std::endl;
-    std::exit(1);
-  }
-
-#ifdef TEST_DEBUG
-  std::cout << "DEBUG: file_path=" << data_name << ", len=" << len
-            << ", data vec_sum=" << vec_sum(cpu_data)
-            << ", quant_data vec_sum=" << vec_sum(cpu_quant_data)
-            << ", details: " << print_vec(cpu_quant_data) << std::endl;
-#endif
-  r = xpu_wait();
-  if (r != 0) {
-    std::cout << "ERR: xpu_wait failed!" << std::endl;
-    std::exit(1);
-  }
-  // xpu memcpy
-  r = xpu_memcpy(xpu_data, cpu_quant_data.data(), len * sizeof(TY),
-                 XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-  if (r != 0) {
-    std::cout << "ERR: xpu_memcpy failed!" << std::endl;
-    std::exit(1);
-  }
-#ifdef TEST_DEBUG
-  std::cout << "DEBUG: max is " << print_vec(cpu_max) << std::endl;
-#endif
-  r = xpu_memcpy(xpu_max_ptr, cpu_max.data(), max_ptr_len * sizeof(float),
-                 XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-  if (r != 0) {
-    std::cout << "ERR: xpu_malloc failed!" << std::endl;
-    std::exit(1);
-  }
-
-#ifdef TEST_DEBUG
-  std::cout << "DEBUG: xpu_data=" << xpu_data << ", xpu_max_ptr=" << xpu_max_ptr
-            << std::endl;
-#endif
-  xpu_quant_data.data_ = xpu_data;
-  xpu_quant_data.max_ptr_ = xpu_max_ptr;
-  return xpu_quant_data;
-}
-
-template XPUQunatData<float> get_xpu_quant_data<float, float>(
-    const std::string&, const std::vector<float>&);
-template XPUQunatData<int16_t> get_xpu_quant_data<float, int16_t>(
-    const std::string&, const std::vector<float>&);
-
-template <typename TX, typename TY>
-XPUQunatData<TY> get_xpu_quant_data(const std::string& file_path, int len) {
-  std::vector<TX> cpu_data = get_cpu_data<TX>(file_path, len);
-  return get_xpu_quant_data<TX, TY>(file_path, cpu_data);
-}
-
-template XPUQunatData<float> get_xpu_quant_data<float, float>(
-    const std::string&, int);
-template XPUQunatData<int16_t> get_xpu_quant_data<float, int16_t>(
-    const std::string&, int);
-
-std::vector<int> get_all_ids(const std::string& dir_in) {
-  std::vector<int> ids;
-  std::set<int> ids_set;
-  struct stat s;
-  stat(dir_in.c_str(), &s);
-  if (!S_ISDIR(s.st_mode)) {
-    return ids;
-  }
-  DIR* open_dir = opendir(dir_in.c_str());
-  if (nullptr == open_dir) {
-    return ids;
-  }
-  dirent* p = nullptr;
-  while ((p = readdir(open_dir)) != nullptr) {
-    if (p->d_name[0] != '.') {
-      std::string filename = std::string(p->d_name);
-      int end_pos = filename.find('_');
-
-      int qid = std::stoi(filename.substr(0, end_pos));
-      ids_set.insert(qid);
-    }
-  }
-  closedir(open_dir);
-  ids.resize(ids_set.size());
-  ids.assign(ids_set.begin(), ids_set.end());
-  return ids;
-}
-
-void get_ln_param(const std::unordered_map<std::string, int>& weights_len_info,
-                  const std::string& params_dir,
-                  const std::string& ln_name_prefix,
-                  const float*& ln_scale,   // NOLINT
-                  const float*& ln_bias) {  // NOLINT
-  std::string ln_file_prefix = params_dir + ln_name_prefix;
-  int wlen = weights_len_info.at(ln_name_prefix + "weight");
-  int blen = weights_len_info.at(ln_name_prefix + "bias");
-  ln_scale = get_xpu_data<float>(ln_file_prefix + "weight", wlen);
-  ln_bias = get_xpu_data<float>(ln_file_prefix + "bias", blen);
-}
-
-template <typename T>
-void print_xpu_data_all(api::Context* ctx, const T* data,
-                        std::vector<int> shape, std::string name) {
-  int data_len = vector_prod(shape);
-  std::vector<T> cpu_data(data_len);
-  xpu_wait(ctx->xpu_stream);
-  xpu_memcpy(reinterpret_cast<void**>(&cpu_data.front()), data,
-             data_len * sizeof(T), XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  std::cout << name;
-  std::cout << " shape:";
-  for (auto i : shape) {
-    std::cout << i << " ";
-  }
-  std::cout << std::endl;
-  int row = 1;
-  int col = shape.back();
-  if (shape.size() >= 2) {
-    row = data_len / col;
-  }
-  T* cpu_data_ptr = &cpu_data.front();
-  for (int i = 0; i < row; i++) {
-    for (int j = 0; j < col; j++) {
-      std::cout << *(cpu_data_ptr + i * col + j) << " ";
-    }
-    std::cout << std::endl;
-  }
-}
-template <typename T>
-void print_xpu_data(api::Context* ctx, const T* data, std::vector<int> shape,
-                    std::string name) {
-  int data_len = vector_prod(shape);
-
-  std::vector<T> cpu_data(data_len);
-  xpu_memcpy(reinterpret_cast<void*>(&cpu_data.front()), data,
-             data_len * sizeof(T), XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  std::cout << name;
-  std::cout << " shape:";
-  for (auto i : shape) {
-    std::cout << i << " ";
-  }
-  std::cout << std::endl;
-  if (data_len > 1000) {
-    double mean = 0;
-    for (auto val : cpu_data) {
-      mean += static_cast<double>(val);
-    }
-    mean /= data_len;
-    std::cout << "mean=" << mean << std::endl;
-    std::cout << "details: ";
-    for (int i = 0; i < 8; ++i) {
-      std::cout << cpu_data[i] << " ";
-    }
-    std::cout << "...";
-    for (int i = data_len - 8; i < data_len; ++i) {
-      std::cout << cpu_data[i] << " ";
-    }
-    std::cout << std::endl;
-    return;
-  }
-  int row = 1;
-  int col = shape.back();
-  if (shape.size() >= 2) {
-    row = data_len / col;
-  }
-  T* cpu_data_ptr = &cpu_data.front();
-  for (int i = 0; i < row; i++) {
-    for (int j = 0; j < col; j++) {
-      std::cout << *(cpu_data_ptr + i * col + j) << " ";
-    }
-    std::cout << std::endl;
-  }
-}
-template <typename T>
-void print_cpu_data(const T* data, std::vector<int> shape, std::string name) {
-  int data_len = vector_prod(shape);
-  std::cout << name;
-  std::cout << " shape:";
-  for (auto i : shape) {
-    std::cout << i << " ";
-  }
-  std::cout << std::endl;
-  int row = 1;
-  int col = shape.back();
-  if (shape.size() >= 2) {
-    row = data_len / col;
-  }
-  for (int i = 0; i < row; i++) {
-    for (int j = 0; j < col; j++) {
-      std::cout << *(data + i * col + j) << " ";
-    }
-    std::cout << std::endl;
-  }
-}
-
-template <typename T>
-void print_vec(const std::vector<T>& data, const std::string& data_name) {
-  int len = static_cast<int>(data.size());
-  T sum = std::accumulate(data.begin(), data.end(), 0);
-  std::cout << "DEBUG: data_name is " << data_name << ", len=" << len
-            << ", sum=" << sum << ", ";
-  for (int i = 0; i < len - 1; ++i) {
-    std::cout << data[i] << ", ";
-  }
-  std::cout << data[len - 1] << std::endl;
-}
-
-#define INSTANTIATION_PRINT(T)                                           \
-  template void print_vec<T>(const std::vector<T>&, const std::string&); \
-  template void print_cpu_data<T>(const T*, std::vector<int>,            \
-                                  std::string name);                     \
-  template void print_xpu_data<T>(api::Context * ctx, const T*,          \
-                                  std::vector<int>, std::string);        \
-  template void print_xpu_data_all<T>(api::Context * ctx, const T*,      \
-                                      std::vector<int> shape, std::string);
-
-INSTANTIATION_PRINT(int);
-INSTANTIATION_PRINT(int16_t);
-INSTANTIATION_PRINT(int8_t);
-INSTANTIATION_PRINT(float);
-INSTANTIATION_PRINT(float16);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_util.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_util.h
deleted file mode 100644
index e0b02dc6004f9a17a789fccc101a0ac16fd3cedb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/kunlun/xpu/xpu_util.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (c) 2022 KUNLUNXIN Inc.
-//               2022 Han Qi (qihan@baidu.com)
-//                    Hehe Pan (panhehe@baidu.com)
-//                    Zikui Yan (yanzikui@baidu.com)
-//                    Chaolin Li (lichaolin@baidu.com)
-// All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <dirent.h>
-#include <sys/stat.h>
-
-#include <algorithm>
-#include <cmath>
-#include <cstdio>
-#include <fstream>
-#include <iostream>
-#include <limits>
-#include <numeric>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "xpu/runtime.h"
-#include "xpu/xdnn.h"
-
-#pragma once
-namespace api = baidu::xpu::api;
-template <typename T>
-class XPUQunatData {
- public:
-  XPUQunatData() : data_(nullptr), max_ptr_(nullptr) {}
-  XPUQunatData(T* data, float* max_ptr) : data_(data), max_ptr_(max_ptr) {}
-  T* data_{nullptr};
-  float* max_ptr_{nullptr};
-};
-
-int vector_prod(std::vector<int> shape);
-void add_separator_when_necessary(std::string& str);  // NOLINT
-
-template <typename T, typename TW>
-void conformer_test(const std::string& data_dir, const std::string& params_dir,
-                    int threads_number, int dev_id);
-
-template <typename T>
-std::vector<T> Split(const std::string& str, const std::string& separator);
-
-std::unordered_map<std::string, int> get_weights_lens(
-    const std::string& file_path);
-std::unordered_map<std::string, std::vector<int>> get_weights_shape(
-    const std::string& file_path);
-
-template <typename T>
-std::vector<T> get_cpu_data(const std::string& file_path, int len);
-
-template <typename T>
-T* get_xpu_data(const std::string& file_path, int len);
-
-template <typename T>
-T* get_xpu_data(const std::string& data_name, const std::vector<T>& cpu_data);
-
-template <typename TX, typename TY>
-XPUQunatData<TY> get_xpu_quant_data(const std::string& file_path, int len);
-
-template <typename TX, typename TY>
-XPUQunatData<TY> get_xpu_quant_data(const std::string& data_name,
-                                    const std::vector<TX>& cpu_data);
-
-std::vector<int> get_all_ids(const std::string& dir_in);
-
-void get_ln_param(const std::unordered_map<std::string, int>& weights_len_info,
-                  const std::string& params_dir,
-                  const std::string& ln_name_prefix,
-                  const float*& ln_scale,  // NOLINT
-                  const float*& ln_bias);  // NOLINT
-
-template <typename T>
-void print_vec(const std::vector<T>& data, const std::string& data_name);
-template <typename T>
-void print_cpu_data(const T* data, std::vector<int> shape, std::string name);
-template <typename T>
-void print_xpu_data(api::Context* ctx, const T* data, std::vector<int> shape,
-                    std::string name);
-template <typename T>
-void print_xpu_data_all(api::Context* ctx, const T* data,
-                        std::vector<int> shape, std::string name);
-
-#define CHECK_RET(ret)                                    \
-  if ((ret) != 0) {                                       \
-    std::cout << "ERR" << __FILE__ << ":" << __LINE__     \
-              << ", check failed, ret != 0" << std::endl; \
-    std::exit(1);                                         \
-  }
-#define WRAPPER_CHECK_CTX(ctx) \
-  if (ctx == nullptr) {        \
-    return api::INVALID_PARAM; \
-  }
-#define WRAPPER_ASSERT_GT(ctx, expra, exprb) \
-  if (!((expra) > (exprb))) {                \
-    return api::INVALID_PARAM;               \
-  }
-#define WRAPPER_ASSERT_SUCCESS(ctx, ret) \
-  if (!((ret) == api::SUCCESS)) {        \
-    return ret;                          \
-  }
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/CMakeLists.txt
deleted file mode 100644
index 6223e1481e7e98846d9de3535ec510b41c237d48..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/CMakeLists.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-project(wenet VERSION 0.1)
-
-option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
-option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
-option(BUILD_TESTING "whether to build unit test" ON)
-
-option(GRPC "whether to build with gRPC" OFF)
-# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
-#                     which is a very big library
-option(WEBSOCKET "whether to build with websocket" ON)
-option(HTTP "whether to build with http" OFF)
-option(TORCH "whether to build with Torch" ON)
-option(ONNX "whether to build with ONNX" OFF)
-option(GPU "whether to build with GPU" OFF)
-
-set(CMAKE_VERBOSE_MAKEFILE OFF)
-
-include(FetchContent)
-set(FETCHCONTENT_QUIET OFF)
-get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-set(FETCHCONTENT_BASE_DIR ${fc_base})
-
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-
-if(NOT MSVC)
-  # Keep the same with openfst, -fPIC or -fpic
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
-else()
-  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-  add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
-endif()
-
-# Include all dependency
-if(TORCH)
-  include(libtorch)
-endif()
-if(ONNX)
-  include(onnx)
-endif()
-include(openfst)
-include_directories(
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-)
-
-# Build all libraries
-add_subdirectory(utils)
-add_subdirectory(frontend)
-add_subdirectory(post_processor)
-add_subdirectory(kaldi)  # kaldi: wfst based decoder
-add_subdirectory(decoder)
-add_subdirectory(api)
-
-# Optionally, you can build with websocket
-if(WEBSOCKET)
-  include(boost)
-  add_subdirectory(websocket)
-endif()
-
-# Optionally, you can build with gRPC
-if(GRPC)
-  include(grpc)
-  add_subdirectory(grpc)
-endif()
-
-# Optionally, you can build with http
-if(HTTP)
-  include(boost)
-  add_subdirectory(http)
-endif()
-
-# Build all bins
-add_subdirectory(bin)
-
-# Unit Test
-if(BUILD_TESTING)
-  include(gtest)
-  add_subdirectory(test)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/README.md
deleted file mode 100644
index 92f1727f6e4f336ded62de398ae1907e07c27067..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/README.md
+++ /dev/null
@@ -1,179 +0,0 @@
-# WeNet Server (x86) ASR Demo
-
-**[中文版:x86 平台上使用 WeNet 进行语音识别](./README_CN.md)**
-
-## Run with Prebuilt Docker
-
-* Step 1. Download pretrained model(see the following link) or prepare your trained model.
-
-[中文(WenetSpeech)](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/wenetspeech/wenetspeech_u2pp_conformer_libtorch.tar.gz)
-| [English(GigaSpeech)](https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/gigaspeech/gigaspeech_u2pp_conformer_libtorch.tar.gz)
-
-
-* Step 2. Start docker websocket server. Here is a demo.
-
-``` sh
-model_dir=$PWD/20210602_u2++_conformer_libtorch  # absolute path
-docker run --rm -it -p 10086:10086 -v $model_dir:/home/wenet/model wenetorg/wenet-mini:latest bash /home/run.sh
-```
-
-* Step 3. Test with web browser. Open runtime/libtorch/web/templates/index.html in the browser directly, input your `WebSocket URL`, it will request some permissions, and start to record to test, as the following graph shows.
-
-![Runtime web](../../docs/images/runtime_web.png)
-
-## Run in Docker Build
-
-We recommend using the docker environment to build the c++ binary to avoid
-system and environment problems.
-
-* Step 1. Build your docker image.
-
-``` sh
-cd docker
-docker build --no-cache -t wenet:latest .
-```
-
-* Step 2. Put all the resources, like model, test wavs into a docker resource dir.
-
-``` sh
-mkdir -p docker_resource
-cp -r <your_model_dir> docker_resource/model
-cp <your_test_wav> docker_resource/test.wav
-```
-
-* Step 3. Start docker container.
-``` sh
-docker run --rm -v $PWD/docker_resource:/home/wenet/runtime/libtorch/docker_resource -it wenet bash
-```
-
-* Step 4. Testing in docker container
-```
-cd /home/wenet/runtime/libtorch
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=docker_resource/test.wav
-model_dir=docker_resource/model
-./build/bin/decoder_main \
-    --chunk_size -1 \
-    --wav_path $wav_path \
-    --model_path $model_dir/final.zip \
-    --unit_path $model_dir/units.txt 2>&1 | tee log.txt
-```
-
-Or you can do the WebSocket server/client testing as described in the `WebSocket` section.
-
-## Run with Local Build
-
-* Step 1. Download or prepare your pretrained model.
-
-* Step 2. Build. The build requires cmake 3.14 or above. For building, please first change to `wenet/runtime/libtorch` as your build directory, then type:
-
-``` sh
-mkdir build && cd build && cmake .. && cmake --build .
-```
-
-For building with GPU, you should turn on `GPU`:
-
-``` sh
-mkdir build && cd build && cmake -DGPU=ON .. && cmake --build .
-```
-
-* Step 3. Testing, the RTF(real time factor) is shown in the console.
-
-``` sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=your_test_wav_path
-model_dir=your_model_dir
-./build/bin/decoder_main \
-    --chunk_size -1 \
-    --wav_path $wav_path \
-    --model_path $model_dir/final.zip \
-    --unit_path $model_dir/units.txt 2>&1 | tee log.txt
-```
-
-
-## Advanced Usage
-
-### WebSocket
-
-* Step 1. Download or prepare your pretrained model.
-* Step 2. Build as in `Run with Local Build`
-* Step 3. Start WebSocket server.
-
-``` sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-model_dir=your_model_dir
-./build/bin/websocket_server_main \
-    --port 10086 \
-    --chunk_size 16 \
-    --model_path $model_dir/final.zip \
-    --unit_path $model_dir/units.txt 2>&1 | tee server.log
-```
-* Step 4. Start WebSocket client.
-
-```sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=your_test_wav_path
-./build/websocket_client_main \
-    --hostname 127.0.0.1 --port 10086 \
-    --wav_path $wav_path 2>&1 | tee client.log
-```
-
-You can also start WebSocket client by web browser as described before.
-
-Here is a demo for command line based websocket server/client interaction.
-
-![Runtime server demo](../../../docs/images/runtime_server.gif)
-
-### gRPC
-
-Why grpc? You may find your answer in https://grpc.io/.
-Please follow the following steps to try gRPC.
-
-* Step 1. Download or prepare your pretrained model.
-* Step 2. Build
-``` sh
-mkdir build && cd build && cmake -DGRPC=ON .. && cmake --build .
-```
-* Step 3. Start gRPC server
-
-``` sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-model_dir=your_model_dir
-./build/bin/grpc_server_main \
-    --port 10086 \
-    --workers 4 \
-    --chunk_size 16 \
-    --model_path $model_dir/final.zip \
-    --unit_path $model_dir/units.txt 2>&1 | tee server.log
-```
-
-* Step 4. Start gRPC client.
-
-```sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=your_test_wav_path
-./build/bin/grpc_client_main \
-    --hostname 127.0.0.1 --port 10086 \
-    --wav_path $wav_path 2>&1 | tee client.log
-```
-
-### http
-
-* Step 1. Download or prepare your pretrained model.
-* Step 2. Build
-``` sh
-mkdir build && cd build && cmake -DHTTP=ON .. && cmake --build .
-```
-* Step 3. Start http server
-
-simply replace grpc_server_main with http_server_main of Step 3 in gRPC
-
-* Step 4. Start http client.
-
-simply replace grpc_client_main with http_client_main of Step 4 in gRPC
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/README_CN.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/README_CN.md
deleted file mode 100644
index ee74968bd3693357fe3d29a8ebda495b6ccca11c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/README_CN.md
+++ /dev/null
@@ -1,172 +0,0 @@
-# x86 平台上使用 WeNet 进行语音识别
-
-Wenet 基于 pytorch 框架进行语音识别模型训练，而在使用训练好的 Wenet 模型进行真实场景的语音识别任务时，需要更高效的执行效率和一些外围组件。因此我们提供了一套基于 C++ 实现的 Wenet 的语音识别工具和在线服务。
-
-
-## 使用docker启动语音识别服务
-
-最简单的使用 Wenet 的方式是通过官方提供的 docker 镜像 `wenetorg/wenet:mini` 来启动服务。
-
-下面的命令先下载官方提供的预训练模型，并启动 docker 服务，加载模型，提供 websocket 协议的语音识别服务。
-``` sh
-cd wenet/runtime/libtorch
-wget https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20210601_u2%2B%2B_conformer_libtorch.tar.gz
-tar -xf 20210601_u2++_conformer_libtorch.tar.gz
-model_dir=$PWD/20210601_u2++_conformer_libtorch
-docker run --rm -it -p 10086:10086 -v $model_dir:/home/wenet/model wenetorg/wenet-mini:latest bash /home/run.sh
-```
-
-`$model_dir` 是模型在本机的目录，将被映射到容器的 `/home/wenet/model` 目录，然后启动 web 服务。
-
-**实时识别**
-
-使用浏览器打开文件`web/templates/index.html`，在 `WebSocket URL：`填入 `ws://127.0.0.1:10086`, 允许浏览器弹出的请求使用麦克风，即可通过麦克风进行实时语音识别。
-
-![Runtime web](../../../docs/images/runtime_web.png)
-
-## 自行编译运行时程序
-
-如果想使用非 docker 方式，需要自行编译。Wenet 支持 linux/macos/windows 三种平台上的编译。需要安装 cmake 3.14 或者更高版本。
-
-运行如下命令，完成编译。
-
-``` sh
-# 当前目录为 wenet/runtime/libtorch
-mkdir build && cd build && cmake .. && cmake --build .
-```
-或者使用命令编译以支持 gRPC。
-
-``` sh
-mkdir build && cd build && cmake -DGRPC=ON .. && cmake --build .
-```
-或者使用命令编译以支持 GPU。
-``` sh
-mkdir build && cd build && cmake -DGPU=ON .. && cmake --build .
-```
-
-编译好的可执行程序在 `wenet/runtime/libtorch/build` 下：
-
-* decoder_main 本地文件识别工具
-* websocket_server_main 基于websocket协议的识别服务端
-* websocket_client_main 基于websocket协议的识别客户端
-
-
-下载预训练模型
-
-``` sh
-# 当前目录为 wenet/runtime/libtorch
-wget https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell/20210601_u2%2B%2B_conformer_libtorch.tar.gz
-tar -xf 20210601_u2++_conformer_libtorch.tar.gz
-```
-
-## 本地wav文件识别
-
-本地文件识别，即程序每次运行时，给定一个语音文件或者一组语音文件列表，输出识别结果，然后结束程序。
-
-下载好模型后，执行如下的命令进行本地wav文件识别，将 `wav_path` 设为你想测试的 wav 文件地址，将 `model_dir` 设为你的模型目录地址。
-
-``` sh
-# 当前目录为 wenet/runtime/libtorch
-# 已经下载并解压20210602_unified_transformer_server.tar.gz到当前目录
-# 准备好一个16k采样率，单通道，16bits的音频文件test.wav
-
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=test.wav
-model_dir=./20210602_unified_transformer_server
-./build/bin/decoder_main \
-    --chunk_size -1 \
-    --wav_path $wav_path \
-    --model_path $model_dir/final.zip \
-    --unit_path $model_dir/units.txt 2>&1 | tee log.txt
-```
-
-`decoder_main`工具支持两种wav文件模式：
- * 使用`--wav_path`指定单个文件，一次识别单个wav文件。
- * 使用`--wav_scp`指定一个.scp格式的wav列表，一次识别多个wav文件。
-
-执行 `./build/bin/decoder_main --help`  可以了解更多的参数意义。
-
-## 基于websocket的在线识别服务
-
-在线识别服务，即程序运行后会常驻在内存中，等待客户端的请求，对于客户端发来的语音数据进行识别，将识别文本返回给客户端。
-
-在这个示例中，需要先启动服务端程序，然后再启动客户端发送请求。
-
-### 启动websocket识别服务端
-
-执行如下指令，将 `model_dir` 设置为你的模型目录地址。
-
-``` sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-model_dir=./20210602_unified_transformer_server
-./build/bin/websocket_server_main \
-    --port 10086 \
-    --chunk_size 16 \
-    --model_path $model_dir/final.zip \
-    --unit_path $model_dir/units.txt 2>&1 | tee server.log
-```
-
-上述服务启动后，会监听 10086 端口。若想使用其他端口，请修改 `--port` 对应的参数.
-
-### websocket 识别客户端
-
-客户端按 websocket 协议去请求服务，可以用不同语言来实现客户端。我们提供了两种客户端，一种是基于 C++ 的命令行工具。一种是基于网页形式的可视化客户端。
-
-**命令行 websocket 客户端**
-
-打开一个新的命令行窗口，运行如下指令，启动客户端。可将 `wav_path` 设为你想测试的 wav 文件地址。
-
-```sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=test.wav
-./build/bin/websocket_client_main \
-    --hostname 127.0.0.1 --port 10086 \
-    --wav_path $wav_path 2>&1 | tee client.log
-```
-
-该程序会模拟语音数据的真实时间进行流式请求，即 10 秒的语音会按 10 秒时间发送完。可以在客户端和服务器端看到流式识别过程输出的信息。
-
-![Runtime server demo](../../../docs/images/runtime_server.gif)
-
-注意 `--port` 需要设置为服务端使用的端口号。
-
-如果有两台机器，也可以在一台机器上运行服务端，在另一台机器运行客户端，此时 `--hostname` 要指定为服务端所在机器的可访问 ip。
-
-**网页版 websocket 客户端**
-
-网页版客户端支持麦克风的语音输入。
-
-使用浏览器打开文件 `web/templates/index.html`, 在 `Websoket URL` 里设置 websoket 识别服务的地址，比如 `ws://localhost:10086`, 点击开始识别。
-
-**时延信息计算**
-
-`server.log` 文件中记录了每次请求的时延，可以通过如下命令得到所有请求的平均时延。
-
-``` sh
-grep "Rescoring cost latency" server.log | awk '{sum += $NF}; END {print sum/NR}'
-```
-
-## 在 Docker 环境中使用
-
-如果遇到问题比如无法编译，我们提供了 docker 镜像用于直接执行示例。需要先安装好 docker，运行如下命令，进入 docker 容器环境。
-
-``` sh
-docker run --rm -it mobvoiwenet/wenet:latest bash
-```
-
-该镜像包含了编译过程中所依赖的所有第三方库、编译好的文件和预训练模型。
-
-预训练模型在 `/home/model` 目录, 可执行程序在 `/home/wenet/runtime/libtorch/build` 目录。
-
-### 构建 Docker 镜像
-
-我们也提供了 Dockerfile，可以自己构建 docker 镜像，参考 `docker/Dockerfile` 文件。
-
-``` sh
-cd docker
-docker build --no-cache -t wenet:latest .
-docker run --rm -it wenet bash
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/CMakeLists.txt
deleted file mode 100644
index 8d61ca8477f0f0b6128f1effe0a2738494b2620f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-if(TORCH)
- add_library(wenet_api SHARED wenet_api.cc)
- target_link_libraries(wenet_api PUBLIC decoder)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/README.md
deleted file mode 100644
index 5eaa13b977eb4836eb930452f4434dc9f2ea4139..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# WeNet API
-
-We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
-for the interface design.
-
-
-We are going to implement the following interfaces:
-
-- [x] non-streaming recognition
-- [] streaming recognition
-- [] nbest
-- [] contextual biasing word
-- [] alignment
-- [] language support(post processor)
-- [] label check
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/wenet_api.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/wenet_api.cc
deleted file mode 100644
index cb1e0c8552e0126e2db274a29075578fe351a25f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/wenet_api.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "post_processor/post_processor.h"
-#include "utils/file.h"
-#include "utils/json.h"
-#include "utils/string.h"
-
-class Recognizer {
- public:
-  explicit Recognizer(const std::string& model_dir) {
-    // FeaturePipeline init
-    feature_config_ = std::make_shared<wenet::FeaturePipelineConfig>(80, 16000);
-    feature_pipeline_ =
-        std::make_shared<wenet::FeaturePipeline>(*feature_config_);
-    // Resource init
-    resource_ = std::make_shared<wenet::DecodeResource>();
-    wenet::TorchAsrModel::InitEngineThreads();
-    std::string model_path = wenet::JoinPath(model_dir, "final.zip");
-    CHECK(wenet::FileExists(model_path));
-
-    auto model = std::make_shared<wenet::TorchAsrModel>();
-    model->Read(model_path);
-    resource_->model = model;
-
-    // units.txt: E2E model unit
-    std::string unit_path = wenet::JoinPath(model_dir, "units.txt");
-    CHECK(wenet::FileExists(unit_path));
-    resource_->unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(unit_path));
-
-    std::string fst_path = wenet::JoinPath(model_dir, "TLG.fst");
-    if (wenet::FileExists(fst_path)) {  // With LM
-      resource_->fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-          fst::Fst<fst::StdArc>::Read(fst_path));
-
-      std::string symbol_path = wenet::JoinPath(model_dir, "words.txt");
-      CHECK(wenet::FileExists(symbol_path));
-      resource_->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(symbol_path));
-    } else {  // Without LM, symbol_table is the same as unit_table
-      resource_->symbol_table = resource_->unit_table;
-    }
-
-    // Context config init
-    context_config_ = std::make_shared<wenet::ContextConfig>();
-    decode_options_ = std::make_shared<wenet::DecodeOptions>();
-    post_process_opts_ = std::make_shared<wenet::PostProcessOptions>();
-  }
-
-  void Reset() {
-    if (feature_pipeline_ != nullptr) {
-      feature_pipeline_->Reset();
-    }
-    if (decoder_ != nullptr) {
-      decoder_->Reset();
-    }
-    result_.clear();
-  }
-
-  void InitDecoder() {
-    CHECK(decoder_ == nullptr);
-    // Optional init context graph
-    if (context_.size() > 0) {
-      context_config_->context_score = context_score_;
-      auto context_graph =
-          std::make_shared<wenet::ContextGraph>(*context_config_);
-      context_graph->BuildContextGraph(context_, resource_->symbol_table);
-      resource_->context_graph = context_graph;
-    }
-    // PostProcessor
-    if (language_ == "chs") {  // TODO(Binbin Zhang): CJK(chs, jp, kr)
-      post_process_opts_->language_type = wenet::kMandarinEnglish;
-    } else {
-      post_process_opts_->language_type = wenet::kIndoEuropean;
-    }
-    resource_->post_processor =
-        std::make_shared<wenet::PostProcessor>(*post_process_opts_);
-    // Init decoder
-    decoder_ = std::make_shared<wenet::AsrDecoder>(feature_pipeline_, resource_,
-                                                   *decode_options_);
-  }
-
-  void Decode(const char* data, int len, int last) {
-    using wenet::DecodeState;
-    // Init decoder when it is called first time
-    if (decoder_ == nullptr) {
-      InitDecoder();
-    }
-    // Convert to 16 bits PCM data to float
-    CHECK_EQ(len % 2, 0);
-    feature_pipeline_->AcceptWaveform(reinterpret_cast<const int16_t*>(data),
-                                      len / 2);
-    if (last > 0) {
-      feature_pipeline_->set_input_finished();
-    }
-
-    while (true) {
-      DecodeState state = decoder_->Decode(false);
-      if (state == DecodeState::kWaitFeats) {
-        break;
-      } else if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        break;
-      } else if (state == DecodeState::kEndpoint && continuous_decoding_) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        decoder_->ResetContinuousDecoding();
-      } else {  // kEndBatch
-        UpdateResult(false);
-      }
-    }
-  }
-
-  void UpdateResult(bool final_result) {
-    json::JSON obj;
-    obj["type"] = final_result ? "final_result" : "partial_result";
-    int nbest = final_result ? nbest_ : 1;
-    obj["nbest"] = json::Array();
-    for (int i = 0; i < nbest && i < decoder_->result().size(); i++) {
-      json::JSON one;
-      one["sentence"] = decoder_->result()[i].sentence;
-      if (final_result && enable_timestamp_) {
-        one["word_pieces"] = json::Array();
-        for (const auto& word_piece : decoder_->result()[i].word_pieces) {
-          json::JSON piece;
-          piece["word"] = word_piece.word;
-          piece["start"] = word_piece.start;
-          piece["end"] = word_piece.end;
-          one["word_pieces"].append(piece);
-        }
-      }
-      one["sentence"] = decoder_->result()[i].sentence;
-      obj["nbest"].append(one);
-    }
-    result_ = obj.dump();
-  }
-
-  const char* GetResult() { return result_.c_str(); }
-
-  void set_nbest(int n) { nbest_ = n; }
-  void set_enable_timestamp(bool flag) { enable_timestamp_ = flag; }
-  void AddContext(const char* word) { context_.emplace_back(word); }
-  void set_context_score(float score) { context_score_ = score; }
-  void set_language(const char* lang) { language_ = lang; }
-  void set_continuous_decoding(bool flag) { continuous_decoding_ = flag; }
-
- private:
-  // NOTE(Binbin Zhang): All use shared_ptr for clone in the future
-  std::shared_ptr<wenet::FeaturePipelineConfig> feature_config_ = nullptr;
-  std::shared_ptr<wenet::FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<wenet::DecodeResource> resource_ = nullptr;
-  std::shared_ptr<wenet::DecodeOptions> decode_options_ = nullptr;
-  std::shared_ptr<wenet::AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<wenet::ContextConfig> context_config_ = nullptr;
-  std::shared_ptr<wenet::PostProcessOptions> post_process_opts_ = nullptr;
-
-  int nbest_ = 1;
-  std::string result_;
-  bool enable_timestamp_ = false;
-  std::vector<std::string> context_;
-  float context_score_;
-  std::string language_ = "chs";
-  bool continuous_decoding_ = false;
-};
-
-void* wenet_init(const char* model_dir) {
-  Recognizer* decoder = new Recognizer(model_dir);
-  return reinterpret_cast<void*>(decoder);
-}
-
-void wenet_free(void* decoder) {
-  delete reinterpret_cast<Recognizer*>(decoder);
-}
-
-void wenet_reset(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Reset();
-}
-
-void wenet_decode(void* decoder, const char* data, int len, int last) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Decode(data, len, last);
-}
-
-const char* wenet_get_result(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  return recognizer->GetResult();
-}
-
-void wenet_set_log_level(int level) {
-  FLAGS_logtostderr = true;
-  FLAGS_v = level;
-}
-
-void wenet_set_nbest(void* decoder, int n) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_nbest(n);
-}
-
-void wenet_set_timestamp(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  bool enable = flag > 0 ? true : false;
-  recognizer->set_enable_timestamp(enable);
-}
-
-void wenet_add_context(void* decoder, const char* word) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->AddContext(word);
-}
-
-void wenet_set_context_score(void* decoder, float score) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_context_score(score);
-}
-
-void wenet_set_language(void* decoder, const char* lang) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_language(lang);
-}
-
-void wenet_set_continuous_decoding(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_continuous_decoding(flag > 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/wenet_api.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/wenet_api.h
deleted file mode 100644
index e839aaa40166a6e50d9aa2ac0e697356bd25b941..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/api/wenet_api.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_WENET_API_H_
-#define API_WENET_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Init decoder from the file and returns the object
- *
- * @param model_dir: the model dir
- * @returns model object or NULL if problem occured
- */
-void* wenet_init(const char* model_dir);
-
-/** Free wenet decoder and corresponding resource
- */
-void wenet_free(void* decoder);
-
-/** Reset decoder for next decoding
- */
-void wenet_reset(void* decoder);
-
-/** Decode the input wav data
- * @param data: pcm data, encoded as int16_t(16 bits)
- * @param len: data length
- * @param last: if it is the last package
- */
-void wenet_decode(void* decoder, const char* data, int len, int last);
-
-/** Get decode result in json format
- *  It returns partial result when last is 0
- *  It returns final result when last is 1
-
-    {
-      "nbest" : [{
-          "sentence" : "are you okay"
-          "word_pieces" : [{
-              "end" : 960,
-              "start" : 0,
-              "word" : "are"
-            }, {
-              "end" : 1200,
-              "start" : 960,
-              "word" : "you"
-            }, {
-            ...}]
-        }, {
-          "sentence" : "are you ok"
-        }],
-      "type" : "final_result"
-    }
-
-    "type": final_result/partial_result
-    "nbest": nbest is enabled when n > 1 in final_result
-        "sentence": the ASR result
-        "word_pieces": optional, output timestamp when enabled
- */
-const char* wenet_get_result(void* decoder);
-
-/** Set n-best, range 1~10
- *  wenet_get_result will return top-n best results
- */
-void wenet_set_nbest(void* decoder, int n);
-
-/** Whether to enable word level timestamp in results
-    disable it when flag = 0, otherwise enable
- */
-void wenet_set_timestamp(void* decoder, int flag);
-
-/** Add one contextual biasing
- */
-void wenet_add_context(void* decoder, const char* word);
-
-/** Set contextual biasing bonus score
- */
-void wenet_set_context_score(void* decoder, float score);
-
-/** Set language, has effect on the postpocessing
- *  @param: lang, could be chs/en now
- */
-void wenet_set_language(void* decoder, const char* lang);
-
-/** Set log level
- *  We use glog in wenet, so the level is the glog level
- */
-void wenet_set_log_level(int level);
-
-/** Enable continous decoding or not
- *  flag > 0: enable, otherwise disable
- */
-void wenet_set_continuous_decoding(void* decoder, int flag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // API_WENET_API_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/CMakeLists.txt
deleted file mode 100644
index a117b8bcb580c8738a7ce72f88bc10ff0a450e98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_executable(decoder_main decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC decoder)
-
-add_executable(label_checker_main label_checker_main.cc)
-target_link_libraries(label_checker_main PUBLIC decoder)
-
-# if(TORCH)
-#  add_executable(api_main api_main.cc)
-#  target_link_libraries(api_main PUBLIC wenet_api)
-# endif()
-
-if(WEBSOCKET)
-  add_executable(websocket_client_main websocket_client_main.cc)
-  target_link_libraries(websocket_client_main PUBLIC websocket)
-  add_executable(websocket_server_main websocket_server_main.cc)
-  target_link_libraries(websocket_server_main PUBLIC websocket)
-endif()
-
-if(GRPC)
-  add_executable(grpc_server_main grpc_server_main.cc)
-  target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
-  add_executable(grpc_client_main grpc_client_main.cc)
-  target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
-endif()
-
-if(HTTP)
-  add_executable(http_client_main http_client_main.cc)
-  target_link_libraries(http_client_main PUBLIC http)
-  add_executable(http_server_main http_server_main.cc)
-  target_link_libraries(http_server_main PUBLIC http)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/api_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/api_main.cc
deleted file mode 100644
index 94b20d52a7b8eee5c39a12af4e1e25324d7d880f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/api_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-
-DEFINE_string(model_dir, "", "model dir path");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_bool(enable_timestamp, false, "enable timestamps");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet_set_log_level(2);
-
-  void* decoder = wenet_init(FLAGS_model_dir.c_str());
-  wenet_set_timestamp(decoder, FLAGS_enable_timestamp == true ? 1 : 0);
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  std::vector<int16_t> data(wav_reader.num_samples());
-  for (int i = 0; i < wav_reader.num_samples(); i++) {
-    data[i] = static_cast<int16_t>(*(wav_reader.data() + i));
-  }
-
-  for (int i = 0; i < 10; i++) {
-    // Return the final result when last is 1
-    wenet_decode(decoder, reinterpret_cast<const char*>(data.data()),
-                 data.size() * 2, 1);
-    const char* result = wenet_get_result(decoder);
-    LOG(INFO) << i << " " << result;
-    wenet_reset(decoder);
-  }
-  wenet_free(decoder);
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/decoder_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/decoder_main.cc
deleted file mode 100644
index b8f1dbae6b88390504cc9ce63f33dc9bd54a2d6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/decoder_main.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <iomanip>
-#include <thread>
-#include <utility>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-#include "utils/thread_pool.h"
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-DEFINE_bool(simulate_streaming, false, "simulate streaming input");
-DEFINE_bool(output_nbest, false, "output n-best of decode result");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_string(wav_scp, "", "input wav scp");
-DEFINE_string(result, "", "result output file");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-DEFINE_int32(thread_num, 1, "num of decode thread");
-DEFINE_int32(warmup, 0, "num of warmup decode, 0 means no warmup");
-
-std::shared_ptr<wenet::DecodeOptions> g_decode_config;
-std::shared_ptr<wenet::FeaturePipelineConfig> g_feature_config;
-std::shared_ptr<wenet::DecodeResource> g_decode_resource;
-
-std::ofstream g_result;
-std::mutex g_mutex;
-int g_total_waves_dur = 0;
-int g_total_decode_time = 0;
-
-void decode(std::pair<std::string, std::string> wav, bool warmup = false) {
-  wenet::WavReader wav_reader(wav.second);
-  int num_samples = wav_reader.num_samples();
-  CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-
-  auto feature_pipeline =
-      std::make_shared<wenet::FeaturePipeline>(*g_feature_config);
-  feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-  feature_pipeline->set_input_finished();
-  LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-
-  wenet::AsrDecoder decoder(feature_pipeline, g_decode_resource,
-                            *g_decode_config);
-
-  int wave_dur = static_cast<int>(static_cast<float>(num_samples) /
-                                  wav_reader.sample_rate() * 1000);
-  int decode_time = 0;
-  std::string final_result;
-  while (true) {
-    wenet::Timer timer;
-    wenet::DecodeState state = decoder.Decode();
-    if (state == wenet::DecodeState::kEndFeats) {
-      decoder.Rescoring();
-    }
-    int chunk_decode_time = timer.Elapsed();
-    decode_time += chunk_decode_time;
-    if (decoder.DecodedSomething()) {
-      LOG(INFO) << "Partial result: " << decoder.result()[0].sentence;
-    }
-
-    if (FLAGS_continuous_decoding && state == wenet::DecodeState::kEndpoint) {
-      if (decoder.DecodedSomething()) {
-        decoder.Rescoring();
-        LOG(INFO) << "Final result (continuous decoding): "
-                  << decoder.result()[0].sentence;
-        final_result.append(decoder.result()[0].sentence);
-      }
-      decoder.ResetContinuousDecoding();
-    }
-
-    if (state == wenet::DecodeState::kEndFeats) {
-      break;
-    } else if (FLAGS_chunk_size > 0 && FLAGS_simulate_streaming) {
-      float frame_shift_in_ms =
-          static_cast<float>(g_feature_config->frame_shift) /
-          wav_reader.sample_rate() * 1000;
-      auto wait_time =
-          decoder.num_frames_in_current_chunk() * frame_shift_in_ms -
-          chunk_decode_time;
-      if (wait_time > 0) {
-        LOG(INFO) << "Simulate streaming, waiting for " << wait_time << "ms";
-        std::this_thread::sleep_for(
-            std::chrono::milliseconds(static_cast<int>(wait_time)));
-      }
-    }
-  }
-  if (decoder.DecodedSomething()) {
-    final_result.append(decoder.result()[0].sentence);
-  }
-  LOG(INFO) << wav.first << " Final result: " << final_result << std::endl;
-  LOG(INFO) << "Decoded " << wave_dur << "ms audio taken " << decode_time
-            << "ms.";
-
-  if (!warmup) {
-    g_mutex.lock();
-    std::ostream& buffer = FLAGS_result.empty() ? std::cout : g_result;
-    if (!FLAGS_output_nbest) {
-      buffer << wav.first << " " << final_result << std::endl;
-    } else {
-      buffer << "wav " << wav.first << std::endl;
-      auto& results = decoder.result();
-      for (auto& r : results) {
-        if (r.sentence.empty()) continue;
-        buffer << "candidate " << r.score << " " << r.sentence << std::endl;
-      }
-    }
-    g_total_waves_dur += wave_dur;
-    g_total_decode_time += decode_time;
-    g_mutex.unlock();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  g_decode_config = wenet::InitDecodeOptionsFromFlags();
-  g_feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  g_decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  if (FLAGS_wav_path.empty() && FLAGS_wav_scp.empty()) {
-    LOG(FATAL) << "Please provide the wave path or the wav scp.";
-  }
-  std::vector<std::pair<std::string, std::string>> waves;
-  if (!FLAGS_wav_path.empty()) {
-    waves.emplace_back(make_pair("test", FLAGS_wav_path));
-  } else {
-    std::ifstream wav_scp(FLAGS_wav_scp);
-    std::string line;
-    while (getline(wav_scp, line)) {
-      std::vector<std::string> strs;
-      wenet::SplitString(line, &strs);
-      CHECK_GE(strs.size(), 2);
-      waves.emplace_back(make_pair(strs[0], strs[1]));
-    }
-
-    if (waves.empty()) {
-      LOG(FATAL) << "Please provide non-empty wav scp.";
-    }
-  }
-
-  if (!FLAGS_result.empty()) {
-    g_result.open(FLAGS_result, std::ios::out);
-  }
-
-  // Warmup
-  if (FLAGS_warmup > 0) {
-    LOG(INFO) << "Warming up...";
-    {
-      ThreadPool pool(FLAGS_thread_num);
-      auto wav = waves[0];
-      for (int i = 0; i < FLAGS_warmup; i++) {
-        pool.enqueue(decode, wav, true);
-      }
-    }
-    LOG(INFO) << "Warmup done.";
-  }
-
-  {
-    ThreadPool pool(FLAGS_thread_num);
-    for (auto& wav : waves) {
-      pool.enqueue(decode, wav, false);
-    }
-  }
-
-  LOG(INFO) << "Total: decoded " << g_total_waves_dur << "ms audio taken "
-            << g_total_decode_time << "ms.";
-  LOG(INFO) << "RTF: " << std::setprecision(4)
-            << static_cast<float>(g_total_decode_time) / g_total_waves_dur;
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/grpc_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/grpc_client_main.cc
deleted file mode 100644
index f2d226d48d3757c5f095335eff3288f5d227282b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/grpc_client_main.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "grpc/grpc_client.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::GrpcClient client(FLAGS_hostname, FLAGS_port, FLAGS_nbest,
-                           FLAGS_continuous_decoding);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  std::vector<float> pcm_data(wav_reader.data(),
-                              wav_reader.data() + num_samples);
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(pcm_data[j]));
-    }
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/grpc_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/grpc_server_main.cc
deleted file mode 100644
index b00f3cbade1ee70dadfb49829e9ca73fd50c2be2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/grpc_server_main.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <grpcpp/ext/proto_server_reflection_plugin.h>
-#include <grpcpp/grpcpp.h>
-#include <grpcpp/health_check_service_interface.h>
-
-#include "decoder/params.h"
-#include "grpc/grpc_server.h"
-#include "utils/log.h"
-
-DEFINE_int32(port, 10086, "grpc listening port");
-DEFINE_int32(workers, 4, "grpc num workers");
-
-using grpc::Server;
-using grpc::ServerBuilder;
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::GrpcServer service(feature_config, decode_config, decode_resource);
-  grpc::EnableDefaultHealthCheckService(true);
-  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
-  ServerBuilder builder;
-  std::string address("0.0.0.0:" + std::to_string(FLAGS_port));
-  builder.AddListeningPort(address, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  builder.SetSyncServerOption(ServerBuilder::SyncServerOption::NUM_CQS,
-                              FLAGS_workers);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server->Wait();
-  google::ShutdownGoogleLogging();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/http_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/http_client_main.cc
deleted file mode 100644
index b59ee3f5f32bf08552416b183802029ac5d5afa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/http_client_main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "http/http_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of http server");
-DEFINE_int32(port, 10086, "port of http server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Convert to short
-  std::vector<int16_t> data;
-  data.reserve(num_samples);
-  for (int j = 0; j < num_samples; j++) {
-    data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-  }
-  // Send data
-  wenet::HttpClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  wenet::Timer timer;
-  VLOG(2) << "Send " << data.size() << " samples";
-  client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/http_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/http_server_main.cc
deleted file mode 100644
index e30cf2bcdf746c2072f023e90f470ccba5467c2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/http_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "http/http_server.h"
-
-DEFINE_int32(port, 10086, "http listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
-                           decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/label_checker_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/label_checker_main.cc
deleted file mode 100644
index e36e3d5c29a38a7ebee80606ebd8e69ae8b1eb96..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/label_checker_main.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_string(text, "", "kaldi style text input file");
-DEFINE_string(wav_scp, "", "kaldi style wav scp");
-DEFINE_double(is_penalty, 1.0,
-              "insertion/substitution penalty for align insertion");
-DEFINE_double(del_penalty, 1.0, "deletion penalty for align insertion");
-DEFINE_string(result, "", "result output file");
-DEFINE_string(timestamp, "", "timestamp output file");
-
-namespace wenet {
-
-const char* kDeletion = "<del>";
-// Is: Insertion and substitution
-const char* kIsStart = "<is>";
-const char* kIsEnd = "</is>";
-
-bool MapToLabel(const std::string& text,
-                std::shared_ptr<fst::SymbolTable> symbol_table,
-                std::vector<int>* labels) {
-  labels->clear();
-  // Split label to char sequence
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(text, &chars);
-  for (size_t i = 0; i < chars.size(); i++) {
-    // ▁ is special symbol for white space
-    std::string label = chars[i] != " " ? chars[i] : "▁";
-    int id = symbol_table->Find(label);
-    if (id != -1) {  // fst::kNoSymbol
-      // LOG(INFO) << label << " " << id;
-      labels->push_back(id);
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<fst::SymbolTable> MakeSymbolTableForFst(
-    std::shared_ptr<fst::SymbolTable> isymbol_table) {
-  LOG(INFO) << isymbol_table;
-  CHECK(isymbol_table != nullptr);
-  auto osymbol_table = std::make_shared<fst::SymbolTable>();
-  osymbol_table->AddSymbol("<eps>", 0);
-  CHECK_EQ(isymbol_table->Find("<blank>"), 0);
-  osymbol_table->AddSymbol("<blank>", 1);
-  for (int i = 1; i < isymbol_table->NumSymbols(); i++) {
-    std::string symbol = isymbol_table->Find(i);
-    osymbol_table->AddSymbol(symbol, i + 1);
-  }
-  osymbol_table->AddSymbol(kDeletion, isymbol_table->NumSymbols() + 1);
-  osymbol_table->AddSymbol(kIsStart, isymbol_table->NumSymbols() + 2);
-  osymbol_table->AddSymbol(kIsEnd, isymbol_table->NumSymbols() + 3);
-  return osymbol_table;
-}
-
-void CompileCtcFst(std::shared_ptr<fst::SymbolTable> symbol_table,
-                   fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  CHECK_EQ(symbol_table->Find("<eps>"), 0);
-  CHECK_EQ(symbol_table->Find("<blank>"), 1);
-  ofst->AddArc(start, fst::StdArc(1, 0, 0.0, start));
-  // Exclude kDeletion and kInsertion
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    int s = ofst->AddState();
-    ofst->AddArc(start, fst::StdArc(i, i, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(i, 0, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(0, 0, 0.0, start));
-  }
-  ofst->SetFinal(start, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdOLabelCompare());
-}
-
-void CompileAlignFst(std::vector<int> labels,
-                     std::shared_ptr<fst::SymbolTable> symbol_table,
-                     fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int deletion = symbol_table->Find(kDeletion);
-  int insertion_start = symbol_table->Find(kIsStart);
-  int insertion_end = symbol_table->Find(kIsEnd);
-
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  // Filler State
-  int filler_start = ofst->AddState();
-  int filler_end = ofst->AddState();
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    ofst->AddArc(filler_start, fst::StdArc(i, i, FLAGS_is_penalty, filler_end));
-  }
-  ofst->AddArc(filler_end, fst::StdArc(0, 0, 0.0, filler_start));
-
-  int prev = start;
-  // Alignment path and optional filler
-  for (size_t i = 0; i < labels.size(); i++) {
-    int cur = ofst->AddState();
-    // 1. Insertion or Substitution
-    ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-    ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-    // 2. Correct
-    ofst->AddArc(prev, fst::StdArc(labels[i], labels[i], 0.0, cur));
-    // 3. Deletion
-    ofst->AddArc(prev, fst::StdArc(0, deletion, FLAGS_del_penalty, cur));
-
-    prev = cur;
-  }
-  // Optional add endding filler
-  ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-  ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-  ofst->SetFinal(prev, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdILabelCompare());
-}
-
-}  // namespace wenet
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-  CHECK(decode_resource->unit_table != nullptr);
-
-  auto wfst_symbol_table =
-      wenet::MakeSymbolTableForFst(decode_resource->unit_table);
-  // wfst_symbol_table->WriteText("fst.txt");
-  // Reset symbol_table to on-the-fly generated wfst_symbol_table
-  decode_resource->symbol_table = wfst_symbol_table;
-
-  // Compile ctc FST
-  fst::StdVectorFst ctc_fst;
-  wenet::CompileCtcFst(wfst_symbol_table, &ctc_fst);
-  // ctc_fst.Write("ctc.fst");
-
-  std::unordered_map<std::string, std::string> wav_table;
-  std::ifstream wav_is(FLAGS_wav_scp);
-  std::string line;
-  while (std::getline(wav_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    CHECK_EQ(strs.size(), 2);
-    wav_table[strs[0]] = strs[1];
-  }
-
-  std::ifstream text_is(FLAGS_text);
-  std::ofstream result_os(FLAGS_result, std::ios::out);
-  std::ofstream timestamp_out;
-  if (!FLAGS_timestamp.empty()) {
-    timestamp_out.open(FLAGS_timestamp, std::ios::out);
-  }
-  std::ostream& timestamp_os =
-      FLAGS_timestamp.empty() ? std::cout : timestamp_out;
-
-  while (std::getline(text_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    if (strs.size() < 2) continue;
-    std::string key = strs[0];
-    LOG(INFO) << "Processing " << key;
-    if (wav_table.find(key) != wav_table.end()) {
-      strs.erase(strs.begin());
-      std::string text = wenet::JoinString(" ", strs);
-      std::vector<int> labels;
-      wenet::MapToLabel(text, wfst_symbol_table, &labels);
-      // Prepare FST for alignment decoding
-      fst::StdVectorFst align_fst;
-      wenet::CompileAlignFst(labels, wfst_symbol_table, &align_fst);
-      // align_fst.Write("align.fst");
-      auto decoding_fst = std::make_shared<fst::StdVectorFst>();
-      fst::Compose(ctc_fst, align_fst, decoding_fst.get());
-      // decoding_fst->Write("decoding.fst");
-      // Preapre feature pipeline
-      wenet::WavReader wav_reader;
-      if (!wav_reader.Open(wav_table[key])) {
-        LOG(WARNING) << "Error in reading " << wav_table[key];
-        continue;
-      }
-      int num_samples = wav_reader.num_samples();
-      CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-      auto feature_pipeline =
-          std::make_shared<wenet::FeaturePipeline>(*feature_config);
-      feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-      feature_pipeline->set_input_finished();
-      decode_resource->fst = decoding_fst;
-      LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-      wenet::AsrDecoder decoder(feature_pipeline, decode_resource,
-                                *decode_config);
-      while (true) {
-        wenet::DecodeState state = decoder.Decode();
-        if (state == wenet::DecodeState::kEndFeats) {
-          decoder.Rescoring();
-          break;
-        }
-      }
-      std::string final_result;
-      std::string timestamp_str;
-      if (decoder.DecodedSomething()) {
-        const wenet::DecodeResult& result = decoder.result()[0];
-        final_result = result.sentence;
-        std::stringstream ss;
-        for (const auto& w : result.word_pieces) {
-          ss << " " << w.word << " " << w.start << " " << w.end;
-        }
-        timestamp_str = ss.str();
-      }
-      result_os << key << " " << final_result << std::endl;
-      timestamp_os << key << " " << timestamp_str << std::endl;
-      LOG(INFO) << key << " " << final_result;
-    } else {
-      LOG(WARNING) << "No wav file for " << key;
-    }
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/websocket_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/websocket_client_main.cc
deleted file mode 100644
index 3eaa96069dc5f57673fbb2819bf7d4883e0d5ffa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/websocket_client_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "websocket/websocket_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::WebSocketClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  client.set_continuous_decoding(FLAGS_continuous_decoding);
-  client.SendStartSignal();
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-    }
-    // TODO(Binbin Zhang): Network order?
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-  client.SendEndSignal();
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/websocket_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/websocket_server_main.cc
deleted file mode 100644
index 796d9d2e6d151f7c08b43d66b7245c58ee086cc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/bin/websocket_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "websocket/websocket_server.h"
-
-DEFINE_int32(port, 10086, "websocket listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
-                                decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/docker/Dockerfile b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/docker/Dockerfile
deleted file mode 100644
index 5c6865faa1a07719d913e9b36abe41a9ba1041d1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/docker/Dockerfile
+++ /dev/null
@@ -1,11 +0,0 @@
-FROM ubuntu:latest
-MAINTAINER <zhendong.peng@mobvoi.com>
-ENV DEBIAN_FRONTEND=noninteractive
-RUN sed -i s@/archive.ubuntu.com/@/mirrors.tuna.tsinghua.edu.cn/@g /etc/apt/sources.list
-RUN apt-get update && apt-get install -y git cmake wget build-essential
-RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet
-ARG model=20210618_u2pp_conformer_libtorch.tar.gz
-RUN wget -P /home https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell2/$model
-RUN tar -xzf /home/$model -C /home
-ARG build=/home/wenet/runtime/libtorch/build
-RUN cmake -B $build -DCMAKE_BUILD_TYPE=Release -DGRAPH_TOOLS=ON && cmake --build $build
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/CMakeLists.txt
deleted file mode 100644
index 2a152dd0d38cdc17d2758d7dbd542cd974d5f0c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# compile wenet.proto
-set(PROTO_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-add_custom_command(
-  OUTPUT  ${PROTO_DIR}/wenet.pb.cc
-          ${PROTO_DIR}/wenet.pb.h
-          ${PROTO_DIR}/wenet.grpc.pb.cc
-          ${PROTO_DIR}/wenet.grpc.pb.h
-  COMMAND ${protobuf_BINARY_DIR}/protoc
-  ARGS --grpc_out "${PROTO_DIR}"
-    --cpp_out "${PROTO_DIR}"
-    -I "${PROTO_DIR}"
-    --plugin=protoc-gen-grpc=${grpc_BINARY_DIR}/grpc_cpp_plugin
-    wenet.proto)
-
-# grpc_server/client
-link_directories(${protobuf_BINARY_DIR}/lib)
-add_library(wenet_grpc STATIC
-  grpc_client.cc
-  grpc_server.cc
-  wenet.pb.cc
-  wenet.grpc.pb.cc
-)
-target_link_libraries(wenet_grpc PUBLIC grpc++ grpc++_reflection decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_client.cc
deleted file mode 100644
index 7a2e3f6f384980b6566468213d3eead43a404070..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_client.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_client.h"
-
-#include "utils/log.h"
-
-namespace wenet {
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using grpc::Status;
-using wenet::Request;
-using wenet::Response;
-
-GrpcClient::GrpcClient(const std::string& host, int port, int nbest,
-                       bool continuous_decoding)
-    : host_(host),
-      port_(port),
-      nbest_(nbest),
-      continuous_decoding_(continuous_decoding) {
-  Connect();
-  t_.reset(new std::thread(&GrpcClient::ReadLoopFunc, this));
-}
-
-void GrpcClient::Connect() {
-  channel_ = grpc::CreateChannel(host_ + ":" + std::to_string(port_),
-                                 grpc::InsecureChannelCredentials());
-  stub_ = ASR::NewStub(channel_);
-  context_ = std::make_shared<ClientContext>();
-  stream_ = stub_->Recognize(context_.get());
-  request_ = std::make_shared<Request>();
-  response_ = std::make_shared<Response>();
-  request_->mutable_decode_config()->set_nbest_config(nbest_);
-  request_->mutable_decode_config()->set_continuous_decoding_config(
-      continuous_decoding_);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::SendBinaryData(const void* data, size_t size) {
-  const int16_t* pdata = reinterpret_cast<const int16_t*>(data);
-  request_->set_audio_data(pdata, size);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::ReadLoopFunc() {
-  try {
-    while (stream_->Read(response_.get())) {
-      for (int i = 0; i < response_->nbest_size(); i++) {
-        // you can also traverse wordpieces like demonstrated above
-        LOG(INFO) << i + 1 << "best " << response_->nbest(i).sentence();
-      }
-      if (response_->status() != Response_Status_ok) {
-        break;
-      }
-      if (response_->type() == Response_Type_speech_end) {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void GrpcClient::Join() {
-  stream_->WritesDone();
-  t_->join();
-  Status status = stream_->Finish();
-  if (!status.ok()) {
-    LOG(INFO) << "Recognize rpc failed.";
-  }
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_client.h
deleted file mode 100644
index 36e36a0f5f5ec5bbb818009fe931e863eaa7fd60..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_client.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_CLIENT_H_
-#define GRPC_GRPC_CLIENT_H_
-
-#include <grpc/grpc.h>
-#include <grpcpp/channel.h>
-#include <grpcpp/client_context.h>
-#include <grpcpp/create_channel.h>
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "grpc/wenet.grpc.pb.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcClient {
- public:
-  GrpcClient(const std::string& host, int port, int nbest,
-             bool continuous_decoding);
-
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Join();
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string host_;
-  int port_;
-  std::shared_ptr<Channel> channel_{nullptr};
-  std::unique_ptr<ASR::Stub> stub_{nullptr};
-  std::shared_ptr<ClientContext> context_{nullptr};
-  std::unique_ptr<ClientReaderWriter<Request, Response>> stream_{nullptr};
-  std::shared_ptr<Request> request_{nullptr};
-  std::shared_ptr<Response> response_{nullptr};
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(GrpcClient);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_server.cc
deleted file mode 100644
index 26268bc02a2f2ea56bb24a1eb379a565f693429a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_server.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_server.h"
-
-namespace wenet {
-
-using grpc::ServerReaderWriter;
-using wenet::Request;
-using wenet::Response;
-
-GrpcConnectionHandler::GrpcConnectionHandler(
-    ServerReaderWriter<Response, Request>* stream,
-    std::shared_ptr<Request> request, std::shared_ptr<Response> response,
-    std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : stream_(std::move(stream)),
-      request_(std::move(request)),
-      response_(std::move(response)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void GrpcConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  response_->set_status(Response::ok);
-  response_->set_type(Response::server_ready);
-  stream_->Write(*response_);
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ = std::make_shared<std::thread>(
-      &GrpcConnectionHandler::DecodeThreadFunc, this);
-}
-
-void GrpcConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  CHECK(feature_pipeline_ != nullptr);
-  feature_pipeline_->set_input_finished();
-  got_end_tag_ = true;
-}
-
-void GrpcConnectionHandler::OnPartialResult() {
-  LOG(INFO) << "Partial result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::partial_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinalResult() {
-  LOG(INFO) << "Final result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::final_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinish() {
-  // Send finish tag
-  response_->set_status(Response::ok);
-  response_->set_type(Response::speech_end);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnSpeechData() {
-  // Read binary PCM data
-  const int16_t* pcm_data =
-      reinterpret_cast<const int16_t*>(request_->audio_data().c_str());
-  int num_samples = request_->audio_data().length() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-void GrpcConnectionHandler::SerializeResult(bool finish) {
-  for (const DecodeResult& path : decoder_->result()) {
-    Response_OneBest* one_best_ = response_->add_nbest();
-    one_best_->set_sentence(path.sentence);
-    if (finish) {
-      for (const WordPiece& word_piece : path.word_pieces) {
-        Response_OnePiece* one_piece_ = one_best_->add_wordpieces();
-        one_piece_->set_word(word_piece.word);
-        one_piece_->set_start(word_piece.start);
-        one_piece_->set_end(word_piece.end);
-      }
-    }
-    if (response_->nbest_size() == nbest_) {
-      break;
-    }
-  }
-  return;
-}
-
-void GrpcConnectionHandler::DecodeThreadFunc() {
-  while (true) {
-    DecodeState state = decoder_->Decode();
-    response_->clear_status();
-    response_->clear_type();
-    response_->clear_nbest();
-    if (state == DecodeState::kEndFeats) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      OnFinish();
-      stop_recognition_ = true;
-      break;
-    } else if (state == DecodeState::kEndpoint) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      // If it's not continuous decoding, continue to do next recognition
-      // otherwise stop the recognition
-      if (continuous_decoding_) {
-        decoder_->ResetContinuousDecoding();
-      } else {
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      }
-    } else {
-      if (decoder_->DecodedSomething()) {
-        SerializeResult(false);
-        OnPartialResult();
-      }
-    }
-  }
-}
-
-void GrpcConnectionHandler::operator()() {
-  try {
-    while (stream_->Read(request_.get())) {
-      if (!got_start_tag_) {
-        nbest_ = request_->decode_config().nbest_config();
-        continuous_decoding_ =
-            request_->decode_config().continuous_decoding_config();
-        OnSpeechStart();
-      } else {
-        OnSpeechData();
-      }
-    }
-    OnSpeechEnd();
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-Status GrpcServer::Recognize(ServerContext* context,
-                             ServerReaderWriter<Response, Request>* stream) {
-  LOG(INFO) << "Get Recognize request" << std::endl;
-  auto request = std::make_shared<Request>();
-  auto response = std::make_shared<Response>();
-  GrpcConnectionHandler handler(stream, request, response, feature_config_,
-                                decode_config_, decode_resource_);
-  std::thread t(std::move(handler));
-  t.join();
-  return Status::OK;
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_server.h
deleted file mode 100644
index 3ab47ce5b15897c2a596d8ef27f2e7c4f8d26a3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/grpc_server.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_SERVER_H_
-#define GRPC_GRPC_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-#include "grpc/wenet.grpc.pb.h"
-
-namespace wenet {
-
-using grpc::ServerContext;
-using grpc::ServerReaderWriter;
-using grpc::Status;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcConnectionHandler {
- public:
-  GrpcConnectionHandler(ServerReaderWriter<Response, Request>* stream,
-                        std::shared_ptr<Request> request,
-                        std::shared_ptr<Response> response,
-                        std::shared_ptr<FeaturePipelineConfig> feature_config,
-                        std::shared_ptr<DecodeOptions> decode_config,
-                        std::shared_ptr<DecodeResource> decode_resource);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnFinish();
-  void OnSpeechData();
-  void OnPartialResult();
-  void OnFinalResult();
-  void DecodeThreadFunc();
-  void SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  ServerReaderWriter<Response, Request>* stream_;
-  std::shared_ptr<Request> request_;
-  std::shared_ptr<Response> response_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class GrpcServer final : public ASR::Service {
- public:
-  GrpcServer(std::shared_ptr<FeaturePipelineConfig> feature_config,
-             std::shared_ptr<DecodeOptions> decode_config,
-             std::shared_ptr<DecodeResource> decode_resource)
-      : feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-  Status Recognize(ServerContext* context,
-                   ServerReaderWriter<Response, Request>* reader) override;
-
- private:
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  DISALLOW_COPY_AND_ASSIGN(GrpcServer);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/wenet.proto b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/wenet.proto
deleted file mode 100644
index 4c3033c034c513611c9159ff9db42b225be2cc98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/grpc/wenet.proto
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-syntax = "proto3";
-
-option java_package = "ex.grpc";
-option objc_class_prefix = "wenet";
-
-package wenet;
-
-service ASR {
-  rpc Recognize (stream Request) returns (stream Response) {}
-}
-
-message Request {
-
-  message DecodeConfig {
-    int32 nbest_config = 1;
-    bool continuous_decoding_config = 2;
-  }
-
-  oneof RequestPayload {
-    DecodeConfig decode_config = 1;
-    bytes audio_data = 2;
-  }
-}
-
-message Response {
-
-  message OneBest {
-    string sentence = 1;
-    repeated OnePiece wordpieces = 2;
-  }
-
-  message OnePiece {
-    string word = 1;
-    int32 start = 2;
-    int32 end = 3;
-  }
-
-  enum Status {
-    ok = 0;
-    failed = 1;
-  }
-
-  enum Type {
-    server_ready = 0;
-    partial_result = 1;
-    final_result = 2;
-    speech_end = 3;
-  }
-
-  Status status = 1;
-  Type type = 2;
-  repeated OneBest nbest = 3;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/CMakeLists.txt
deleted file mode 100644
index 4ba414e25bd577575b1baec2eba4bf1c3062b211..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(http STATIC
-  http_client.cc
-  http_server.cc
-)
-target_link_libraries(http PUBLIC decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_client.cc
deleted file mode 100644
index 50bace0d4e40b073586c744cd85799f7414655e8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_client.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "http/http_client.h"
-
-#include "boost/json/src.hpp"
-
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;     // from <boost/beast.hpp>
-namespace http = beast::http;       // from <boost/beast/http.hpp>
-namespace net = boost::asio;        // from <boost/asio.hpp>
-using tcp = net::ip::tcp;           // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-HttpClient::HttpClient(const std::string& hostname, int port)
-    : hostname_(hostname), port_(port) {
-  Connect();
-}
-
-void HttpClient::Connect() {
-  tcp::resolver resolver{ioc_};
-  // Look up the domain name
-  auto const results = resolver.resolve(hostname_, std::to_string(port_));
-  stream_.connect(results);
-}
-
-void HttpClient::SendBinaryData(const void* data, size_t size) {
-  try {
-    json::value start_tag = {{"nbest", nbest_},
-                             {"continuous_decoding", continuous_decoding_}};
-    std::string config = json::serialize(start_tag);
-    req_.set("config", config);
-    std::size_t encode_size = beast::detail::base64::encoded_size(size);
-    char encode_data[encode_size];  // NOLINT
-    beast::detail::base64::encode(encode_data, data, size);
-    req_.body() = encode_data;
-    req_.prepare_payload();
-    http::write(stream_, req_, ec_);
-
-    http::read(stream_, buffer_, res_);
-    std::string message = res_.body();
-    json::object obj = json::parse(message).as_object();
-    LOG(INFO) << message;
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-  stream_.socket().shutdown(tcp::socket::shutdown_both, ec_);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_client.h
deleted file mode 100644
index 803af26a4ef2b5a236570476fb89003014bc0280..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_client.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef HTTP_HTTP_CLIENT_H_
-#define HTTP_HTTP_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include <boost/asio/connect.hpp>
-#include <boost/asio/ip/tcp.hpp>
-#include <boost/beast/core.hpp>
-#include <boost/beast/core/detail/base64.hpp>
-#include <boost/beast/http.hpp>
-#include <boost/beast/version.hpp>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;     // from <boost/beast.hpp>
-namespace http = beast::http;       // from <boost/beast/http.hpp>
-namespace net = boost::asio;        // from <boost/asio.hpp>
-using tcp = net::ip::tcp;           // from <boost/asio/ip/tcp.hpp>
-
-class HttpClient {
- public:
-  HttpClient(const std::string& host, int port);
-
-  void SendBinaryData(const void* data, size_t size);
-  void set_nbest(int nbest) { nbest_ = nbest; }
-
- private:
-  void Connect();
-  std::string hostname_;
-  int port_;
-  std::string target_ = "/";
-  int version_ = 11;
-  int nbest_ = 1;
-  const bool continuous_decoding_ = false;
-  net::io_context ioc_;
-  beast::tcp_stream stream_{ioc_};
-  beast::flat_buffer buffer_;
-  http::request<http::string_body> req_{http::verb::get, target_, version_};
-  http::response<http::string_body> res_{http::status::ok, version_};
-  beast::error_code ec_;
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(HttpClient);
-};
-
-}  // namespace wenet
-
-#endif  // HTTP_HTTP_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_server.cc
deleted file mode 100644
index c839757647554235a9e70a3dfc886a02b4e2cd79..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_server.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "http/http_server.h"
-
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "boost/json/src.hpp"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace net = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-ConnectionHandler::ConnectionHandler(
-    tcp::socket&& socket, std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : socket_(std::move(socket)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)),
-      req_(std::make_shared<http::request<http::string_body>>(
-          http::verb::post, target_, version_)),
-      res_(std::make_shared<http::response<http::string_body>>(http::status::ok,
-                                                               version_)) {}
-
-void ConnectionHandler::OnSpeechStart() {
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ =
-      std::make_shared<std::thread>(&ConnectionHandler::DecodeThreadFunc, this);
-}
-
-void ConnectionHandler::OnSpeechEnd() {
-  if (feature_pipeline_ != nullptr) {
-    feature_pipeline_->set_input_finished();
-  }
-}
-
-void ConnectionHandler::OnFinalResult(const std::string& result) {
-  LOG(INFO) << "Final result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "final_result"}, {"nbest", result}};
-  std::string message = json::serialize(rv);
-  res_.get()->body() = message;
-  http::write(socket_, *res_.get(), ec_);
-}
-
-void ConnectionHandler::OnSpeechData(const std::string& message) {
-  std::size_t decode_size =
-      beast::detail::base64::decoded_size(message.length());
-  int num_samples = decode_size / sizeof(int16_t);
-  int16_t decode_data[num_samples];  // NOLINT
-  beast::detail::base64::decode(decode_data, message.c_str(), message.length());
-
-  // Read binary PCM data
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  feature_pipeline_->AcceptWaveform(decode_data, num_samples);
-}
-
-std::string ConnectionHandler::SerializeResult(bool finish) {
-  json::array nbest;
-  for (const DecodeResult& path : decoder_->result()) {
-    json::object jpath({{"sentence", path.sentence}});
-    if (finish) {
-      json::array word_pieces;
-      for (const WordPiece& word_piece : path.word_pieces) {
-        json::object jword_piece({{"word", word_piece.word},
-                                  {"start", word_piece.start},
-                                  {"end", word_piece.end}});
-        word_pieces.emplace_back(jword_piece);
-      }
-      jpath.emplace("word_pieces", word_pieces);
-    }
-    nbest.emplace_back(jpath);
-
-    if (nbest.size() == nbest_) {
-      break;
-    }
-  }
-  return json::serialize(nbest);
-}
-
-void ConnectionHandler::DecodeThreadFunc() {
-  try {
-    while (true) {
-      DecodeState state = decoder_->Decode();
-      if (state == DecodeState::kEndFeats || state == DecodeState::kEndpoint) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        break;
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void ConnectionHandler::OnError(const std::string& message) {
-  json::value rv = {{"status", "failed"}, {"message", message}};
-  res_.get()->body() = json::serialize(rv);
-  http::write(socket_, *res_.get(), ec_);
-  // Send a TCP shutdown
-  socket_.shutdown(tcp::socket::shutdown_send, ec_);
-}
-
-void ConnectionHandler::OnText(const std::string& message) {
-  LOG(INFO) << message;
-  json::value v = json::parse(message);
-  if (v.is_object()) {
-    json::object obj = v.get_object();
-    if (obj.find("nbest") != obj.end()) {
-      if (obj["nbest"].is_int64()) {
-        nbest_ = obj["nbest"].as_int64();
-      } else {
-        OnError("integer is expected for nbest option");
-      }
-    }
-  } else {
-    OnError("Wrong protocol");
-  }
-}
-
-void ConnectionHandler::operator()() {
-  try {
-    http::read(socket_, buffer_, *req_.get(), ec_);
-    if (ec_) {
-      LOG(ERROR) << ec_;
-    } else {
-      OnText(req_.get()->base()["config"].to_string());
-      OnSpeechStart();
-      OnSpeechData(req_.get()->body());
-      OnSpeechEnd();
-    }
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (beast::system_error const& se) {
-    LOG(INFO) << se.code().message();
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-  socket_.shutdown(tcp::socket::shutdown_send, ec_);
-}
-
-void HttpServer::Start() {
-  try {
-    auto const address = net::ip::make_address("0.0.0.0");
-    tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
-    for (;;) {
-      // This will receive the new connection
-      tcp::socket socket{ioc_};
-      // Block until we get a connection
-      acceptor.accept(socket);
-      // Launch the session, transferring ownership of the socket
-      ConnectionHandler handler(std::move(socket), feature_config_,
-                                decode_config_, decode_resource_);
-      std::thread t(std::move(handler));
-      t.detach();
-    }
-  } catch (const std::exception& e) {
-    LOG(FATAL) << e.what();
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_server.h
deleted file mode 100644
index f7304475e0c374dfb2b5308864b5e08dce71ae12..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/http/http_server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef HTTP_HTTP_SERVER_H_
-#define HTTP_HTTP_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-
-#include <boost/asio/ip/tcp.hpp>
-#include <boost/beast/core.hpp>
-#include <boost/beast/core/detail/base64.hpp>
-#include <boost/beast/http.hpp>
-#include <boost/beast/version.hpp>
-#include <boost/config.hpp>
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;         // from <boost/beast.hpp>
-namespace http = beast::http;           // from <boost/beast/http.hpp>
-namespace net = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;       // from <boost/asio/ip/tcp.hpp>
-
-class ConnectionHandler {
- public:
-  ConnectionHandler(tcp::socket&& socket,
-                    std::shared_ptr<FeaturePipelineConfig> feature_config,
-                    std::shared_ptr<DecodeOptions> decode_config,
-                    std::shared_ptr<DecodeResource> decode_resource_);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnText(const std::string& message);
-  void OnSpeechData(const std::string& message);
-  void OnError(const std::string& message);
-  void OnFinalResult(const std::string& result);
-  void DecodeThreadFunc();
-  std::string SerializeResult(bool finish);
-
-  std::string target_ = "/";
-  int version_ = 11;
-  const bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  tcp::socket socket_;
-  beast::flat_buffer buffer_;
-  beast::error_code ec_;
-  std::shared_ptr<http::request<http::string_body>> req_;
-  std::shared_ptr<http::response<http::string_body>> res_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class HttpServer {
- public:
-  HttpServer(int port, std::shared_ptr<FeaturePipelineConfig> feature_config,
-             std::shared_ptr<DecodeOptions> decode_config,
-             std::shared_ptr<DecodeResource> decode_resource)
-      : port_(port),
-        feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-
-  void Start();
-
- private:
-  int port_;
-  // The io_context is required for all I/O
-  net::io_context ioc_{1};
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  WENET_DISALLOW_COPY_AND_ASSIGN(HttpServer);
-};
-
-}  // namespace wenet
-
-#endif  // HTTP_HTTP_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/CMakeLists.txt
deleted file mode 100644
index 145654105350e91a5f9121b47197f5fc60663f5c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-link_libraries(gtest_main gmock)
-
-add_executable(utils_test utils_test.cc)
-target_link_libraries(utils_test PUBLIC utils)
-add_test(UTILS_TEST utils_test)
-
-add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
-target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
-add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
-
-add_executable(post_processor_test post_processor_test.cc)
-target_link_libraries(post_processor_test PUBLIC post_processor)
-add_test(POST_PROCESSOR_TEST post_processor_test)
-
-
-add_executable(feature_pipeline_test feature_pipeline_test.cc)
-target_link_libraries(feature_pipeline_test PUBLIC frontend)
-add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/ctc_prefix_beam_search_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/ctc_prefix_beam_search_test.cc
deleted file mode 100644
index d8f3b65693b934beb33f3a770795f0b6e7ce3456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/ctc_prefix_beam_search_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <cmath>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(CtcPrefixBeamSearchTest, CtcPrefixBeamSearchLogicTest) {
-  using ::testing::ElementsAre;
-  // See https://robin1001.github.io/2020/12/11/ctc-search for the
-  // graph demonstration of the data
-  std::vector<std::vector<float>> data = {
-      {0.25, 0.40, 0.35}, {0.40, 0.35, 0.25}, {0.10, 0.50, 0.40}};
-  // Apply log
-  for (int i = 0; i < data.size(); i++) {
-    for (int j = 0; j < data[i].size(); j++) {
-      data[i][j] = std::log(data[i][j]);
-    }
-  }
-  wenet::CtcPrefixBeamSearchOptions option;
-  option.first_beam_size = 3;
-  option.second_beam_size = 3;
-  wenet::CtcPrefixBeamSearch prefix_beam_search(option);
-  prefix_beam_search.Search(data);
-  /* Test case info
-  | top k | result index | prefix score | viterbi score | timestamp |
-  |-------|--------------|--------------|---------------|-----------|
-  | top 1 | [2, 1]       | 0.2185       | 0.07          | [0, 2]    |
-  | top 2 | [1, 2]       | 0.1550       | 0.064         | [0, 2]    |
-  | top 3 | [1]          | 0.1525       | 0.07          | [2]       |
-  */
-  const std::vector<std::vector<int>>& result = prefix_beam_search.Outputs();
-  EXPECT_EQ(result.size(), 3);
-  ASSERT_THAT(result[0], ElementsAre(2, 1));
-  ASSERT_THAT(result[1], ElementsAre(1, 2));
-  ASSERT_THAT(result[2], ElementsAre(1));
-
-  const std::vector<float>& likelihood = prefix_beam_search.Likelihood();
-  EXPECT_EQ(likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[0]), 0.2185);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[1]), 0.1550);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[2]), 0.1525);
-
-  const std::vector<float>& viterbi_likelihood =
-      prefix_beam_search.viterbi_likelihood();
-  EXPECT_EQ(viterbi_likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[0]), 0.07);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[1]), 0.064);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[2]), 0.07);
-
-  const std::vector<std::vector<int>>& times = prefix_beam_search.Times();
-  EXPECT_EQ(times.size(), 3);
-  ASSERT_THAT(times[0], ElementsAre(0, 2));
-  ASSERT_THAT(times[1], ElementsAre(0, 2));
-  ASSERT_THAT(times[2], ElementsAre(2));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/feature_pipeline_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/feature_pipeline_test.cc
deleted file mode 100644
index 244ec0735b6086211b476e8d97569e1ee5959bc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/feature_pipeline_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2022 Roney
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <thread>
-#include <vector>
-
-#include "frontend/feature_pipeline.h"
-#include "utils/blocking_queue.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-void pushQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que,
-               std::vector<int> vec) {
-  que->Push(vec);
-}
-
-void popQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que, int num,
-              int back_data) {
-  auto pop_data = que->Pop(num);
-  ASSERT_EQ(pop_data[num - 1], back_data);
-}
-
-TEST(FeaturePipelineTest, BlockingQueueTest) {
-  auto capacity_queue = std::make_shared<wenet::BlockingQueue<int>>(2);
-  std::vector<int> test_data{1, 2, 3, 4, 5};
-  std::thread push_thread(&pushQueue, capacity_queue, test_data);
-  ASSERT_EQ(capacity_queue->Pop(), 1);
-  ASSERT_LE(capacity_queue->Size(), 2);    // capacity_queue: 2 or 2,3
-  auto pop_data = capacity_queue->Pop(3);  // 2,3,4 num > capacity
-  ASSERT_EQ(pop_data.size(), 3);
-  ASSERT_EQ(pop_data[2], 4);
-  push_thread.join();
-  ASSERT_EQ(capacity_queue->Size(), 1);  // capacity_queue:5
-
-  std::thread pop_thread(&popQueue, capacity_queue, 3, 0);  // num > capacity
-  capacity_queue->Push(9);  // capacity_queue:5,9
-  capacity_queue->Push(0);  // capacity_queue:5,9,0
-  pop_thread.join();        // capacity_queue:
-  ASSERT_EQ(capacity_queue->Size(), 0);
-
-  pop_data = capacity_queue->Pop(0);
-  ASSERT_TRUE(pop_data.empty());
-}
-
-TEST(FeaturePipelineTest, PipelineTest) {
-  wenet::FeaturePipelineConfig config(80, 8000);
-  wenet::FeaturePipeline feature_pipeline(config);
-  int audio_len = 8 * 55;  // audio len 55ms,4 frames
-  std::vector<float> pcm(audio_len, 0);
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 4);
-
-  std::vector<std::vector<float>> out_feats;
-  auto b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_TRUE(b);
-  ASSERT_EQ(out_feats.size(), 2);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 2);
-
-  std::vector<float> out_feat;
-  b = feature_pipeline.ReadOne(&out_feat);
-  ASSERT_TRUE(b);
-  ASSERT_FALSE(out_feat.empty());
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 1);
-
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 1);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  feature_pipeline.Read(2, &out_feats);
-  feature_pipeline.Reset();
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 0);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/post_processor_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/post_processor_test.cc
deleted file mode 100644
index fa11fa29231032d62389a93fd00b0ec782bf8a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/post_processor_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(PostProcessorTest, ProcessSpacekMandarinEnglishTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: mandarin character
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "震东好帅",
-      // modeling unit: mandarin word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 吴迪 也 好帅",
-      // modeling unit: english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁binbin▁is▁also▁handsome",
-      // modeling unit: english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " life is short i use wenet",
-      // modeling unit: mandarin character + english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "超哥▁is▁the▁most▁handsome",
-      // modeling unit: mandarin word + english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 人生 苦短 i use wenet",
-  };
-
-  std::vector<std::string> result_lowercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "binbin is also handsome",
-      "life is short i use wenet",
-      "超哥 is the most handsome",
-      "人生苦短i use wenet",
-  };
-
-  std::vector<std::string> result_uppercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "BINBIN IS ALSO HANDSOME",
-      "LIFE IS SHORT I USE WENET",
-      "超哥 IS THE MOST HANDSOME",
-      "人生苦短I USE WENET",
-  };
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
-
-TEST(PostProcessorTest, ProcessSpacekIndoEuropeanTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  opts_lowercase.language_type = wenet::kIndoEuropean;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.language_type = wenet::kIndoEuropean;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁zhendong▁ist▁so▁schön",
-      // modeling unit: word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " zhendong ist so schön"};
-
-  std::vector<std::string> result_lowercase = {"zhendong ist so schön",
-                                               "zhendong ist so schön"};
-
-  std::vector<std::string> result_uppercase = {"ZHENDONG IST SO SCHÖN",
-                                               "ZHENDONG IST SO SCHÖN"};
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/utils_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/utils_test.cc
deleted file mode 100644
index 6b2bbac25e000ce854d5e55a50cb51109d62d758..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/test/utils_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "utils/utils.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-TEST(UtilsTest, TopKTest) {
-  using ::testing::ElementsAre;
-  using ::testing::FloatNear;
-  using ::testing::Pointwise;
-  std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
-  std::vector<float> values;
-  std::vector<int32_t> indices;
-  wenet::TopK(data, 3, &values, &indices);
-  EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
-  ASSERT_THAT(indices, ElementsAre(9, 4, 8));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/app.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/app.py
deleted file mode 100644
index b880cf7ff41509bfd618cdcc26bd402123af2236..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/app.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: zhendong.peng@mobvoi.com (Zhendong Peng)
-
-import argparse
-
-from flask import Flask, render_template
-
-parser = argparse.ArgumentParser(description='training your network')
-parser.add_argument('--port', default=19999, type=int, help='port id')
-args = parser.parse_args()
-
-app = Flask(__name__)
-
-@app.route('/')
-def index():
-    return render_template('index.html')
-
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=args.port, debug=True)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/css/font-awesome.min.css b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/css/font-awesome.min.css
deleted file mode 100644
index 540440ce89f2a408aa699b65100e18f15e0f09ca..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/css/font-awesome.min.css
+++ /dev/null
@@ -1,4 +0,0 @@
-/*!
- *  Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome
- *  License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
- */@font-face{font-family:'FontAwesome';src:url('../fonts/fontawesome-webfont.eot?v=4.7.0');src:url('../fonts/fontawesome-webfont.eot?#iefix&v=4.7.0') format('embedded-opentype'),url('../fonts/fontawesome-webfont.woff2?v=4.7.0') format('woff2'),url('../fonts/fontawesome-webfont.woff?v=4.7.0') format('woff'),url('../fonts/fontawesome-webfont.ttf?v=4.7.0') format('truetype'),url('../fonts/fontawesome-webfont.svg?v=4.7.0#fontawesomeregular') format('svg');font-weight:normal;font-style:normal}.fa{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571429em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14285714em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14285714em;width:2.14285714em;top:.14285714em;text-align:center}.fa-li.fa-lg{left:-1.85714286em}.fa-border{padding:.2em .25em .15em;border:solid .08em #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left{margin-right:.3em}.fa.fa-pull-right{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left{margin-right:.3em}.fa.pull-right{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s infinite linear;animation:fa-spin 2s infinite linear}.fa-pulse{-webkit-animation:fa-spin 1s infinite steps(8);animation:fa-spin 1s infinite steps(8)}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scale(-1, 1);-ms-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scale(1, -1);-ms-transform:scale(1, -1);transform:scale(1, -1)}:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270,:root .fa-flip-horizontal,:root .fa-flip-vertical{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:"\f000"}.fa-music:before{content:"\f001"}.fa-search:before{content:"\f002"}.fa-envelope-o:before{content:"\f003"}.fa-heart:before{content:"\f004"}.fa-star:before{content:"\f005"}.fa-star-o:before{content:"\f006"}.fa-user:before{content:"\f007"}.fa-film:before{content:"\f008"}.fa-th-large:before{content:"\f009"}.fa-th:before{content:"\f00a"}.fa-th-list:before{content:"\f00b"}.fa-check:before{content:"\f00c"}.fa-remove:before,.fa-close:before,.fa-times:before{content:"\f00d"}.fa-search-plus:before{content:"\f00e"}.fa-search-minus:before{content:"\f010"}.fa-power-off:before{content:"\f011"}.fa-signal:before{content:"\f012"}.fa-gear:before,.fa-cog:before{content:"\f013"}.fa-trash-o:before{content:"\f014"}.fa-home:before{content:"\f015"}.fa-file-o:before{content:"\f016"}.fa-clock-o:before{content:"\f017"}.fa-road:before{content:"\f018"}.fa-download:before{content:"\f019"}.fa-arrow-circle-o-down:before{content:"\f01a"}.fa-arrow-circle-o-up:before{content:"\f01b"}.fa-inbox:before{content:"\f01c"}.fa-play-circle-o:before{content:"\f01d"}.fa-rotate-right:before,.fa-repeat:before{content:"\f01e"}.fa-refresh:before{content:"\f021"}.fa-list-alt:before{content:"\f022"}.fa-lock:before{content:"\f023"}.fa-flag:before{content:"\f024"}.fa-headphones:before{content:"\f025"}.fa-volume-off:before{content:"\f026"}.fa-volume-down:before{content:"\f027"}.fa-volume-up:before{content:"\f028"}.fa-qrcode:before{content:"\f029"}.fa-barcode:before{content:"\f02a"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-book:before{content:"\f02d"}.fa-bookmark:before{content:"\f02e"}.fa-print:before{content:"\f02f"}.fa-camera:before{content:"\f030"}.fa-font:before{content:"\f031"}.fa-bold:before{content:"\f032"}.fa-italic:before{content:"\f033"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-align-left:before{content:"\f036"}.fa-align-center:before{content:"\f037"}.fa-align-right:before{content:"\f038"}.fa-align-justify:before{content:"\f039"}.fa-list:before{content:"\f03a"}.fa-dedent:before,.fa-outdent:before{content:"\f03b"}.fa-indent:before{content:"\f03c"}.fa-video-camera:before{content:"\f03d"}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:"\f03e"}.fa-pencil:before{content:"\f040"}.fa-map-marker:before{content:"\f041"}.fa-adjust:before{content:"\f042"}.fa-tint:before{content:"\f043"}.fa-edit:before,.fa-pencil-square-o:before{content:"\f044"}.fa-share-square-o:before{content:"\f045"}.fa-check-square-o:before{content:"\f046"}.fa-arrows:before{content:"\f047"}.fa-step-backward:before{content:"\f048"}.fa-fast-backward:before{content:"\f049"}.fa-backward:before{content:"\f04a"}.fa-play:before{content:"\f04b"}.fa-pause:before{content:"\f04c"}.fa-stop:before{content:"\f04d"}.fa-forward:before{content:"\f04e"}.fa-fast-forward:before{content:"\f050"}.fa-step-forward:before{content:"\f051"}.fa-eject:before{content:"\f052"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-plus-circle:before{content:"\f055"}.fa-minus-circle:before{content:"\f056"}.fa-times-circle:before{content:"\f057"}.fa-check-circle:before{content:"\f058"}.fa-question-circle:before{content:"\f059"}.fa-info-circle:before{content:"\f05a"}.fa-crosshairs:before{content:"\f05b"}.fa-times-circle-o:before{content:"\f05c"}.fa-check-circle-o:before{content:"\f05d"}.fa-ban:before{content:"\f05e"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrow-down:before{content:"\f063"}.fa-mail-forward:before,.fa-share:before{content:"\f064"}.fa-expand:before{content:"\f065"}.fa-compress:before{content:"\f066"}.fa-plus:before{content:"\f067"}.fa-minus:before{content:"\f068"}.fa-asterisk:before{content:"\f069"}.fa-exclamation-circle:before{content:"\f06a"}.fa-gift:before{content:"\f06b"}.fa-leaf:before{content:"\f06c"}.fa-fire:before{content:"\f06d"}.fa-eye:before{content:"\f06e"}.fa-eye-slash:before{content:"\f070"}.fa-warning:before,.fa-exclamation-triangle:before{content:"\f071"}.fa-plane:before{content:"\f072"}.fa-calendar:before{content:"\f073"}.fa-random:before{content:"\f074"}.fa-comment:before{content:"\f075"}.fa-magnet:before{content:"\f076"}.fa-chevron-up:before{content:"\f077"}.fa-chevron-down:before{content:"\f078"}.fa-retweet:before{content:"\f079"}.fa-shopping-cart:before{content:"\f07a"}.fa-folder:before{content:"\f07b"}.fa-folder-open:before{content:"\f07c"}.fa-arrows-v:before{content:"\f07d"}.fa-arrows-h:before{content:"\f07e"}.fa-bar-chart-o:before,.fa-bar-chart:before{content:"\f080"}.fa-twitter-square:before{content:"\f081"}.fa-facebook-square:before{content:"\f082"}.fa-camera-retro:before{content:"\f083"}.fa-key:before{content:"\f084"}.fa-gears:before,.fa-cogs:before{content:"\f085"}.fa-comments:before{content:"\f086"}.fa-thumbs-o-up:before{content:"\f087"}.fa-thumbs-o-down:before{content:"\f088"}.fa-star-half:before{content:"\f089"}.fa-heart-o:before{content:"\f08a"}.fa-sign-out:before{content:"\f08b"}.fa-linkedin-square:before{content:"\f08c"}.fa-thumb-tack:before{content:"\f08d"}.fa-external-link:before{content:"\f08e"}.fa-sign-in:before{content:"\f090"}.fa-trophy:before{content:"\f091"}.fa-github-square:before{content:"\f092"}.fa-upload:before{content:"\f093"}.fa-lemon-o:before{content:"\f094"}.fa-phone:before{content:"\f095"}.fa-square-o:before{content:"\f096"}.fa-bookmark-o:before{content:"\f097"}.fa-phone-square:before{content:"\f098"}.fa-twitter:before{content:"\f099"}.fa-facebook-f:before,.fa-facebook:before{content:"\f09a"}.fa-github:before{content:"\f09b"}.fa-unlock:before{content:"\f09c"}.fa-credit-card:before{content:"\f09d"}.fa-feed:before,.fa-rss:before{content:"\f09e"}.fa-hdd-o:before{content:"\f0a0"}.fa-bullhorn:before{content:"\f0a1"}.fa-bell:before{content:"\f0f3"}.fa-certificate:before{content:"\f0a3"}.fa-hand-o-right:before{content:"\f0a4"}.fa-hand-o-left:before{content:"\f0a5"}.fa-hand-o-up:before{content:"\f0a6"}.fa-hand-o-down:before{content:"\f0a7"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-globe:before{content:"\f0ac"}.fa-wrench:before{content:"\f0ad"}.fa-tasks:before{content:"\f0ae"}.fa-filter:before{content:"\f0b0"}.fa-briefcase:before{content:"\f0b1"}.fa-arrows-alt:before{content:"\f0b2"}.fa-group:before,.fa-users:before{content:"\f0c0"}.fa-chain:before,.fa-link:before{content:"\f0c1"}.fa-cloud:before{content:"\f0c2"}.fa-flask:before{content:"\f0c3"}.fa-cut:before,.fa-scissors:before{content:"\f0c4"}.fa-copy:before,.fa-files-o:before{content:"\f0c5"}.fa-paperclip:before{content:"\f0c6"}.fa-save:before,.fa-floppy-o:before{content:"\f0c7"}.fa-square:before{content:"\f0c8"}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:"\f0c9"}.fa-list-ul:before{content:"\f0ca"}.fa-list-ol:before{content:"\f0cb"}.fa-strikethrough:before{content:"\f0cc"}.fa-underline:before{content:"\f0cd"}.fa-table:before{content:"\f0ce"}.fa-magic:before{content:"\f0d0"}.fa-truck:before{content:"\f0d1"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-square:before{content:"\f0d3"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-plus:before{content:"\f0d5"}.fa-money:before{content:"\f0d6"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-up:before{content:"\f0d8"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-columns:before{content:"\f0db"}.fa-unsorted:before,.fa-sort:before{content:"\f0dc"}.fa-sort-down:before,.fa-sort-desc:before{content:"\f0dd"}.fa-sort-up:before,.fa-sort-asc:before{content:"\f0de"}.fa-envelope:before{content:"\f0e0"}.fa-linkedin:before{content:"\f0e1"}.fa-rotate-left:before,.fa-undo:before{content:"\f0e2"}.fa-legal:before,.fa-gavel:before{content:"\f0e3"}.fa-dashboard:before,.fa-tachometer:before{content:"\f0e4"}.fa-comment-o:before{content:"\f0e5"}.fa-comments-o:before{content:"\f0e6"}.fa-flash:before,.fa-bolt:before{content:"\f0e7"}.fa-sitemap:before{content:"\f0e8"}.fa-umbrella:before{content:"\f0e9"}.fa-paste:before,.fa-clipboard:before{content:"\f0ea"}.fa-lightbulb-o:before{content:"\f0eb"}.fa-exchange:before{content:"\f0ec"}.fa-cloud-download:before{content:"\f0ed"}.fa-cloud-upload:before{content:"\f0ee"}.fa-user-md:before{content:"\f0f0"}.fa-stethoscope:before{content:"\f0f1"}.fa-suitcase:before{content:"\f0f2"}.fa-bell-o:before{content:"\f0a2"}.fa-coffee:before{content:"\f0f4"}.fa-cutlery:before{content:"\f0f5"}.fa-file-text-o:before{content:"\f0f6"}.fa-building-o:before{content:"\f0f7"}.fa-hospital-o:before{content:"\f0f8"}.fa-ambulance:before{content:"\f0f9"}.fa-medkit:before{content:"\f0fa"}.fa-fighter-jet:before{content:"\f0fb"}.fa-beer:before{content:"\f0fc"}.fa-h-square:before{content:"\f0fd"}.fa-plus-square:before{content:"\f0fe"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angle-down:before{content:"\f107"}.fa-desktop:before{content:"\f108"}.fa-laptop:before{content:"\f109"}.fa-tablet:before{content:"\f10a"}.fa-mobile-phone:before,.fa-mobile:before{content:"\f10b"}.fa-circle-o:before{content:"\f10c"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-spinner:before{content:"\f110"}.fa-circle:before{content:"\f111"}.fa-mail-reply:before,.fa-reply:before{content:"\f112"}.fa-github-alt:before{content:"\f113"}.fa-folder-o:before{content:"\f114"}.fa-folder-open-o:before{content:"\f115"}.fa-smile-o:before{content:"\f118"}.fa-frown-o:before{content:"\f119"}.fa-meh-o:before{content:"\f11a"}.fa-gamepad:before{content:"\f11b"}.fa-keyboard-o:before{content:"\f11c"}.fa-flag-o:before{content:"\f11d"}.fa-flag-checkered:before{content:"\f11e"}.fa-terminal:before{content:"\f120"}.fa-code:before{content:"\f121"}.fa-mail-reply-all:before,.fa-reply-all:before{content:"\f122"}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:"\f123"}.fa-location-arrow:before{content:"\f124"}.fa-crop:before{content:"\f125"}.fa-code-fork:before{content:"\f126"}.fa-unlink:before,.fa-chain-broken:before{content:"\f127"}.fa-question:before{content:"\f128"}.fa-info:before{content:"\f129"}.fa-exclamation:before{content:"\f12a"}.fa-superscript:before{content:"\f12b"}.fa-subscript:before{content:"\f12c"}.fa-eraser:before{content:"\f12d"}.fa-puzzle-piece:before{content:"\f12e"}.fa-microphone:before{content:"\f130"}.fa-microphone-slash:before{content:"\f131"}.fa-shield:before{content:"\f132"}.fa-calendar-o:before{content:"\f133"}.fa-fire-extinguisher:before{content:"\f134"}.fa-rocket:before{content:"\f135"}.fa-maxcdn:before{content:"\f136"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-html5:before{content:"\f13b"}.fa-css3:before{content:"\f13c"}.fa-anchor:before{content:"\f13d"}.fa-unlock-alt:before{content:"\f13e"}.fa-bullseye:before{content:"\f140"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-rss-square:before{content:"\f143"}.fa-play-circle:before{content:"\f144"}.fa-ticket:before{content:"\f145"}.fa-minus-square:before{content:"\f146"}.fa-minus-square-o:before{content:"\f147"}.fa-level-up:before{content:"\f148"}.fa-level-down:before{content:"\f149"}.fa-check-square:before{content:"\f14a"}.fa-pencil-square:before{content:"\f14b"}.fa-external-link-square:before{content:"\f14c"}.fa-share-square:before{content:"\f14d"}.fa-compass:before{content:"\f14e"}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:"\f150"}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:"\f151"}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:"\f152"}.fa-euro:before,.fa-eur:before{content:"\f153"}.fa-gbp:before{content:"\f154"}.fa-dollar:before,.fa-usd:before{content:"\f155"}.fa-rupee:before,.fa-inr:before{content:"\f156"}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:"\f157"}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:"\f158"}.fa-won:before,.fa-krw:before{content:"\f159"}.fa-bitcoin:before,.fa-btc:before{content:"\f15a"}.fa-file:before{content:"\f15b"}.fa-file-text:before{content:"\f15c"}.fa-sort-alpha-asc:before{content:"\f15d"}.fa-sort-alpha-desc:before{content:"\f15e"}.fa-sort-amount-asc:before{content:"\f160"}.fa-sort-amount-desc:before{content:"\f161"}.fa-sort-numeric-asc:before{content:"\f162"}.fa-sort-numeric-desc:before{content:"\f163"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbs-down:before{content:"\f165"}.fa-youtube-square:before{content:"\f166"}.fa-youtube:before{content:"\f167"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-youtube-play:before{content:"\f16a"}.fa-dropbox:before{content:"\f16b"}.fa-stack-overflow:before{content:"\f16c"}.fa-instagram:before{content:"\f16d"}.fa-flickr:before{content:"\f16e"}.fa-adn:before{content:"\f170"}.fa-bitbucket:before{content:"\f171"}.fa-bitbucket-square:before{content:"\f172"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-long-arrow-down:before{content:"\f175"}.fa-long-arrow-up:before{content:"\f176"}.fa-long-arrow-left:before{content:"\f177"}.fa-long-arrow-right:before{content:"\f178"}.fa-apple:before{content:"\f179"}.fa-windows:before{content:"\f17a"}.fa-android:before{content:"\f17b"}.fa-linux:before{content:"\f17c"}.fa-dribbble:before{content:"\f17d"}.fa-skype:before{content:"\f17e"}.fa-foursquare:before{content:"\f180"}.fa-trello:before{content:"\f181"}.fa-female:before{content:"\f182"}.fa-male:before{content:"\f183"}.fa-gittip:before,.fa-gratipay:before{content:"\f184"}.fa-sun-o:before{content:"\f185"}.fa-moon-o:before{content:"\f186"}.fa-archive:before{content:"\f187"}.fa-bug:before{content:"\f188"}.fa-vk:before{content:"\f189"}.fa-weibo:before{content:"\f18a"}.fa-renren:before{content:"\f18b"}.fa-pagelines:before{content:"\f18c"}.fa-stack-exchange:before{content:"\f18d"}.fa-arrow-circle-o-right:before{content:"\f18e"}.fa-arrow-circle-o-left:before{content:"\f190"}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:"\f191"}.fa-dot-circle-o:before{content:"\f192"}.fa-wheelchair:before{content:"\f193"}.fa-vimeo-square:before{content:"\f194"}.fa-turkish-lira:before,.fa-try:before{content:"\f195"}.fa-plus-square-o:before{content:"\f196"}.fa-space-shuttle:before{content:"\f197"}.fa-slack:before{content:"\f198"}.fa-envelope-square:before{content:"\f199"}.fa-wordpress:before{content:"\f19a"}.fa-openid:before{content:"\f19b"}.fa-institution:before,.fa-bank:before,.fa-university:before{content:"\f19c"}.fa-mortar-board:before,.fa-graduation-cap:before{content:"\f19d"}.fa-yahoo:before{content:"\f19e"}.fa-google:before{content:"\f1a0"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-square:before{content:"\f1a2"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-stumbleupon:before{content:"\f1a4"}.fa-delicious:before{content:"\f1a5"}.fa-digg:before{content:"\f1a6"}.fa-pied-piper-pp:before{content:"\f1a7"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-drupal:before{content:"\f1a9"}.fa-joomla:before{content:"\f1aa"}.fa-language:before{content:"\f1ab"}.fa-fax:before{content:"\f1ac"}.fa-building:before{content:"\f1ad"}.fa-child:before{content:"\f1ae"}.fa-paw:before{content:"\f1b0"}.fa-spoon:before{content:"\f1b1"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-recycle:before{content:"\f1b8"}.fa-automobile:before,.fa-car:before{content:"\f1b9"}.fa-cab:before,.fa-taxi:before{content:"\f1ba"}.fa-tree:before{content:"\f1bb"}.fa-spotify:before{content:"\f1bc"}.fa-deviantart:before{content:"\f1bd"}.fa-soundcloud:before{content:"\f1be"}.fa-database:before{content:"\f1c0"}.fa-file-pdf-o:before{content:"\f1c1"}.fa-file-word-o:before{content:"\f1c2"}.fa-file-excel-o:before{content:"\f1c3"}.fa-file-powerpoint-o:before{content:"\f1c4"}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:"\f1c5"}.fa-file-zip-o:before,.fa-file-archive-o:before{content:"\f1c6"}.fa-file-sound-o:before,.fa-file-audio-o:before{content:"\f1c7"}.fa-file-movie-o:before,.fa-file-video-o:before{content:"\f1c8"}.fa-file-code-o:before{content:"\f1c9"}.fa-vine:before{content:"\f1ca"}.fa-codepen:before{content:"\f1cb"}.fa-jsfiddle:before{content:"\f1cc"}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:"\f1cd"}.fa-circle-o-notch:before{content:"\f1ce"}.fa-ra:before,.fa-resistance:before,.fa-rebel:before{content:"\f1d0"}.fa-ge:before,.fa-empire:before{content:"\f1d1"}.fa-git-square:before{content:"\f1d2"}.fa-git:before{content:"\f1d3"}.fa-y-combinator-square:before,.fa-yc-square:before,.fa-hacker-news:before{content:"\f1d4"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-qq:before{content:"\f1d6"}.fa-wechat:before,.fa-weixin:before{content:"\f1d7"}.fa-send:before,.fa-paper-plane:before{content:"\f1d8"}.fa-send-o:before,.fa-paper-plane-o:before{content:"\f1d9"}.fa-history:before{content:"\f1da"}.fa-circle-thin:before{content:"\f1db"}.fa-header:before{content:"\f1dc"}.fa-paragraph:before{content:"\f1dd"}.fa-sliders:before{content:"\f1de"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-bomb:before{content:"\f1e2"}.fa-soccer-ball-o:before,.fa-futbol-o:before{content:"\f1e3"}.fa-tty:before{content:"\f1e4"}.fa-binoculars:before{content:"\f1e5"}.fa-plug:before{content:"\f1e6"}.fa-slideshare:before{content:"\f1e7"}.fa-twitch:before{content:"\f1e8"}.fa-yelp:before{content:"\f1e9"}.fa-newspaper-o:before{content:"\f1ea"}.fa-wifi:before{content:"\f1eb"}.fa-calculator:before{content:"\f1ec"}.fa-paypal:before{content:"\f1ed"}.fa-google-wallet:before{content:"\f1ee"}.fa-cc-visa:before{content:"\f1f0"}.fa-cc-mastercard:before{content:"\f1f1"}.fa-cc-discover:before{content:"\f1f2"}.fa-cc-amex:before{content:"\f1f3"}.fa-cc-paypal:before{content:"\f1f4"}.fa-cc-stripe:before{content:"\f1f5"}.fa-bell-slash:before{content:"\f1f6"}.fa-bell-slash-o:before{content:"\f1f7"}.fa-trash:before{content:"\f1f8"}.fa-copyright:before{content:"\f1f9"}.fa-at:before{content:"\f1fa"}.fa-eyedropper:before{content:"\f1fb"}.fa-paint-brush:before{content:"\f1fc"}.fa-birthday-cake:before{content:"\f1fd"}.fa-area-chart:before{content:"\f1fe"}.fa-pie-chart:before{content:"\f200"}.fa-line-chart:before{content:"\f201"}.fa-lastfm:before{content:"\f202"}.fa-lastfm-square:before{content:"\f203"}.fa-toggle-off:before{content:"\f204"}.fa-toggle-on:before{content:"\f205"}.fa-bicycle:before{content:"\f206"}.fa-bus:before{content:"\f207"}.fa-ioxhost:before{content:"\f208"}.fa-angellist:before{content:"\f209"}.fa-cc:before{content:"\f20a"}.fa-shekel:before,.fa-sheqel:before,.fa-ils:before{content:"\f20b"}.fa-meanpath:before{content:"\f20c"}.fa-buysellads:before{content:"\f20d"}.fa-connectdevelop:before{content:"\f20e"}.fa-dashcube:before{content:"\f210"}.fa-forumbee:before{content:"\f211"}.fa-leanpub:before{content:"\f212"}.fa-sellsy:before{content:"\f213"}.fa-shirtsinbulk:before{content:"\f214"}.fa-simplybuilt:before{content:"\f215"}.fa-skyatlas:before{content:"\f216"}.fa-cart-plus:before{content:"\f217"}.fa-cart-arrow-down:before{content:"\f218"}.fa-diamond:before{content:"\f219"}.fa-ship:before{content:"\f21a"}.fa-user-secret:before{content:"\f21b"}.fa-motorcycle:before{content:"\f21c"}.fa-street-view:before{content:"\f21d"}.fa-heartbeat:before{content:"\f21e"}.fa-venus:before{content:"\f221"}.fa-mars:before{content:"\f222"}.fa-mercury:before{content:"\f223"}.fa-intersex:before,.fa-transgender:before{content:"\f224"}.fa-transgender-alt:before{content:"\f225"}.fa-venus-double:before{content:"\f226"}.fa-mars-double:before{content:"\f227"}.fa-venus-mars:before{content:"\f228"}.fa-mars-stroke:before{content:"\f229"}.fa-mars-stroke-v:before{content:"\f22a"}.fa-mars-stroke-h:before{content:"\f22b"}.fa-neuter:before{content:"\f22c"}.fa-genderless:before{content:"\f22d"}.fa-facebook-official:before{content:"\f230"}.fa-pinterest-p:before{content:"\f231"}.fa-whatsapp:before{content:"\f232"}.fa-server:before{content:"\f233"}.fa-user-plus:before{content:"\f234"}.fa-user-times:before{content:"\f235"}.fa-hotel:before,.fa-bed:before{content:"\f236"}.fa-viacoin:before{content:"\f237"}.fa-train:before{content:"\f238"}.fa-subway:before{content:"\f239"}.fa-medium:before{content:"\f23a"}.fa-yc:before,.fa-y-combinator:before{content:"\f23b"}.fa-optin-monster:before{content:"\f23c"}.fa-opencart:before{content:"\f23d"}.fa-expeditedssl:before{content:"\f23e"}.fa-battery-4:before,.fa-battery:before,.fa-battery-full:before{content:"\f240"}.fa-battery-3:before,.fa-battery-three-quarters:before{content:"\f241"}.fa-battery-2:before,.fa-battery-half:before{content:"\f242"}.fa-battery-1:before,.fa-battery-quarter:before{content:"\f243"}.fa-battery-0:before,.fa-battery-empty:before{content:"\f244"}.fa-mouse-pointer:before{content:"\f245"}.fa-i-cursor:before{content:"\f246"}.fa-object-group:before{content:"\f247"}.fa-object-ungroup:before{content:"\f248"}.fa-sticky-note:before{content:"\f249"}.fa-sticky-note-o:before{content:"\f24a"}.fa-cc-jcb:before{content:"\f24b"}.fa-cc-diners-club:before{content:"\f24c"}.fa-clone:before{content:"\f24d"}.fa-balance-scale:before{content:"\f24e"}.fa-hourglass-o:before{content:"\f250"}.fa-hourglass-1:before,.fa-hourglass-start:before{content:"\f251"}.fa-hourglass-2:before,.fa-hourglass-half:before{content:"\f252"}.fa-hourglass-3:before,.fa-hourglass-end:before{content:"\f253"}.fa-hourglass:before{content:"\f254"}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:"\f255"}.fa-hand-stop-o:before,.fa-hand-paper-o:before{content:"\f256"}.fa-hand-scissors-o:before{content:"\f257"}.fa-hand-lizard-o:before{content:"\f258"}.fa-hand-spock-o:before{content:"\f259"}.fa-hand-pointer-o:before{content:"\f25a"}.fa-hand-peace-o:before{content:"\f25b"}.fa-trademark:before{content:"\f25c"}.fa-registered:before{content:"\f25d"}.fa-creative-commons:before{content:"\f25e"}.fa-gg:before{content:"\f260"}.fa-gg-circle:before{content:"\f261"}.fa-tripadvisor:before{content:"\f262"}.fa-odnoklassniki:before{content:"\f263"}.fa-odnoklassniki-square:before{content:"\f264"}.fa-get-pocket:before{content:"\f265"}.fa-wikipedia-w:before{content:"\f266"}.fa-safari:before{content:"\f267"}.fa-chrome:before{content:"\f268"}.fa-firefox:before{content:"\f269"}.fa-opera:before{content:"\f26a"}.fa-internet-explorer:before{content:"\f26b"}.fa-tv:before,.fa-television:before{content:"\f26c"}.fa-contao:before{content:"\f26d"}.fa-500px:before{content:"\f26e"}.fa-amazon:before{content:"\f270"}.fa-calendar-plus-o:before{content:"\f271"}.fa-calendar-minus-o:before{content:"\f272"}.fa-calendar-times-o:before{content:"\f273"}.fa-calendar-check-o:before{content:"\f274"}.fa-industry:before{content:"\f275"}.fa-map-pin:before{content:"\f276"}.fa-map-signs:before{content:"\f277"}.fa-map-o:before{content:"\f278"}.fa-map:before{content:"\f279"}.fa-commenting:before{content:"\f27a"}.fa-commenting-o:before{content:"\f27b"}.fa-houzz:before{content:"\f27c"}.fa-vimeo:before{content:"\f27d"}.fa-black-tie:before{content:"\f27e"}.fa-fonticons:before{content:"\f280"}.fa-reddit-alien:before{content:"\f281"}.fa-edge:before{content:"\f282"}.fa-credit-card-alt:before{content:"\f283"}.fa-codiepie:before{content:"\f284"}.fa-modx:before{content:"\f285"}.fa-fort-awesome:before{content:"\f286"}.fa-usb:before{content:"\f287"}.fa-product-hunt:before{content:"\f288"}.fa-mixcloud:before{content:"\f289"}.fa-scribd:before{content:"\f28a"}.fa-pause-circle:before{content:"\f28b"}.fa-pause-circle-o:before{content:"\f28c"}.fa-stop-circle:before{content:"\f28d"}.fa-stop-circle-o:before{content:"\f28e"}.fa-shopping-bag:before{content:"\f290"}.fa-shopping-basket:before{content:"\f291"}.fa-hashtag:before{content:"\f292"}.fa-bluetooth:before{content:"\f293"}.fa-bluetooth-b:before{content:"\f294"}.fa-percent:before{content:"\f295"}.fa-gitlab:before{content:"\f296"}.fa-wpbeginner:before{content:"\f297"}.fa-wpforms:before{content:"\f298"}.fa-envira:before{content:"\f299"}.fa-universal-access:before{content:"\f29a"}.fa-wheelchair-alt:before{content:"\f29b"}.fa-question-circle-o:before{content:"\f29c"}.fa-blind:before{content:"\f29d"}.fa-audio-description:before{content:"\f29e"}.fa-volume-control-phone:before{content:"\f2a0"}.fa-braille:before{content:"\f2a1"}.fa-assistive-listening-systems:before{content:"\f2a2"}.fa-asl-interpreting:before,.fa-american-sign-language-interpreting:before{content:"\f2a3"}.fa-deafness:before,.fa-hard-of-hearing:before,.fa-deaf:before{content:"\f2a4"}.fa-glide:before{content:"\f2a5"}.fa-glide-g:before{content:"\f2a6"}.fa-signing:before,.fa-sign-language:before{content:"\f2a7"}.fa-low-vision:before{content:"\f2a8"}.fa-viadeo:before{content:"\f2a9"}.fa-viadeo-square:before{content:"\f2aa"}.fa-snapchat:before{content:"\f2ab"}.fa-snapchat-ghost:before{content:"\f2ac"}.fa-snapchat-square:before{content:"\f2ad"}.fa-pied-piper:before{content:"\f2ae"}.fa-first-order:before{content:"\f2b0"}.fa-yoast:before{content:"\f2b1"}.fa-themeisle:before{content:"\f2b2"}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:"\f2b3"}.fa-fa:before,.fa-font-awesome:before{content:"\f2b4"}.fa-handshake-o:before{content:"\f2b5"}.fa-envelope-open:before{content:"\f2b6"}.fa-envelope-open-o:before{content:"\f2b7"}.fa-linode:before{content:"\f2b8"}.fa-address-book:before{content:"\f2b9"}.fa-address-book-o:before{content:"\f2ba"}.fa-vcard:before,.fa-address-card:before{content:"\f2bb"}.fa-vcard-o:before,.fa-address-card-o:before{content:"\f2bc"}.fa-user-circle:before{content:"\f2bd"}.fa-user-circle-o:before{content:"\f2be"}.fa-user-o:before{content:"\f2c0"}.fa-id-badge:before{content:"\f2c1"}.fa-drivers-license:before,.fa-id-card:before{content:"\f2c2"}.fa-drivers-license-o:before,.fa-id-card-o:before{content:"\f2c3"}.fa-quora:before{content:"\f2c4"}.fa-free-code-camp:before{content:"\f2c5"}.fa-telegram:before{content:"\f2c6"}.fa-thermometer-4:before,.fa-thermometer:before,.fa-thermometer-full:before{content:"\f2c7"}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:"\f2c8"}.fa-thermometer-2:before,.fa-thermometer-half:before{content:"\f2c9"}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:"\f2ca"}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:"\f2cb"}.fa-shower:before{content:"\f2cc"}.fa-bathtub:before,.fa-s15:before,.fa-bath:before{content:"\f2cd"}.fa-podcast:before{content:"\f2ce"}.fa-window-maximize:before{content:"\f2d0"}.fa-window-minimize:before{content:"\f2d1"}.fa-window-restore:before{content:"\f2d2"}.fa-times-rectangle:before,.fa-window-close:before{content:"\f2d3"}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:"\f2d4"}.fa-bandcamp:before{content:"\f2d5"}.fa-grav:before{content:"\f2d6"}.fa-etsy:before{content:"\f2d7"}.fa-imdb:before{content:"\f2d8"}.fa-ravelry:before{content:"\f2d9"}.fa-eercast:before{content:"\f2da"}.fa-microchip:before{content:"\f2db"}.fa-snowflake-o:before{content:"\f2dc"}.fa-superpowers:before{content:"\f2dd"}.fa-wpexplorer:before{content:"\f2de"}.fa-meetup:before{content:"\f2e0"}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0, 0, 0, 0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/css/style.css b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/css/style.css
deleted file mode 100644
index a3040718b8f1caa8fed98832b8c82778b0003a9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/css/style.css
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
-* @Author: baipengxia
-* @Date:   2021-03-12 11:44:28
-* @Last Modified by:   baipengxia
-* @Last Modified time: 2021-03-12 15:14:24
-*/
-
-/** COMMON RESET **/
-* {
-  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
-}
-
-body,
-h1,
-h2,
-h3,
-h4,
-h5,
-h6,
-hr,
-p,
-dl,
-dt,
-dd,
-ul,
-ol,
-li,
-fieldset,
-lengend,
-button,
-input,
-textarea,
-th,
-td {
-  margin: 0;
-  padding: 0;
-  color: #000;
-}
-
-body {
-  font-size: 14px;
-}
-html, body {
-  min-width: 1200px;
-}
-
-button,
-input,
-select,
-textarea {
-  font-size: 14px;
-}
-
-h1 {
-  font-size: 18px;
-}
-
-h2 {
-  font-size: 14px;
-}
-
-h3 {
-  font-size: 14px;
-}
-
-ul,
-ol,
-li {
-  list-style: none;
-}
-
-a {
-  text-decoration: none;
-}
-
-a:hover {
-  text-decoration: none;
-}
-
-fieldset,
-img {
-  border: none;
-}
-
-table {
-  border-collapse: collapse;
-  border-spacing: 0;
-}
-
-i {
-  font-style: normal;
-}
-
-label {
-  position: inherit;
-}
-
-.clearfix:after {
-  content: ".";
-  display: block;
-  height: 0;
-  clear: both;
-  visibility: hidden;
-}
-
-.clearfix {
-  zoom: 1;
-  display: block;
-}
-
-html,
-body {
-  font-family: Tahoma, Arial, 'microsoft yahei', 'Roboto', 'Droid Sans', 'Helvetica Neue', 'Droid Sans Fallback', 'Heiti SC', 'Hiragino Sans GB', 'Simsun', 'sans-self';
-}
-
-
-
-.audio-banner {
-  width: 100%;
-  overflow: auto;
-  padding: 0;
-  background: url('../image/voice-dictation.svg');
-  background-size: cover;
-}
-.weaper {
-  width: 1200px;
-  height: 155px;
-  margin: 72px auto;
-}
-.text-content {
-  width: 670px;
-  height: 100%;
-  float: left;
-}
-.text-content .title {
-  font-size: 34px;
-  font-family: 'PingFangSC-Medium';
-  font-weight: 500;
-  color: rgba(255, 255, 255, 1);
-  line-height: 48px;
-}
-.text-content .con {
-  font-size: 16px;
-  font-family: PingFangSC-Light;
-  font-weight: 300;
-  color: rgba(255, 255, 255, 1);
-  line-height: 30px;
-}
-.img-con {
-  width: 416px;
-  height: 100%;
-  float: right;
-}
-.img-con img {
-  width: 100%;
-  height: 100%;
-}
-.con-container {
-  margin-top: 34px;
-}
-
-.audio-advantage {
-  background: #f8f9fa;
-}
-.asr-advantage {
-  width: 1200px;
-  margin: 0 auto;
-}
-.asr-advantage h2 {
-  text-align: center;
-  font-size: 22px;
-  padding: 30px 0 0 0;
-}
-.asr-advantage > ul > li {
-  box-sizing: border-box;
-  padding: 0 16px;
-  width: 33%;
-  text-align: center;
-  margin-bottom: 35px;
-}
-.asr-advantage > ul > li .icons{
-  margin-top: 10px;
-  margin-bottom: 20px;
-  width: 42px;
-  height: 42px;
-}
-.service-item-content {
-  margin-top: 35px;
-  display: flex;
-  justify-content: center;
-  flex-wrap: wrap;
-}
-.service-item-content img {
-  width: 160px;
-  vertical-align: bottom;
-}
-.service-item-content > li {
-    box-sizing: border-box;
-    padding: 0 16px;
-    width: 33%;
-    text-align: center;
-    margin-bottom: 35px;
-}
-.service-item-content > li .service-item-content-title {
-  line-height: 1.5;
-  font-weight: 700;
-  margin-top: 10px;
-}
-.service-item-content > li .service-item-content-desc {
-  margin-top: 5px;
-  line-height: 1.8;
-  color: #657384;
-}
-
-
-.audio-scene-con {
-  width: 100%;
-  padding-bottom: 84px;
-  background: #fff;
-}
-.audio-scene {
-  overflow: auto;
-  width: 1200px;
-  background: #fff;
-  text-align: center;
-  padding: 0;
-  margin: 0 auto;
-}
-.audio-scene h2 {
-  padding: 30px 0 0 0;
-  font-size: 22px;
-  text-align: center;
-}
-
-.audio-experience {
-  width: 100%;
-  height: 538px;
-  background: #fff;
-  padding: 0;
-  margin: 0;
-  overflow: auto;
-}
-.asr-box {
-  width: 1200px;
-  height: 394px;
-  margin: 64px auto;
-}
-.asr-box h2 {
-  font-size: 22px;
-  text-align: center;
-  margin-bottom: 64px;
-}
-.voice-container {
-  position: relative;
-  width: 1200px;
-  height: 308px;
-  background: rgba(255, 255, 255, 1);
-  border-radius: 8px;
-  border: 1px solid rgba(225, 225, 225, 1);
-}
-.voice-container .voice {
-  height: 236px;
-  width: 100%;
-  border-radius: 8px;
-}
-.voice-container .voice textarea {
-  height: 100%;
-  width: 100%;
-  border: none;
-  outline: none;
-  border-radius: 8px;
-  padding: 25px;
-  font-size: 14px;
-  box-sizing: border-box;
-  resize: none;
-}
-.voice-input {
-  width: 100%;
-  height: 72px;
-  box-sizing: border-box;
-  padding-left: 35px;
-  background: rgba(242, 244, 245, 1);
-  border-radius: 8px;
-  line-height: 72px;
-}
-.voice-input .el-select {
-  width: 492px;
-}
-.start-voice {
-  display: inline-block;
-  margin-left: 10px;
-}
-.start-voice .time {
-  margin-right: 25px;
-}
-.asr-advantage > ul > li {
-  margin-bottom: 77px;
-}
-#msg {
-  width: 100%;
-  line-height: 40px;
-  font-size: 14px;
-  margin-left: 330px;
-}
-#captcha {
-  margin-left: 350px !important;
-  display: inline-block;
-  position: relative;
-}
-.black {
-  position: fixed;
-  width: 100%;
-  height: 100%;
-  z-index: 5;
-  background: rgba(0, 0, 0, 0.5);
-  top: 0;
-  left: 0;
-}
-.container {
-  position: fixed;
-  z-index: 6;
-  top: 25%;
-  left: 10%;
-}
-.audio-scene-con {
-  width: 100%;
-  padding-bottom: 84px;
-  background: #fff;
-}
-#sound {
-  color: #fff;
-  cursor: pointer;
-  background: #147ede;
-  padding: 10px;
-  margin-top: 30px;
-  margin-left: 135px;
-  width: 176px;
-  height: 30px !important;
-  text-align: center;
-  line-height: 30px !important;
-  border-radius: 10px;
-}
-.con-ten {
-  position: absolute;
-  width: 100%;
-  height: 100%;
-  z-index: 5;
-  background: #fff;
-  opacity: 0.5;
-  top: 0;
-  left: 0;
-}
-.websocket-url {
-  width: 320px;
-  height: 20px;
-  border: 1px solid #dcdfe6;
-  line-height: 20px;
-  padding: 10px;
-  border-radius: 4px;
-}
-.voice-btn {
-  color: #fff;
-  background-color: #409eff;
-  font-weight: 500;
-  padding: 12px 20px;
-  font-size: 14px;
-  border-radius: 4px;
-  border: 0;
-  cursor: pointer;
-}
-.voice-btn.end {
-  display: none;
-}
-.result-text {
-  background: #fff;
-  padding: 20px;
-}
-.voice-footer {
-  border-top: 1px solid #dddede;
-  background: #f7f9fa;
-  text-align: center;
-  margin-bottom: 8px;
-  color: #333;
-  font-size: 12px;
-  padding: 20px 0;
-}
-
-/** line animate **/
-.time-box {
-  display: none;
-  margin-left: 10px;
-  width: 300px;
-}
-.total-time {
-  font-size: 14px;
-  color: #545454;
-}
-.voice-btn.end.show,
-.time-box.show {
-  display: inline;
-}
-.start-taste-line {
-  margin-right: 20px;
-  display: inline-block;
-}
-.start-taste-line hr {
-  background-color: #187cff;
-  width: 3px;
-  height: 8px;
-  margin: 0 3px;
-  display: inline-block;
-  border: none;
-}
-.hr {
-  animation: note 0.2s ease-in-out;
-  animation-iteration-count: infinite;
-  animation-direction: alternate;
-}
-.hr-one {
-  animation-delay: -0.9s;
-}
-.hr-two {
-  animation-delay: -0.8s;
-}
-.hr-three {
-  animation-delay: -0.7s;
-}
-.hr-four {
-  animation-delay: -0.6s;
-}
-.hr-five {
-  animation-delay: -0.5s;
-}
-.hr-six {
-  animation-delay: -0.4s;
-}
-.hr-seven {
-  animation-delay: -0.3s;
-}
-.hr-eight {
-  animation-delay: -0.2s;
-}
-.hr-nine {
-  animation-delay: -0.1s;
-}
-@keyframes note {
-  from {
-    transform: scaleY(1);
-  }
-  to {
-    transform: scaleY(4);
-  }
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/favicon.ico b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/favicon.ico
deleted file mode 100644
index da219e6d6e693bda3193ad25d24e0f54fbe7b989..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/favicon.ico and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/FontAwesome.otf b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/FontAwesome.otf
deleted file mode 100644
index 401ec0f36e4f73b8efa40bd6f604fe80d286db70..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/FontAwesome.otf and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.eot b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.eot
deleted file mode 100644
index e9f60ca953f93e35eab4108bd414bc02ddcf3928..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.eot and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.svg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.svg
deleted file mode 100644
index 6cd0326be380a32c3193c42e1879b7a6c6cf527e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.svg
+++ /dev/null
@@ -1,1951 +0,0 @@
-<?xml version="1.0" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
-<svg>
-<metadata>
-Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016
- By ,,,
-Copyright Dave Gandy 2016. All rights reserved.
-</metadata>
-<defs>
-<font id="FontAwesome" horiz-adv-x="1536" >
-  <font-face font-family="FontAwesome" font-weight="400" font-stretch="normal" units-per-em="1792" panose-1="0 0 0 0 0 0 0 0 0 0" ascent="1536" descent="-256" bbox="-1.02083 -256.962 2304.6 1537.02" underline-thickness="0" underline-position="0" unicode-range="U+0020-F500"/>
-<missing-glyph horiz-adv-x="896" d="M224 112h448v1312h-448v-1312zM112 0v1536h672v-1536h-672z"/>
-    <glyph glyph-name=".notdef" horiz-adv-x="896" d="M224 112h448v1312h-448v-1312zM112 0v1536h672v-1536h-672z"/>
-    <glyph glyph-name=".null" horiz-adv-x="0"/>
-    <glyph glyph-name="nonmarkingreturn" horiz-adv-x="597"/>
-    <glyph glyph-name="space" unicode=" " horiz-adv-x="448"/>
-    <glyph glyph-name="dieresis" unicode="&#xa8;" horiz-adv-x="1792"/>
-    <glyph glyph-name="copyright" unicode="&#xa9;" horiz-adv-x="1792"/>
-    <glyph glyph-name="registered" unicode="&#xae;" horiz-adv-x="1792"/>
-    <glyph glyph-name="acute" unicode="&#xb4;" horiz-adv-x="1792"/>
-    <glyph glyph-name="AE" unicode="&#xc6;" horiz-adv-x="1792"/>
-    <glyph glyph-name="Oslash" unicode="&#xd8;" horiz-adv-x="1792"/>
-    <glyph glyph-name="trademark" unicode="&#x2122;" horiz-adv-x="1792"/>
-    <glyph glyph-name="infinity" unicode="&#x221e;" horiz-adv-x="1792"/>
-    <glyph glyph-name="notequal" unicode="&#x2260;" horiz-adv-x="1792"/>
-    <glyph glyph-name="glass" unicode="&#xf000;" horiz-adv-x="1792" d="M1699 1350q0 -35 -43 -78l-632 -632v-768h320q26 0 45 -19t19 -45t-19 -45t-45 -19h-896q-26 0 -45 19t-19 45t19 45t45 19h320v768l-632 632q-43 43 -43 78q0 23 18 36.5t38 17.5t43 4h1408q23 0 43 -4t38 -17.5t18 -36.5z"/>
-    <glyph glyph-name="music" unicode="&#xf001;" d="M1536 1312v-1120q0 -50 -34 -89t-86 -60.5t-103.5 -32t-96.5 -10.5t-96.5 10.5t-103.5 32t-86 60.5t-34 89t34 89t86 60.5t103.5 32t96.5 10.5q105 0 192 -39v537l-768 -237v-709q0 -50 -34 -89t-86 -60.5t-103.5 -32t-96.5 -10.5t-96.5 10.5t-103.5 32t-86 60.5t-34 89
-t34 89t86 60.5t103.5 32t96.5 10.5q105 0 192 -39v967q0 31 19 56.5t49 35.5l832 256q12 4 28 4q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="search" unicode="&#xf002;" horiz-adv-x="1664" d="M1152 704q0 185 -131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5t316.5 131.5t131.5 316.5zM1664 -128q0 -52 -38 -90t-90 -38q-54 0 -90 38l-343 342q-179 -124 -399 -124q-143 0 -273.5 55.5t-225 150t-150 225t-55.5 273.5
-t55.5 273.5t150 225t225 150t273.5 55.5t273.5 -55.5t225 -150t150 -225t55.5 -273.5q0 -220 -124 -399l343 -343q37 -37 37 -90z"/>
-    <glyph glyph-name="envelope" unicode="&#xf003;" horiz-adv-x="1792" d="M1664 32v768q-32 -36 -69 -66q-268 -206 -426 -338q-51 -43 -83 -67t-86.5 -48.5t-102.5 -24.5h-1h-1q-48 0 -102.5 24.5t-86.5 48.5t-83 67q-158 132 -426 338q-37 30 -69 66v-768q0 -13 9.5 -22.5t22.5 -9.5h1472q13 0 22.5 9.5t9.5 22.5zM1664 1083v11v13.5t-0.5 13
-t-3 12.5t-5.5 9t-9 7.5t-14 2.5h-1472q-13 0 -22.5 -9.5t-9.5 -22.5q0 -168 147 -284q193 -152 401 -317q6 -5 35 -29.5t46 -37.5t44.5 -31.5t50.5 -27.5t43 -9h1h1q20 0 43 9t50.5 27.5t44.5 31.5t46 37.5t35 29.5q208 165 401 317q54 43 100.5 115.5t46.5 131.5z
-M1792 1120v-1088q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h1472q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="heart" unicode="&#xf004;" horiz-adv-x="1792" d="M896 -128q-26 0 -44 18l-624 602q-10 8 -27.5 26t-55.5 65.5t-68 97.5t-53.5 121t-23.5 138q0 220 127 344t351 124q62 0 126.5 -21.5t120 -58t95.5 -68.5t76 -68q36 36 76 68t95.5 68.5t120 58t126.5 21.5q224 0 351 -124t127 -344q0 -221 -229 -450l-623 -600
-q-18 -18 -44 -18z"/>
-    <glyph glyph-name="star" unicode="&#xf005;" horiz-adv-x="1664" d="M1664 889q0 -22 -26 -48l-363 -354l86 -500q1 -7 1 -20q0 -21 -10.5 -35.5t-30.5 -14.5q-19 0 -40 12l-449 236l-449 -236q-22 -12 -40 -12q-21 0 -31.5 14.5t-10.5 35.5q0 6 2 20l86 500l-364 354q-25 27 -25 48q0 37 56 46l502 73l225 455q19 41 49 41t49 -41l225 -455
-l502 -73q56 -9 56 -46z"/>
-    <glyph glyph-name="star_empty" unicode="&#xf006;" horiz-adv-x="1664" d="M1137 532l306 297l-422 62l-189 382l-189 -382l-422 -62l306 -297l-73 -421l378 199l377 -199zM1664 889q0 -22 -26 -48l-363 -354l86 -500q1 -7 1 -20q0 -50 -41 -50q-19 0 -40 12l-449 236l-449 -236q-22 -12 -40 -12q-21 0 -31.5 14.5t-10.5 35.5q0 6 2 20l86 500
-l-364 354q-25 27 -25 48q0 37 56 46l502 73l225 455q19 41 49 41t49 -41l225 -455l502 -73q56 -9 56 -46z"/>
-    <glyph glyph-name="user" unicode="&#xf007;" horiz-adv-x="1280" d="M1280 137q0 -109 -62.5 -187t-150.5 -78h-854q-88 0 -150.5 78t-62.5 187q0 85 8.5 160.5t31.5 152t58.5 131t94 89t134.5 34.5q131 -128 313 -128t313 128q76 0 134.5 -34.5t94 -89t58.5 -131t31.5 -152t8.5 -160.5zM1024 1024q0 -159 -112.5 -271.5t-271.5 -112.5
-t-271.5 112.5t-112.5 271.5t112.5 271.5t271.5 112.5t271.5 -112.5t112.5 -271.5z"/>
-    <glyph glyph-name="film" unicode="&#xf008;" horiz-adv-x="1920" d="M384 -64v128q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM384 320v128q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM384 704v128q0 26 -19 45t-45 19h-128
-q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM1408 -64v512q0 26 -19 45t-45 19h-768q-26 0 -45 -19t-19 -45v-512q0 -26 19 -45t45 -19h768q26 0 45 19t19 45zM384 1088v128q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45
-t45 -19h128q26 0 45 19t19 45zM1792 -64v128q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM1408 704v512q0 26 -19 45t-45 19h-768q-26 0 -45 -19t-19 -45v-512q0 -26 19 -45t45 -19h768q26 0 45 19t19 45zM1792 320v128
-q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM1792 704v128q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM1792 1088v128q0 26 -19 45t-45 19h-128q-26 0 -45 -19
-t-19 -45v-128q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM1920 1248v-1344q0 -66 -47 -113t-113 -47h-1600q-66 0 -113 47t-47 113v1344q0 66 47 113t113 47h1600q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="th_large" unicode="&#xf009;" horiz-adv-x="1664" d="M768 512v-384q0 -52 -38 -90t-90 -38h-512q-52 0 -90 38t-38 90v384q0 52 38 90t90 38h512q52 0 90 -38t38 -90zM768 1280v-384q0 -52 -38 -90t-90 -38h-512q-52 0 -90 38t-38 90v384q0 52 38 90t90 38h512q52 0 90 -38t38 -90zM1664 512v-384q0 -52 -38 -90t-90 -38
-h-512q-52 0 -90 38t-38 90v384q0 52 38 90t90 38h512q52 0 90 -38t38 -90zM1664 1280v-384q0 -52 -38 -90t-90 -38h-512q-52 0 -90 38t-38 90v384q0 52 38 90t90 38h512q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="th" unicode="&#xf00a;" horiz-adv-x="1792" d="M512 288v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM512 800v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM1152 288v-192q0 -40 -28 -68t-68 -28h-320
-q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM512 1312v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM1152 800v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28
-h320q40 0 68 -28t28 -68zM1792 288v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM1152 1312v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM1792 800v-192
-q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM1792 1312v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="th_list" unicode="&#xf00b;" horiz-adv-x="1792" d="M512 288v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM512 800v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM1792 288v-192q0 -40 -28 -68t-68 -28h-960
-q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h960q40 0 68 -28t28 -68zM512 1312v-192q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h320q40 0 68 -28t28 -68zM1792 800v-192q0 -40 -28 -68t-68 -28h-960q-40 0 -68 28t-28 68v192q0 40 28 68t68 28
-h960q40 0 68 -28t28 -68zM1792 1312v-192q0 -40 -28 -68t-68 -28h-960q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h960q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="ok" unicode="&#xf00c;" horiz-adv-x="1792" d="M1671 970q0 -40 -28 -68l-724 -724l-136 -136q-28 -28 -68 -28t-68 28l-136 136l-362 362q-28 28 -28 68t28 68l136 136q28 28 68 28t68 -28l294 -295l656 657q28 28 68 28t68 -28l136 -136q28 -28 28 -68z"/>
-    <glyph glyph-name="remove" unicode="&#xf00d;" horiz-adv-x="1408" d="M1298 214q0 -40 -28 -68l-136 -136q-28 -28 -68 -28t-68 28l-294 294l-294 -294q-28 -28 -68 -28t-68 28l-136 136q-28 28 -28 68t28 68l294 294l-294 294q-28 28 -28 68t28 68l136 136q28 28 68 28t68 -28l294 -294l294 294q28 28 68 28t68 -28l136 -136q28 -28 28 -68
-t-28 -68l-294 -294l294 -294q28 -28 28 -68z"/>
-    <glyph glyph-name="zoom_in" unicode="&#xf00e;" horiz-adv-x="1664" d="M1024 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-224v-224q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v224h-224q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h224v224q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5v-224h224
-q13 0 22.5 -9.5t9.5 -22.5zM1152 704q0 185 -131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5t316.5 131.5t131.5 316.5zM1664 -128q0 -53 -37.5 -90.5t-90.5 -37.5q-54 0 -90 38l-343 342q-179 -124 -399 -124q-143 0 -273.5 55.5
-t-225 150t-150 225t-55.5 273.5t55.5 273.5t150 225t225 150t273.5 55.5t273.5 -55.5t225 -150t150 -225t55.5 -273.5q0 -220 -124 -399l343 -343q37 -37 37 -90z"/>
-    <glyph glyph-name="zoom_out" unicode="&#xf010;" horiz-adv-x="1664" d="M1024 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-576q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h576q13 0 22.5 -9.5t9.5 -22.5zM1152 704q0 185 -131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5t316.5 131.5t131.5 316.5z
-M1664 -128q0 -53 -37.5 -90.5t-90.5 -37.5q-54 0 -90 38l-343 342q-179 -124 -399 -124q-143 0 -273.5 55.5t-225 150t-150 225t-55.5 273.5t55.5 273.5t150 225t225 150t273.5 55.5t273.5 -55.5t225 -150t150 -225t55.5 -273.5q0 -220 -124 -399l343 -343q37 -37 37 -90z
-"/>
-    <glyph glyph-name="off" unicode="&#xf011;" d="M1536 640q0 -156 -61 -298t-164 -245t-245 -164t-298 -61t-298 61t-245 164t-164 245t-61 298q0 182 80.5 343t226.5 270q43 32 95.5 25t83.5 -50q32 -42 24.5 -94.5t-49.5 -84.5q-98 -74 -151.5 -181t-53.5 -228q0 -104 40.5 -198.5t109.5 -163.5t163.5 -109.5
-t198.5 -40.5t198.5 40.5t163.5 109.5t109.5 163.5t40.5 198.5q0 121 -53.5 228t-151.5 181q-42 32 -49.5 84.5t24.5 94.5q31 43 84 50t95 -25q146 -109 226.5 -270t80.5 -343zM896 1408v-640q0 -52 -38 -90t-90 -38t-90 38t-38 90v640q0 52 38 90t90 38t90 -38t38 -90z"/>
-    <glyph glyph-name="signal" unicode="&#xf012;" horiz-adv-x="1792" d="M256 96v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM640 224v-320q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v320q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1024 480v-576q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23
-v576q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1408 864v-960q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v960q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1792 1376v-1472q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v1472q0 14 9 23t23 9h192q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="cog" unicode="&#xf013;" d="M1024 640q0 106 -75 181t-181 75t-181 -75t-75 -181t75 -181t181 -75t181 75t75 181zM1536 749v-222q0 -12 -8 -23t-20 -13l-185 -28q-19 -54 -39 -91q35 -50 107 -138q10 -12 10 -25t-9 -23q-27 -37 -99 -108t-94 -71q-12 0 -26 9l-138 108q-44 -23 -91 -38
-q-16 -136 -29 -186q-7 -28 -36 -28h-222q-14 0 -24.5 8.5t-11.5 21.5l-28 184q-49 16 -90 37l-141 -107q-10 -9 -25 -9q-14 0 -25 11q-126 114 -165 168q-7 10 -7 23q0 12 8 23q15 21 51 66.5t54 70.5q-27 50 -41 99l-183 27q-13 2 -21 12.5t-8 23.5v222q0 12 8 23t19 13
-l186 28q14 46 39 92q-40 57 -107 138q-10 12 -10 24q0 10 9 23q26 36 98.5 107.5t94.5 71.5q13 0 26 -10l138 -107q44 23 91 38q16 136 29 186q7 28 36 28h222q14 0 24.5 -8.5t11.5 -21.5l28 -184q49 -16 90 -37l142 107q9 9 24 9q13 0 25 -10q129 -119 165 -170q7 -8 7 -22
-q0 -12 -8 -23q-15 -21 -51 -66.5t-54 -70.5q26 -50 41 -98l183 -28q13 -2 21 -12.5t8 -23.5z"/>
-    <glyph glyph-name="trash" unicode="&#xf014;" horiz-adv-x="1408" d="M512 800v-576q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v576q0 14 9 23t23 9h64q14 0 23 -9t9 -23zM768 800v-576q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v576q0 14 9 23t23 9h64q14 0 23 -9t9 -23zM1024 800v-576q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v576
-q0 14 9 23t23 9h64q14 0 23 -9t9 -23zM1152 76v948h-896v-948q0 -22 7 -40.5t14.5 -27t10.5 -8.5h832q3 0 10.5 8.5t14.5 27t7 40.5zM480 1152h448l-48 117q-7 9 -17 11h-317q-10 -2 -17 -11zM1408 1120v-64q0 -14 -9 -23t-23 -9h-96v-948q0 -83 -47 -143.5t-113 -60.5h-832
-q-66 0 -113 58.5t-47 141.5v952h-96q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h309l70 167q15 37 54 63t79 26h320q40 0 79 -26t54 -63l70 -167h309q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="home" unicode="&#xf015;" horiz-adv-x="1664" d="M1408 544v-480q0 -26 -19 -45t-45 -19h-384v384h-256v-384h-384q-26 0 -45 19t-19 45v480q0 1 0.5 3t0.5 3l575 474l575 -474q1 -2 1 -6zM1631 613l-62 -74q-8 -9 -21 -11h-3q-13 0 -21 7l-692 577l-692 -577q-12 -8 -24 -7q-13 2 -21 11l-62 74q-8 10 -7 23.5t11 21.5
-l719 599q32 26 76 26t76 -26l244 -204v195q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-408l219 -182q10 -8 11 -21.5t-7 -23.5z"/>
-    <glyph glyph-name="file_alt" unicode="&#xf016;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-"/>
-    <glyph glyph-name="time" unicode="&#xf017;" d="M896 992v-448q0 -14 -9 -23t-23 -9h-320q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h224v352q0 14 9 23t23 9h64q14 0 23 -9t9 -23zM1312 640q0 148 -73 273t-198 198t-273 73t-273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="road" unicode="&#xf018;" horiz-adv-x="1920" d="M1111 540v4l-24 320q-1 13 -11 22.5t-23 9.5h-186q-13 0 -23 -9.5t-11 -22.5l-24 -320v-4q-1 -12 8 -20t21 -8h244q12 0 21 8t8 20zM1870 73q0 -73 -46 -73h-704q13 0 22 9.5t8 22.5l-20 256q-1 13 -11 22.5t-23 9.5h-272q-13 0 -23 -9.5t-11 -22.5l-20 -256
-q-1 -13 8 -22.5t22 -9.5h-704q-46 0 -46 73q0 54 26 116l417 1044q8 19 26 33t38 14h339q-13 0 -23 -9.5t-11 -22.5l-15 -192q-1 -14 8 -23t22 -9h166q13 0 22 9t8 23l-15 192q-1 13 -11 22.5t-23 9.5h339q20 0 38 -14t26 -33l417 -1044q26 -62 26 -116z"/>
-    <glyph glyph-name="download_alt" unicode="&#xf019;" horiz-adv-x="1664" d="M1280 192q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1536 192q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1664 416v-320q0 -40 -28 -68t-68 -28h-1472q-40 0 -68 28t-28 68v320q0 40 28 68t68 28h465l135 -136
-q58 -56 136 -56t136 56l136 136h464q40 0 68 -28t28 -68zM1339 985q17 -41 -14 -70l-448 -448q-18 -19 -45 -19t-45 19l-448 448q-31 29 -14 70q17 39 59 39h256v448q0 26 19 45t45 19h256q26 0 45 -19t19 -45v-448h256q42 0 59 -39z"/>
-    <glyph glyph-name="download" unicode="&#xf01a;" d="M1120 608q0 -12 -10 -24l-319 -319q-11 -9 -23 -9t-23 9l-320 320q-15 16 -7 35q8 20 30 20h192v352q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-352h192q14 0 23 -9t9 -23zM768 1184q-148 0 -273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273
-t-73 273t-198 198t-273 73zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="upload" unicode="&#xf01b;" d="M1118 660q-8 -20 -30 -20h-192v-352q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v352h-192q-14 0 -23 9t-9 23q0 12 10 24l319 319q11 9 23 9t23 -9l320 -320q15 -16 7 -35zM768 1184q-148 0 -273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198
-t73 273t-73 273t-198 198t-273 73zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="inbox" unicode="&#xf01c;" d="M1023 576h316q-1 3 -2.5 8.5t-2.5 7.5l-212 496h-708l-212 -496q-1 -3 -2.5 -8.5t-2.5 -7.5h316l95 -192h320zM1536 546v-482q0 -26 -19 -45t-45 -19h-1408q-26 0 -45 19t-19 45v482q0 62 25 123l238 552q10 25 36.5 42t52.5 17h832q26 0 52.5 -17t36.5 -42l238 -552
-q25 -61 25 -123z"/>
-    <glyph glyph-name="play_circle" unicode="&#xf01d;" d="M1184 640q0 -37 -32 -55l-544 -320q-15 -9 -32 -9q-16 0 -32 8q-32 19 -32 56v640q0 37 32 56q33 18 64 -1l544 -320q32 -18 32 -55zM1312 640q0 148 -73 273t-198 198t-273 73t-273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="repeat" unicode="&#xf01e;" d="M1536 1280v-448q0 -26 -19 -45t-45 -19h-448q-42 0 -59 40q-17 39 14 69l138 138q-148 137 -349 137q-104 0 -198.5 -40.5t-163.5 -109.5t-109.5 -163.5t-40.5 -198.5t40.5 -198.5t109.5 -163.5t163.5 -109.5t198.5 -40.5q119 0 225 52t179 147q7 10 23 12q15 0 25 -9
-l137 -138q9 -8 9.5 -20.5t-7.5 -22.5q-109 -132 -264 -204.5t-327 -72.5q-156 0 -298 61t-245 164t-164 245t-61 298t61 298t164 245t245 164t298 61q147 0 284.5 -55.5t244.5 -156.5l130 129q29 31 70 14q39 -17 39 -59z"/>
-    <glyph glyph-name="refresh" unicode="&#xf021;" d="M1511 480q0 -5 -1 -7q-64 -268 -268 -434.5t-478 -166.5q-146 0 -282.5 55t-243.5 157l-129 -129q-19 -19 -45 -19t-45 19t-19 45v448q0 26 19 45t45 19h448q26 0 45 -19t19 -45t-19 -45l-137 -137q71 -66 161 -102t187 -36q134 0 250 65t186 179q11 17 53 117
-q8 23 30 23h192q13 0 22.5 -9.5t9.5 -22.5zM1536 1280v-448q0 -26 -19 -45t-45 -19h-448q-26 0 -45 19t-19 45t19 45l138 138q-148 137 -349 137q-134 0 -250 -65t-186 -179q-11 -17 -53 -117q-8 -23 -30 -23h-199q-13 0 -22.5 9.5t-9.5 22.5v7q65 268 270 434.5t480 166.5
-q146 0 284 -55.5t245 -156.5l130 129q19 19 45 19t45 -19t19 -45z"/>
-    <glyph glyph-name="list_alt" unicode="&#xf022;" horiz-adv-x="1792" d="M384 352v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM384 608v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M384 864v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM1536 352v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-960q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h960q13 0 22.5 -9.5t9.5 -22.5z
-M1536 608v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-960q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h960q13 0 22.5 -9.5t9.5 -22.5zM1536 864v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-960q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h960q13 0 22.5 -9.5
-t9.5 -22.5zM1664 160v832q0 13 -9.5 22.5t-22.5 9.5h-1472q-13 0 -22.5 -9.5t-9.5 -22.5v-832q0 -13 9.5 -22.5t22.5 -9.5h1472q13 0 22.5 9.5t9.5 22.5zM1792 1248v-1088q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h1472q66 0 113 -47
-t47 -113z"/>
-    <glyph glyph-name="lock" unicode="&#xf023;" horiz-adv-x="1152" d="M320 768h512v192q0 106 -75 181t-181 75t-181 -75t-75 -181v-192zM1152 672v-576q0 -40 -28 -68t-68 -28h-960q-40 0 -68 28t-28 68v576q0 40 28 68t68 28h32v192q0 184 132 316t316 132t316 -132t132 -316v-192h32q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="flag" unicode="&#xf024;" horiz-adv-x="1792" d="M320 1280q0 -72 -64 -110v-1266q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v1266q-64 38 -64 110q0 53 37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1792 1216v-763q0 -25 -12.5 -38.5t-39.5 -27.5q-215 -116 -369 -116q-61 0 -123.5 22t-108.5 48
-t-115.5 48t-142.5 22q-192 0 -464 -146q-17 -9 -33 -9q-26 0 -45 19t-19 45v742q0 32 31 55q21 14 79 43q236 120 421 120q107 0 200 -29t219 -88q38 -19 88 -19q54 0 117.5 21t110 47t88 47t54.5 21q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="headphones" unicode="&#xf025;" horiz-adv-x="1664" d="M1664 650q0 -166 -60 -314l-20 -49l-185 -33q-22 -83 -90.5 -136.5t-156.5 -53.5v-32q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v576q0 14 9 23t23 9h64q14 0 23 -9t9 -23v-32q71 0 130 -35.5t93 -95.5l68 12q29 95 29 193q0 148 -88 279t-236.5 209t-315.5 78
-t-315.5 -78t-236.5 -209t-88 -279q0 -98 29 -193l68 -12q34 60 93 95.5t130 35.5v32q0 14 9 23t23 9h64q14 0 23 -9t9 -23v-576q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v32q-88 0 -156.5 53.5t-90.5 136.5l-185 33l-20 49q-60 148 -60 314q0 151 67 291t179 242.5
-t266 163.5t320 61t320 -61t266 -163.5t179 -242.5t67 -291z"/>
-    <glyph glyph-name="volume_off" unicode="&#xf026;" horiz-adv-x="768" d="M768 1184v-1088q0 -26 -19 -45t-45 -19t-45 19l-333 333h-262q-26 0 -45 19t-19 45v384q0 26 19 45t45 19h262l333 333q19 19 45 19t45 -19t19 -45z"/>
-    <glyph glyph-name="volume_down" unicode="&#xf027;" horiz-adv-x="1152" d="M768 1184v-1088q0 -26 -19 -45t-45 -19t-45 19l-333 333h-262q-26 0 -45 19t-19 45v384q0 26 19 45t45 19h262l333 333q19 19 45 19t45 -19t19 -45zM1152 640q0 -76 -42.5 -141.5t-112.5 -93.5q-10 -5 -25 -5q-26 0 -45 18.5t-19 45.5q0 21 12 35.5t29 25t34 23t29 36
-t12 56.5t-12 56.5t-29 36t-34 23t-29 25t-12 35.5q0 27 19 45.5t45 18.5q15 0 25 -5q70 -27 112.5 -93t42.5 -142z"/>
-    <glyph glyph-name="volume_up" unicode="&#xf028;" horiz-adv-x="1664" d="M768 1184v-1088q0 -26 -19 -45t-45 -19t-45 19l-333 333h-262q-26 0 -45 19t-19 45v384q0 26 19 45t45 19h262l333 333q19 19 45 19t45 -19t19 -45zM1152 640q0 -76 -42.5 -141.5t-112.5 -93.5q-10 -5 -25 -5q-26 0 -45 18.5t-19 45.5q0 21 12 35.5t29 25t34 23t29 36
-t12 56.5t-12 56.5t-29 36t-34 23t-29 25t-12 35.5q0 27 19 45.5t45 18.5q15 0 25 -5q70 -27 112.5 -93t42.5 -142zM1408 640q0 -153 -85 -282.5t-225 -188.5q-13 -5 -25 -5q-27 0 -46 19t-19 45q0 39 39 59q56 29 76 44q74 54 115.5 135.5t41.5 173.5t-41.5 173.5
-t-115.5 135.5q-20 15 -76 44q-39 20 -39 59q0 26 19 45t45 19q13 0 26 -5q140 -59 225 -188.5t85 -282.5zM1664 640q0 -230 -127 -422.5t-338 -283.5q-13 -5 -26 -5q-26 0 -45 19t-19 45q0 36 39 59q7 4 22.5 10.5t22.5 10.5q46 25 82 51q123 91 192 227t69 289t-69 289
-t-192 227q-36 26 -82 51q-7 4 -22.5 10.5t-22.5 10.5q-39 23 -39 59q0 26 19 45t45 19q13 0 26 -5q211 -91 338 -283.5t127 -422.5z"/>
-    <glyph glyph-name="qrcode" unicode="&#xf029;" horiz-adv-x="1408" d="M384 384v-128h-128v128h128zM384 1152v-128h-128v128h128zM1152 1152v-128h-128v128h128zM128 129h384v383h-384v-383zM128 896h384v384h-384v-384zM896 896h384v384h-384v-384zM640 640v-640h-640v640h640zM1152 128v-128h-128v128h128zM1408 128v-128h-128v128h128z
-M1408 640v-384h-384v128h-128v-384h-128v640h384v-128h128v128h128zM640 1408v-640h-640v640h640zM1408 1408v-640h-640v640h640z"/>
-    <glyph glyph-name="barcode" unicode="&#xf02a;" horiz-adv-x="1792" d="M63 0h-63v1408h63v-1408zM126 1h-32v1407h32v-1407zM220 1h-31v1407h31v-1407zM377 1h-31v1407h31v-1407zM534 1h-62v1407h62v-1407zM660 1h-31v1407h31v-1407zM723 1h-31v1407h31v-1407zM786 1h-31v1407h31v-1407zM943 1h-63v1407h63v-1407zM1100 1h-63v1407h63v-1407z
-M1226 1h-63v1407h63v-1407zM1352 1h-63v1407h63v-1407zM1446 1h-63v1407h63v-1407zM1635 1h-94v1407h94v-1407zM1698 1h-32v1407h32v-1407zM1792 0h-63v1408h63v-1408z"/>
-    <glyph glyph-name="tag" unicode="&#xf02b;" d="M448 1088q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1515 512q0 -53 -37 -90l-491 -492q-39 -37 -91 -37q-53 0 -90 37l-715 716q-38 37 -64.5 101t-26.5 117v416q0 52 38 90t90 38h416q53 0 117 -26.5t102 -64.5
-l715 -714q37 -39 37 -91z"/>
-    <glyph glyph-name="tags" unicode="&#xf02c;" horiz-adv-x="1920" d="M448 1088q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1515 512q0 -53 -37 -90l-491 -492q-39 -37 -91 -37q-53 0 -90 37l-715 716q-38 37 -64.5 101t-26.5 117v416q0 52 38 90t90 38h416q53 0 117 -26.5t102 -64.5
-l715 -714q37 -39 37 -91zM1899 512q0 -53 -37 -90l-491 -492q-39 -37 -91 -37q-36 0 -59 14t-53 45l470 470q37 37 37 90q0 52 -37 91l-715 714q-38 38 -102 64.5t-117 26.5h224q53 0 117 -26.5t102 -64.5l715 -714q37 -39 37 -91z"/>
-    <glyph glyph-name="book" unicode="&#xf02d;" horiz-adv-x="1664" d="M1639 1058q40 -57 18 -129l-275 -906q-19 -64 -76.5 -107.5t-122.5 -43.5h-923q-77 0 -148.5 53.5t-99.5 131.5q-24 67 -2 127q0 4 3 27t4 37q1 8 -3 21.5t-3 19.5q2 11 8 21t16.5 23.5t16.5 23.5q23 38 45 91.5t30 91.5q3 10 0.5 30t-0.5 28q3 11 17 28t17 23
-q21 36 42 92t25 90q1 9 -2.5 32t0.5 28q4 13 22 30.5t22 22.5q19 26 42.5 84.5t27.5 96.5q1 8 -3 25.5t-2 26.5q2 8 9 18t18 23t17 21q8 12 16.5 30.5t15 35t16 36t19.5 32t26.5 23.5t36 11.5t47.5 -5.5l-1 -3q38 9 51 9h761q74 0 114 -56t18 -130l-274 -906
-q-36 -119 -71.5 -153.5t-128.5 -34.5h-869q-27 0 -38 -15q-11 -16 -1 -43q24 -70 144 -70h923q29 0 56 15.5t35 41.5l300 987q7 22 5 57q38 -15 59 -43zM575 1056q-4 -13 2 -22.5t20 -9.5h608q13 0 25.5 9.5t16.5 22.5l21 64q4 13 -2 22.5t-20 9.5h-608q-13 0 -25.5 -9.5
-t-16.5 -22.5zM492 800q-4 -13 2 -22.5t20 -9.5h608q13 0 25.5 9.5t16.5 22.5l21 64q4 13 -2 22.5t-20 9.5h-608q-13 0 -25.5 -9.5t-16.5 -22.5z"/>
-    <glyph glyph-name="bookmark" unicode="&#xf02e;" horiz-adv-x="1280" d="M1164 1408q23 0 44 -9q33 -13 52.5 -41t19.5 -62v-1289q0 -34 -19.5 -62t-52.5 -41q-19 -8 -44 -8q-48 0 -83 32l-441 424l-441 -424q-36 -33 -83 -33q-23 0 -44 9q-33 13 -52.5 41t-19.5 62v1289q0 34 19.5 62t52.5 41q21 9 44 9h1048z"/>
-    <glyph glyph-name="print" unicode="&#xf02f;" horiz-adv-x="1664" d="M384 0h896v256h-896v-256zM384 640h896v384h-160q-40 0 -68 28t-28 68v160h-640v-640zM1536 576q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1664 576v-416q0 -13 -9.5 -22.5t-22.5 -9.5h-224v-160q0 -40 -28 -68t-68 -28h-960q-40 0 -68 28t-28 68
-v160h-224q-13 0 -22.5 9.5t-9.5 22.5v416q0 79 56.5 135.5t135.5 56.5h64v544q0 40 28 68t68 28h672q40 0 88 -20t76 -48l152 -152q28 -28 48 -76t20 -88v-256h64q79 0 135.5 -56.5t56.5 -135.5z"/>
-    <glyph glyph-name="camera" unicode="&#xf030;" horiz-adv-x="1920" d="M960 864q119 0 203.5 -84.5t84.5 -203.5t-84.5 -203.5t-203.5 -84.5t-203.5 84.5t-84.5 203.5t84.5 203.5t203.5 84.5zM1664 1280q106 0 181 -75t75 -181v-896q0 -106 -75 -181t-181 -75h-1408q-106 0 -181 75t-75 181v896q0 106 75 181t181 75h224l51 136
-q19 49 69.5 84.5t103.5 35.5h512q53 0 103.5 -35.5t69.5 -84.5l51 -136h224zM960 128q185 0 316.5 131.5t131.5 316.5t-131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="font" unicode="&#xf031;" horiz-adv-x="1664" d="M725 977l-170 -450q33 0 136.5 -2t160.5 -2q19 0 57 2q-87 253 -184 452zM0 -128l2 79q23 7 56 12.5t57 10.5t49.5 14.5t44.5 29t31 50.5l237 616l280 724h75h53q8 -14 11 -21l205 -480q33 -78 106 -257.5t114 -274.5q15 -34 58 -144.5t72 -168.5q20 -45 35 -57
-q19 -15 88 -29.5t84 -20.5q6 -38 6 -57q0 -5 -0.5 -13.5t-0.5 -12.5q-63 0 -190 8t-191 8q-76 0 -215 -7t-178 -8q0 43 4 78l131 28q1 0 12.5 2.5t15.5 3.5t14.5 4.5t15 6.5t11 8t9 11t2.5 14q0 16 -31 96.5t-72 177.5t-42 100l-450 2q-26 -58 -76.5 -195.5t-50.5 -162.5
-q0 -22 14 -37.5t43.5 -24.5t48.5 -13.5t57 -8.5t41 -4q1 -19 1 -58q0 -9 -2 -27q-58 0 -174.5 10t-174.5 10q-8 0 -26.5 -4t-21.5 -4q-80 -14 -188 -14z"/>
-    <glyph glyph-name="bold" unicode="&#xf032;" horiz-adv-x="1408" d="M555 15q74 -32 140 -32q376 0 376 335q0 114 -41 180q-27 44 -61.5 74t-67.5 46.5t-80.5 25t-84 10.5t-94.5 2q-73 0 -101 -10q0 -53 -0.5 -159t-0.5 -158q0 -8 -1 -67.5t-0.5 -96.5t4.5 -83.5t12 -66.5zM541 761q42 -7 109 -7q82 0 143 13t110 44.5t74.5 89.5t25.5 142
-q0 70 -29 122.5t-79 82t-108 43.5t-124 14q-50 0 -130 -13q0 -50 4 -151t4 -152q0 -27 -0.5 -80t-0.5 -79q0 -46 1 -69zM0 -128l2 94q15 4 85 16t106 27q7 12 12.5 27t8.5 33.5t5.5 32.5t3 37.5t0.5 34v35.5v30q0 982 -22 1025q-4 8 -22 14.5t-44.5 11t-49.5 7t-48.5 4.5
-t-30.5 3l-4 83q98 2 340 11.5t373 9.5q23 0 68 -0.5t68 -0.5q70 0 136.5 -13t128.5 -42t108 -71t74 -104.5t28 -137.5q0 -52 -16.5 -95.5t-39 -72t-64.5 -57.5t-73 -45t-84 -40q154 -35 256.5 -134t102.5 -248q0 -100 -35 -179.5t-93.5 -130.5t-138 -85.5t-163.5 -48.5
-t-176 -14q-44 0 -132 3t-132 3q-106 0 -307 -11t-231 -12z"/>
-    <glyph glyph-name="italic" unicode="&#xf033;" horiz-adv-x="1024" d="M0 -126l17 85q22 7 61.5 16.5t72 19t59.5 23.5q28 35 41 101q1 7 62 289t114 543.5t52 296.5v25q-24 13 -54.5 18.5t-69.5 8t-58 5.5l19 103q33 -2 120 -6.5t149.5 -7t120.5 -2.5q48 0 98.5 2.5t121 7t98.5 6.5q-5 -39 -19 -89q-30 -10 -101.5 -28.5t-108.5 -33.5
-q-8 -19 -14 -42.5t-9 -40t-7.5 -45.5t-6.5 -42q-27 -148 -87.5 -419.5t-77.5 -355.5q-2 -9 -13 -58t-20 -90t-16 -83.5t-6 -57.5l1 -18q17 -4 185 -31q-3 -44 -16 -99q-11 0 -32.5 -1.5t-32.5 -1.5q-29 0 -87 10t-86 10q-138 2 -206 2q-51 0 -143 -9t-121 -11z"/>
-    <glyph glyph-name="text_height" unicode="&#xf034;" horiz-adv-x="1792" d="M1744 128q33 0 42 -18.5t-11 -44.5l-126 -162q-20 -26 -49 -26t-49 26l-126 162q-20 26 -11 44.5t42 18.5h80v1024h-80q-33 0 -42 18.5t11 44.5l126 162q20 26 49 26t49 -26l126 -162q20 -26 11 -44.5t-42 -18.5h-80v-1024h80zM81 1407l54 -27q12 -5 211 -5q44 0 132 2
-t132 2q36 0 107.5 -0.5t107.5 -0.5h293q6 0 21 -0.5t20.5 0t16 3t17.5 9t15 17.5l42 1q4 0 14 -0.5t14 -0.5q2 -112 2 -336q0 -80 -5 -109q-39 -14 -68 -18q-25 44 -54 128q-3 9 -11 48t-14.5 73.5t-7.5 35.5q-6 8 -12 12.5t-15.5 6t-13 2.5t-18 0.5t-16.5 -0.5
-q-17 0 -66.5 0.5t-74.5 0.5t-64 -2t-71 -6q-9 -81 -8 -136q0 -94 2 -388t2 -455q0 -16 -2.5 -71.5t0 -91.5t12.5 -69q40 -21 124 -42.5t120 -37.5q5 -40 5 -50q0 -14 -3 -29l-34 -1q-76 -2 -218 8t-207 10q-50 0 -151 -9t-152 -9q-3 51 -3 52v9q17 27 61.5 43t98.5 29t78 27
-q19 42 19 383q0 101 -3 303t-3 303v117q0 2 0.5 15.5t0.5 25t-1 25.5t-3 24t-5 14q-11 12 -162 12q-33 0 -93 -12t-80 -26q-19 -13 -34 -72.5t-31.5 -111t-42.5 -53.5q-42 26 -56 44v383z"/>
-    <glyph glyph-name="text_width" unicode="&#xf035;" d="M81 1407l54 -27q12 -5 211 -5q44 0 132 2t132 2q70 0 246.5 1t304.5 0.5t247 -4.5q33 -1 56 31l42 1q4 0 14 -0.5t14 -0.5q2 -112 2 -336q0 -80 -5 -109q-39 -14 -68 -18q-25 44 -54 128q-3 9 -11 47.5t-15 73.5t-7 36q-10 13 -27 19q-5 2 -66 2q-30 0 -93 1t-103 1
-t-94 -2t-96 -7q-9 -81 -8 -136l1 -152v52q0 -55 1 -154t1.5 -180t0.5 -153q0 -16 -2.5 -71.5t0 -91.5t12.5 -69q40 -21 124 -42.5t120 -37.5q5 -40 5 -50q0 -14 -3 -29l-34 -1q-76 -2 -218 8t-207 10q-50 0 -151 -9t-152 -9q-3 51 -3 52v9q17 27 61.5 43t98.5 29t78 27
-q7 16 11.5 74t6 145.5t1.5 155t-0.5 153.5t-0.5 89q0 7 -2.5 21.5t-2.5 22.5q0 7 0.5 44t1 73t0 76.5t-3 67.5t-6.5 32q-11 12 -162 12q-41 0 -163 -13.5t-138 -24.5q-19 -12 -34 -71.5t-31.5 -111.5t-42.5 -54q-42 26 -56 44v383zM1310 125q12 0 42 -19.5t57.5 -41.5
-t59.5 -49t36 -30q26 -21 26 -49t-26 -49q-4 -3 -36 -30t-59.5 -49t-57.5 -41.5t-42 -19.5q-13 0 -20.5 10.5t-10 28.5t-2.5 33.5t1.5 33t1.5 19.5h-1024q0 -2 1.5 -19.5t1.5 -33t-2.5 -33.5t-10 -28.5t-20.5 -10.5q-12 0 -42 19.5t-57.5 41.5t-59.5 49t-36 30q-26 21 -26 49
-t26 49q4 3 36 30t59.5 49t57.5 41.5t42 19.5q13 0 20.5 -10.5t10 -28.5t2.5 -33.5t-1.5 -33t-1.5 -19.5h1024q0 2 -1.5 19.5t-1.5 33t2.5 33.5t10 28.5t20.5 10.5z"/>
-    <glyph glyph-name="align_left" unicode="&#xf036;" horiz-adv-x="1792" d="M1792 192v-128q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1408 576v-128q0 -26 -19 -45t-45 -19h-1280q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1280q26 0 45 -19t19 -45zM1664 960v-128q0 -26 -19 -45
-t-45 -19h-1536q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1536q26 0 45 -19t19 -45zM1280 1344v-128q0 -26 -19 -45t-45 -19h-1152q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1152q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="align_center" unicode="&#xf037;" horiz-adv-x="1792" d="M1792 192v-128q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1408 576v-128q0 -26 -19 -45t-45 -19h-896q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h896q26 0 45 -19t19 -45zM1664 960v-128q0 -26 -19 -45t-45 -19
-h-1408q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1408q26 0 45 -19t19 -45zM1280 1344v-128q0 -26 -19 -45t-45 -19h-640q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h640q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="align_right" unicode="&#xf038;" horiz-adv-x="1792" d="M1792 192v-128q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1792 576v-128q0 -26 -19 -45t-45 -19h-1280q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1280q26 0 45 -19t19 -45zM1792 960v-128q0 -26 -19 -45
-t-45 -19h-1536q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1536q26 0 45 -19t19 -45zM1792 1344v-128q0 -26 -19 -45t-45 -19h-1152q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1152q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="align_justify" unicode="&#xf039;" horiz-adv-x="1792" d="M1792 192v-128q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1792 576v-128q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1792 960v-128q0 -26 -19 -45
-t-45 -19h-1664q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1792 1344v-128q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1664q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="list" unicode="&#xf03a;" horiz-adv-x="1792" d="M256 224v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-192q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h192q13 0 22.5 -9.5t9.5 -22.5zM256 608v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-192q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h192q13 0 22.5 -9.5
-t9.5 -22.5zM256 992v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-192q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h192q13 0 22.5 -9.5t9.5 -22.5zM1792 224v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1344q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1344
-q13 0 22.5 -9.5t9.5 -22.5zM256 1376v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-192q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h192q13 0 22.5 -9.5t9.5 -22.5zM1792 608v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1344q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5
-t22.5 9.5h1344q13 0 22.5 -9.5t9.5 -22.5zM1792 992v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1344q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1344q13 0 22.5 -9.5t9.5 -22.5zM1792 1376v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1344q-13 0 -22.5 9.5t-9.5 22.5v192
-q0 13 9.5 22.5t22.5 9.5h1344q13 0 22.5 -9.5t9.5 -22.5z"/>
-    <glyph glyph-name="indent_left" unicode="&#xf03b;" horiz-adv-x="1792" d="M384 992v-576q0 -13 -9.5 -22.5t-22.5 -9.5q-14 0 -23 9l-288 288q-9 9 -9 23t9 23l288 288q9 9 23 9q13 0 22.5 -9.5t9.5 -22.5zM1792 224v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1728q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1728q13 0 22.5 -9.5
-t9.5 -22.5zM1792 608v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1088q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1088q13 0 22.5 -9.5t9.5 -22.5zM1792 992v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1088q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1088
-q13 0 22.5 -9.5t9.5 -22.5zM1792 1376v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1728q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1728q13 0 22.5 -9.5t9.5 -22.5z"/>
-    <glyph glyph-name="indent_right" unicode="&#xf03c;" horiz-adv-x="1792" d="M352 704q0 -14 -9 -23l-288 -288q-9 -9 -23 -9q-13 0 -22.5 9.5t-9.5 22.5v576q0 13 9.5 22.5t22.5 9.5q14 0 23 -9l288 -288q9 -9 9 -23zM1792 224v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1728q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1728q13 0 22.5 -9.5
-t9.5 -22.5zM1792 608v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1088q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1088q13 0 22.5 -9.5t9.5 -22.5zM1792 992v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1088q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1088
-q13 0 22.5 -9.5t9.5 -22.5zM1792 1376v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1728q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1728q13 0 22.5 -9.5t9.5 -22.5z"/>
-    <glyph glyph-name="facetime_video" unicode="&#xf03d;" horiz-adv-x="1792" d="M1792 1184v-1088q0 -42 -39 -59q-13 -5 -25 -5q-27 0 -45 19l-403 403v-166q0 -119 -84.5 -203.5t-203.5 -84.5h-704q-119 0 -203.5 84.5t-84.5 203.5v704q0 119 84.5 203.5t203.5 84.5h704q119 0 203.5 -84.5t84.5 -203.5v-165l403 402q18 19 45 19q12 0 25 -5
-q39 -17 39 -59z"/>
-    <glyph glyph-name="picture" unicode="&#xf03e;" horiz-adv-x="1920" d="M640 960q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM1664 576v-448h-1408v192l320 320l160 -160l512 512zM1760 1280h-1600q-13 0 -22.5 -9.5t-9.5 -22.5v-1216q0 -13 9.5 -22.5t22.5 -9.5h1600q13 0 22.5 9.5t9.5 22.5v1216
-q0 13 -9.5 22.5t-22.5 9.5zM1920 1248v-1216q0 -66 -47 -113t-113 -47h-1600q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1600q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="pencil" unicode="&#xf040;" d="M363 0l91 91l-235 235l-91 -91v-107h128v-128h107zM886 928q0 22 -22 22q-10 0 -17 -7l-542 -542q-7 -7 -7 -17q0 -22 22 -22q10 0 17 7l542 542q7 7 7 17zM832 1120l416 -416l-832 -832h-416v416zM1515 1024q0 -53 -37 -90l-166 -166l-416 416l166 165q36 38 90 38
-q53 0 91 -38l235 -234q37 -39 37 -91z"/>
-    <glyph glyph-name="map_marker" unicode="&#xf041;" horiz-adv-x="1024" d="M768 896q0 106 -75 181t-181 75t-181 -75t-75 -181t75 -181t181 -75t181 75t75 181zM1024 896q0 -109 -33 -179l-364 -774q-16 -33 -47.5 -52t-67.5 -19t-67.5 19t-46.5 52l-365 774q-33 70 -33 179q0 212 150 362t362 150t362 -150t150 -362z"/>
-    <glyph glyph-name="adjust" unicode="&#xf042;" d="M768 96v1088q-148 0 -273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="tint" unicode="&#xf043;" horiz-adv-x="1024" d="M512 384q0 36 -20 69q-1 1 -15.5 22.5t-25.5 38t-25 44t-21 50.5q-4 16 -21 16t-21 -16q-7 -23 -21 -50.5t-25 -44t-25.5 -38t-15.5 -22.5q-20 -33 -20 -69q0 -53 37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1024 512q0 -212 -150 -362t-362 -150t-362 150t-150 362
-q0 145 81 275q6 9 62.5 90.5t101 151t99.5 178t83 201.5q9 30 34 47t51 17t51.5 -17t33.5 -47q28 -93 83 -201.5t99.5 -178t101 -151t62.5 -90.5q81 -127 81 -275z"/>
-    <glyph glyph-name="edit" unicode="&#xf044;" horiz-adv-x="1792" d="M888 352l116 116l-152 152l-116 -116v-56h96v-96h56zM1328 1072q-16 16 -33 -1l-350 -350q-17 -17 -1 -33t33 1l350 350q17 17 1 33zM1408 478v-190q0 -119 -84.5 -203.5t-203.5 -84.5h-832q-119 0 -203.5 84.5t-84.5 203.5v832q0 119 84.5 203.5t203.5 84.5h832
-q63 0 117 -25q15 -7 18 -23q3 -17 -9 -29l-49 -49q-14 -14 -32 -8q-23 6 -45 6h-832q-66 0 -113 -47t-47 -113v-832q0 -66 47 -113t113 -47h832q66 0 113 47t47 113v126q0 13 9 22l64 64q15 15 35 7t20 -29zM1312 1216l288 -288l-672 -672h-288v288zM1756 1084l-92 -92
-l-288 288l92 92q28 28 68 28t68 -28l152 -152q28 -28 28 -68t-28 -68z"/>
-    <glyph glyph-name="share" unicode="&#xf045;" horiz-adv-x="1664" d="M1408 547v-259q0 -119 -84.5 -203.5t-203.5 -84.5h-832q-119 0 -203.5 84.5t-84.5 203.5v832q0 119 84.5 203.5t203.5 84.5h255v0q13 0 22.5 -9.5t9.5 -22.5q0 -27 -26 -32q-77 -26 -133 -60q-10 -4 -16 -4h-112q-66 0 -113 -47t-47 -113v-832q0 -66 47 -113t113 -47h832
-q66 0 113 47t47 113v214q0 19 18 29q28 13 54 37q16 16 35 8q21 -9 21 -29zM1645 1043l-384 -384q-18 -19 -45 -19q-12 0 -25 5q-39 17 -39 59v192h-160q-323 0 -438 -131q-119 -137 -74 -473q3 -23 -20 -34q-8 -2 -12 -2q-16 0 -26 13q-10 14 -21 31t-39.5 68.5t-49.5 99.5
-t-38.5 114t-17.5 122q0 49 3.5 91t14 90t28 88t47 81.5t68.5 74t94.5 61.5t124.5 48.5t159.5 30.5t196.5 11h160v192q0 42 39 59q13 5 25 5q26 0 45 -19l384 -384q19 -19 19 -45t-19 -45z"/>
-    <glyph glyph-name="check" unicode="&#xf046;" horiz-adv-x="1664" d="M1408 606v-318q0 -119 -84.5 -203.5t-203.5 -84.5h-832q-119 0 -203.5 84.5t-84.5 203.5v832q0 119 84.5 203.5t203.5 84.5h832q63 0 117 -25q15 -7 18 -23q3 -17 -9 -29l-49 -49q-10 -10 -23 -10q-3 0 -9 2q-23 6 -45 6h-832q-66 0 -113 -47t-47 -113v-832
-q0 -66 47 -113t113 -47h832q66 0 113 47t47 113v254q0 13 9 22l64 64q10 10 23 10q6 0 12 -3q20 -8 20 -29zM1639 1095l-814 -814q-24 -24 -57 -24t-57 24l-430 430q-24 24 -24 57t24 57l110 110q24 24 57 24t57 -24l263 -263l647 647q24 24 57 24t57 -24l110 -110
-q24 -24 24 -57t-24 -57z"/>
-    <glyph glyph-name="move" unicode="&#xf047;" horiz-adv-x="1792" d="M1792 640q0 -26 -19 -45l-256 -256q-19 -19 -45 -19t-45 19t-19 45v128h-384v-384h128q26 0 45 -19t19 -45t-19 -45l-256 -256q-19 -19 -45 -19t-45 19l-256 256q-19 19 -19 45t19 45t45 19h128v384h-384v-128q0 -26 -19 -45t-45 -19t-45 19l-256 256q-19 19 -19 45
-t19 45l256 256q19 19 45 19t45 -19t19 -45v-128h384v384h-128q-26 0 -45 19t-19 45t19 45l256 256q19 19 45 19t45 -19l256 -256q19 -19 19 -45t-19 -45t-45 -19h-128v-384h384v128q0 26 19 45t45 19t45 -19l256 -256q19 -19 19 -45z"/>
-    <glyph glyph-name="step_backward" unicode="&#xf048;" horiz-adv-x="1024" d="M979 1395q19 19 32 13t13 -32v-1472q0 -26 -13 -32t-32 13l-710 710q-9 9 -13 19v-678q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v1408q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-678q4 10 13 19z"/>
-    <glyph glyph-name="fast_backward" unicode="&#xf049;" horiz-adv-x="1792" d="M1747 1395q19 19 32 13t13 -32v-1472q0 -26 -13 -32t-32 13l-710 710q-9 9 -13 19v-710q0 -26 -13 -32t-32 13l-710 710q-9 9 -13 19v-678q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v1408q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-678q4 10 13 19l710 710
-q19 19 32 13t13 -32v-710q4 10 13 19z"/>
-    <glyph glyph-name="backward" unicode="&#xf04a;" horiz-adv-x="1664" d="M1619 1395q19 19 32 13t13 -32v-1472q0 -26 -13 -32t-32 13l-710 710q-9 9 -13 19v-710q0 -26 -13 -32t-32 13l-710 710q-19 19 -19 45t19 45l710 710q19 19 32 13t13 -32v-710q4 10 13 19z"/>
-    <glyph glyph-name="play" unicode="&#xf04b;" horiz-adv-x="1408" d="M1384 609l-1328 -738q-23 -13 -39.5 -3t-16.5 36v1472q0 26 16.5 36t39.5 -3l1328 -738q23 -13 23 -31t-23 -31z"/>
-    <glyph glyph-name="pause" unicode="&#xf04c;" d="M1536 1344v-1408q0 -26 -19 -45t-45 -19h-512q-26 0 -45 19t-19 45v1408q0 26 19 45t45 19h512q26 0 45 -19t19 -45zM640 1344v-1408q0 -26 -19 -45t-45 -19h-512q-26 0 -45 19t-19 45v1408q0 26 19 45t45 19h512q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="stop" unicode="&#xf04d;" d="M1536 1344v-1408q0 -26 -19 -45t-45 -19h-1408q-26 0 -45 19t-19 45v1408q0 26 19 45t45 19h1408q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="forward" unicode="&#xf04e;" horiz-adv-x="1664" d="M45 -115q-19 -19 -32 -13t-13 32v1472q0 26 13 32t32 -13l710 -710q9 -9 13 -19v710q0 26 13 32t32 -13l710 -710q19 -19 19 -45t-19 -45l-710 -710q-19 -19 -32 -13t-13 32v710q-4 -10 -13 -19z"/>
-    <glyph glyph-name="fast_forward" unicode="&#xf050;" horiz-adv-x="1792" d="M45 -115q-19 -19 -32 -13t-13 32v1472q0 26 13 32t32 -13l710 -710q9 -9 13 -19v710q0 26 13 32t32 -13l710 -710q9 -9 13 -19v678q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-1408q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v678q-4 -10 -13 -19l-710 -710
-q-19 -19 -32 -13t-13 32v710q-4 -10 -13 -19z"/>
-    <glyph glyph-name="step_forward" unicode="&#xf051;" horiz-adv-x="1024" d="M45 -115q-19 -19 -32 -13t-13 32v1472q0 26 13 32t32 -13l710 -710q9 -9 13 -19v678q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-1408q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v678q-4 -10 -13 -19z"/>
-    <glyph glyph-name="eject" unicode="&#xf052;" horiz-adv-x="1538" d="M14 557l710 710q19 19 45 19t45 -19l710 -710q19 -19 13 -32t-32 -13h-1472q-26 0 -32 13t13 32zM1473 0h-1408q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h1408q26 0 45 -19t19 -45v-256q0 -26 -19 -45t-45 -19z"/>
-    <glyph glyph-name="chevron_left" unicode="&#xf053;" horiz-adv-x="1280" d="M1171 1235l-531 -531l531 -531q19 -19 19 -45t-19 -45l-166 -166q-19 -19 -45 -19t-45 19l-742 742q-19 19 -19 45t19 45l742 742q19 19 45 19t45 -19l166 -166q19 -19 19 -45t-19 -45z"/>
-    <glyph glyph-name="chevron_right" unicode="&#xf054;" horiz-adv-x="1280" d="M1107 659l-742 -742q-19 -19 -45 -19t-45 19l-166 166q-19 19 -19 45t19 45l531 531l-531 531q-19 19 -19 45t19 45l166 166q19 19 45 19t45 -19l742 -742q19 -19 19 -45t-19 -45z"/>
-    <glyph glyph-name="plus_sign" unicode="&#xf055;" d="M1216 576v128q0 26 -19 45t-45 19h-256v256q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-256h-256q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h256v-256q0 -26 19 -45t45 -19h128q26 0 45 19t19 45v256h256q26 0 45 19t19 45zM1536 640q0 -209 -103 -385.5
-t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="minus_sign" unicode="&#xf056;" d="M1216 576v128q0 26 -19 45t-45 19h-768q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h768q26 0 45 19t19 45zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5
-t103 -385.5z"/>
-    <glyph glyph-name="remove_sign" unicode="&#xf057;" d="M1149 414q0 26 -19 45l-181 181l181 181q19 19 19 45q0 27 -19 46l-90 90q-19 19 -46 19q-26 0 -45 -19l-181 -181l-181 181q-19 19 -45 19q-27 0 -46 -19l-90 -90q-19 -19 -19 -46q0 -26 19 -45l181 -181l-181 -181q-19 -19 -19 -45q0 -27 19 -46l90 -90q19 -19 46 -19
-q26 0 45 19l181 181l181 -181q19 -19 45 -19q27 0 46 19l90 90q19 19 19 46zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="ok_sign" unicode="&#xf058;" d="M1284 802q0 28 -18 46l-91 90q-19 19 -45 19t-45 -19l-408 -407l-226 226q-19 19 -45 19t-45 -19l-91 -90q-18 -18 -18 -46q0 -27 18 -45l362 -362q19 -19 45 -19q27 0 46 19l543 543q18 18 18 45zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103
-t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="question_sign" unicode="&#xf059;" d="M896 160v192q0 14 -9 23t-23 9h-192q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h192q14 0 23 9t9 23zM1152 832q0 88 -55.5 163t-138.5 116t-170 41q-243 0 -371 -213q-15 -24 8 -42l132 -100q7 -6 19 -6q16 0 25 12q53 68 86 92q34 24 86 24q48 0 85.5 -26t37.5 -59
-q0 -38 -20 -61t-68 -45q-63 -28 -115.5 -86.5t-52.5 -125.5v-36q0 -14 9 -23t23 -9h192q14 0 23 9t9 23q0 19 21.5 49.5t54.5 49.5q32 18 49 28.5t46 35t44.5 48t28 60.5t12.5 81zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5
-t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="info_sign" unicode="&#xf05a;" d="M1024 160v160q0 14 -9 23t-23 9h-96v512q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-160q0 -14 9 -23t23 -9h96v-320h-96q-14 0 -23 -9t-9 -23v-160q0 -14 9 -23t23 -9h448q14 0 23 9t9 23zM896 1056v160q0 14 -9 23t-23 9h-192q-14 0 -23 -9t-9 -23v-160q0 -14 9 -23
-t23 -9h192q14 0 23 9t9 23zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="screenshot" unicode="&#xf05b;" d="M1197 512h-109q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h109q-32 108 -112.5 188.5t-188.5 112.5v-109q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v109q-108 -32 -188.5 -112.5t-112.5 -188.5h109q26 0 45 -19t19 -45v-128q0 -26 -19 -45t-45 -19h-109
-q32 -108 112.5 -188.5t188.5 -112.5v109q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-109q108 32 188.5 112.5t112.5 188.5zM1536 704v-128q0 -26 -19 -45t-45 -19h-143q-37 -161 -154.5 -278.5t-278.5 -154.5v-143q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v143
-q-161 37 -278.5 154.5t-154.5 278.5h-143q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h143q37 161 154.5 278.5t278.5 154.5v143q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-143q161 -37 278.5 -154.5t154.5 -278.5h143q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="remove_circle" unicode="&#xf05c;" d="M1097 457l-146 -146q-10 -10 -23 -10t-23 10l-137 137l-137 -137q-10 -10 -23 -10t-23 10l-146 146q-10 10 -10 23t10 23l137 137l-137 137q-10 10 -10 23t10 23l146 146q10 10 23 10t23 -10l137 -137l137 137q10 10 23 10t23 -10l146 -146q10 -10 10 -23t-10 -23
-l-137 -137l137 -137q10 -10 10 -23t-10 -23zM1312 640q0 148 -73 273t-198 198t-273 73t-273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5
-t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="ok_circle" unicode="&#xf05d;" d="M1171 723l-422 -422q-19 -19 -45 -19t-45 19l-294 294q-19 19 -19 45t19 45l102 102q19 19 45 19t45 -19l147 -147l275 275q19 19 45 19t45 -19l102 -102q19 -19 19 -45t-19 -45zM1312 640q0 148 -73 273t-198 198t-273 73t-273 -73t-198 -198t-73 -273t73 -273t198 -198
-t273 -73t273 73t198 198t73 273zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="ban_circle" unicode="&#xf05e;" d="M1312 643q0 161 -87 295l-754 -753q137 -89 297 -89q111 0 211.5 43.5t173.5 116.5t116 174.5t43 212.5zM313 344l755 754q-135 91 -300 91q-148 0 -273 -73t-198 -199t-73 -274q0 -162 89 -299zM1536 643q0 -157 -61 -300t-163.5 -246t-245 -164t-298.5 -61t-298.5 61
-t-245 164t-163.5 246t-61 300t61 299.5t163.5 245.5t245 164t298.5 61t298.5 -61t245 -164t163.5 -245.5t61 -299.5z"/>
-    <glyph glyph-name="arrow_left" unicode="&#xf060;" d="M1536 640v-128q0 -53 -32.5 -90.5t-84.5 -37.5h-704l293 -294q38 -36 38 -90t-38 -90l-75 -76q-37 -37 -90 -37q-52 0 -91 37l-651 652q-37 37 -37 90q0 52 37 91l651 650q38 38 91 38q52 0 90 -38l75 -74q38 -38 38 -91t-38 -91l-293 -293h704q52 0 84.5 -37.5
-t32.5 -90.5z"/>
-    <glyph glyph-name="arrow_right" unicode="&#xf061;" d="M1472 576q0 -54 -37 -91l-651 -651q-39 -37 -91 -37q-51 0 -90 37l-75 75q-38 38 -38 91t38 91l293 293h-704q-52 0 -84.5 37.5t-32.5 90.5v128q0 53 32.5 90.5t84.5 37.5h704l-293 294q-38 36 -38 90t38 90l75 75q38 38 90 38q53 0 91 -38l651 -651q37 -35 37 -90z"/>
-    <glyph glyph-name="arrow_up" unicode="&#xf062;" horiz-adv-x="1664" d="M1611 565q0 -51 -37 -90l-75 -75q-38 -38 -91 -38q-54 0 -90 38l-294 293v-704q0 -52 -37.5 -84.5t-90.5 -32.5h-128q-53 0 -90.5 32.5t-37.5 84.5v704l-294 -293q-36 -38 -90 -38t-90 38l-75 75q-38 38 -38 90q0 53 38 91l651 651q35 37 90 37q54 0 91 -37l651 -651
-q37 -39 37 -91z"/>
-    <glyph glyph-name="arrow_down" unicode="&#xf063;" horiz-adv-x="1664" d="M1611 704q0 -53 -37 -90l-651 -652q-39 -37 -91 -37q-53 0 -90 37l-651 652q-38 36 -38 90q0 53 38 91l74 75q39 37 91 37q53 0 90 -37l294 -294v704q0 52 38 90t90 38h128q52 0 90 -38t38 -90v-704l294 294q37 37 90 37q52 0 91 -37l75 -75q37 -39 37 -91z"/>
-    <glyph glyph-name="share_alt" unicode="&#xf064;" horiz-adv-x="1792" d="M1792 896q0 -26 -19 -45l-512 -512q-19 -19 -45 -19t-45 19t-19 45v256h-224q-98 0 -175.5 -6t-154 -21.5t-133 -42.5t-105.5 -69.5t-80 -101t-48.5 -138.5t-17.5 -181q0 -55 5 -123q0 -6 2.5 -23.5t2.5 -26.5q0 -15 -8.5 -25t-23.5 -10q-16 0 -28 17q-7 9 -13 22
-t-13.5 30t-10.5 24q-127 285 -127 451q0 199 53 333q162 403 875 403h224v256q0 26 19 45t45 19t45 -19l512 -512q19 -19 19 -45z"/>
-    <glyph glyph-name="resize_full" unicode="&#xf065;" d="M755 480q0 -13 -10 -23l-332 -332l144 -144q19 -19 19 -45t-19 -45t-45 -19h-448q-26 0 -45 19t-19 45v448q0 26 19 45t45 19t45 -19l144 -144l332 332q10 10 23 10t23 -10l114 -114q10 -10 10 -23zM1536 1344v-448q0 -26 -19 -45t-45 -19t-45 19l-144 144l-332 -332
-q-10 -10 -23 -10t-23 10l-114 114q-10 10 -10 23t10 23l332 332l-144 144q-19 19 -19 45t19 45t45 19h448q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="resize_small" unicode="&#xf066;" d="M768 576v-448q0 -26 -19 -45t-45 -19t-45 19l-144 144l-332 -332q-10 -10 -23 -10t-23 10l-114 114q-10 10 -10 23t10 23l332 332l-144 144q-19 19 -19 45t19 45t45 19h448q26 0 45 -19t19 -45zM1523 1248q0 -13 -10 -23l-332 -332l144 -144q19 -19 19 -45t-19 -45
-t-45 -19h-448q-26 0 -45 19t-19 45v448q0 26 19 45t45 19t45 -19l144 -144l332 332q10 10 23 10t23 -10l114 -114q10 -10 10 -23z"/>
-    <glyph glyph-name="plus" unicode="&#xf067;" horiz-adv-x="1408" d="M1408 800v-192q0 -40 -28 -68t-68 -28h-416v-416q0 -40 -28 -68t-68 -28h-192q-40 0 -68 28t-28 68v416h-416q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h416v416q0 40 28 68t68 28h192q40 0 68 -28t28 -68v-416h416q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="minus" unicode="&#xf068;" horiz-adv-x="1408" d="M1408 800v-192q0 -40 -28 -68t-68 -28h-1216q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h1216q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="asterisk" unicode="&#xf069;" horiz-adv-x="1664" d="M1482 486q46 -26 59.5 -77.5t-12.5 -97.5l-64 -110q-26 -46 -77.5 -59.5t-97.5 12.5l-266 153v-307q0 -52 -38 -90t-90 -38h-128q-52 0 -90 38t-38 90v307l-266 -153q-46 -26 -97.5 -12.5t-77.5 59.5l-64 110q-26 46 -12.5 97.5t59.5 77.5l266 154l-266 154
-q-46 26 -59.5 77.5t12.5 97.5l64 110q26 46 77.5 59.5t97.5 -12.5l266 -153v307q0 52 38 90t90 38h128q52 0 90 -38t38 -90v-307l266 153q46 26 97.5 12.5t77.5 -59.5l64 -110q26 -46 12.5 -97.5t-59.5 -77.5l-266 -154z"/>
-    <glyph glyph-name="exclamation_sign" unicode="&#xf06a;" d="M768 1408q209 0 385.5 -103t279.5 -279.5t103 -385.5t-103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103zM896 161v190q0 14 -9 23.5t-22 9.5h-192q-13 0 -23 -10t-10 -23v-190q0 -13 10 -23t23 -10h192
-q13 0 22 9.5t9 23.5zM894 505l18 621q0 12 -10 18q-10 8 -24 8h-220q-14 0 -24 -8q-10 -6 -10 -18l17 -621q0 -10 10 -17.5t24 -7.5h185q14 0 23.5 7.5t10.5 17.5z"/>
-    <glyph glyph-name="gift" unicode="&#xf06b;" d="M928 180v56v468v192h-320v-192v-468v-56q0 -25 18 -38.5t46 -13.5h192q28 0 46 13.5t18 38.5zM472 1024h195l-126 161q-26 31 -69 31q-40 0 -68 -28t-28 -68t28 -68t68 -28zM1160 1120q0 40 -28 68t-68 28q-43 0 -69 -31l-125 -161h194q40 0 68 28t28 68zM1536 864v-320
-q0 -14 -9 -23t-23 -9h-96v-416q0 -40 -28 -68t-68 -28h-1088q-40 0 -68 28t-28 68v416h-96q-14 0 -23 9t-9 23v320q0 14 9 23t23 9h440q-93 0 -158.5 65.5t-65.5 158.5t65.5 158.5t158.5 65.5q107 0 168 -77l128 -165l128 165q61 77 168 77q93 0 158.5 -65.5t65.5 -158.5
-t-65.5 -158.5t-158.5 -65.5h440q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="leaf" unicode="&#xf06c;" horiz-adv-x="1792" d="M1280 832q0 26 -19 45t-45 19q-172 0 -318 -49.5t-259.5 -134t-235.5 -219.5q-19 -21 -19 -45q0 -26 19 -45t45 -19q24 0 45 19q27 24 74 71t67 66q137 124 268.5 176t313.5 52q26 0 45 19t19 45zM1792 1030q0 -95 -20 -193q-46 -224 -184.5 -383t-357.5 -268
-q-214 -108 -438 -108q-148 0 -286 47q-15 5 -88 42t-96 37q-16 0 -39.5 -32t-45 -70t-52.5 -70t-60 -32q-43 0 -63.5 17.5t-45.5 59.5q-2 4 -6 11t-5.5 10t-3 9.5t-1.5 13.5q0 35 31 73.5t68 65.5t68 56t31 48q0 4 -14 38t-16 44q-9 51 -9 104q0 115 43.5 220t119 184.5
-t170.5 139t204 95.5q55 18 145 25.5t179.5 9t178.5 6t163.5 24t113.5 56.5l29.5 29.5t29.5 28t27 20t36.5 16t43.5 4.5q39 0 70.5 -46t47.5 -112t24 -124t8 -96z"/>
-    <glyph glyph-name="fire" unicode="&#xf06d;" horiz-adv-x="1408" d="M1408 -160v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-1344q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h1344q13 0 22.5 -9.5t9.5 -22.5zM1152 896q0 -78 -24.5 -144t-64 -112.5t-87.5 -88t-96 -77.5t-87.5 -72t-64 -81.5t-24.5 -96.5q0 -96 67 -224l-4 1l1 -1
-q-90 41 -160 83t-138.5 100t-113.5 122.5t-72.5 150.5t-27.5 184q0 78 24.5 144t64 112.5t87.5 88t96 77.5t87.5 72t64 81.5t24.5 96.5q0 94 -66 224l3 -1l-1 1q90 -41 160 -83t138.5 -100t113.5 -122.5t72.5 -150.5t27.5 -184z"/>
-    <glyph glyph-name="eye_open" unicode="&#xf06e;" horiz-adv-x="1792" d="M1664 576q-152 236 -381 353q61 -104 61 -225q0 -185 -131.5 -316.5t-316.5 -131.5t-316.5 131.5t-131.5 316.5q0 121 61 225q-229 -117 -381 -353q133 -205 333.5 -326.5t434.5 -121.5t434.5 121.5t333.5 326.5zM944 960q0 20 -14 34t-34 14q-125 0 -214.5 -89.5
-t-89.5 -214.5q0 -20 14 -34t34 -14t34 14t14 34q0 86 61 147t147 61q20 0 34 14t14 34zM1792 576q0 -34 -20 -69q-140 -230 -376.5 -368.5t-499.5 -138.5t-499.5 139t-376.5 368q-20 35 -20 69t20 69q140 229 376.5 368t499.5 139t499.5 -139t376.5 -368q20 -35 20 -69z"/>
-    <glyph glyph-name="eye_close" unicode="&#xf070;" horiz-adv-x="1792" d="M555 201l78 141q-87 63 -136 159t-49 203q0 121 61 225q-229 -117 -381 -353q167 -258 427 -375zM944 960q0 20 -14 34t-34 14q-125 0 -214.5 -89.5t-89.5 -214.5q0 -20 14 -34t34 -14t34 14t14 34q0 86 61 147t147 61q20 0 34 14t14 34zM1307 1151q0 -7 -1 -9
-q-106 -189 -316 -567t-315 -566l-49 -89q-10 -16 -28 -16q-12 0 -134 70q-16 10 -16 28q0 12 44 87q-143 65 -263.5 173t-208.5 245q-20 31 -20 69t20 69q153 235 380 371t496 136q89 0 180 -17l54 97q10 16 28 16q5 0 18 -6t31 -15.5t33 -18.5t31.5 -18.5t19.5 -11.5
-q16 -10 16 -27zM1344 704q0 -139 -79 -253.5t-209 -164.5l280 502q8 -45 8 -84zM1792 576q0 -35 -20 -69q-39 -64 -109 -145q-150 -172 -347.5 -267t-419.5 -95l74 132q212 18 392.5 137t301.5 307q-115 179 -282 294l63 112q95 -64 182.5 -153t144.5 -184q20 -34 20 -69z
-"/>
-    <glyph glyph-name="warning_sign" unicode="&#xf071;" horiz-adv-x="1792" d="M1024 161v190q0 14 -9.5 23.5t-22.5 9.5h-192q-13 0 -22.5 -9.5t-9.5 -23.5v-190q0 -14 9.5 -23.5t22.5 -9.5h192q13 0 22.5 9.5t9.5 23.5zM1022 535l18 459q0 12 -10 19q-13 11 -24 11h-220q-11 0 -24 -11q-10 -7 -10 -21l17 -457q0 -10 10 -16.5t24 -6.5h185
-q14 0 23.5 6.5t10.5 16.5zM1008 1469l768 -1408q35 -63 -2 -126q-17 -29 -46.5 -46t-63.5 -17h-1536q-34 0 -63.5 17t-46.5 46q-37 63 -2 126l768 1408q17 31 47 49t65 18t65 -18t47 -49z"/>
-    <glyph glyph-name="plane" unicode="&#xf072;" horiz-adv-x="1408" d="M1376 1376q44 -52 12 -148t-108 -172l-161 -161l160 -696q5 -19 -12 -33l-128 -96q-7 -6 -19 -6q-4 0 -7 1q-15 3 -21 16l-279 508l-259 -259l53 -194q5 -17 -8 -31l-96 -96q-9 -9 -23 -9h-2q-15 2 -24 13l-189 252l-252 189q-11 7 -13 23q-1 13 9 25l96 97q9 9 23 9
-q6 0 8 -1l194 -53l259 259l-508 279q-14 8 -17 24q-2 16 9 27l128 128q14 13 30 8l665 -159l160 160q76 76 172 108t148 -12z"/>
-    <glyph glyph-name="calendar" unicode="&#xf073;" horiz-adv-x="1664" d="M128 -128h288v288h-288v-288zM480 -128h320v288h-320v-288zM128 224h288v320h-288v-320zM480 224h320v320h-320v-320zM128 608h288v288h-288v-288zM864 -128h320v288h-320v-288zM480 608h320v288h-320v-288zM1248 -128h288v288h-288v-288zM864 224h320v320h-320v-320z
-M512 1088v288q0 13 -9.5 22.5t-22.5 9.5h-64q-13 0 -22.5 -9.5t-9.5 -22.5v-288q0 -13 9.5 -22.5t22.5 -9.5h64q13 0 22.5 9.5t9.5 22.5zM1248 224h288v320h-288v-320zM864 608h320v288h-320v-288zM1248 608h288v288h-288v-288zM1280 1088v288q0 13 -9.5 22.5t-22.5 9.5h-64
-q-13 0 -22.5 -9.5t-9.5 -22.5v-288q0 -13 9.5 -22.5t22.5 -9.5h64q13 0 22.5 9.5t9.5 22.5zM1664 1152v-1280q0 -52 -38 -90t-90 -38h-1408q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h128v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h384v96q0 66 47 113t113 47
-h64q66 0 113 -47t47 -113v-96h128q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="random" unicode="&#xf074;" horiz-adv-x="1792" d="M666 1055q-60 -92 -137 -273q-22 45 -37 72.5t-40.5 63.5t-51 56.5t-63 35t-81.5 14.5h-224q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h224q250 0 410 -225zM1792 256q0 -14 -9 -23l-320 -320q-9 -9 -23 -9q-13 0 -22.5 9.5t-9.5 22.5v192q-32 0 -85 -0.5t-81 -1t-73 1
-t-71 5t-64 10.5t-63 18.5t-58 28.5t-59 40t-55 53.5t-56 69.5q59 93 136 273q22 -45 37 -72.5t40.5 -63.5t51 -56.5t63 -35t81.5 -14.5h256v192q0 14 9 23t23 9q12 0 24 -10l319 -319q9 -9 9 -23zM1792 1152q0 -14 -9 -23l-320 -320q-9 -9 -23 -9q-13 0 -22.5 9.5t-9.5 22.5
-v192h-256q-48 0 -87 -15t-69 -45t-51 -61.5t-45 -77.5q-32 -62 -78 -171q-29 -66 -49.5 -111t-54 -105t-64 -100t-74 -83t-90 -68.5t-106.5 -42t-128 -16.5h-224q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h224q48 0 87 15t69 45t51 61.5t45 77.5q32 62 78 171q29 66 49.5 111
-t54 105t64 100t74 83t90 68.5t106.5 42t128 16.5h256v192q0 14 9 23t23 9q12 0 24 -10l319 -319q9 -9 9 -23z"/>
-    <glyph glyph-name="comment" unicode="&#xf075;" horiz-adv-x="1792" d="M1792 640q0 -174 -120 -321.5t-326 -233t-450 -85.5q-70 0 -145 8q-198 -175 -460 -242q-49 -14 -114 -22q-17 -2 -30.5 9t-17.5 29v1q-3 4 -0.5 12t2 10t4.5 9.5l6 9t7 8.5t8 9q7 8 31 34.5t34.5 38t31 39.5t32.5 51t27 59t26 76q-157 89 -247.5 220t-90.5 281
-q0 130 71 248.5t191 204.5t286 136.5t348 50.5q244 0 450 -85.5t326 -233t120 -321.5z"/>
-    <glyph glyph-name="magnet" unicode="&#xf076;" d="M1536 704v-128q0 -201 -98.5 -362t-274 -251.5t-395.5 -90.5t-395.5 90.5t-274 251.5t-98.5 362v128q0 26 19 45t45 19h384q26 0 45 -19t19 -45v-128q0 -52 23.5 -90t53.5 -57t71 -30t64 -13t44 -2t44 2t64 13t71 30t53.5 57t23.5 90v128q0 26 19 45t45 19h384
-q26 0 45 -19t19 -45zM512 1344v-384q0 -26 -19 -45t-45 -19h-384q-26 0 -45 19t-19 45v384q0 26 19 45t45 19h384q26 0 45 -19t19 -45zM1536 1344v-384q0 -26 -19 -45t-45 -19h-384q-26 0 -45 19t-19 45v384q0 26 19 45t45 19h384q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="chevron_up" unicode="&#xf077;" horiz-adv-x="1792" d="M1683 205l-166 -165q-19 -19 -45 -19t-45 19l-531 531l-531 -531q-19 -19 -45 -19t-45 19l-166 165q-19 19 -19 45.5t19 45.5l742 741q19 19 45 19t45 -19l742 -741q19 -19 19 -45.5t-19 -45.5z"/>
-    <glyph glyph-name="chevron_down" unicode="&#xf078;" horiz-adv-x="1792" d="M1683 728l-742 -741q-19 -19 -45 -19t-45 19l-742 741q-19 19 -19 45.5t19 45.5l166 165q19 19 45 19t45 -19l531 -531l531 531q19 19 45 19t45 -19l166 -165q19 -19 19 -45.5t-19 -45.5z"/>
-    <glyph glyph-name="retweet" unicode="&#xf079;" horiz-adv-x="1920" d="M1280 32q0 -13 -9.5 -22.5t-22.5 -9.5h-960q-8 0 -13.5 2t-9 7t-5.5 8t-3 11.5t-1 11.5v13v11v160v416h-192q-26 0 -45 19t-19 45q0 24 15 41l320 384q19 22 49 22t49 -22l320 -384q15 -17 15 -41q0 -26 -19 -45t-45 -19h-192v-384h576q16 0 25 -11l160 -192q7 -10 7 -21
-zM1920 448q0 -24 -15 -41l-320 -384q-20 -23 -49 -23t-49 23l-320 384q-15 17 -15 41q0 26 19 45t45 19h192v384h-576q-16 0 -25 12l-160 192q-7 9 -7 20q0 13 9.5 22.5t22.5 9.5h960q8 0 13.5 -2t9 -7t5.5 -8t3 -11.5t1 -11.5v-13v-11v-160v-416h192q26 0 45 -19t19 -45z
-"/>
-    <glyph glyph-name="shopping_cart" unicode="&#xf07a;" horiz-adv-x="1664" d="M640 0q0 -52 -38 -90t-90 -38t-90 38t-38 90t38 90t90 38t90 -38t38 -90zM1536 0q0 -52 -38 -90t-90 -38t-90 38t-38 90t38 90t90 38t90 -38t38 -90zM1664 1088v-512q0 -24 -16.5 -42.5t-40.5 -21.5l-1044 -122q13 -60 13 -70q0 -16 -24 -64h920q26 0 45 -19t19 -45
-t-19 -45t-45 -19h-1024q-26 0 -45 19t-19 45q0 11 8 31.5t16 36t21.5 40t15.5 29.5l-177 823h-204q-26 0 -45 19t-19 45t19 45t45 19h256q16 0 28.5 -6.5t19.5 -15.5t13 -24.5t8 -26t5.5 -29.5t4.5 -26h1201q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="folder_close" unicode="&#xf07b;" horiz-adv-x="1664" d="M1664 928v-704q0 -92 -66 -158t-158 -66h-1216q-92 0 -158 66t-66 158v960q0 92 66 158t158 66h320q92 0 158 -66t66 -158v-32h672q92 0 158 -66t66 -158z"/>
-    <glyph glyph-name="folder_open" unicode="&#xf07c;" horiz-adv-x="1920" d="M1879 584q0 -31 -31 -66l-336 -396q-43 -51 -120.5 -86.5t-143.5 -35.5h-1088q-34 0 -60.5 13t-26.5 43q0 31 31 66l336 396q43 51 120.5 86.5t143.5 35.5h1088q34 0 60.5 -13t26.5 -43zM1536 928v-160h-832q-94 0 -197 -47.5t-164 -119.5l-337 -396l-5 -6q0 4 -0.5 12.5
-t-0.5 12.5v960q0 92 66 158t158 66h320q92 0 158 -66t66 -158v-32h544q92 0 158 -66t66 -158z"/>
-    <glyph glyph-name="resize_vertical" unicode="&#xf07d;" horiz-adv-x="768" d="M704 1216q0 -26 -19 -45t-45 -19h-128v-1024h128q26 0 45 -19t19 -45t-19 -45l-256 -256q-19 -19 -45 -19t-45 19l-256 256q-19 19 -19 45t19 45t45 19h128v1024h-128q-26 0 -45 19t-19 45t19 45l256 256q19 19 45 19t45 -19l256 -256q19 -19 19 -45z"/>
-    <glyph glyph-name="resize_horizontal" unicode="&#xf07e;" horiz-adv-x="1792" d="M1792 640q0 -26 -19 -45l-256 -256q-19 -19 -45 -19t-45 19t-19 45v128h-1024v-128q0 -26 -19 -45t-45 -19t-45 19l-256 256q-19 19 -19 45t19 45l256 256q19 19 45 19t45 -19t19 -45v-128h1024v128q0 26 19 45t45 19t45 -19l256 -256q19 -19 19 -45z"/>
-    <glyph glyph-name="bar_chart" unicode="&#xf080;" horiz-adv-x="2048" d="M640 640v-512h-256v512h256zM1024 1152v-1024h-256v1024h256zM2048 0v-128h-2048v1536h128v-1408h1920zM1408 896v-768h-256v768h256zM1792 1280v-1152h-256v1152h256z"/>
-    <glyph glyph-name="twitter_sign" unicode="&#xf081;" d="M1280 926q-56 -25 -121 -34q68 40 93 117q-65 -38 -134 -51q-61 66 -153 66q-87 0 -148.5 -61.5t-61.5 -148.5q0 -29 5 -48q-129 7 -242 65t-192 155q-29 -50 -29 -106q0 -114 91 -175q-47 1 -100 26v-2q0 -75 50 -133.5t123 -72.5q-29 -8 -51 -8q-13 0 -39 4
-q21 -63 74.5 -104t121.5 -42q-116 -90 -261 -90q-26 0 -50 3q148 -94 322 -94q112 0 210 35.5t168 95t120.5 137t75 162t24.5 168.5q0 18 -1 27q63 45 105 109zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5
-t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="facebook_sign" unicode="&#xf082;" d="M1248 1408q119 0 203.5 -84.5t84.5 -203.5v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-188v595h199l30 232h-229v148q0 56 23.5 84t91.5 28l122 1v207q-63 9 -178 9q-136 0 -217.5 -80t-81.5 -226v-171h-200v-232h200v-595h-532q-119 0 -203.5 84.5t-84.5 203.5v960
-q0 119 84.5 203.5t203.5 84.5h960z"/>
-    <glyph glyph-name="camera_retro" unicode="&#xf083;" horiz-adv-x="1792" d="M928 704q0 14 -9 23t-23 9q-66 0 -113 -47t-47 -113q0 -14 9 -23t23 -9t23 9t9 23q0 40 28 68t68 28q14 0 23 9t9 23zM1152 574q0 -106 -75 -181t-181 -75t-181 75t-75 181t75 181t181 75t181 -75t75 -181zM128 0h1536v128h-1536v-128zM1280 574q0 159 -112.5 271.5
-t-271.5 112.5t-271.5 -112.5t-112.5 -271.5t112.5 -271.5t271.5 -112.5t271.5 112.5t112.5 271.5zM256 1216h384v128h-384v-128zM128 1024h1536v118v138h-828l-64 -128h-644v-128zM1792 1280v-1280q0 -53 -37.5 -90.5t-90.5 -37.5h-1536q-53 0 -90.5 37.5t-37.5 90.5v1280
-q0 53 37.5 90.5t90.5 37.5h1536q53 0 90.5 -37.5t37.5 -90.5z"/>
-    <glyph glyph-name="key" unicode="&#xf084;" horiz-adv-x="1792" d="M832 1024q0 80 -56 136t-136 56t-136 -56t-56 -136q0 -42 19 -83q-41 19 -83 19q-80 0 -136 -56t-56 -136t56 -136t136 -56t136 56t56 136q0 42 -19 83q41 -19 83 -19q80 0 136 56t56 136zM1683 320q0 -17 -49 -66t-66 -49q-9 0 -28.5 16t-36.5 33t-38.5 40t-24.5 26
-l-96 -96l220 -220q28 -28 28 -68q0 -42 -39 -81t-81 -39q-40 0 -68 28l-671 671q-176 -131 -365 -131q-163 0 -265.5 102.5t-102.5 265.5q0 160 95 313t248 248t313 95q163 0 265.5 -102.5t102.5 -265.5q0 -189 -131 -365l355 -355l96 96q-3 3 -26 24.5t-40 38.5t-33 36.5
-t-16 28.5q0 17 49 66t66 49q13 0 23 -10q6 -6 46 -44.5t82 -79.5t86.5 -86t73 -78t28.5 -41z"/>
-    <glyph glyph-name="cogs" unicode="&#xf085;" horiz-adv-x="1920" d="M896 640q0 106 -75 181t-181 75t-181 -75t-75 -181t75 -181t181 -75t181 75t75 181zM1664 128q0 52 -38 90t-90 38t-90 -38t-38 -90q0 -53 37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1664 1152q0 52 -38 90t-90 38t-90 -38t-38 -90q0 -53 37.5 -90.5t90.5 -37.5
-t90.5 37.5t37.5 90.5zM1280 731v-185q0 -10 -7 -19.5t-16 -10.5l-155 -24q-11 -35 -32 -76q34 -48 90 -115q7 -11 7 -20q0 -12 -7 -19q-23 -30 -82.5 -89.5t-78.5 -59.5q-11 0 -21 7l-115 90q-37 -19 -77 -31q-11 -108 -23 -155q-7 -24 -30 -24h-186q-11 0 -20 7.5t-10 17.5
-l-23 153q-34 10 -75 31l-118 -89q-7 -7 -20 -7q-11 0 -21 8q-144 133 -144 160q0 9 7 19q10 14 41 53t47 61q-23 44 -35 82l-152 24q-10 1 -17 9.5t-7 19.5v185q0 10 7 19.5t16 10.5l155 24q11 35 32 76q-34 48 -90 115q-7 11 -7 20q0 12 7 20q22 30 82 89t79 59q11 0 21 -7
-l115 -90q34 18 77 32q11 108 23 154q7 24 30 24h186q11 0 20 -7.5t10 -17.5l23 -153q34 -10 75 -31l118 89q8 7 20 7q11 0 21 -8q144 -133 144 -160q0 -8 -7 -19q-12 -16 -42 -54t-45 -60q23 -48 34 -82l152 -23q10 -2 17 -10.5t7 -19.5zM1920 198v-140q0 -16 -149 -31
-q-12 -27 -30 -52q51 -113 51 -138q0 -4 -4 -7q-122 -71 -124 -71q-8 0 -46 47t-52 68q-20 -2 -30 -2t-30 2q-14 -21 -52 -68t-46 -47q-2 0 -124 71q-4 3 -4 7q0 25 51 138q-18 25 -30 52q-149 15 -149 31v140q0 16 149 31q13 29 30 52q-51 113 -51 138q0 4 4 7q4 2 35 20
-t59 34t30 16q8 0 46 -46.5t52 -67.5q20 2 30 2t30 -2q51 71 92 112l6 2q4 0 124 -70q4 -3 4 -7q0 -25 -51 -138q17 -23 30 -52q149 -15 149 -31zM1920 1222v-140q0 -16 -149 -31q-12 -27 -30 -52q51 -113 51 -138q0 -4 -4 -7q-122 -71 -124 -71q-8 0 -46 47t-52 68
-q-20 -2 -30 -2t-30 2q-14 -21 -52 -68t-46 -47q-2 0 -124 71q-4 3 -4 7q0 25 51 138q-18 25 -30 52q-149 15 -149 31v140q0 16 149 31q13 29 30 52q-51 113 -51 138q0 4 4 7q4 2 35 20t59 34t30 16q8 0 46 -46.5t52 -67.5q20 2 30 2t30 -2q51 71 92 112l6 2q4 0 124 -70
-q4 -3 4 -7q0 -25 -51 -138q17 -23 30 -52q149 -15 149 -31z"/>
-    <glyph glyph-name="comments" unicode="&#xf086;" horiz-adv-x="1792" d="M1408 768q0 -139 -94 -257t-256.5 -186.5t-353.5 -68.5q-86 0 -176 16q-124 -88 -278 -128q-36 -9 -86 -16h-3q-11 0 -20.5 8t-11.5 21q-1 3 -1 6.5t0.5 6.5t2 6l2.5 5t3.5 5.5t4 5t4.5 5t4 4.5q5 6 23 25t26 29.5t22.5 29t25 38.5t20.5 44q-124 72 -195 177t-71 224
-q0 139 94 257t256.5 186.5t353.5 68.5t353.5 -68.5t256.5 -186.5t94 -257zM1792 512q0 -120 -71 -224.5t-195 -176.5q10 -24 20.5 -44t25 -38.5t22.5 -29t26 -29.5t23 -25q1 -1 4 -4.5t4.5 -5t4 -5t3.5 -5.5l2.5 -5t2 -6t0.5 -6.5t-1 -6.5q-3 -14 -13 -22t-22 -7
-q-50 7 -86 16q-154 40 -278 128q-90 -16 -176 -16q-271 0 -472 132q58 -4 88 -4q161 0 309 45t264 129q125 92 192 212t67 254q0 77 -23 152q129 -71 204 -178t75 -230z"/>
-    <glyph glyph-name="thumbs_up_alt" unicode="&#xf087;" d="M256 192q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1408 768q0 51 -39 89.5t-89 38.5h-352q0 58 48 159.5t48 160.5q0 98 -32 145t-128 47q-26 -26 -38 -85t-30.5 -125.5t-59.5 -109.5q-22 -23 -77 -91q-4 -5 -23 -30t-31.5 -41t-34.5 -42.5
-t-40 -44t-38.5 -35.5t-40 -27t-35.5 -9h-32v-640h32q13 0 31.5 -3t33 -6.5t38 -11t35 -11.5t35.5 -12.5t29 -10.5q211 -73 342 -73h121q192 0 192 167q0 26 -5 56q30 16 47.5 52.5t17.5 73.5t-18 69q53 50 53 119q0 25 -10 55.5t-25 47.5q32 1 53.5 47t21.5 81zM1536 769
-q0 -89 -49 -163q9 -33 9 -69q0 -77 -38 -144q3 -21 3 -43q0 -101 -60 -178q1 -139 -85 -219.5t-227 -80.5h-36h-93q-96 0 -189.5 22.5t-216.5 65.5q-116 40 -138 40h-288q-53 0 -90.5 37.5t-37.5 90.5v640q0 53 37.5 90.5t90.5 37.5h274q36 24 137 155q58 75 107 128
-q24 25 35.5 85.5t30.5 126.5t62 108q39 37 90 37q84 0 151 -32.5t102 -101.5t35 -186q0 -93 -48 -192h176q104 0 180 -76t76 -179z"/>
-    <glyph glyph-name="thumbs_down_alt" unicode="&#xf088;" d="M256 1088q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1408 512q0 35 -21.5 81t-53.5 47q15 17 25 47.5t10 55.5q0 69 -53 119q18 31 18 69q0 37 -17.5 73.5t-47.5 52.5q5 30 5 56q0 85 -49 126t-136 41h-128q-131 0 -342 -73q-5 -2 -29 -10.5
-t-35.5 -12.5t-35 -11.5t-38 -11t-33 -6.5t-31.5 -3h-32v-640h32q16 0 35.5 -9t40 -27t38.5 -35.5t40 -44t34.5 -42.5t31.5 -41t23 -30q55 -68 77 -91q41 -43 59.5 -109.5t30.5 -125.5t38 -85q96 0 128 47t32 145q0 59 -48 160.5t-48 159.5h352q50 0 89 38.5t39 89.5z
-M1536 511q0 -103 -76 -179t-180 -76h-176q48 -99 48 -192q0 -118 -35 -186q-35 -69 -102 -101.5t-151 -32.5q-51 0 -90 37q-34 33 -54 82t-25.5 90.5t-17.5 84.5t-31 64q-48 50 -107 127q-101 131 -137 155h-274q-53 0 -90.5 37.5t-37.5 90.5v640q0 53 37.5 90.5t90.5 37.5
-h288q22 0 138 40q128 44 223 66t200 22h112q140 0 226.5 -79t85.5 -216v-5q60 -77 60 -178q0 -22 -3 -43q38 -67 38 -144q0 -36 -9 -69q49 -73 49 -163z"/>
-    <glyph glyph-name="star_half" unicode="&#xf089;" horiz-adv-x="896" d="M832 1504v-1339l-449 -236q-22 -12 -40 -12q-21 0 -31.5 14.5t-10.5 35.5q0 6 2 20l86 500l-364 354q-25 27 -25 48q0 37 56 46l502 73l225 455q19 41 49 41z"/>
-    <glyph glyph-name="heart_empty" unicode="&#xf08a;" horiz-adv-x="1792" d="M1664 940q0 81 -21.5 143t-55 98.5t-81.5 59.5t-94 31t-98 8t-112 -25.5t-110.5 -64t-86.5 -72t-60 -61.5q-18 -22 -49 -22t-49 22q-24 28 -60 61.5t-86.5 72t-110.5 64t-112 25.5t-98 -8t-94 -31t-81.5 -59.5t-55 -98.5t-21.5 -143q0 -168 187 -355l581 -560l580 559
-q188 188 188 356zM1792 940q0 -221 -229 -450l-623 -600q-18 -18 -44 -18t-44 18l-624 602q-10 8 -27.5 26t-55.5 65.5t-68 97.5t-53.5 121t-23.5 138q0 220 127 344t351 124q62 0 126.5 -21.5t120 -58t95.5 -68.5t76 -68q36 36 76 68t95.5 68.5t120 58t126.5 21.5
-q224 0 351 -124t127 -344z"/>
-    <glyph glyph-name="signout" unicode="&#xf08b;" horiz-adv-x="1664" d="M640 96q0 -4 1 -20t0.5 -26.5t-3 -23.5t-10 -19.5t-20.5 -6.5h-320q-119 0 -203.5 84.5t-84.5 203.5v704q0 119 84.5 203.5t203.5 84.5h320q13 0 22.5 -9.5t9.5 -22.5q0 -4 1 -20t0.5 -26.5t-3 -23.5t-10 -19.5t-20.5 -6.5h-320q-66 0 -113 -47t-47 -113v-704
-q0 -66 47 -113t113 -47h288h11h13t11.5 -1t11.5 -3t8 -5.5t7 -9t2 -13.5zM1568 640q0 -26 -19 -45l-544 -544q-19 -19 -45 -19t-45 19t-19 45v288h-448q-26 0 -45 19t-19 45v384q0 26 19 45t45 19h448v288q0 26 19 45t45 19t45 -19l544 -544q19 -19 19 -45z"/>
-    <glyph glyph-name="linkedin_sign" unicode="&#xf08c;" d="M237 122h231v694h-231v-694zM483 1030q-1 52 -36 86t-93 34t-94.5 -34t-36.5 -86q0 -51 35.5 -85.5t92.5 -34.5h1q59 0 95 34.5t36 85.5zM1068 122h231v398q0 154 -73 233t-193 79q-136 0 -209 -117h2v101h-231q3 -66 0 -694h231v388q0 38 7 56q15 35 45 59.5t74 24.5
-q116 0 116 -157v-371zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="pushpin" unicode="&#xf08d;" horiz-adv-x="1152" d="M480 672v448q0 14 -9 23t-23 9t-23 -9t-9 -23v-448q0 -14 9 -23t23 -9t23 9t9 23zM1152 320q0 -26 -19 -45t-45 -19h-429l-51 -483q-2 -12 -10.5 -20.5t-20.5 -8.5h-1q-27 0 -32 27l-76 485h-404q-26 0 -45 19t-19 45q0 123 78.5 221.5t177.5 98.5v512q-52 0 -90 38
-t-38 90t38 90t90 38h640q52 0 90 -38t38 -90t-38 -90t-90 -38v-512q99 0 177.5 -98.5t78.5 -221.5z"/>
-    <glyph glyph-name="external_link" unicode="&#xf08e;" horiz-adv-x="1792" d="M1408 608v-320q0 -119 -84.5 -203.5t-203.5 -84.5h-832q-119 0 -203.5 84.5t-84.5 203.5v832q0 119 84.5 203.5t203.5 84.5h704q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-704q-66 0 -113 -47t-47 -113v-832q0 -66 47 -113t113 -47h832q66 0 113 47t47 113v320
-q0 14 9 23t23 9h64q14 0 23 -9t9 -23zM1792 1472v-512q0 -26 -19 -45t-45 -19t-45 19l-176 176l-652 -652q-10 -10 -23 -10t-23 10l-114 114q-10 10 -10 23t10 23l652 652l-176 176q-19 19 -19 45t19 45t45 19h512q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="signin" unicode="&#xf090;" d="M1184 640q0 -26 -19 -45l-544 -544q-19 -19 -45 -19t-45 19t-19 45v288h-448q-26 0 -45 19t-19 45v384q0 26 19 45t45 19h448v288q0 26 19 45t45 19t45 -19l544 -544q19 -19 19 -45zM1536 992v-704q0 -119 -84.5 -203.5t-203.5 -84.5h-320q-13 0 -22.5 9.5t-9.5 22.5
-q0 4 -1 20t-0.5 26.5t3 23.5t10 19.5t20.5 6.5h320q66 0 113 47t47 113v704q0 66 -47 113t-113 47h-288h-11h-13t-11.5 1t-11.5 3t-8 5.5t-7 9t-2 13.5q0 4 -1 20t-0.5 26.5t3 23.5t10 19.5t20.5 6.5h320q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="trophy" unicode="&#xf091;" horiz-adv-x="1664" d="M458 653q-74 162 -74 371h-256v-96q0 -78 94.5 -162t235.5 -113zM1536 928v96h-256q0 -209 -74 -371q141 29 235.5 113t94.5 162zM1664 1056v-128q0 -71 -41.5 -143t-112 -130t-173 -97.5t-215.5 -44.5q-42 -54 -95 -95q-38 -34 -52.5 -72.5t-14.5 -89.5q0 -54 30.5 -91
-t97.5 -37q75 0 133.5 -45.5t58.5 -114.5v-64q0 -14 -9 -23t-23 -9h-832q-14 0 -23 9t-9 23v64q0 69 58.5 114.5t133.5 45.5q67 0 97.5 37t30.5 91q0 51 -14.5 89.5t-52.5 72.5q-53 41 -95 95q-113 5 -215.5 44.5t-173 97.5t-112 130t-41.5 143v128q0 40 28 68t68 28h288v96
-q0 66 47 113t113 47h576q66 0 113 -47t47 -113v-96h288q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="github_sign" unicode="&#xf092;" d="M519 336q4 6 -3 13q-9 7 -14 2q-4 -6 3 -13q9 -7 14 -2zM491 377q-5 7 -12 4q-6 -4 0 -12q7 -8 12 -5q6 4 0 13zM450 417q2 4 -5 8q-7 2 -8 -2q-3 -5 4 -8q8 -2 9 2zM471 394q2 1 1.5 4.5t-3.5 5.5q-6 7 -10 3t1 -11q6 -6 11 -2zM557 319q2 7 -9 11q-9 3 -13 -4
-q-2 -7 9 -11q9 -3 13 4zM599 316q0 8 -12 8q-10 0 -10 -8t11 -8t11 8zM638 323q-2 7 -13 5t-9 -9q2 -8 12 -6t10 10zM1280 640q0 212 -150 362t-362 150t-362 -150t-150 -362q0 -167 98 -300.5t252 -185.5q18 -3 26.5 5t8.5 20q0 52 -1 95q-6 -1 -15.5 -2.5t-35.5 -2t-48 4
-t-43.5 20t-29.5 41.5q-23 59 -57 74q-2 1 -4.5 3.5l-8 8t-7 9.5t4 7.5t19.5 3.5q6 0 15 -2t30 -15.5t33 -35.5q16 -28 37.5 -42t43.5 -14t38 3.5t30 9.5q7 47 33 69q-49 6 -86 18.5t-73 39t-55.5 76t-19.5 119.5q0 79 53 137q-24 62 5 136q19 6 54.5 -7.5t60.5 -29.5l26 -16
-q58 17 128 17t128 -17q11 7 28.5 18t55.5 26t57 9q29 -74 5 -136q53 -58 53 -137q0 -57 -14 -100.5t-35.5 -70t-53.5 -44.5t-62.5 -26t-68.5 -12q35 -31 35 -95q0 -40 -0.5 -89t-0.5 -51q0 -12 8.5 -20t26.5 -5q154 52 252 185.5t98 300.5zM1536 1120v-960
-q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="upload_alt" unicode="&#xf093;" horiz-adv-x="1664" d="M1280 64q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1536 64q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1664 288v-320q0 -40 -28 -68t-68 -28h-1472q-40 0 -68 28t-28 68v320q0 40 28 68t68 28h427q21 -56 70.5 -92
-t110.5 -36h256q61 0 110.5 36t70.5 92h427q40 0 68 -28t28 -68zM1339 936q-17 -40 -59 -40h-256v-448q0 -26 -19 -45t-45 -19h-256q-26 0 -45 19t-19 45v448h-256q-42 0 -59 40q-17 39 14 69l448 448q18 19 45 19t45 -19l448 -448q31 -30 14 -69z"/>
-    <glyph glyph-name="lemon" unicode="&#xf094;" d="M1407 710q0 44 -7 113.5t-18 96.5q-12 30 -17 44t-9 36.5t-4 48.5q0 23 5 68.5t5 67.5q0 37 -10 55q-4 1 -13 1q-19 0 -58 -4.5t-59 -4.5q-60 0 -176 24t-175 24q-43 0 -94.5 -11.5t-85 -23.5t-89.5 -34q-137 -54 -202 -103q-96 -73 -159.5 -189.5t-88 -236t-24.5 -248.5
-q0 -40 12.5 -120t12.5 -121q0 -23 -11 -66.5t-11 -65.5t12 -36.5t34 -14.5q24 0 72.5 11t73.5 11q57 0 169.5 -15.5t169.5 -15.5q181 0 284 36q129 45 235.5 152.5t166 245.5t59.5 275zM1535 712q0 -165 -70 -327.5t-196 -288t-281 -180.5q-124 -44 -326 -44
-q-57 0 -170 14.5t-169 14.5q-24 0 -72.5 -14.5t-73.5 -14.5q-73 0 -123.5 55.5t-50.5 128.5q0 24 11 68t11 67q0 40 -12.5 120.5t-12.5 121.5q0 111 18 217.5t54.5 209.5t100.5 194t150 156q78 59 232 120q194 78 316 78q60 0 175.5 -24t173.5 -24q19 0 57 5t58 5
-q81 0 118 -50.5t37 -134.5q0 -23 -5 -68t-5 -68q0 -13 2 -25t3.5 -16.5t7.5 -20.5t8 -20q16 -40 25 -118.5t9 -136.5z"/>
-    <glyph glyph-name="phone" unicode="&#xf095;" horiz-adv-x="1408" d="M1408 296q0 -27 -10 -70.5t-21 -68.5q-21 -50 -122 -106q-94 -51 -186 -51q-27 0 -53 3.5t-57.5 12.5t-47 14.5t-55.5 20.5t-49 18q-98 35 -175 83q-127 79 -264 216t-216 264q-48 77 -83 175q-3 9 -18 49t-20.5 55.5t-14.5 47t-12.5 57.5t-3.5 53q0 92 51 186
-q56 101 106 122q25 11 68.5 21t70.5 10q14 0 21 -3q18 -6 53 -76q11 -19 30 -54t35 -63.5t31 -53.5q3 -4 17.5 -25t21.5 -35.5t7 -28.5q0 -20 -28.5 -50t-62 -55t-62 -53t-28.5 -46q0 -9 5 -22.5t8.5 -20.5t14 -24t11.5 -19q76 -137 174 -235t235 -174q2 -1 19 -11.5t24 -14
-t20.5 -8.5t22.5 -5q18 0 46 28.5t53 62t55 62t50 28.5q14 0 28.5 -7t35.5 -21.5t25 -17.5q25 -15 53.5 -31t63.5 -35t54 -30q70 -35 76 -53q3 -7 3 -21z"/>
-    <glyph glyph-name="check_empty" unicode="&#xf096;" horiz-adv-x="1408" d="M1120 1280h-832q-66 0 -113 -47t-47 -113v-832q0 -66 47 -113t113 -47h832q66 0 113 47t47 113v832q0 66 -47 113t-113 47zM1408 1120v-832q0 -119 -84.5 -203.5t-203.5 -84.5h-832q-119 0 -203.5 84.5t-84.5 203.5v832q0 119 84.5 203.5t203.5 84.5h832
-q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="bookmark_empty" unicode="&#xf097;" horiz-adv-x="1280" d="M1152 1280h-1024v-1242l423 406l89 85l89 -85l423 -406v1242zM1164 1408q23 0 44 -9q33 -13 52.5 -41t19.5 -62v-1289q0 -34 -19.5 -62t-52.5 -41q-19 -8 -44 -8q-48 0 -83 32l-441 424l-441 -424q-36 -33 -83 -33q-23 0 -44 9q-33 13 -52.5 41t-19.5 62v1289
-q0 34 19.5 62t52.5 41q21 9 44 9h1048z"/>
-    <glyph glyph-name="phone_sign" unicode="&#xf098;" d="M1280 343q0 11 -2 16t-18 16.5t-40.5 25t-47.5 26.5t-45.5 25t-28.5 15q-5 3 -19 13t-25 15t-21 5q-15 0 -36.5 -20.5t-39.5 -45t-38.5 -45t-33.5 -20.5q-7 0 -16.5 3.5t-15.5 6.5t-17 9.5t-14 8.5q-99 55 -170 126.5t-127 170.5q-2 3 -8.5 14t-9.5 17t-6.5 15.5
-t-3.5 16.5q0 13 20.5 33.5t45 38.5t45 39.5t20.5 36.5q0 10 -5 21t-15 25t-13 19q-3 6 -15 28.5t-25 45.5t-26.5 47.5t-25 40.5t-16.5 18t-16 2q-48 0 -101 -22q-46 -21 -80 -94.5t-34 -130.5q0 -16 2.5 -34t5 -30.5t9 -33t10 -29.5t12.5 -33t11 -30q60 -164 216.5 -320.5
-t320.5 -216.5q6 -2 30 -11t33 -12.5t29.5 -10t33 -9t30.5 -5t34 -2.5q57 0 130.5 34t94.5 80q22 53 22 101zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z
-"/>
-    <glyph glyph-name="twitter" unicode="&#xf099;" horiz-adv-x="1664" d="M1620 1128q-67 -98 -162 -167q1 -14 1 -42q0 -130 -38 -259.5t-115.5 -248.5t-184.5 -210.5t-258 -146t-323 -54.5q-271 0 -496 145q35 -4 78 -4q225 0 401 138q-105 2 -188 64.5t-114 159.5q33 -5 61 -5q43 0 85 11q-112 23 -185.5 111.5t-73.5 205.5v4q68 -38 146 -41
-q-66 44 -105 115t-39 154q0 88 44 163q121 -149 294.5 -238.5t371.5 -99.5q-8 38 -8 74q0 134 94.5 228.5t228.5 94.5q140 0 236 -102q109 21 205 78q-37 -115 -142 -178q93 10 186 50z"/>
-    <glyph glyph-name="facebook" unicode="&#xf09a;" horiz-adv-x="1024" d="M959 1524v-264h-157q-86 0 -116 -36t-30 -108v-189h293l-39 -296h-254v-759h-306v759h-255v296h255v218q0 186 104 288.5t277 102.5q147 0 228 -12z"/>
-    <glyph glyph-name="github" unicode="&#xf09b;" d="M768 1408q209 0 385.5 -103t279.5 -279.5t103 -385.5q0 -251 -146.5 -451.5t-378.5 -277.5q-27 -5 -40 7t-13 30q0 3 0.5 76.5t0.5 134.5q0 97 -52 142q57 6 102.5 18t94 39t81 66.5t53 105t20.5 150.5q0 119 -79 206q37 91 -8 204q-28 9 -81 -11t-92 -44l-38 -24
-q-93 26 -192 26t-192 -26q-16 11 -42.5 27t-83.5 38.5t-85 13.5q-45 -113 -8 -204q-79 -87 -79 -206q0 -85 20.5 -150t52.5 -105t80.5 -67t94 -39t102.5 -18q-39 -36 -49 -103q-21 -10 -45 -15t-57 -5t-65.5 21.5t-55.5 62.5q-19 32 -48.5 52t-49.5 24l-20 3q-21 0 -29 -4.5
-t-5 -11.5t9 -14t13 -12l7 -5q22 -10 43.5 -38t31.5 -51l10 -23q13 -38 44 -61.5t67 -30t69.5 -7t55.5 3.5l23 4q0 -38 0.5 -88.5t0.5 -54.5q0 -18 -13 -30t-40 -7q-232 77 -378.5 277.5t-146.5 451.5q0 209 103 385.5t279.5 279.5t385.5 103zM291 305q3 7 -7 12
-q-10 3 -13 -2q-3 -7 7 -12q9 -6 13 2zM322 271q7 5 -2 16q-10 9 -16 3q-7 -5 2 -16q10 -10 16 -3zM352 226q9 7 0 19q-8 13 -17 6q-9 -5 0 -18t17 -7zM394 184q8 8 -4 19q-12 12 -20 3q-9 -8 4 -19q12 -12 20 -3zM451 159q3 11 -13 16q-15 4 -19 -7t13 -15q15 -6 19 6z
-M514 154q0 13 -17 11q-16 0 -16 -11q0 -13 17 -11q16 0 16 11zM572 164q-2 11 -18 9q-16 -3 -14 -15t18 -8t14 14z"/>
-    <glyph glyph-name="unlock" unicode="&#xf09c;" horiz-adv-x="1664" d="M1664 960v-256q0 -26 -19 -45t-45 -19h-64q-26 0 -45 19t-19 45v256q0 106 -75 181t-181 75t-181 -75t-75 -181v-192h96q40 0 68 -28t28 -68v-576q0 -40 -28 -68t-68 -28h-960q-40 0 -68 28t-28 68v576q0 40 28 68t68 28h672v192q0 185 131.5 316.5t316.5 131.5
-t316.5 -131.5t131.5 -316.5z"/>
-    <glyph glyph-name="credit_card" unicode="&#xf09d;" horiz-adv-x="1920" d="M1760 1408q66 0 113 -47t47 -113v-1216q0 -66 -47 -113t-113 -47h-1600q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1600zM160 1280q-13 0 -22.5 -9.5t-9.5 -22.5v-224h1664v224q0 13 -9.5 22.5t-22.5 9.5h-1600zM1760 0q13 0 22.5 9.5t9.5 22.5v608h-1664v-608
-q0 -13 9.5 -22.5t22.5 -9.5h1600zM256 128v128h256v-128h-256zM640 128v128h384v-128h-384z"/>
-    <glyph glyph-name="rss" unicode="&#xf09e;" horiz-adv-x="1408" d="M384 192q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM896 69q2 -28 -17 -48q-18 -21 -47 -21h-135q-25 0 -43 16.5t-20 41.5q-22 229 -184.5 391.5t-391.5 184.5q-25 2 -41.5 20t-16.5 43v135q0 29 21 47q17 17 43 17h5q160 -13 306 -80.5
-t259 -181.5q114 -113 181.5 -259t80.5 -306zM1408 67q2 -27 -18 -47q-18 -20 -46 -20h-143q-26 0 -44.5 17.5t-19.5 42.5q-12 215 -101 408.5t-231.5 336t-336 231.5t-408.5 102q-25 1 -42.5 19.5t-17.5 43.5v143q0 28 20 46q18 18 44 18h3q262 -13 501.5 -120t425.5 -294
-q187 -186 294 -425.5t120 -501.5z"/>
-    <glyph glyph-name="hdd" unicode="&#xf0a0;" d="M1040 320q0 -33 -23.5 -56.5t-56.5 -23.5t-56.5 23.5t-23.5 56.5t23.5 56.5t56.5 23.5t56.5 -23.5t23.5 -56.5zM1296 320q0 -33 -23.5 -56.5t-56.5 -23.5t-56.5 23.5t-23.5 56.5t23.5 56.5t56.5 23.5t56.5 -23.5t23.5 -56.5zM1408 160v320q0 13 -9.5 22.5t-22.5 9.5
-h-1216q-13 0 -22.5 -9.5t-9.5 -22.5v-320q0 -13 9.5 -22.5t22.5 -9.5h1216q13 0 22.5 9.5t9.5 22.5zM178 640h1180l-157 482q-4 13 -16 21.5t-26 8.5h-782q-14 0 -26 -8.5t-16 -21.5zM1536 480v-320q0 -66 -47 -113t-113 -47h-1216q-66 0 -113 47t-47 113v320q0 25 16 75
-l197 606q17 53 63 86t101 33h782q55 0 101 -33t63 -86l197 -606q16 -50 16 -75z"/>
-    <glyph glyph-name="bullhorn" unicode="&#xf0a1;" horiz-adv-x="1792" d="M1664 896q53 0 90.5 -37.5t37.5 -90.5t-37.5 -90.5t-90.5 -37.5v-384q0 -52 -38 -90t-90 -38q-417 347 -812 380q-58 -19 -91 -66t-31 -100.5t40 -92.5q-20 -33 -23 -65.5t6 -58t33.5 -55t48 -50t61.5 -50.5q-29 -58 -111.5 -83t-168.5 -11.5t-132 55.5q-7 23 -29.5 87.5
-t-32 94.5t-23 89t-15 101t3.5 98.5t22 110.5h-122q-66 0 -113 47t-47 113v192q0 66 47 113t113 47h480q435 0 896 384q52 0 90 -38t38 -90v-384zM1536 292v954q-394 -302 -768 -343v-270q377 -42 768 -341z"/>
-    <glyph glyph-name="bell" unicode="&#xf0a2;" horiz-adv-x="1792" d="M912 -160q0 16 -16 16q-59 0 -101.5 42.5t-42.5 101.5q0 16 -16 16t-16 -16q0 -73 51.5 -124.5t124.5 -51.5q16 0 16 16zM246 128h1300q-266 300 -266 832q0 51 -24 105t-69 103t-121.5 80.5t-169.5 31.5t-169.5 -31.5t-121.5 -80.5t-69 -103t-24 -105q0 -532 -266 -832z
-M1728 128q0 -52 -38 -90t-90 -38h-448q0 -106 -75 -181t-181 -75t-181 75t-75 181h-448q-52 0 -90 38t-38 90q50 42 91 88t85 119.5t74.5 158.5t50 206t19.5 260q0 152 117 282.5t307 158.5q-8 19 -8 39q0 40 28 68t68 28t68 -28t28 -68q0 -20 -8 -39q190 -28 307 -158.5
-t117 -282.5q0 -139 19.5 -260t50 -206t74.5 -158.5t85 -119.5t91 -88z"/>
-    <glyph glyph-name="certificate" unicode="&#xf0a3;" d="M1376 640l138 -135q30 -28 20 -70q-12 -41 -52 -51l-188 -48l53 -186q12 -41 -19 -70q-29 -31 -70 -19l-186 53l-48 -188q-10 -40 -51 -52q-12 -2 -19 -2q-31 0 -51 22l-135 138l-135 -138q-28 -30 -70 -20q-41 11 -51 52l-48 188l-186 -53q-41 -12 -70 19q-31 29 -19 70
-l53 186l-188 48q-40 10 -52 51q-10 42 20 70l138 135l-138 135q-30 28 -20 70q12 41 52 51l188 48l-53 186q-12 41 19 70q29 31 70 19l186 -53l48 188q10 41 51 51q41 12 70 -19l135 -139l135 139q29 30 70 19q41 -10 51 -51l48 -188l186 53q41 12 70 -19q31 -29 19 -70
-l-53 -186l188 -48q40 -10 52 -51q10 -42 -20 -70z"/>
-    <glyph glyph-name="hand_right" unicode="&#xf0a4;" horiz-adv-x="1792" d="M256 192q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1664 768q0 51 -39 89.5t-89 38.5h-576q0 20 15 48.5t33 55t33 68t15 84.5q0 67 -44.5 97.5t-115.5 30.5q-24 0 -90 -139q-24 -44 -37 -65q-40 -64 -112 -145q-71 -81 -101 -106
-q-69 -57 -140 -57h-32v-640h32q72 0 167 -32t193.5 -64t179.5 -32q189 0 189 167q0 26 -5 56q30 16 47.5 52.5t17.5 73.5t-18 69q53 50 53 119q0 25 -10 55.5t-25 47.5h331q52 0 90 38t38 90zM1792 769q0 -105 -75.5 -181t-180.5 -76h-169q-4 -62 -37 -119q3 -21 3 -43
-q0 -101 -60 -178q1 -139 -85 -219.5t-227 -80.5q-133 0 -322 69q-164 59 -223 59h-288q-53 0 -90.5 37.5t-37.5 90.5v640q0 53 37.5 90.5t90.5 37.5h288q10 0 21.5 4.5t23.5 14t22.5 18t24 22.5t20.5 21.5t19 21.5t14 17q65 74 100 129q13 21 33 62t37 72t40.5 63t55 49.5
-t69.5 17.5q125 0 206.5 -67t81.5 -189q0 -68 -22 -128h374q104 0 180 -76t76 -179z"/>
-    <glyph glyph-name="hand_left" unicode="&#xf0a5;" horiz-adv-x="1792" d="M1376 128h32v640h-32q-35 0 -67.5 12t-62.5 37t-50 46t-49 54q-8 9 -12 14q-72 81 -112 145q-14 22 -38 68q-1 3 -10.5 22.5t-18.5 36t-20 35.5t-21.5 30.5t-18.5 11.5q-71 0 -115.5 -30.5t-44.5 -97.5q0 -43 15 -84.5t33 -68t33 -55t15 -48.5h-576q-50 0 -89 -38.5
-t-39 -89.5q0 -52 38 -90t90 -38h331q-15 -17 -25 -47.5t-10 -55.5q0 -69 53 -119q-18 -32 -18 -69t17.5 -73.5t47.5 -52.5q-4 -24 -4 -56q0 -85 48.5 -126t135.5 -41q84 0 183 32t194 64t167 32zM1664 192q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45z
-M1792 768v-640q0 -53 -37.5 -90.5t-90.5 -37.5h-288q-59 0 -223 -59q-190 -69 -317 -69q-142 0 -230 77.5t-87 217.5l1 5q-61 76 -61 178q0 22 3 43q-33 57 -37 119h-169q-105 0 -180.5 76t-75.5 181q0 103 76 179t180 76h374q-22 60 -22 128q0 122 81.5 189t206.5 67
-q38 0 69.5 -17.5t55 -49.5t40.5 -63t37 -72t33 -62q35 -55 100 -129q2 -3 14 -17t19 -21.5t20.5 -21.5t24 -22.5t22.5 -18t23.5 -14t21.5 -4.5h288q53 0 90.5 -37.5t37.5 -90.5z"/>
-    <glyph glyph-name="hand_up" unicode="&#xf0a6;" d="M1280 -64q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1408 700q0 189 -167 189q-26 0 -56 -5q-16 30 -52.5 47.5t-73.5 17.5t-69 -18q-50 53 -119 53q-25 0 -55.5 -10t-47.5 -25v331q0 52 -38 90t-90 38q-51 0 -89.5 -39t-38.5 -89v-576
-q-20 0 -48.5 15t-55 33t-68 33t-84.5 15q-67 0 -97.5 -44.5t-30.5 -115.5q0 -24 139 -90q44 -24 65 -37q64 -40 145 -112q81 -71 106 -101q57 -69 57 -140v-32h640v32q0 72 32 167t64 193.5t32 179.5zM1536 705q0 -133 -69 -322q-59 -164 -59 -223v-288q0 -53 -37.5 -90.5
-t-90.5 -37.5h-640q-53 0 -90.5 37.5t-37.5 90.5v288q0 10 -4.5 21.5t-14 23.5t-18 22.5t-22.5 24t-21.5 20.5t-21.5 19t-17 14q-74 65 -129 100q-21 13 -62 33t-72 37t-63 40.5t-49.5 55t-17.5 69.5q0 125 67 206.5t189 81.5q68 0 128 -22v374q0 104 76 180t179 76
-q105 0 181 -75.5t76 -180.5v-169q62 -4 119 -37q21 3 43 3q101 0 178 -60q139 1 219.5 -85t80.5 -227z"/>
-    <glyph glyph-name="hand_down" unicode="&#xf0a7;" d="M1408 576q0 84 -32 183t-64 194t-32 167v32h-640v-32q0 -35 -12 -67.5t-37 -62.5t-46 -50t-54 -49q-9 -8 -14 -12q-81 -72 -145 -112q-22 -14 -68 -38q-3 -1 -22.5 -10.5t-36 -18.5t-35.5 -20t-30.5 -21.5t-11.5 -18.5q0 -71 30.5 -115.5t97.5 -44.5q43 0 84.5 15t68 33
-t55 33t48.5 15v-576q0 -50 38.5 -89t89.5 -39q52 0 90 38t38 90v331q46 -35 103 -35q69 0 119 53q32 -18 69 -18t73.5 17.5t52.5 47.5q24 -4 56 -4q85 0 126 48.5t41 135.5zM1280 1344q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1536 580
-q0 -142 -77.5 -230t-217.5 -87l-5 1q-76 -61 -178 -61q-22 0 -43 3q-54 -30 -119 -37v-169q0 -105 -76 -180.5t-181 -75.5q-103 0 -179 76t-76 180v374q-54 -22 -128 -22q-121 0 -188.5 81.5t-67.5 206.5q0 38 17.5 69.5t49.5 55t63 40.5t72 37t62 33q55 35 129 100
-q3 2 17 14t21.5 19t21.5 20.5t22.5 24t18 22.5t14 23.5t4.5 21.5v288q0 53 37.5 90.5t90.5 37.5h640q53 0 90.5 -37.5t37.5 -90.5v-288q0 -59 59 -223q69 -190 69 -317z"/>
-    <glyph glyph-name="circle_arrow_left" unicode="&#xf0a8;" d="M1280 576v128q0 26 -19 45t-45 19h-502l189 189q19 19 19 45t-19 45l-91 91q-18 18 -45 18t-45 -18l-362 -362l-91 -91q-18 -18 -18 -45t18 -45l91 -91l362 -362q18 -18 45 -18t45 18l91 91q18 18 18 45t-18 45l-189 189h502q26 0 45 19t19 45zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="circle_arrow_right" unicode="&#xf0a9;" d="M1285 640q0 27 -18 45l-91 91l-362 362q-18 18 -45 18t-45 -18l-91 -91q-18 -18 -18 -45t18 -45l189 -189h-502q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h502l-189 -189q-19 -19 -19 -45t19 -45l91 -91q18 -18 45 -18t45 18l362 362l91 91q18 18 18 45zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="circle_arrow_up" unicode="&#xf0aa;" d="M1284 641q0 27 -18 45l-362 362l-91 91q-18 18 -45 18t-45 -18l-91 -91l-362 -362q-18 -18 -18 -45t18 -45l91 -91q18 -18 45 -18t45 18l189 189v-502q0 -26 19 -45t45 -19h128q26 0 45 19t19 45v502l189 -189q19 -19 45 -19t45 19l91 91q18 18 18 45zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="circle_arrow_down" unicode="&#xf0ab;" d="M1284 639q0 27 -18 45l-91 91q-18 18 -45 18t-45 -18l-189 -189v502q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-502l-189 189q-19 19 -45 19t-45 -19l-91 -91q-18 -18 -18 -45t18 -45l362 -362l91 -91q18 -18 45 -18t45 18l91 91l362 362q18 18 18 45zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="globe" unicode="&#xf0ac;" d="M768 1408q209 0 385.5 -103t279.5 -279.5t103 -385.5t-103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103zM1042 887q-2 -1 -9.5 -9.5t-13.5 -9.5q2 0 4.5 5t5 11t3.5 7q6 7 22 15q14 6 52 12q34 8 51 -11
-q-2 2 9.5 13t14.5 12q3 2 15 4.5t15 7.5l2 22q-12 -1 -17.5 7t-6.5 21q0 -2 -6 -8q0 7 -4.5 8t-11.5 -1t-9 -1q-10 3 -15 7.5t-8 16.5t-4 15q-2 5 -9.5 11t-9.5 10q-1 2 -2.5 5.5t-3 6.5t-4 5.5t-5.5 2.5t-7 -5t-7.5 -10t-4.5 -5q-3 2 -6 1.5t-4.5 -1t-4.5 -3t-5 -3.5
-q-3 -2 -8.5 -3t-8.5 -2q15 5 -1 11q-10 4 -16 3q9 4 7.5 12t-8.5 14h5q-1 4 -8.5 8.5t-17.5 8.5t-13 6q-8 5 -34 9.5t-33 0.5q-5 -6 -4.5 -10.5t4 -14t3.5 -12.5q1 -6 -5.5 -13t-6.5 -12q0 -7 14 -15.5t10 -21.5q-3 -8 -16 -16t-16 -12q-5 -8 -1.5 -18.5t10.5 -16.5
-q2 -2 1.5 -4t-3.5 -4.5t-5.5 -4t-6.5 -3.5l-3 -2q-11 -5 -20.5 6t-13.5 26q-7 25 -16 30q-23 8 -29 -1q-5 13 -41 26q-25 9 -58 4q6 1 0 15q-7 15 -19 12q3 6 4 17.5t1 13.5q3 13 12 23q1 1 7 8.5t9.5 13.5t0.5 6q35 -4 50 11q5 5 11.5 17t10.5 17q9 6 14 5.5t14.5 -5.5
-t14.5 -5q14 -1 15.5 11t-7.5 20q12 -1 3 17q-4 7 -8 9q-12 4 -27 -5q-8 -4 2 -8q-1 1 -9.5 -10.5t-16.5 -17.5t-16 5q-1 1 -5.5 13.5t-9.5 13.5q-8 0 -16 -15q3 8 -11 15t-24 8q19 12 -8 27q-7 4 -20.5 5t-19.5 -4q-5 -7 -5.5 -11.5t5 -8t10.5 -5.5t11.5 -4t8.5 -3
-q14 -10 8 -14q-2 -1 -8.5 -3.5t-11.5 -4.5t-6 -4q-3 -4 0 -14t-2 -14q-5 5 -9 17.5t-7 16.5q7 -9 -25 -6l-10 1q-4 0 -16 -2t-20.5 -1t-13.5 8q-4 8 0 20q1 4 4 2q-4 3 -11 9.5t-10 8.5q-46 -15 -94 -41q6 -1 12 1q5 2 13 6.5t10 5.5q34 14 42 7l5 5q14 -16 20 -25
-q-7 4 -30 1q-20 -6 -22 -12q7 -12 5 -18q-4 3 -11.5 10t-14.5 11t-15 5q-16 0 -22 -1q-146 -80 -235 -222q7 -7 12 -8q4 -1 5 -9t2.5 -11t11.5 3q9 -8 3 -19q1 1 44 -27q19 -17 21 -21q3 -11 -10 -18q-1 2 -9 9t-9 4q-3 -5 0.5 -18.5t10.5 -12.5q-7 0 -9.5 -16t-2.5 -35.5
-t-1 -23.5l2 -1q-3 -12 5.5 -34.5t21.5 -19.5q-13 -3 20 -43q6 -8 8 -9q3 -2 12 -7.5t15 -10t10 -10.5q4 -5 10 -22.5t14 -23.5q-2 -6 9.5 -20t10.5 -23q-1 0 -2.5 -1t-2.5 -1q3 -7 15.5 -14t15.5 -13q1 -3 2 -10t3 -11t8 -2q2 20 -24 62q-15 25 -17 29q-3 5 -5.5 15.5
-t-4.5 14.5q2 0 6 -1.5t8.5 -3.5t7.5 -4t2 -3q-3 -7 2 -17.5t12 -18.5t17 -19t12 -13q6 -6 14 -19.5t0 -13.5q9 0 20 -10.5t17 -19.5q5 -8 8 -26t5 -24q2 -7 8.5 -13.5t12.5 -9.5l16 -8t13 -7q5 -2 18.5 -10.5t21.5 -11.5q10 -4 16 -4t14.5 2.5t13.5 3.5q15 2 29 -15t21 -21
-q36 -19 55 -11q-2 -1 0.5 -7.5t8 -15.5t9 -14.5t5.5 -8.5q5 -6 18 -15t18 -15q6 4 7 9q-3 -8 7 -20t18 -10q14 3 14 32q-31 -15 -49 18q0 1 -2.5 5.5t-4 8.5t-2.5 8.5t0 7.5t5 3q9 0 10 3.5t-2 12.5t-4 13q-1 8 -11 20t-12 15q-5 -9 -16 -8t-16 9q0 -1 -1.5 -5.5t-1.5 -6.5
-q-13 0 -15 1q1 3 2.5 17.5t3.5 22.5q1 4 5.5 12t7.5 14.5t4 12.5t-4.5 9.5t-17.5 2.5q-19 -1 -26 -20q-1 -3 -3 -10.5t-5 -11.5t-9 -7q-7 -3 -24 -2t-24 5q-13 8 -22.5 29t-9.5 37q0 10 2.5 26.5t3 25t-5.5 24.5q3 2 9 9.5t10 10.5q2 1 4.5 1.5t4.5 0t4 1.5t3 6q-1 1 -4 3
-q-3 3 -4 3q7 -3 28.5 1.5t27.5 -1.5q15 -11 22 2q0 1 -2.5 9.5t-0.5 13.5q5 -27 29 -9q3 -3 15.5 -5t17.5 -5q3 -2 7 -5.5t5.5 -4.5t5 0.5t8.5 6.5q10 -14 12 -24q11 -40 19 -44q7 -3 11 -2t4.5 9.5t0 14t-1.5 12.5l-1 8v18l-1 8q-15 3 -18.5 12t1.5 18.5t15 18.5q1 1 8 3.5
-t15.5 6.5t12.5 8q21 19 15 35q7 0 11 9q-1 0 -5 3t-7.5 5t-4.5 2q9 5 2 16q5 3 7.5 11t7.5 10q9 -12 21 -2q8 8 1 16q5 7 20.5 10.5t18.5 9.5q7 -2 8 2t1 12t3 12q4 5 15 9t13 5l17 11q3 4 0 4q18 -2 31 11q10 11 -6 20q3 6 -3 9.5t-15 5.5q3 1 11.5 0.5t10.5 1.5
-q15 10 -7 16q-17 5 -43 -12zM879 10q206 36 351 189q-3 3 -12.5 4.5t-12.5 3.5q-18 7 -24 8q1 7 -2.5 13t-8 9t-12.5 8t-11 7q-2 2 -7 6t-7 5.5t-7.5 4.5t-8.5 2t-10 -1l-3 -1q-3 -1 -5.5 -2.5t-5.5 -3t-4 -3t0 -2.5q-21 17 -36 22q-5 1 -11 5.5t-10.5 7t-10 1.5t-11.5 -7
-q-5 -5 -6 -15t-2 -13q-7 5 0 17.5t2 18.5q-3 6 -10.5 4.5t-12 -4.5t-11.5 -8.5t-9 -6.5t-8.5 -5.5t-8.5 -7.5q-3 -4 -6 -12t-5 -11q-2 4 -11.5 6.5t-9.5 5.5q2 -10 4 -35t5 -38q7 -31 -12 -48q-27 -25 -29 -40q-4 -22 12 -26q0 -7 -8 -20.5t-7 -21.5q0 -6 2 -16z"/>
-    <glyph glyph-name="wrench" unicode="&#xf0ad;" horiz-adv-x="1664" d="M384 64q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1028 484l-682 -682q-37 -37 -90 -37q-52 0 -91 37l-106 108q-38 36 -38 90q0 53 38 91l681 681q39 -98 114.5 -173.5t173.5 -114.5zM1662 919q0 -39 -23 -106q-47 -134 -164.5 -217.5
-t-258.5 -83.5q-185 0 -316.5 131.5t-131.5 316.5t131.5 316.5t316.5 131.5q58 0 121.5 -16.5t107.5 -46.5q16 -11 16 -28t-16 -28l-293 -169v-224l193 -107q5 3 79 48.5t135.5 81t70.5 35.5q15 0 23.5 -10t8.5 -25z"/>
-    <glyph glyph-name="tasks" unicode="&#xf0ae;" horiz-adv-x="1792" d="M1024 128h640v128h-640v-128zM640 640h1024v128h-1024v-128zM1280 1152h384v128h-384v-128zM1792 320v-256q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1792 832v-256q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19
-t-19 45v256q0 26 19 45t45 19h1664q26 0 45 -19t19 -45zM1792 1344v-256q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h1664q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="filter" unicode="&#xf0b0;" horiz-adv-x="1408" d="M1403 1241q17 -41 -14 -70l-493 -493v-742q0 -42 -39 -59q-13 -5 -25 -5q-27 0 -45 19l-256 256q-19 19 -19 45v486l-493 493q-31 29 -14 70q17 39 59 39h1280q42 0 59 -39z"/>
-    <glyph glyph-name="briefcase" unicode="&#xf0b1;" horiz-adv-x="1792" d="M640 1280h512v128h-512v-128zM1792 640v-480q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v480h672v-160q0 -26 19 -45t45 -19h320q26 0 45 19t19 45v160h672zM1024 640v-128h-256v128h256zM1792 1120v-384h-1792v384q0 66 47 113t113 47h352v160q0 40 28 68
-t68 28h576q40 0 68 -28t28 -68v-160h352q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="fullscreen" unicode="&#xf0b2;" d="M1283 995l-355 -355l355 -355l144 144q29 31 70 14q39 -17 39 -59v-448q0 -26 -19 -45t-45 -19h-448q-42 0 -59 40q-17 39 14 69l144 144l-355 355l-355 -355l144 -144q31 -30 14 -69q-17 -40 -59 -40h-448q-26 0 -45 19t-19 45v448q0 42 40 59q39 17 69 -14l144 -144
-l355 355l-355 355l-144 -144q-19 -19 -45 -19q-12 0 -24 5q-40 17 -40 59v448q0 26 19 45t45 19h448q42 0 59 -40q17 -39 -14 -69l-144 -144l355 -355l355 355l-144 144q-31 30 -14 69q17 40 59 40h448q26 0 45 -19t19 -45v-448q0 -42 -39 -59q-13 -5 -25 -5q-26 0 -45 19z
-"/>
-    <glyph glyph-name="group" unicode="&#xf0c0;" horiz-adv-x="1920" d="M593 640q-162 -5 -265 -128h-134q-82 0 -138 40.5t-56 118.5q0 353 124 353q6 0 43.5 -21t97.5 -42.5t119 -21.5q67 0 133 23q-5 -37 -5 -66q0 -139 81 -256zM1664 3q0 -120 -73 -189.5t-194 -69.5h-874q-121 0 -194 69.5t-73 189.5q0 53 3.5 103.5t14 109t26.5 108.5
-t43 97.5t62 81t85.5 53.5t111.5 20q10 0 43 -21.5t73 -48t107 -48t135 -21.5t135 21.5t107 48t73 48t43 21.5q61 0 111.5 -20t85.5 -53.5t62 -81t43 -97.5t26.5 -108.5t14 -109t3.5 -103.5zM640 1280q0 -106 -75 -181t-181 -75t-181 75t-75 181t75 181t181 75t181 -75
-t75 -181zM1344 896q0 -159 -112.5 -271.5t-271.5 -112.5t-271.5 112.5t-112.5 271.5t112.5 271.5t271.5 112.5t271.5 -112.5t112.5 -271.5zM1920 671q0 -78 -56 -118.5t-138 -40.5h-134q-103 123 -265 128q81 117 81 256q0 29 -5 66q66 -23 133 -23q59 0 119 21.5t97.5 42.5
-t43.5 21q124 0 124 -353zM1792 1280q0 -106 -75 -181t-181 -75t-181 75t-75 181t75 181t181 75t181 -75t75 -181z"/>
-    <glyph glyph-name="link" unicode="&#xf0c1;" horiz-adv-x="1664" d="M1456 320q0 40 -28 68l-208 208q-28 28 -68 28q-42 0 -72 -32q3 -3 19 -18.5t21.5 -21.5t15 -19t13 -25.5t3.5 -27.5q0 -40 -28 -68t-68 -28q-15 0 -27.5 3.5t-25.5 13t-19 15t-21.5 21.5t-18.5 19q-33 -31 -33 -73q0 -40 28 -68l206 -207q27 -27 68 -27q40 0 68 26
-l147 146q28 28 28 67zM753 1025q0 40 -28 68l-206 207q-28 28 -68 28q-39 0 -68 -27l-147 -146q-28 -28 -28 -67q0 -40 28 -68l208 -208q27 -27 68 -27q42 0 72 31q-3 3 -19 18.5t-21.5 21.5t-15 19t-13 25.5t-3.5 27.5q0 40 28 68t68 28q15 0 27.5 -3.5t25.5 -13t19 -15
-t21.5 -21.5t18.5 -19q33 31 33 73zM1648 320q0 -120 -85 -203l-147 -146q-83 -83 -203 -83q-121 0 -204 85l-206 207q-83 83 -83 203q0 123 88 209l-88 88q-86 -88 -208 -88q-120 0 -204 84l-208 208q-84 84 -84 204t85 203l147 146q83 83 203 83q121 0 204 -85l206 -207
-q83 -83 83 -203q0 -123 -88 -209l88 -88q86 88 208 88q120 0 204 -84l208 -208q84 -84 84 -204z"/>
-    <glyph glyph-name="cloud" unicode="&#xf0c2;" horiz-adv-x="1920" d="M1920 384q0 -159 -112.5 -271.5t-271.5 -112.5h-1088q-185 0 -316.5 131.5t-131.5 316.5q0 132 71 241.5t187 163.5q-2 28 -2 43q0 212 150 362t362 150q158 0 286.5 -88t187.5 -230q70 62 166 62q106 0 181 -75t75 -181q0 -75 -41 -138q129 -30 213 -134.5t84 -239.5z
-"/>
-    <glyph glyph-name="beaker" unicode="&#xf0c3;" horiz-adv-x="1664" d="M1527 88q56 -89 21.5 -152.5t-140.5 -63.5h-1152q-106 0 -140.5 63.5t21.5 152.5l503 793v399h-64q-26 0 -45 19t-19 45t19 45t45 19h512q26 0 45 -19t19 -45t-19 -45t-45 -19h-64v-399zM748 813l-272 -429h712l-272 429l-20 31v37v399h-128v-399v-37z"/>
-    <glyph glyph-name="cut" unicode="&#xf0c4;" horiz-adv-x="1792" d="M960 640q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1260 576l507 -398q28 -20 25 -56q-5 -35 -35 -51l-128 -64q-13 -7 -29 -7q-17 0 -31 8l-690 387l-110 -66q-8 -4 -12 -5q14 -49 10 -97q-7 -77 -56 -147.5t-132 -123.5q-132 -84 -277 -84
-q-136 0 -222 78q-90 84 -79 207q7 76 56 147t131 124q132 84 278 84q83 0 151 -31q9 13 22 22l122 73l-122 73q-13 9 -22 22q-68 -31 -151 -31q-146 0 -278 84q-82 53 -131 124t-56 147q-5 59 15.5 113t63.5 93q85 79 222 79q145 0 277 -84q83 -52 132 -123t56 -148
-q4 -48 -10 -97q4 -1 12 -5l110 -66l690 387q14 8 31 8q16 0 29 -7l128 -64q30 -16 35 -51q3 -36 -25 -56zM579 836q46 42 21 108t-106 117q-92 59 -192 59q-74 0 -113 -36q-46 -42 -21 -108t106 -117q92 -59 192 -59q74 0 113 36zM494 91q81 51 106 117t-21 108
-q-39 36 -113 36q-100 0 -192 -59q-81 -51 -106 -117t21 -108q39 -36 113 -36q100 0 192 59zM672 704l96 -58v11q0 36 33 56l14 8l-79 47l-26 -26q-3 -3 -10 -11t-12 -12q-2 -2 -4 -3.5t-3 -2.5zM896 480l96 -32l736 576l-128 64l-768 -431v-113l-160 -96l9 -8q2 -2 7 -6
-q4 -4 11 -12t11 -12l26 -26zM1600 64l128 64l-520 408l-177 -138q-2 -3 -13 -7z"/>
-    <glyph glyph-name="copy" unicode="&#xf0c5;" horiz-adv-x="1792" d="M1696 1152q40 0 68 -28t28 -68v-1216q0 -40 -28 -68t-68 -28h-960q-40 0 -68 28t-28 68v288h-544q-40 0 -68 28t-28 68v672q0 40 20 88t48 76l408 408q28 28 76 48t88 20h416q40 0 68 -28t28 -68v-328q68 40 128 40h416zM1152 939l-299 -299h299v299zM512 1323l-299 -299
-h299v299zM708 676l316 316v416h-384v-416q0 -40 -28 -68t-68 -28h-416v-640h512v256q0 40 20 88t48 76zM1664 -128v1152h-384v-416q0 -40 -28 -68t-68 -28h-416v-640h896z"/>
-    <glyph glyph-name="paper_clip" unicode="&#xf0c6;" horiz-adv-x="1408" d="M1404 151q0 -117 -79 -196t-196 -79q-135 0 -235 100l-777 776q-113 115 -113 271q0 159 110 270t269 111q158 0 273 -113l605 -606q10 -10 10 -22q0 -16 -30.5 -46.5t-46.5 -30.5q-13 0 -23 10l-606 607q-79 77 -181 77q-106 0 -179 -75t-73 -181q0 -105 76 -181
-l776 -777q63 -63 145 -63q64 0 106 42t42 106q0 82 -63 145l-581 581q-26 24 -60 24q-29 0 -48 -19t-19 -48q0 -32 25 -59l410 -410q10 -10 10 -22q0 -16 -31 -47t-47 -31q-12 0 -22 10l-410 410q-63 61 -63 149q0 82 57 139t139 57q88 0 149 -63l581 -581q100 -98 100 -235
-z"/>
-    <glyph glyph-name="save" unicode="&#xf0c7;" d="M384 0h768v384h-768v-384zM1280 0h128v896q0 14 -10 38.5t-20 34.5l-281 281q-10 10 -34 20t-39 10v-416q0 -40 -28 -68t-68 -28h-576q-40 0 -68 28t-28 68v416h-128v-1280h128v416q0 40 28 68t68 28h832q40 0 68 -28t28 -68v-416zM896 928v320q0 13 -9.5 22.5t-22.5 9.5
-h-192q-13 0 -22.5 -9.5t-9.5 -22.5v-320q0 -13 9.5 -22.5t22.5 -9.5h192q13 0 22.5 9.5t9.5 22.5zM1536 896v-928q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1344q0 40 28 68t68 28h928q40 0 88 -20t76 -48l280 -280q28 -28 48 -76t20 -88z"/>
-    <glyph glyph-name="sign_blank" unicode="&#xf0c8;" d="M1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="reorder" unicode="&#xf0c9;" d="M1536 192v-128q0 -26 -19 -45t-45 -19h-1408q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1408q26 0 45 -19t19 -45zM1536 704v-128q0 -26 -19 -45t-45 -19h-1408q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1408q26 0 45 -19t19 -45zM1536 1216v-128q0 -26 -19 -45
-t-45 -19h-1408q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h1408q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="ul" unicode="&#xf0ca;" horiz-adv-x="1792" d="M384 128q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM384 640q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM1792 224v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1216q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5
-t22.5 9.5h1216q13 0 22.5 -9.5t9.5 -22.5zM384 1152q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM1792 736v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1216q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1216q13 0 22.5 -9.5t9.5 -22.5z
-M1792 1248v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1216q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1216q13 0 22.5 -9.5t9.5 -22.5z"/>
-    <glyph glyph-name="ol" unicode="&#xf0cb;" horiz-adv-x="1792" d="M381 -84q0 -80 -54.5 -126t-135.5 -46q-106 0 -172 66l57 88q49 -45 106 -45q29 0 50.5 14.5t21.5 42.5q0 64 -105 56l-26 56q8 10 32.5 43.5t42.5 54t37 38.5v1q-16 0 -48.5 -1t-48.5 -1v-53h-106v152h333v-88l-95 -115q51 -12 81 -49t30 -88zM383 543v-159h-362
-q-6 36 -6 54q0 51 23.5 93t56.5 68t66 47.5t56.5 43.5t23.5 45q0 25 -14.5 38.5t-39.5 13.5q-46 0 -81 -58l-85 59q24 51 71.5 79.5t105.5 28.5q73 0 123 -41.5t50 -112.5q0 -50 -34 -91.5t-75 -64.5t-75.5 -50.5t-35.5 -52.5h127v60h105zM1792 224v-192q0 -13 -9.5 -22.5
-t-22.5 -9.5h-1216q-13 0 -22.5 9.5t-9.5 22.5v192q0 14 9 23t23 9h1216q13 0 22.5 -9.5t9.5 -22.5zM384 1123v-99h-335v99h107q0 41 0.5 121.5t0.5 121.5v12h-2q-8 -17 -50 -54l-71 76l136 127h106v-404h108zM1792 736v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1216
-q-13 0 -22.5 9.5t-9.5 22.5v192q0 14 9 23t23 9h1216q13 0 22.5 -9.5t9.5 -22.5zM1792 1248v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1216q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1216q13 0 22.5 -9.5t9.5 -22.5z"/>
-    <glyph glyph-name="strikethrough" unicode="&#xf0cc;" horiz-adv-x="1792" d="M1760 640q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-1728q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h1728zM483 704q-28 35 -51 80q-48 98 -48 188q0 181 134 309q133 127 393 127q50 0 167 -19q66 -12 177 -48q10 -38 21 -118q14 -123 14 -183q0 -18 -5 -45l-12 -3l-84 6
-l-14 2q-50 149 -103 205q-88 91 -210 91q-114 0 -182 -59q-67 -58 -67 -146q0 -73 66 -140t279 -129q69 -20 173 -66q58 -28 95 -52h-743zM990 448h411q7 -39 7 -92q0 -111 -41 -212q-23 -56 -71 -104q-37 -35 -109 -81q-80 -48 -153 -66q-80 -21 -203 -21q-114 0 -195 23
-l-140 40q-57 16 -72 28q-8 8 -8 22v13q0 108 -2 156q-1 30 0 68l2 37v44l102 2q15 -34 30 -71t22.5 -56t12.5 -27q35 -57 80 -94q43 -36 105 -57q59 -22 132 -22q64 0 139 27q77 26 122 86q47 61 47 129q0 84 -81 157q-34 29 -137 71z"/>
-    <glyph glyph-name="underline" unicode="&#xf0cd;" d="M48 1313q-37 2 -45 4l-3 88q13 1 40 1q60 0 112 -4q132 -7 166 -7q86 0 168 3q116 4 146 5q56 0 86 2l-1 -14l2 -64v-9q-60 -9 -124 -9q-60 0 -79 -25q-13 -14 -13 -132q0 -13 0.5 -32.5t0.5 -25.5l1 -229l14 -280q6 -124 51 -202q35 -59 96 -92q88 -47 177 -47
-q104 0 191 28q56 18 99 51q48 36 65 64q36 56 53 114q21 73 21 229q0 79 -3.5 128t-11 122.5t-13.5 159.5l-4 59q-5 67 -24 88q-34 35 -77 34l-100 -2l-14 3l2 86h84l205 -10q76 -3 196 10l18 -2q6 -38 6 -51q0 -7 -4 -31q-45 -12 -84 -13q-73 -11 -79 -17q-15 -15 -15 -41
-q0 -7 1.5 -27t1.5 -31q8 -19 22 -396q6 -195 -15 -304q-15 -76 -41 -122q-38 -65 -112 -123q-75 -57 -182 -89q-109 -33 -255 -33q-167 0 -284 46q-119 47 -179 122q-61 76 -83 195q-16 80 -16 237v333q0 188 -17 213q-25 36 -147 39zM1536 -96v64q0 14 -9 23t-23 9h-1472
-q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h1472q14 0 23 9t9 23z"/>
-    <glyph glyph-name="table" unicode="&#xf0ce;" horiz-adv-x="1664" d="M512 160v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM512 544v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1024 160v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23
-v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM512 928v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1024 544v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1536 160v192
-q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1024 928v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1536 544v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192
-q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1536 928v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1664 1248v-1088q0 -66 -47 -113t-113 -47h-1344q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h1344q66 0 113 -47t47 -113
-z"/>
-    <glyph glyph-name="magic" unicode="&#xf0d0;" horiz-adv-x="1664" d="M1190 955l293 293l-107 107l-293 -293zM1637 1248q0 -27 -18 -45l-1286 -1286q-18 -18 -45 -18t-45 18l-198 198q-18 18 -18 45t18 45l1286 1286q18 18 45 18t45 -18l198 -198q18 -18 18 -45zM286 1438l98 -30l-98 -30l-30 -98l-30 98l-98 30l98 30l30 98zM636 1276
-l196 -60l-196 -60l-60 -196l-60 196l-196 60l196 60l60 196zM1566 798l98 -30l-98 -30l-30 -98l-30 98l-98 30l98 30l30 98zM926 1438l98 -30l-98 -30l-30 -98l-30 98l-98 30l98 30l30 98z"/>
-    <glyph glyph-name="truck" unicode="&#xf0d1;" horiz-adv-x="1792" d="M640 128q0 52 -38 90t-90 38t-90 -38t-38 -90t38 -90t90 -38t90 38t38 90zM256 640h384v256h-158q-13 0 -22 -9l-195 -195q-9 -9 -9 -22v-30zM1536 128q0 52 -38 90t-90 38t-90 -38t-38 -90t38 -90t90 -38t90 38t38 90zM1792 1216v-1024q0 -15 -4 -26.5t-13.5 -18.5
-t-16.5 -11.5t-23.5 -6t-22.5 -2t-25.5 0t-22.5 0.5q0 -106 -75 -181t-181 -75t-181 75t-75 181h-384q0 -106 -75 -181t-181 -75t-181 75t-75 181h-64q-3 0 -22.5 -0.5t-25.5 0t-22.5 2t-23.5 6t-16.5 11.5t-13.5 18.5t-4 26.5q0 26 19 45t45 19v320q0 8 -0.5 35t0 38
-t2.5 34.5t6.5 37t14 30.5t22.5 30l198 198q19 19 50.5 32t58.5 13h160v192q0 26 19 45t45 19h1024q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="pinterest" unicode="&#xf0d2;" d="M1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103q-111 0 -218 32q59 93 78 164q9 34 54 211q20 -39 73 -67.5t114 -28.5q121 0 216 68.5t147 188.5t52 270q0 114 -59.5 214t-172.5 163t-255 63q-105 0 -196 -29t-154.5 -77t-109 -110.5t-67 -129.5t-21.5 -134
-q0 -104 40 -183t117 -111q30 -12 38 20q2 7 8 31t8 30q6 23 -11 43q-51 61 -51 151q0 151 104.5 259.5t273.5 108.5q151 0 235.5 -82t84.5 -213q0 -170 -68.5 -289t-175.5 -119q-61 0 -98 43.5t-23 104.5q8 35 26.5 93.5t30 103t11.5 75.5q0 50 -27 83t-77 33
-q-62 0 -105 -57t-43 -142q0 -73 25 -122l-99 -418q-17 -70 -13 -177q-206 91 -333 281t-127 423q0 209 103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="pinterest_sign" unicode="&#xf0d3;" d="M1248 1408q119 0 203.5 -84.5t84.5 -203.5v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-725q85 122 108 210q9 34 53 209q21 -39 73.5 -67t112.5 -28q181 0 295.5 147.5t114.5 373.5q0 84 -35 162.5t-96.5 139t-152.5 97t-197 36.5q-104 0 -194.5 -28.5t-153 -76.5
-t-107.5 -109.5t-66.5 -128t-21.5 -132.5q0 -102 39.5 -180t116.5 -110q13 -5 23.5 0t14.5 19q10 44 15 61q6 23 -11 42q-50 62 -50 150q0 150 103.5 256.5t270.5 106.5q149 0 232.5 -81t83.5 -210q0 -168 -67.5 -286t-173.5 -118q-60 0 -97 43.5t-23 103.5q8 34 26.5 92.5
-t29.5 102t11 74.5q0 49 -26.5 81.5t-75.5 32.5q-61 0 -103.5 -56.5t-42.5 -139.5q0 -72 24 -121l-98 -414q-24 -100 -7 -254h-183q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960z"/>
-    <glyph glyph-name="google_plus_sign" unicode="&#xf0d4;" d="M917 631q0 26 -6 64h-362v-132h217q-3 -24 -16.5 -50t-37.5 -53t-66.5 -44.5t-96.5 -17.5q-99 0 -169 71t-70 171t70 171t169 71q92 0 153 -59l104 101q-108 100 -257 100q-160 0 -272 -112.5t-112 -271.5t112 -271.5t272 -112.5q165 0 266.5 105t101.5 270zM1262 585
-h109v110h-109v110h-110v-110h-110v-110h110v-110h110v110zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="google_plus" unicode="&#xf0d5;" horiz-adv-x="2304" d="M1437 623q0 -208 -87 -370.5t-248 -254t-369 -91.5q-149 0 -285 58t-234 156t-156 234t-58 285t58 285t156 234t234 156t285 58q286 0 491 -192l-199 -191q-117 113 -292 113q-123 0 -227.5 -62t-165.5 -168.5t-61 -232.5t61 -232.5t165.5 -168.5t227.5 -62
-q83 0 152.5 23t114.5 57.5t78.5 78.5t49 83t21.5 74h-416v252h692q12 -63 12 -122zM2304 745v-210h-209v-209h-210v209h-209v210h209v209h210v-209h209z"/>
-    <glyph glyph-name="money" unicode="&#xf0d6;" horiz-adv-x="1920" d="M768 384h384v96h-128v448h-114l-148 -137l77 -80q42 37 55 57h2v-288h-128v-96zM1280 640q0 -70 -21 -142t-59.5 -134t-101.5 -101t-138 -39t-138 39t-101.5 101t-59.5 134t-21 142t21 142t59.5 134t101.5 101t138 39t138 -39t101.5 -101t59.5 -134t21 -142zM1792 384
-v512q-106 0 -181 75t-75 181h-1152q0 -106 -75 -181t-181 -75v-512q106 0 181 -75t75 -181h1152q0 106 75 181t181 75zM1920 1216v-1152q0 -26 -19 -45t-45 -19h-1792q-26 0 -45 19t-19 45v1152q0 26 19 45t45 19h1792q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="caret_down" unicode="&#xf0d7;" horiz-adv-x="1024" d="M1024 832q0 -26 -19 -45l-448 -448q-19 -19 -45 -19t-45 19l-448 448q-19 19 -19 45t19 45t45 19h896q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="caret_up" unicode="&#xf0d8;" horiz-adv-x="1024" d="M1024 320q0 -26 -19 -45t-45 -19h-896q-26 0 -45 19t-19 45t19 45l448 448q19 19 45 19t45 -19l448 -448q19 -19 19 -45z"/>
-    <glyph glyph-name="caret_left" unicode="&#xf0d9;" horiz-adv-x="640" d="M640 1088v-896q0 -26 -19 -45t-45 -19t-45 19l-448 448q-19 19 -19 45t19 45l448 448q19 19 45 19t45 -19t19 -45z"/>
-    <glyph glyph-name="caret_right" unicode="&#xf0da;" horiz-adv-x="640" d="M576 640q0 -26 -19 -45l-448 -448q-19 -19 -45 -19t-45 19t-19 45v896q0 26 19 45t45 19t45 -19l448 -448q19 -19 19 -45z"/>
-    <glyph glyph-name="columns" unicode="&#xf0db;" horiz-adv-x="1664" d="M160 0h608v1152h-640v-1120q0 -13 9.5 -22.5t22.5 -9.5zM1536 32v1120h-640v-1152h608q13 0 22.5 9.5t9.5 22.5zM1664 1248v-1216q0 -66 -47 -113t-113 -47h-1344q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1344q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="sort" unicode="&#xf0dc;" horiz-adv-x="1024" d="M1024 448q0 -26 -19 -45l-448 -448q-19 -19 -45 -19t-45 19l-448 448q-19 19 -19 45t19 45t45 19h896q26 0 45 -19t19 -45zM1024 832q0 -26 -19 -45t-45 -19h-896q-26 0 -45 19t-19 45t19 45l448 448q19 19 45 19t45 -19l448 -448q19 -19 19 -45z"/>
-    <glyph glyph-name="sort_down" unicode="&#xf0dd;" horiz-adv-x="1024" d="M1024 448q0 -26 -19 -45l-448 -448q-19 -19 -45 -19t-45 19l-448 448q-19 19 -19 45t19 45t45 19h896q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="sort_up" unicode="&#xf0de;" horiz-adv-x="1024" d="M1024 832q0 -26 -19 -45t-45 -19h-896q-26 0 -45 19t-19 45t19 45l448 448q19 19 45 19t45 -19l448 -448q19 -19 19 -45z"/>
-    <glyph glyph-name="envelope_alt" unicode="&#xf0e0;" horiz-adv-x="1792" d="M1792 826v-794q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v794q44 -49 101 -87q362 -246 497 -345q57 -42 92.5 -65.5t94.5 -48t110 -24.5h1h1q51 0 110 24.5t94.5 48t92.5 65.5q170 123 498 345q57 39 100 87zM1792 1120q0 -79 -49 -151t-122 -123
-q-376 -261 -468 -325q-10 -7 -42.5 -30.5t-54 -38t-52 -32.5t-57.5 -27t-50 -9h-1h-1q-23 0 -50 9t-57.5 27t-52 32.5t-54 38t-42.5 30.5q-91 64 -262 182.5t-205 142.5q-62 42 -117 115.5t-55 136.5q0 78 41.5 130t118.5 52h1472q65 0 112.5 -47t47.5 -113z"/>
-    <glyph glyph-name="linkedin" unicode="&#xf0e1;" d="M349 911v-991h-330v991h330zM370 1217q1 -73 -50.5 -122t-135.5 -49h-2q-82 0 -132 49t-50 122q0 74 51.5 122.5t134.5 48.5t133 -48.5t51 -122.5zM1536 488v-568h-329v530q0 105 -40.5 164.5t-126.5 59.5q-63 0 -105.5 -34.5t-63.5 -85.5q-11 -30 -11 -81v-553h-329
-q2 399 2 647t-1 296l-1 48h329v-144h-2q20 32 41 56t56.5 52t87 43.5t114.5 15.5q171 0 275 -113.5t104 -332.5z"/>
-    <glyph glyph-name="undo" unicode="&#xf0e2;" d="M1536 640q0 -156 -61 -298t-164 -245t-245 -164t-298 -61q-172 0 -327 72.5t-264 204.5q-7 10 -6.5 22.5t8.5 20.5l137 138q10 9 25 9q16 -2 23 -12q73 -95 179 -147t225 -52q104 0 198.5 40.5t163.5 109.5t109.5 163.5t40.5 198.5t-40.5 198.5t-109.5 163.5
-t-163.5 109.5t-198.5 40.5q-98 0 -188 -35.5t-160 -101.5l137 -138q31 -30 14 -69q-17 -40 -59 -40h-448q-26 0 -45 19t-19 45v448q0 42 40 59q39 17 69 -14l130 -129q107 101 244.5 156.5t284.5 55.5q156 0 298 -61t245 -164t164 -245t61 -298z"/>
-    <glyph glyph-name="legal" unicode="&#xf0e3;" horiz-adv-x="1792" d="M1771 0q0 -53 -37 -90l-107 -108q-39 -37 -91 -37q-53 0 -90 37l-363 364q-38 36 -38 90q0 53 43 96l-256 256l-126 -126q-14 -14 -34 -14t-34 14q2 -2 12.5 -12t12.5 -13t10 -11.5t10 -13.5t6 -13.5t5.5 -16.5t1.5 -18q0 -38 -28 -68q-3 -3 -16.5 -18t-19 -20.5
-t-18.5 -16.5t-22 -15.5t-22 -9t-26 -4.5q-40 0 -68 28l-408 408q-28 28 -28 68q0 13 4.5 26t9 22t15.5 22t16.5 18.5t20.5 19t18 16.5q30 28 68 28q10 0 18 -1.5t16.5 -5.5t13.5 -6t13.5 -10t11.5 -10t13 -12.5t12 -12.5q-14 14 -14 34t14 34l348 348q14 14 34 14t34 -14
-q-2 2 -12.5 12t-12.5 13t-10 11.5t-10 13.5t-6 13.5t-5.5 16.5t-1.5 18q0 38 28 68q3 3 16.5 18t19 20.5t18.5 16.5t22 15.5t22 9t26 4.5q40 0 68 -28l408 -408q28 -28 28 -68q0 -13 -4.5 -26t-9 -22t-15.5 -22t-16.5 -18.5t-20.5 -19t-18 -16.5q-30 -28 -68 -28
-q-10 0 -18 1.5t-16.5 5.5t-13.5 6t-13.5 10t-11.5 10t-13 12.5t-12 12.5q14 -14 14 -34t-14 -34l-126 -126l256 -256q43 43 96 43q52 0 91 -37l363 -363q37 -39 37 -91z"/>
-    <glyph glyph-name="dashboard" unicode="&#xf0e4;" horiz-adv-x="1792" d="M384 384q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM576 832q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1004 351l101 382q6 26 -7.5 48.5t-38.5 29.5
-t-48 -6.5t-30 -39.5l-101 -382q-60 -5 -107 -43.5t-63 -98.5q-20 -77 20 -146t117 -89t146 20t89 117q16 60 -6 117t-72 91zM1664 384q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1024 1024q0 53 -37.5 90.5
-t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1472 832q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1792 384q0 -261 -141 -483q-19 -29 -54 -29h-1402q-35 0 -54 29
-q-141 221 -141 483q0 182 71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="comment_alt" unicode="&#xf0e5;" horiz-adv-x="1792" d="M896 1152q-204 0 -381.5 -69.5t-282 -187.5t-104.5 -255q0 -112 71.5 -213.5t201.5 -175.5l87 -50l-27 -96q-24 -91 -70 -172q152 63 275 171l43 38l57 -6q69 -8 130 -8q204 0 381.5 69.5t282 187.5t104.5 255t-104.5 255t-282 187.5t-381.5 69.5zM1792 640
-q0 -174 -120 -321.5t-326 -233t-450 -85.5q-70 0 -145 8q-198 -175 -460 -242q-49 -14 -114 -22h-5q-15 0 -27 10.5t-16 27.5v1q-3 4 -0.5 12t2 10t4.5 9.5l6 9t7 8.5t8 9q7 8 31 34.5t34.5 38t31 39.5t32.5 51t27 59t26 76q-157 89 -247.5 220t-90.5 281q0 174 120 321.5
-t326 233t450 85.5t450 -85.5t326 -233t120 -321.5z"/>
-    <glyph glyph-name="comments_alt" unicode="&#xf0e6;" horiz-adv-x="1792" d="M704 1152q-153 0 -286 -52t-211.5 -141t-78.5 -191q0 -82 53 -158t149 -132l97 -56l-35 -84q34 20 62 39l44 31l53 -10q78 -14 153 -14q153 0 286 52t211.5 141t78.5 191t-78.5 191t-211.5 141t-286 52zM704 1280q191 0 353.5 -68.5t256.5 -186.5t94 -257t-94 -257
-t-256.5 -186.5t-353.5 -68.5q-86 0 -176 16q-124 -88 -278 -128q-36 -9 -86 -16h-3q-11 0 -20.5 8t-11.5 21q-1 3 -1 6.5t0.5 6.5t2 6l2.5 5t3.5 5.5t4 5t4.5 5t4 4.5q5 6 23 25t26 29.5t22.5 29t25 38.5t20.5 44q-124 72 -195 177t-71 224q0 139 94 257t256.5 186.5
-t353.5 68.5zM1526 111q10 -24 20.5 -44t25 -38.5t22.5 -29t26 -29.5t23 -25q1 -1 4 -4.5t4.5 -5t4 -5t3.5 -5.5l2.5 -5t2 -6t0.5 -6.5t-1 -6.5q-3 -14 -13 -22t-22 -7q-50 7 -86 16q-154 40 -278 128q-90 -16 -176 -16q-271 0 -472 132q58 -4 88 -4q161 0 309 45t264 129
-q125 92 192 212t67 254q0 77 -23 152q129 -71 204 -178t75 -230q0 -120 -71 -224.5t-195 -176.5z"/>
-    <glyph glyph-name="bolt" unicode="&#xf0e7;" horiz-adv-x="896" d="M885 970q18 -20 7 -44l-540 -1157q-13 -25 -42 -25q-4 0 -14 2q-17 5 -25.5 19t-4.5 30l197 808l-406 -101q-4 -1 -12 -1q-18 0 -31 11q-18 15 -13 39l201 825q4 14 16 23t28 9h328q19 0 32 -12.5t13 -29.5q0 -8 -5 -18l-171 -463l396 98q8 2 12 2q19 0 34 -15z"/>
-    <glyph glyph-name="sitemap" unicode="&#xf0e8;" horiz-adv-x="1792" d="M1792 288v-320q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v320q0 40 28 68t68 28h96v192h-512v-192h96q40 0 68 -28t28 -68v-320q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v320q0 40 28 68t68 28h96v192h-512v-192h96q40 0 68 -28t28 -68v-320
-q0 -40 -28 -68t-68 -28h-320q-40 0 -68 28t-28 68v320q0 40 28 68t68 28h96v192q0 52 38 90t90 38h512v192h-96q-40 0 -68 28t-28 68v320q0 40 28 68t68 28h320q40 0 68 -28t28 -68v-320q0 -40 -28 -68t-68 -28h-96v-192h512q52 0 90 -38t38 -90v-192h96q40 0 68 -28t28 -68
-z"/>
-    <glyph glyph-name="umbrella" unicode="&#xf0e9;" horiz-adv-x="1664" d="M896 708v-580q0 -104 -76 -180t-180 -76t-180 76t-76 180q0 26 19 45t45 19t45 -19t19 -45q0 -50 39 -89t89 -39t89 39t39 89v580q33 11 64 11t64 -11zM1664 681q0 -13 -9.5 -22.5t-22.5 -9.5q-11 0 -23 10q-49 46 -93 69t-102 23q-68 0 -128 -37t-103 -97
-q-7 -10 -17.5 -28t-14.5 -24q-11 -17 -28 -17q-18 0 -29 17q-4 6 -14.5 24t-17.5 28q-43 60 -102.5 97t-127.5 37t-127.5 -37t-102.5 -97q-7 -10 -17.5 -28t-14.5 -24q-11 -17 -29 -17q-17 0 -28 17q-4 6 -14.5 24t-17.5 28q-43 60 -103 97t-128 37q-58 0 -102 -23t-93 -69
-q-12 -10 -23 -10q-13 0 -22.5 9.5t-9.5 22.5q0 5 1 7q45 183 172.5 319.5t298 204.5t360.5 68q140 0 274.5 -40t246.5 -113.5t194.5 -187t115.5 -251.5q1 -2 1 -7zM896 1408v-98q-42 2 -64 2t-64 -2v98q0 26 19 45t45 19t45 -19t19 -45z"/>
-    <glyph glyph-name="paste" unicode="&#xf0ea;" horiz-adv-x="1792" d="M768 -128h896v640h-416q-40 0 -68 28t-28 68v416h-384v-1152zM1024 1312v64q0 13 -9.5 22.5t-22.5 9.5h-704q-13 0 -22.5 -9.5t-9.5 -22.5v-64q0 -13 9.5 -22.5t22.5 -9.5h704q13 0 22.5 9.5t9.5 22.5zM1280 640h299l-299 299v-299zM1792 512v-672q0 -40 -28 -68t-68 -28
-h-960q-40 0 -68 28t-28 68v160h-544q-40 0 -68 28t-28 68v1344q0 40 28 68t68 28h1088q40 0 68 -28t28 -68v-328q21 -13 36 -28l408 -408q28 -28 48 -76t20 -88z"/>
-    <glyph glyph-name="light_bulb" unicode="&#xf0eb;" horiz-adv-x="1024" d="M736 960q0 -13 -9.5 -22.5t-22.5 -9.5t-22.5 9.5t-9.5 22.5q0 46 -54 71t-106 25q-13 0 -22.5 9.5t-9.5 22.5t9.5 22.5t22.5 9.5q50 0 99.5 -16t87 -54t37.5 -90zM896 960q0 72 -34.5 134t-90 101.5t-123 62t-136.5 22.5t-136.5 -22.5t-123 -62t-90 -101.5t-34.5 -134
-q0 -101 68 -180q10 -11 30.5 -33t30.5 -33q128 -153 141 -298h228q13 145 141 298q10 11 30.5 33t30.5 33q68 79 68 180zM1024 960q0 -155 -103 -268q-45 -49 -74.5 -87t-59.5 -95.5t-34 -107.5q47 -28 47 -82q0 -37 -25 -64q25 -27 25 -64q0 -52 -45 -81q13 -23 13 -47
-q0 -46 -31.5 -71t-77.5 -25q-20 -44 -60 -70t-87 -26t-87 26t-60 70q-46 0 -77.5 25t-31.5 71q0 24 13 47q-45 29 -45 81q0 37 25 64q-25 27 -25 64q0 54 47 82q-4 50 -34 107.5t-59.5 95.5t-74.5 87q-103 113 -103 268q0 99 44.5 184.5t117 142t164 89t186.5 32.5
-t186.5 -32.5t164 -89t117 -142t44.5 -184.5z"/>
-    <glyph glyph-name="exchange" unicode="&#xf0ec;" horiz-adv-x="1792" d="M1792 352v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-1376v-192q0 -13 -9.5 -22.5t-22.5 -9.5q-12 0 -24 10l-319 320q-9 9 -9 22q0 14 9 23l320 320q9 9 23 9q13 0 22.5 -9.5t9.5 -22.5v-192h1376q13 0 22.5 -9.5t9.5 -22.5zM1792 896q0 -14 -9 -23l-320 -320q-9 -9 -23 -9
-q-13 0 -22.5 9.5t-9.5 22.5v192h-1376q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h1376v192q0 14 9 23t23 9q12 0 24 -10l319 -319q9 -9 9 -23z"/>
-    <glyph glyph-name="cloud_download" unicode="&#xf0ed;" horiz-adv-x="1920" d="M1280 608q0 14 -9 23t-23 9h-224v352q0 13 -9.5 22.5t-22.5 9.5h-192q-13 0 -22.5 -9.5t-9.5 -22.5v-352h-224q-13 0 -22.5 -9.5t-9.5 -22.5q0 -14 9 -23l352 -352q9 -9 23 -9t23 9l351 351q10 12 10 24zM1920 384q0 -159 -112.5 -271.5t-271.5 -112.5h-1088
-q-185 0 -316.5 131.5t-131.5 316.5q0 130 70 240t188 165q-2 30 -2 43q0 212 150 362t362 150q156 0 285.5 -87t188.5 -231q71 62 166 62q106 0 181 -75t75 -181q0 -76 -41 -138q130 -31 213.5 -135.5t83.5 -238.5z"/>
-    <glyph glyph-name="cloud_upload" unicode="&#xf0ee;" horiz-adv-x="1920" d="M1280 672q0 14 -9 23l-352 352q-9 9 -23 9t-23 -9l-351 -351q-10 -12 -10 -24q0 -14 9 -23t23 -9h224v-352q0 -13 9.5 -22.5t22.5 -9.5h192q13 0 22.5 9.5t9.5 22.5v352h224q13 0 22.5 9.5t9.5 22.5zM1920 384q0 -159 -112.5 -271.5t-271.5 -112.5h-1088
-q-185 0 -316.5 131.5t-131.5 316.5q0 130 70 240t188 165q-2 30 -2 43q0 212 150 362t362 150q156 0 285.5 -87t188.5 -231q71 62 166 62q106 0 181 -75t75 -181q0 -76 -41 -138q130 -31 213.5 -135.5t83.5 -238.5z"/>
-    <glyph glyph-name="user_md" unicode="&#xf0f0;" horiz-adv-x="1408" d="M384 192q0 -26 -19 -45t-45 -19t-45 19t-19 45t19 45t45 19t45 -19t19 -45zM1408 131q0 -121 -73 -190t-194 -69h-874q-121 0 -194 69t-73 190q0 68 5.5 131t24 138t47.5 132.5t81 103t120 60.5q-22 -52 -22 -120v-203q-58 -20 -93 -70t-35 -111q0 -80 56 -136t136 -56
-t136 56t56 136q0 61 -35.5 111t-92.5 70v203q0 62 25 93q132 -104 295 -104t295 104q25 -31 25 -93v-64q-106 0 -181 -75t-75 -181v-89q-32 -29 -32 -71q0 -40 28 -68t68 -28t68 28t28 68q0 42 -32 71v89q0 52 38 90t90 38t90 -38t38 -90v-89q-32 -29 -32 -71q0 -40 28 -68
-t68 -28t68 28t28 68q0 42 -32 71v89q0 68 -34.5 127.5t-93.5 93.5q0 10 0.5 42.5t0 48t-2.5 41.5t-7 47t-13 40q68 -15 120 -60.5t81 -103t47.5 -132.5t24 -138t5.5 -131zM1088 1024q0 -159 -112.5 -271.5t-271.5 -112.5t-271.5 112.5t-112.5 271.5t112.5 271.5t271.5 112.5
-t271.5 -112.5t112.5 -271.5z"/>
-    <glyph glyph-name="stethoscope" unicode="&#xf0f1;" horiz-adv-x="1408" d="M1280 832q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1408 832q0 -62 -35.5 -111t-92.5 -70v-395q0 -159 -131.5 -271.5t-316.5 -112.5t-316.5 112.5t-131.5 271.5v132q-164 20 -274 128t-110 252v512q0 26 19 45t45 19q6 0 16 -2q17 30 47 48
-t65 18q53 0 90.5 -37.5t37.5 -90.5t-37.5 -90.5t-90.5 -37.5q-33 0 -64 18v-402q0 -106 94 -181t226 -75t226 75t94 181v402q-31 -18 -64 -18q-53 0 -90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5q35 0 65 -18t47 -48q10 2 16 2q26 0 45 -19t19 -45v-512q0 -144 -110 -252
-t-274 -128v-132q0 -106 94 -181t226 -75t226 75t94 181v395q-57 21 -92.5 70t-35.5 111q0 80 56 136t136 56t136 -56t56 -136z"/>
-    <glyph glyph-name="suitcase" unicode="&#xf0f2;" horiz-adv-x="1792" d="M640 1152h512v128h-512v-128zM288 1152v-1280h-64q-92 0 -158 66t-66 158v832q0 92 66 158t158 66h64zM1408 1152v-1280h-1024v1280h128v160q0 40 28 68t68 28h576q40 0 68 -28t28 -68v-160h128zM1792 928v-832q0 -92 -66 -158t-158 -66h-64v1280h64q92 0 158 -66
-t66 -158z"/>
-    <glyph glyph-name="bell_alt" unicode="&#xf0f3;" horiz-adv-x="1792" d="M912 -160q0 16 -16 16q-59 0 -101.5 42.5t-42.5 101.5q0 16 -16 16t-16 -16q0 -73 51.5 -124.5t124.5 -51.5q16 0 16 16zM1728 128q0 -52 -38 -90t-90 -38h-448q0 -106 -75 -181t-181 -75t-181 75t-75 181h-448q-52 0 -90 38t-38 90q50 42 91 88t85 119.5t74.5 158.5
-t50 206t19.5 260q0 152 117 282.5t307 158.5q-8 19 -8 39q0 40 28 68t68 28t68 -28t28 -68q0 -20 -8 -39q190 -28 307 -158.5t117 -282.5q0 -139 19.5 -260t50 -206t74.5 -158.5t85 -119.5t91 -88z"/>
-    <glyph glyph-name="coffee" unicode="&#xf0f4;" horiz-adv-x="1920" d="M1664 896q0 80 -56 136t-136 56h-64v-384h64q80 0 136 56t56 136zM0 128h1792q0 -106 -75 -181t-181 -75h-1280q-106 0 -181 75t-75 181zM1856 896q0 -159 -112.5 -271.5t-271.5 -112.5h-64v-32q0 -92 -66 -158t-158 -66h-704q-92 0 -158 66t-66 158v736q0 26 19 45
-t45 19h1152q159 0 271.5 -112.5t112.5 -271.5z"/>
-    <glyph glyph-name="food" unicode="&#xf0f5;" horiz-adv-x="1408" d="M640 1472v-640q0 -61 -35.5 -111t-92.5 -70v-779q0 -52 -38 -90t-90 -38h-128q-52 0 -90 38t-38 90v779q-57 20 -92.5 70t-35.5 111v640q0 26 19 45t45 19t45 -19t19 -45v-416q0 -26 19 -45t45 -19t45 19t19 45v416q0 26 19 45t45 19t45 -19t19 -45v-416q0 -26 19 -45
-t45 -19t45 19t19 45v416q0 26 19 45t45 19t45 -19t19 -45zM1408 1472v-1600q0 -52 -38 -90t-90 -38h-128q-52 0 -90 38t-38 90v512h-224q-13 0 -22.5 9.5t-9.5 22.5v800q0 132 94 226t226 94h256q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="file_text_alt" unicode="&#xf0f6;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M384 736q0 14 9 23t23 9h704q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-704q-14 0 -23 9t-9 23v64zM1120 512q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-704q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h704zM1120 256q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-704
-q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h704z"/>
-    <glyph glyph-name="building" unicode="&#xf0f7;" horiz-adv-x="1408" d="M384 224v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM384 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M640 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM384 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M1152 224v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM896 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M640 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM384 992v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M1152 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM896 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M640 992v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM384 1248v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M1152 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM896 992v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M640 1248v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM1152 992v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M896 1248v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM1152 1248v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M896 -128h384v1536h-1152v-1536h384v224q0 13 9.5 22.5t22.5 9.5h320q13 0 22.5 -9.5t9.5 -22.5v-224zM1408 1472v-1664q0 -26 -19 -45t-45 -19h-1280q-26 0 -45 19t-19 45v1664q0 26 19 45t45 19h1280q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="hospital" unicode="&#xf0f8;" horiz-adv-x="1408" d="M384 224v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM384 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M640 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM384 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M1152 224v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM896 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M640 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM1152 480v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M896 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5zM1152 736v-64q0 -13 -9.5 -22.5t-22.5 -9.5h-64q-13 0 -22.5 9.5t-9.5 22.5v64q0 13 9.5 22.5t22.5 9.5h64q13 0 22.5 -9.5t9.5 -22.5z
-M896 -128h384v1152h-256v-32q0 -40 -28 -68t-68 -28h-448q-40 0 -68 28t-28 68v32h-256v-1152h384v224q0 13 9.5 22.5t22.5 9.5h320q13 0 22.5 -9.5t9.5 -22.5v-224zM896 1056v320q0 13 -9.5 22.5t-22.5 9.5h-64q-13 0 -22.5 -9.5t-9.5 -22.5v-96h-128v96q0 13 -9.5 22.5
-t-22.5 9.5h-64q-13 0 -22.5 -9.5t-9.5 -22.5v-320q0 -13 9.5 -22.5t22.5 -9.5h64q13 0 22.5 9.5t9.5 22.5v96h128v-96q0 -13 9.5 -22.5t22.5 -9.5h64q13 0 22.5 9.5t9.5 22.5zM1408 1088v-1280q0 -26 -19 -45t-45 -19h-1280q-26 0 -45 19t-19 45v1280q0 26 19 45t45 19h320
-v288q0 40 28 68t68 28h448q40 0 68 -28t28 -68v-288h320q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="ambulance" unicode="&#xf0f9;" horiz-adv-x="1920" d="M640 128q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM256 640h384v256h-158q-14 -2 -22 -9l-195 -195q-7 -12 -9 -22v-30zM1536 128q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5
-t90.5 37.5t37.5 90.5zM1664 800v192q0 14 -9 23t-23 9h-224v224q0 14 -9 23t-23 9h-192q-14 0 -23 -9t-9 -23v-224h-224q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h224v-224q0 -14 9 -23t23 -9h192q14 0 23 9t9 23v224h224q14 0 23 9t9 23zM1920 1344v-1152
-q0 -26 -19 -45t-45 -19h-192q0 -106 -75 -181t-181 -75t-181 75t-75 181h-384q0 -106 -75 -181t-181 -75t-181 75t-75 181h-128q-26 0 -45 19t-19 45t19 45t45 19v416q0 26 13 58t32 51l198 198q19 19 51 32t58 13h160v320q0 26 19 45t45 19h1152q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="medkit" unicode="&#xf0fa;" horiz-adv-x="1792" d="M1280 416v192q0 14 -9 23t-23 9h-224v224q0 14 -9 23t-23 9h-192q-14 0 -23 -9t-9 -23v-224h-224q-14 0 -23 -9t-9 -23v-192q0 -14 9 -23t23 -9h224v-224q0 -14 9 -23t23 -9h192q14 0 23 9t9 23v224h224q14 0 23 9t9 23zM640 1152h512v128h-512v-128zM256 1152v-1280h-32
-q-92 0 -158 66t-66 158v832q0 92 66 158t158 66h32zM1440 1152v-1280h-1088v1280h160v160q0 40 28 68t68 28h576q40 0 68 -28t28 -68v-160h160zM1792 928v-832q0 -92 -66 -158t-158 -66h-32v1280h32q92 0 158 -66t66 -158z"/>
-    <glyph glyph-name="fighter_jet" unicode="&#xf0fb;" horiz-adv-x="1920" d="M1920 576q-1 -32 -288 -96l-352 -32l-224 -64h-64l-293 -352h69q26 0 45 -4.5t19 -11.5t-19 -11.5t-45 -4.5h-96h-160h-64v32h64v416h-160l-192 -224h-96l-32 32v192h32v32h128v8l-192 24v128l192 24v8h-128v32h-32v192l32 32h96l192 -224h160v416h-64v32h64h160h96
-q26 0 45 -4.5t19 -11.5t-19 -11.5t-45 -4.5h-69l293 -352h64l224 -64l352 -32q128 -28 200 -52t80 -34z"/>
-    <glyph glyph-name="beer" unicode="&#xf0fc;" horiz-adv-x="1664" d="M640 640v384h-256v-256q0 -53 37.5 -90.5t90.5 -37.5h128zM1664 192v-192h-1152v192l128 192h-128q-159 0 -271.5 112.5t-112.5 271.5v320l-64 64l32 128h480l32 128h960l32 -192l-64 -32v-800z"/>
-    <glyph glyph-name="h_sign" unicode="&#xf0fd;" d="M1280 192v896q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-320h-512v320q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-896q0 -26 19 -45t45 -19h128q26 0 45 19t19 45v320h512v-320q0 -26 19 -45t45 -19h128q26 0 45 19t19 45zM1536 1120v-960
-q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="f0fe" unicode="&#xf0fe;" d="M1280 576v128q0 26 -19 45t-45 19h-320v320q0 26 -19 45t-45 19h-128q-26 0 -45 -19t-19 -45v-320h-320q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h320v-320q0 -26 19 -45t45 -19h128q26 0 45 19t19 45v320h320q26 0 45 19t19 45zM1536 1120v-960
-q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="double_angle_left" unicode="&#xf100;" horiz-adv-x="1024" d="M627 160q0 -13 -10 -23l-50 -50q-10 -10 -23 -10t-23 10l-466 466q-10 10 -10 23t10 23l466 466q10 10 23 10t23 -10l50 -50q10 -10 10 -23t-10 -23l-393 -393l393 -393q10 -10 10 -23zM1011 160q0 -13 -10 -23l-50 -50q-10 -10 -23 -10t-23 10l-466 466q-10 10 -10 23
-t10 23l466 466q10 10 23 10t23 -10l50 -50q10 -10 10 -23t-10 -23l-393 -393l393 -393q10 -10 10 -23z"/>
-    <glyph glyph-name="double_angle_right" unicode="&#xf101;" horiz-adv-x="1024" d="M595 576q0 -13 -10 -23l-466 -466q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23l393 393l-393 393q-10 10 -10 23t10 23l50 50q10 10 23 10t23 -10l466 -466q10 -10 10 -23zM979 576q0 -13 -10 -23l-466 -466q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23
-l393 393l-393 393q-10 10 -10 23t10 23l50 50q10 10 23 10t23 -10l466 -466q10 -10 10 -23z"/>
-    <glyph glyph-name="double_angle_up" unicode="&#xf102;" horiz-adv-x="1152" d="M1075 224q0 -13 -10 -23l-50 -50q-10 -10 -23 -10t-23 10l-393 393l-393 -393q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23l466 466q10 10 23 10t23 -10l466 -466q10 -10 10 -23zM1075 608q0 -13 -10 -23l-50 -50q-10 -10 -23 -10t-23 10l-393 393l-393 -393
-q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23l466 466q10 10 23 10t23 -10l466 -466q10 -10 10 -23z"/>
-    <glyph glyph-name="double_angle_down" unicode="&#xf103;" horiz-adv-x="1152" d="M1075 672q0 -13 -10 -23l-466 -466q-10 -10 -23 -10t-23 10l-466 466q-10 10 -10 23t10 23l50 50q10 10 23 10t23 -10l393 -393l393 393q10 10 23 10t23 -10l50 -50q10 -10 10 -23zM1075 1056q0 -13 -10 -23l-466 -466q-10 -10 -23 -10t-23 10l-466 466q-10 10 -10 23
-t10 23l50 50q10 10 23 10t23 -10l393 -393l393 393q10 10 23 10t23 -10l50 -50q10 -10 10 -23z"/>
-    <glyph glyph-name="angle_left" unicode="&#xf104;" horiz-adv-x="640" d="M627 992q0 -13 -10 -23l-393 -393l393 -393q10 -10 10 -23t-10 -23l-50 -50q-10 -10 -23 -10t-23 10l-466 466q-10 10 -10 23t10 23l466 466q10 10 23 10t23 -10l50 -50q10 -10 10 -23z"/>
-    <glyph glyph-name="angle_right" unicode="&#xf105;" horiz-adv-x="640" d="M595 576q0 -13 -10 -23l-466 -466q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23l393 393l-393 393q-10 10 -10 23t10 23l50 50q10 10 23 10t23 -10l466 -466q10 -10 10 -23z"/>
-    <glyph glyph-name="angle_up" unicode="&#xf106;" horiz-adv-x="1152" d="M1075 352q0 -13 -10 -23l-50 -50q-10 -10 -23 -10t-23 10l-393 393l-393 -393q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23l466 466q10 10 23 10t23 -10l466 -466q10 -10 10 -23z"/>
-    <glyph glyph-name="angle_down" unicode="&#xf107;" horiz-adv-x="1152" d="M1075 800q0 -13 -10 -23l-466 -466q-10 -10 -23 -10t-23 10l-466 466q-10 10 -10 23t10 23l50 50q10 10 23 10t23 -10l393 -393l393 393q10 10 23 10t23 -10l50 -50q10 -10 10 -23z"/>
-    <glyph glyph-name="desktop" unicode="&#xf108;" horiz-adv-x="1920" d="M1792 544v832q0 13 -9.5 22.5t-22.5 9.5h-1600q-13 0 -22.5 -9.5t-9.5 -22.5v-832q0 -13 9.5 -22.5t22.5 -9.5h1600q13 0 22.5 9.5t9.5 22.5zM1920 1376v-1088q0 -66 -47 -113t-113 -47h-544q0 -37 16 -77.5t32 -71t16 -43.5q0 -26 -19 -45t-45 -19h-512q-26 0 -45 19
-t-19 45q0 14 16 44t32 70t16 78h-544q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h1600q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="laptop" unicode="&#xf109;" horiz-adv-x="1920" d="M416 256q-66 0 -113 47t-47 113v704q0 66 47 113t113 47h1088q66 0 113 -47t47 -113v-704q0 -66 -47 -113t-113 -47h-1088zM384 1120v-704q0 -13 9.5 -22.5t22.5 -9.5h1088q13 0 22.5 9.5t9.5 22.5v704q0 13 -9.5 22.5t-22.5 9.5h-1088q-13 0 -22.5 -9.5t-9.5 -22.5z
-M1760 192h160v-96q0 -40 -47 -68t-113 -28h-1600q-66 0 -113 28t-47 68v96h160h1600zM1040 96q16 0 16 16t-16 16h-160q-16 0 -16 -16t16 -16h160z"/>
-    <glyph glyph-name="tablet" unicode="&#xf10a;" horiz-adv-x="1152" d="M640 128q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1024 288v960q0 13 -9.5 22.5t-22.5 9.5h-832q-13 0 -22.5 -9.5t-9.5 -22.5v-960q0 -13 9.5 -22.5t22.5 -9.5h832q13 0 22.5 9.5t9.5 22.5zM1152 1248v-1088q0 -66 -47 -113t-113 -47h-832
-q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h832q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="mobile_phone" unicode="&#xf10b;" horiz-adv-x="768" d="M464 128q0 33 -23.5 56.5t-56.5 23.5t-56.5 -23.5t-23.5 -56.5t23.5 -56.5t56.5 -23.5t56.5 23.5t23.5 56.5zM672 288v704q0 13 -9.5 22.5t-22.5 9.5h-512q-13 0 -22.5 -9.5t-9.5 -22.5v-704q0 -13 9.5 -22.5t22.5 -9.5h512q13 0 22.5 9.5t9.5 22.5zM480 1136
-q0 16 -16 16h-160q-16 0 -16 -16t16 -16h160q16 0 16 16zM768 1152v-1024q0 -52 -38 -90t-90 -38h-512q-52 0 -90 38t-38 90v1024q0 52 38 90t90 38h512q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="circle_blank" unicode="&#xf10c;" d="M768 1184q-148 0 -273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273t-73 273t-198 198t-273 73zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103
-t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="quote_left" unicode="&#xf10d;" horiz-adv-x="1664" d="M768 576v-384q0 -80 -56 -136t-136 -56h-384q-80 0 -136 56t-56 136v704q0 104 40.5 198.5t109.5 163.5t163.5 109.5t198.5 40.5h64q26 0 45 -19t19 -45v-128q0 -26 -19 -45t-45 -19h-64q-106 0 -181 -75t-75 -181v-32q0 -40 28 -68t68 -28h224q80 0 136 -56t56 -136z
-M1664 576v-384q0 -80 -56 -136t-136 -56h-384q-80 0 -136 56t-56 136v704q0 104 40.5 198.5t109.5 163.5t163.5 109.5t198.5 40.5h64q26 0 45 -19t19 -45v-128q0 -26 -19 -45t-45 -19h-64q-106 0 -181 -75t-75 -181v-32q0 -40 28 -68t68 -28h224q80 0 136 -56t56 -136z"/>
-    <glyph glyph-name="quote_right" unicode="&#xf10e;" horiz-adv-x="1664" d="M768 1216v-704q0 -104 -40.5 -198.5t-109.5 -163.5t-163.5 -109.5t-198.5 -40.5h-64q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h64q106 0 181 75t75 181v32q0 40 -28 68t-68 28h-224q-80 0 -136 56t-56 136v384q0 80 56 136t136 56h384q80 0 136 -56t56 -136zM1664 1216
-v-704q0 -104 -40.5 -198.5t-109.5 -163.5t-163.5 -109.5t-198.5 -40.5h-64q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h64q106 0 181 75t75 181v32q0 40 -28 68t-68 28h-224q-80 0 -136 56t-56 136v384q0 80 56 136t136 56h384q80 0 136 -56t56 -136z"/>
-    <glyph glyph-name="spinner" unicode="&#xf110;" horiz-adv-x="1792" d="M526 142q0 -53 -37.5 -90.5t-90.5 -37.5q-52 0 -90 38t-38 90q0 53 37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1024 -64q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM320 640q0 -53 -37.5 -90.5t-90.5 -37.5
-t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1522 142q0 -52 -38 -90t-90 -38q-53 0 -90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM558 1138q0 -66 -47 -113t-113 -47t-113 47t-47 113t47 113t113 47t113 -47t47 -113z
-M1728 640q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1088 1344q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM1618 1138q0 -93 -66 -158.5t-158 -65.5q-93 0 -158.5 65.5t-65.5 158.5
-q0 92 65.5 158t158.5 66q92 0 158 -66t66 -158z"/>
-    <glyph glyph-name="circle" unicode="&#xf111;" d="M1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="reply" unicode="&#xf112;" horiz-adv-x="1792" d="M1792 416q0 -166 -127 -451q-3 -7 -10.5 -24t-13.5 -30t-13 -22q-12 -17 -28 -17q-15 0 -23.5 10t-8.5 25q0 9 2.5 26.5t2.5 23.5q5 68 5 123q0 101 -17.5 181t-48.5 138.5t-80 101t-105.5 69.5t-133 42.5t-154 21.5t-175.5 6h-224v-256q0 -26 -19 -45t-45 -19t-45 19
-l-512 512q-19 19 -19 45t19 45l512 512q19 19 45 19t45 -19t19 -45v-256h224q713 0 875 -403q53 -134 53 -333z"/>
-    <glyph glyph-name="github_alt" unicode="&#xf113;" horiz-adv-x="1664" d="M640 320q0 -40 -12.5 -82t-43 -76t-72.5 -34t-72.5 34t-43 76t-12.5 82t12.5 82t43 76t72.5 34t72.5 -34t43 -76t12.5 -82zM1280 320q0 -40 -12.5 -82t-43 -76t-72.5 -34t-72.5 34t-43 76t-12.5 82t12.5 82t43 76t72.5 34t72.5 -34t43 -76t12.5 -82zM1440 320
-q0 120 -69 204t-187 84q-41 0 -195 -21q-71 -11 -157 -11t-157 11q-152 21 -195 21q-118 0 -187 -84t-69 -204q0 -88 32 -153.5t81 -103t122 -60t140 -29.5t149 -7h168q82 0 149 7t140 29.5t122 60t81 103t32 153.5zM1664 496q0 -207 -61 -331q-38 -77 -105.5 -133t-141 -86
-t-170 -47.5t-171.5 -22t-167 -4.5q-78 0 -142 3t-147.5 12.5t-152.5 30t-137 51.5t-121 81t-86 115q-62 123 -62 331q0 237 136 396q-27 82 -27 170q0 116 51 218q108 0 190 -39.5t189 -123.5q147 35 309 35q148 0 280 -32q105 82 187 121t189 39q51 -102 51 -218
-q0 -87 -27 -168q136 -160 136 -398z"/>
-    <glyph glyph-name="folder_close_alt" unicode="&#xf114;" horiz-adv-x="1664" d="M1536 224v704q0 40 -28 68t-68 28h-704q-40 0 -68 28t-28 68v64q0 40 -28 68t-68 28h-320q-40 0 -68 -28t-28 -68v-960q0 -40 28 -68t68 -28h1216q40 0 68 28t28 68zM1664 928v-704q0 -92 -66 -158t-158 -66h-1216q-92 0 -158 66t-66 158v960q0 92 66 158t158 66h320
-q92 0 158 -66t66 -158v-32h672q92 0 158 -66t66 -158z"/>
-    <glyph glyph-name="folder_open_alt" unicode="&#xf115;" horiz-adv-x="1920" d="M1781 605q0 35 -53 35h-1088q-40 0 -85.5 -21.5t-71.5 -52.5l-294 -363q-18 -24 -18 -40q0 -35 53 -35h1088q40 0 86 22t71 53l294 363q18 22 18 39zM640 768h768v160q0 40 -28 68t-68 28h-576q-40 0 -68 28t-28 68v64q0 40 -28 68t-68 28h-320q-40 0 -68 -28t-28 -68
-v-853l256 315q44 53 116 87.5t140 34.5zM1909 605q0 -62 -46 -120l-295 -363q-43 -53 -116 -87.5t-140 -34.5h-1088q-92 0 -158 66t-66 158v960q0 92 66 158t158 66h320q92 0 158 -66t66 -158v-32h544q92 0 158 -66t66 -158v-160h192q54 0 99 -24.5t67 -70.5q15 -32 15 -68z
-"/>
-    <glyph glyph-name="expand_alt" unicode="&#xf116;" horiz-adv-x="1792"/>
-    <glyph glyph-name="collapse_alt" unicode="&#xf117;" horiz-adv-x="1792"/>
-    <glyph glyph-name="smile" unicode="&#xf118;" d="M1134 461q-37 -121 -138 -195t-228 -74t-228 74t-138 195q-8 25 4 48.5t38 31.5q25 8 48.5 -4t31.5 -38q25 -80 92.5 -129.5t151.5 -49.5t151.5 49.5t92.5 129.5q8 26 32 38t49 4t37 -31.5t4 -48.5zM640 896q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5
-t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1152 896q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1408 640q0 130 -51 248.5t-136.5 204t-204 136.5t-248.5 51t-248.5 -51t-204 -136.5t-136.5 -204t-51 -248.5
-t51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5t136.5 204t51 248.5zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="frown" unicode="&#xf119;" d="M1134 307q8 -25 -4 -48.5t-37 -31.5t-49 4t-32 38q-25 80 -92.5 129.5t-151.5 49.5t-151.5 -49.5t-92.5 -129.5q-8 -26 -31.5 -38t-48.5 -4q-26 8 -38 31.5t-4 48.5q37 121 138 195t228 74t228 -74t138 -195zM640 896q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5
-t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1152 896q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1408 640q0 130 -51 248.5t-136.5 204t-204 136.5t-248.5 51t-248.5 -51t-204 -136.5t-136.5 -204
-t-51 -248.5t51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5t136.5 204t51 248.5zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="meh" unicode="&#xf11a;" d="M1152 448q0 -26 -19 -45t-45 -19h-640q-26 0 -45 19t-19 45t19 45t45 19h640q26 0 45 -19t19 -45zM640 896q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1152 896q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5
-t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1408 640q0 130 -51 248.5t-136.5 204t-204 136.5t-248.5 51t-248.5 -51t-204 -136.5t-136.5 -204t-51 -248.5t51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5t136.5 204t51 248.5zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="gamepad" unicode="&#xf11b;" horiz-adv-x="1920" d="M832 448v128q0 14 -9 23t-23 9h-192v192q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-192h-192q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h192v-192q0 -14 9 -23t23 -9h128q14 0 23 9t9 23v192h192q14 0 23 9t9 23zM1408 384q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5
-t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1664 640q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1920 512q0 -212 -150 -362t-362 -150q-192 0 -338 128h-220q-146 -128 -338 -128q-212 0 -362 150
-t-150 362t150 362t362 150h896q212 0 362 -150t150 -362z"/>
-    <glyph glyph-name="keyboard" unicode="&#xf11c;" horiz-adv-x="1920" d="M384 368v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM512 624v-96q0 -16 -16 -16h-224q-16 0 -16 16v96q0 16 16 16h224q16 0 16 -16zM384 880v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM1408 368v-96q0 -16 -16 -16
-h-864q-16 0 -16 16v96q0 16 16 16h864q16 0 16 -16zM768 624v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM640 880v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM1024 624v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16
-h96q16 0 16 -16zM896 880v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM1280 624v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM1664 368v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM1152 880v-96
-q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM1408 880v-96q0 -16 -16 -16h-96q-16 0 -16 16v96q0 16 16 16h96q16 0 16 -16zM1664 880v-352q0 -16 -16 -16h-224q-16 0 -16 16v96q0 16 16 16h112v240q0 16 16 16h96q16 0 16 -16zM1792 128v896h-1664v-896
-h1664zM1920 1024v-896q0 -53 -37.5 -90.5t-90.5 -37.5h-1664q-53 0 -90.5 37.5t-37.5 90.5v896q0 53 37.5 90.5t90.5 37.5h1664q53 0 90.5 -37.5t37.5 -90.5z"/>
-    <glyph glyph-name="flag_alt" unicode="&#xf11d;" horiz-adv-x="1792" d="M1664 491v616q-169 -91 -306 -91q-82 0 -145 32q-100 49 -184 76.5t-178 27.5q-173 0 -403 -127v-599q245 113 433 113q55 0 103.5 -7.5t98 -26t77 -31t82.5 -39.5l28 -14q44 -22 101 -22q120 0 293 92zM320 1280q0 -35 -17.5 -64t-46.5 -46v-1266q0 -14 -9 -23t-23 -9
-h-64q-14 0 -23 9t-9 23v1266q-29 17 -46.5 46t-17.5 64q0 53 37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1792 1216v-763q0 -39 -35 -57q-10 -5 -17 -9q-218 -116 -369 -116q-88 0 -158 35l-28 14q-64 33 -99 48t-91 29t-114 14q-102 0 -235.5 -44t-228.5 -102
-q-15 -9 -33 -9q-16 0 -32 8q-32 19 -32 56v742q0 35 31 55q35 21 78.5 42.5t114 52t152.5 49.5t155 19q112 0 209 -31t209 -86q38 -19 89 -19q122 0 310 112q22 12 31 17q31 16 62 -2q31 -20 31 -55z"/>
-    <glyph glyph-name="flag_checkered" unicode="&#xf11e;" horiz-adv-x="1792" d="M832 536v192q-181 -16 -384 -117v-185q205 96 384 110zM832 954v197q-172 -8 -384 -126v-189q215 111 384 118zM1664 491v184q-235 -116 -384 -71v224q-20 6 -39 15q-5 3 -33 17t-34.5 17t-31.5 15t-34.5 15.5t-32.5 13t-36 12.5t-35 8.5t-39.5 7.5t-39.5 4t-44 2
-q-23 0 -49 -3v-222h19q102 0 192.5 -29t197.5 -82q19 -9 39 -15v-188q42 -17 91 -17q120 0 293 92zM1664 918v189q-169 -91 -306 -91q-45 0 -78 8v-196q148 -42 384 90zM320 1280q0 -35 -17.5 -64t-46.5 -46v-1266q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v1266
-q-29 17 -46.5 46t-17.5 64q0 53 37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1792 1216v-763q0 -39 -35 -57q-10 -5 -17 -9q-218 -116 -369 -116q-88 0 -158 35l-28 14q-64 33 -99 48t-91 29t-114 14q-102 0 -235.5 -44t-228.5 -102q-15 -9 -33 -9q-16 0 -32 8
-q-32 19 -32 56v742q0 35 31 55q35 21 78.5 42.5t114 52t152.5 49.5t155 19q112 0 209 -31t209 -86q38 -19 89 -19q122 0 310 112q22 12 31 17q31 16 62 -2q31 -20 31 -55z"/>
-    <glyph glyph-name="terminal" unicode="&#xf120;" horiz-adv-x="1664" d="M585 553l-466 -466q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23l393 393l-393 393q-10 10 -10 23t10 23l50 50q10 10 23 10t23 -10l466 -466q10 -10 10 -23t-10 -23zM1664 96v-64q0 -14 -9 -23t-23 -9h-960q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h960q14 0 23 -9
-t9 -23z"/>
-    <glyph glyph-name="code" unicode="&#xf121;" horiz-adv-x="1920" d="M617 137l-50 -50q-10 -10 -23 -10t-23 10l-466 466q-10 10 -10 23t10 23l466 466q10 10 23 10t23 -10l50 -50q10 -10 10 -23t-10 -23l-393 -393l393 -393q10 -10 10 -23t-10 -23zM1208 1204l-373 -1291q-4 -13 -15.5 -19.5t-23.5 -2.5l-62 17q-13 4 -19.5 15.5t-2.5 24.5
-l373 1291q4 13 15.5 19.5t23.5 2.5l62 -17q13 -4 19.5 -15.5t2.5 -24.5zM1865 553l-466 -466q-10 -10 -23 -10t-23 10l-50 50q-10 10 -10 23t10 23l393 393l-393 393q-10 10 -10 23t10 23l50 50q10 10 23 10t23 -10l466 -466q10 -10 10 -23t-10 -23z"/>
-    <glyph glyph-name="reply_all" unicode="&#xf122;" horiz-adv-x="1792" d="M640 454v-70q0 -42 -39 -59q-13 -5 -25 -5q-27 0 -45 19l-512 512q-19 19 -19 45t19 45l512 512q29 31 70 14q39 -17 39 -59v-69l-397 -398q-19 -19 -19 -45t19 -45zM1792 416q0 -58 -17 -133.5t-38.5 -138t-48 -125t-40.5 -90.5l-20 -40q-8 -17 -28 -17q-6 0 -9 1
-q-25 8 -23 34q43 400 -106 565q-64 71 -170.5 110.5t-267.5 52.5v-251q0 -42 -39 -59q-13 -5 -25 -5q-27 0 -45 19l-512 512q-19 19 -19 45t19 45l512 512q29 31 70 14q39 -17 39 -59v-262q411 -28 599 -221q169 -173 169 -509z"/>
-    <glyph glyph-name="star_half_empty" unicode="&#xf123;" horiz-adv-x="1664" d="M1186 579l257 250l-356 52l-66 10l-30 60l-159 322v-963l59 -31l318 -168l-60 355l-12 66zM1638 841l-363 -354l86 -500q5 -33 -6 -51.5t-34 -18.5q-17 0 -40 12l-449 236l-449 -236q-23 -12 -40 -12q-23 0 -34 18.5t-6 51.5l86 500l-364 354q-32 32 -23 59.5t54 34.5
-l502 73l225 455q20 41 49 41q28 0 49 -41l225 -455l502 -73q45 -7 54 -34.5t-24 -59.5z"/>
-    <glyph glyph-name="location_arrow" unicode="&#xf124;" horiz-adv-x="1408" d="M1401 1187l-640 -1280q-17 -35 -57 -35q-5 0 -15 2q-22 5 -35.5 22.5t-13.5 39.5v576h-576q-22 0 -39.5 13.5t-22.5 35.5t4 42t29 30l1280 640q13 7 29 7q27 0 45 -19q15 -14 18.5 -34.5t-6.5 -39.5z"/>
-    <glyph glyph-name="crop" unicode="&#xf125;" horiz-adv-x="1664" d="M557 256h595v595zM512 301l595 595h-595v-595zM1664 224v-192q0 -14 -9 -23t-23 -9h-224v-224q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v224h-864q-14 0 -23 9t-9 23v864h-224q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h224v224q0 14 9 23t23 9h192q14 0 23 -9t9 -23
-v-224h851l246 247q10 9 23 9t23 -9q9 -10 9 -23t-9 -23l-247 -246v-851h224q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="code_fork" unicode="&#xf126;" horiz-adv-x="1024" d="M288 64q0 40 -28 68t-68 28t-68 -28t-28 -68t28 -68t68 -28t68 28t28 68zM288 1216q0 40 -28 68t-68 28t-68 -28t-28 -68t28 -68t68 -28t68 28t28 68zM928 1088q0 40 -28 68t-68 28t-68 -28t-28 -68t28 -68t68 -28t68 28t28 68zM1024 1088q0 -52 -26 -96.5t-70 -69.5
-q-2 -287 -226 -414q-67 -38 -203 -81q-128 -40 -169.5 -71t-41.5 -100v-26q44 -25 70 -69.5t26 -96.5q0 -80 -56 -136t-136 -56t-136 56t-56 136q0 52 26 96.5t70 69.5v820q-44 25 -70 69.5t-26 96.5q0 80 56 136t136 56t136 -56t56 -136q0 -52 -26 -96.5t-70 -69.5v-497
-q54 26 154 57q55 17 87.5 29.5t70.5 31t59 39.5t40.5 51t28 69.5t8.5 91.5q-44 25 -70 69.5t-26 96.5q0 80 56 136t136 56t136 -56t56 -136z"/>
-    <glyph glyph-name="unlink" unicode="&#xf127;" horiz-adv-x="1664" d="M439 265l-256 -256q-11 -9 -23 -9t-23 9q-9 10 -9 23t9 23l256 256q10 9 23 9t23 -9q9 -10 9 -23t-9 -23zM608 224v-320q0 -14 -9 -23t-23 -9t-23 9t-9 23v320q0 14 9 23t23 9t23 -9t9 -23zM384 448q0 -14 -9 -23t-23 -9h-320q-14 0 -23 9t-9 23t9 23t23 9h320
-q14 0 23 -9t9 -23zM1648 320q0 -120 -85 -203l-147 -146q-83 -83 -203 -83q-121 0 -204 85l-334 335q-21 21 -42 56l239 18l273 -274q27 -27 68 -27.5t68 26.5l147 146q28 28 28 67q0 40 -28 68l-274 275l18 239q35 -21 56 -42l336 -336q84 -86 84 -204zM1031 1044l-239 -18
-l-273 274q-28 28 -68 28q-39 0 -68 -27l-147 -146q-28 -28 -28 -67q0 -40 28 -68l274 -274l-18 -240q-35 21 -56 42l-336 336q-84 86 -84 204q0 120 85 203l147 146q83 83 203 83q121 0 204 -85l334 -335q21 -21 42 -56zM1664 960q0 -14 -9 -23t-23 -9h-320q-14 0 -23 9
-t-9 23t9 23t23 9h320q14 0 23 -9t9 -23zM1120 1504v-320q0 -14 -9 -23t-23 -9t-23 9t-9 23v320q0 14 9 23t23 9t23 -9t9 -23zM1527 1353l-256 -256q-11 -9 -23 -9t-23 9q-9 10 -9 23t9 23l256 256q10 9 23 9t23 -9q9 -10 9 -23t-9 -23z"/>
-    <glyph glyph-name="question" unicode="&#xf128;" horiz-adv-x="1024" d="M704 280v-240q0 -16 -12 -28t-28 -12h-240q-16 0 -28 12t-12 28v240q0 16 12 28t28 12h240q16 0 28 -12t12 -28zM1020 880q0 -54 -15.5 -101t-35 -76.5t-55 -59.5t-57.5 -43.5t-61 -35.5q-41 -23 -68.5 -65t-27.5 -67q0 -17 -12 -32.5t-28 -15.5h-240q-15 0 -25.5 18.5
-t-10.5 37.5v45q0 83 65 156.5t143 108.5q59 27 84 56t25 76q0 42 -46.5 74t-107.5 32q-65 0 -108 -29q-35 -25 -107 -115q-13 -16 -31 -16q-12 0 -25 8l-164 125q-13 10 -15.5 25t5.5 28q160 266 464 266q80 0 161 -31t146 -83t106 -127.5t41 -158.5z"/>
-    <glyph glyph-name="_279" unicode="&#xf129;" horiz-adv-x="640" d="M640 192v-128q0 -26 -19 -45t-45 -19h-512q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h64v384h-64q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h384q26 0 45 -19t19 -45v-576h64q26 0 45 -19t19 -45zM512 1344v-192q0 -26 -19 -45t-45 -19h-256q-26 0 -45 19t-19 45v192
-q0 26 19 45t45 19h256q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="exclamation" unicode="&#xf12a;" horiz-adv-x="640" d="M512 288v-224q0 -26 -19 -45t-45 -19h-256q-26 0 -45 19t-19 45v224q0 26 19 45t45 19h256q26 0 45 -19t19 -45zM542 1344l-28 -768q-1 -26 -20.5 -45t-45.5 -19h-256q-26 0 -45.5 19t-20.5 45l-28 768q-1 26 17.5 45t44.5 19h320q26 0 44.5 -19t17.5 -45z"/>
-    <glyph glyph-name="superscript" unicode="&#xf12b;" d="M897 167v-167h-248l-159 252l-24 42q-8 9 -11 21h-3q-1 -3 -2.5 -6.5t-3.5 -8t-3 -6.5q-10 -20 -25 -44l-155 -250h-258v167h128l197 291l-185 272h-137v168h276l139 -228q2 -4 23 -42q8 -9 11 -21h3q3 9 11 21l25 42l140 228h257v-168h-125l-184 -267l204 -296h109z
-M1534 846v-206h-514l-3 27q-4 28 -4 46q0 64 26 117t65 86.5t84 65t84 54.5t65 54t26 64q0 38 -29.5 62.5t-70.5 24.5q-51 0 -97 -39q-14 -11 -36 -38l-105 92q26 37 63 66q83 65 188 65q110 0 178 -59.5t68 -158.5q0 -56 -24.5 -103t-62 -76.5t-81.5 -58.5t-82 -50.5
-t-65.5 -51.5t-30.5 -63h232v80h126z"/>
-    <glyph glyph-name="subscript" unicode="&#xf12c;" d="M897 167v-167h-248l-159 252l-24 42q-8 9 -11 21h-3q-1 -3 -2.5 -6.5t-3.5 -8t-3 -6.5q-10 -20 -25 -44l-155 -250h-258v167h128l197 291l-185 272h-137v168h276l139 -228q2 -4 23 -42q8 -9 11 -21h3q3 9 11 21l25 42l140 228h257v-168h-125l-184 -267l204 -296h109z
-M1536 -50v-206h-514l-4 27q-3 45 -3 46q0 64 26 117t65 86.5t84 65t84 54.5t65 54t26 64q0 38 -29.5 62.5t-70.5 24.5q-51 0 -97 -39q-14 -11 -36 -38l-105 92q26 37 63 66q80 65 188 65q110 0 178 -59.5t68 -158.5q0 -66 -34.5 -118.5t-84 -86t-99.5 -62.5t-87 -63t-41 -73
-h232v80h126z"/>
-    <glyph glyph-name="_283" unicode="&#xf12d;" horiz-adv-x="1920" d="M896 128l336 384h-768l-336 -384h768zM1909 1205q15 -34 9.5 -71.5t-30.5 -65.5l-896 -1024q-38 -44 -96 -44h-768q-38 0 -69.5 20.5t-47.5 54.5q-15 34 -9.5 71.5t30.5 65.5l896 1024q38 44 96 44h768q38 0 69.5 -20.5t47.5 -54.5z"/>
-    <glyph glyph-name="puzzle_piece" unicode="&#xf12e;" horiz-adv-x="1664" d="M1664 438q0 -81 -44.5 -135t-123.5 -54q-41 0 -77.5 17.5t-59 38t-56.5 38t-71 17.5q-110 0 -110 -124q0 -39 16 -115t15 -115v-5q-22 0 -33 -1q-34 -3 -97.5 -11.5t-115.5 -13.5t-98 -5q-61 0 -103 26.5t-42 83.5q0 37 17.5 71t38 56.5t38 59t17.5 77.5q0 79 -54 123.5
-t-135 44.5q-84 0 -143 -45.5t-59 -127.5q0 -43 15 -83t33.5 -64.5t33.5 -53t15 -50.5q0 -45 -46 -89q-37 -35 -117 -35q-95 0 -245 24q-9 2 -27.5 4t-27.5 4l-13 2q-1 0 -3 1q-2 0 -2 1v1024q2 -1 17.5 -3.5t34 -5t21.5 -3.5q150 -24 245 -24q80 0 117 35q46 44 46 89
-q0 22 -15 50.5t-33.5 53t-33.5 64.5t-15 83q0 82 59 127.5t144 45.5q80 0 134 -44.5t54 -123.5q0 -41 -17.5 -77.5t-38 -59t-38 -56.5t-17.5 -71q0 -57 42 -83.5t103 -26.5q64 0 180 15t163 17v-2q-1 -2 -3.5 -17.5t-5 -34t-3.5 -21.5q-24 -150 -24 -245q0 -80 35 -117
-q44 -46 89 -46q22 0 50.5 15t53 33.5t64.5 33.5t83 15q82 0 127.5 -59t45.5 -143z"/>
-    <glyph glyph-name="microphone" unicode="&#xf130;" horiz-adv-x="1152" d="M1152 832v-128q0 -221 -147.5 -384.5t-364.5 -187.5v-132h256q26 0 45 -19t19 -45t-19 -45t-45 -19h-640q-26 0 -45 19t-19 45t19 45t45 19h256v132q-217 24 -364.5 187.5t-147.5 384.5v128q0 26 19 45t45 19t45 -19t19 -45v-128q0 -185 131.5 -316.5t316.5 -131.5
-t316.5 131.5t131.5 316.5v128q0 26 19 45t45 19t45 -19t19 -45zM896 1216v-512q0 -132 -94 -226t-226 -94t-226 94t-94 226v512q0 132 94 226t226 94t226 -94t94 -226z"/>
-    <glyph glyph-name="microphone_off" unicode="&#xf131;" horiz-adv-x="1408" d="M271 591l-101 -101q-42 103 -42 214v128q0 26 19 45t45 19t45 -19t19 -45v-128q0 -53 15 -113zM1385 1193l-361 -361v-128q0 -132 -94 -226t-226 -94q-55 0 -109 19l-96 -96q97 -51 205 -51q185 0 316.5 131.5t131.5 316.5v128q0 26 19 45t45 19t45 -19t19 -45v-128
-q0 -221 -147.5 -384.5t-364.5 -187.5v-132h256q26 0 45 -19t19 -45t-19 -45t-45 -19h-640q-26 0 -45 19t-19 45t19 45t45 19h256v132q-125 13 -235 81l-254 -254q-10 -10 -23 -10t-23 10l-82 82q-10 10 -10 23t10 23l1234 1234q10 10 23 10t23 -10l82 -82q10 -10 10 -23
-t-10 -23zM1005 1325l-621 -621v512q0 132 94 226t226 94q102 0 184.5 -59t116.5 -152z"/>
-    <glyph glyph-name="shield" unicode="&#xf132;" horiz-adv-x="1280" d="M1088 576v640h-448v-1137q119 63 213 137q235 184 235 360zM1280 1344v-768q0 -86 -33.5 -170.5t-83 -150t-118 -127.5t-126.5 -103t-121 -77.5t-89.5 -49.5t-42.5 -20q-12 -6 -26 -6t-26 6q-16 7 -42.5 20t-89.5 49.5t-121 77.5t-126.5 103t-118 127.5t-83 150
-t-33.5 170.5v768q0 26 19 45t45 19h1152q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="calendar_empty" unicode="&#xf133;" horiz-adv-x="1664" d="M128 -128h1408v1024h-1408v-1024zM512 1088v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1280 1088v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1664 1152v-1280
-q0 -52 -38 -90t-90 -38h-1408q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h128v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h384v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h128q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="fire_extinguisher" unicode="&#xf134;" horiz-adv-x="1408" d="M512 1344q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1408 1376v-320q0 -16 -12 -25q-8 -7 -20 -7q-4 0 -7 1l-448 96q-11 2 -18 11t-7 20h-256v-102q111 -23 183.5 -111t72.5 -203v-800q0 -26 -19 -45t-45 -19h-512q-26 0 -45 19t-19 45v800
-q0 106 62.5 190.5t161.5 114.5v111h-32q-59 0 -115 -23.5t-91.5 -53t-66 -66.5t-40.5 -53.5t-14 -24.5q-17 -35 -57 -35q-16 0 -29 7q-23 12 -31.5 37t3.5 49q5 10 14.5 26t37.5 53.5t60.5 70t85 67t108.5 52.5q-25 42 -25 86q0 66 47 113t113 47t113 -47t47 -113
-q0 -33 -14 -64h302q0 11 7 20t18 11l448 96q3 1 7 1q12 0 20 -7q12 -9 12 -25z"/>
-    <glyph glyph-name="rocket" unicode="&#xf135;" horiz-adv-x="1664" d="M1440 1088q0 40 -28 68t-68 28t-68 -28t-28 -68t28 -68t68 -28t68 28t28 68zM1664 1376q0 -249 -75.5 -430.5t-253.5 -360.5q-81 -80 -195 -176l-20 -379q-2 -16 -16 -26l-384 -224q-7 -4 -16 -4q-12 0 -23 9l-64 64q-13 14 -8 32l85 276l-281 281l-276 -85q-3 -1 -9 -1
-q-14 0 -23 9l-64 64q-17 19 -5 39l224 384q10 14 26 16l379 20q96 114 176 195q188 187 358 258t431 71q14 0 24 -9.5t10 -22.5z"/>
-    <glyph glyph-name="maxcdn" unicode="&#xf136;" horiz-adv-x="1792" d="M1745 763l-164 -763h-334l178 832q13 56 -15 88q-27 33 -83 33h-169l-204 -953h-334l204 953h-286l-204 -953h-334l204 953l-153 327h1276q101 0 189.5 -40.5t147.5 -113.5q60 -73 81 -168.5t0 -194.5z"/>
-    <glyph glyph-name="chevron_sign_left" unicode="&#xf137;" d="M909 141l102 102q19 19 19 45t-19 45l-307 307l307 307q19 19 19 45t-19 45l-102 102q-19 19 -45 19t-45 -19l-454 -454q-19 -19 -19 -45t19 -45l454 -454q19 -19 45 -19t45 19zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5
-t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="chevron_sign_right" unicode="&#xf138;" d="M717 141l454 454q19 19 19 45t-19 45l-454 454q-19 19 -45 19t-45 -19l-102 -102q-19 -19 -19 -45t19 -45l307 -307l-307 -307q-19 -19 -19 -45t19 -45l102 -102q19 -19 45 -19t45 19zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5
-t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="chevron_sign_up" unicode="&#xf139;" d="M1165 397l102 102q19 19 19 45t-19 45l-454 454q-19 19 -45 19t-45 -19l-454 -454q-19 -19 -19 -45t19 -45l102 -102q19 -19 45 -19t45 19l307 307l307 -307q19 -19 45 -19t45 19zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5
-t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="chevron_sign_down" unicode="&#xf13a;" d="M813 237l454 454q19 19 19 45t-19 45l-102 102q-19 19 -45 19t-45 -19l-307 -307l-307 307q-19 19 -45 19t-45 -19l-102 -102q-19 -19 -19 -45t19 -45l454 -454q19 -19 45 -19t45 19zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5
-t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="html5" unicode="&#xf13b;" horiz-adv-x="1408" d="M1130 939l16 175h-884l47 -534h612l-22 -228l-197 -53l-196 53l-13 140h-175l22 -278l362 -100h4v1l359 99l50 544h-644l-15 181h674zM0 1408h1408l-128 -1438l-578 -162l-574 162z"/>
-    <glyph glyph-name="css3" unicode="&#xf13c;" horiz-adv-x="1792" d="M275 1408h1505l-266 -1333l-804 -267l-698 267l71 356h297l-29 -147l422 -161l486 161l68 339h-1208l58 297h1209l38 191h-1208z"/>
-    <glyph glyph-name="anchor" unicode="&#xf13d;" horiz-adv-x="1792" d="M960 1280q0 26 -19 45t-45 19t-45 -19t-19 -45t19 -45t45 -19t45 19t19 45zM1792 352v-352q0 -22 -20 -30q-8 -2 -12 -2q-12 0 -23 9l-93 93q-119 -143 -318.5 -226.5t-429.5 -83.5t-429.5 83.5t-318.5 226.5l-93 -93q-9 -9 -23 -9q-4 0 -12 2q-20 8 -20 30v352
-q0 14 9 23t23 9h352q22 0 30 -20q8 -19 -7 -35l-100 -100q67 -91 189.5 -153.5t271.5 -82.5v647h-192q-26 0 -45 19t-19 45v128q0 26 19 45t45 19h192v163q-58 34 -93 92.5t-35 128.5q0 106 75 181t181 75t181 -75t75 -181q0 -70 -35 -128.5t-93 -92.5v-163h192q26 0 45 -19
-t19 -45v-128q0 -26 -19 -45t-45 -19h-192v-647q149 20 271.5 82.5t189.5 153.5l-100 100q-15 16 -7 35q8 20 30 20h352q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="unlock_alt" unicode="&#xf13e;" horiz-adv-x="1152" d="M1056 768q40 0 68 -28t28 -68v-576q0 -40 -28 -68t-68 -28h-960q-40 0 -68 28t-28 68v576q0 40 28 68t68 28h32v320q0 185 131.5 316.5t316.5 131.5t316.5 -131.5t131.5 -316.5q0 -26 -19 -45t-45 -19h-64q-26 0 -45 19t-19 45q0 106 -75 181t-181 75t-181 -75t-75 -181
-v-320h736z"/>
-    <glyph glyph-name="bullseye" unicode="&#xf140;" d="M1024 640q0 -106 -75 -181t-181 -75t-181 75t-75 181t75 181t181 75t181 -75t75 -181zM1152 640q0 159 -112.5 271.5t-271.5 112.5t-271.5 -112.5t-112.5 -271.5t112.5 -271.5t271.5 -112.5t271.5 112.5t112.5 271.5zM1280 640q0 -212 -150 -362t-362 -150t-362 150
-t-150 362t150 362t362 150t362 -150t150 -362zM1408 640q0 130 -51 248.5t-136.5 204t-204 136.5t-248.5 51t-248.5 -51t-204 -136.5t-136.5 -204t-51 -248.5t51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5t136.5 204t51 248.5zM1536 640
-q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="ellipsis_horizontal" unicode="&#xf141;" horiz-adv-x="1408" d="M384 800v-192q0 -40 -28 -68t-68 -28h-192q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h192q40 0 68 -28t28 -68zM896 800v-192q0 -40 -28 -68t-68 -28h-192q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h192q40 0 68 -28t28 -68zM1408 800v-192q0 -40 -28 -68t-68 -28h-192
-q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h192q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="ellipsis_vertical" unicode="&#xf142;" horiz-adv-x="384" d="M384 288v-192q0 -40 -28 -68t-68 -28h-192q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h192q40 0 68 -28t28 -68zM384 800v-192q0 -40 -28 -68t-68 -28h-192q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h192q40 0 68 -28t28 -68zM384 1312v-192q0 -40 -28 -68t-68 -28h-192
-q-40 0 -68 28t-28 68v192q0 40 28 68t68 28h192q40 0 68 -28t28 -68z"/>
-    <glyph glyph-name="_303" unicode="&#xf143;" d="M512 256q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM863 162q-13 233 -176.5 396.5t-396.5 176.5q-14 1 -24 -9t-10 -23v-128q0 -13 8.5 -22t21.5 -10q154 -11 264 -121t121 -264q1 -13 10 -21.5t22 -8.5h128
-q13 0 23 10t9 24zM1247 161q-5 154 -56 297.5t-139.5 260t-205 205t-260 139.5t-297.5 56q-14 1 -23 -9q-10 -10 -10 -23v-128q0 -13 9 -22t22 -10q204 -7 378 -111.5t278.5 -278.5t111.5 -378q1 -13 10 -22t22 -9h128q13 0 23 10q11 9 9 23zM1536 1120v-960
-q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="play_sign" unicode="&#xf144;" d="M768 1408q209 0 385.5 -103t279.5 -279.5t103 -385.5t-103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103zM1152 585q32 18 32 55t-32 55l-544 320q-31 19 -64 1q-32 -19 -32 -56v-640q0 -37 32 -56
-q16 -8 32 -8q17 0 32 9z"/>
-    <glyph glyph-name="ticket" unicode="&#xf145;" horiz-adv-x="1792" d="M1024 1084l316 -316l-572 -572l-316 316zM813 105l618 618q19 19 19 45t-19 45l-362 362q-18 18 -45 18t-45 -18l-618 -618q-19 -19 -19 -45t19 -45l362 -362q18 -18 45 -18t45 18zM1702 742l-907 -908q-37 -37 -90.5 -37t-90.5 37l-126 126q56 56 56 136t-56 136
-t-136 56t-136 -56l-125 126q-37 37 -37 90.5t37 90.5l907 906q37 37 90.5 37t90.5 -37l125 -125q-56 -56 -56 -136t56 -136t136 -56t136 56l126 -125q37 -37 37 -90.5t-37 -90.5z"/>
-    <glyph glyph-name="minus_sign_alt" unicode="&#xf146;" d="M1280 576v128q0 26 -19 45t-45 19h-896q-26 0 -45 -19t-19 -45v-128q0 -26 19 -45t45 -19h896q26 0 45 19t19 45zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5
-t84.5 -203.5z"/>
-    <glyph glyph-name="check_minus" unicode="&#xf147;" horiz-adv-x="1408" d="M1152 736v-64q0 -14 -9 -23t-23 -9h-832q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h832q14 0 23 -9t9 -23zM1280 288v832q0 66 -47 113t-113 47h-832q-66 0 -113 -47t-47 -113v-832q0 -66 47 -113t113 -47h832q66 0 113 47t47 113zM1408 1120v-832q0 -119 -84.5 -203.5
-t-203.5 -84.5h-832q-119 0 -203.5 84.5t-84.5 203.5v832q0 119 84.5 203.5t203.5 84.5h832q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="level_up" unicode="&#xf148;" horiz-adv-x="1024" d="M1018 933q-18 -37 -58 -37h-192v-864q0 -14 -9 -23t-23 -9h-704q-21 0 -29 18q-8 20 4 35l160 192q9 11 25 11h320v640h-192q-40 0 -58 37q-17 37 9 68l320 384q18 22 49 22t49 -22l320 -384q27 -32 9 -68z"/>
-    <glyph glyph-name="level_down" unicode="&#xf149;" horiz-adv-x="1024" d="M32 1280h704q13 0 22.5 -9.5t9.5 -23.5v-863h192q40 0 58 -37t-9 -69l-320 -384q-18 -22 -49 -22t-49 22l-320 384q-26 31 -9 69q18 37 58 37h192v640h-320q-14 0 -25 11l-160 192q-13 14 -4 34q9 19 29 19z"/>
-    <glyph glyph-name="check_sign" unicode="&#xf14a;" d="M685 237l614 614q19 19 19 45t-19 45l-102 102q-19 19 -45 19t-45 -19l-467 -467l-211 211q-19 19 -45 19t-45 -19l-102 -102q-19 -19 -19 -45t19 -45l358 -358q19 -19 45 -19t45 19zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5
-t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="edit_sign" unicode="&#xf14b;" d="M404 428l152 -152l-52 -52h-56v96h-96v56zM818 818q14 -13 -3 -30l-291 -291q-17 -17 -30 -3q-14 13 3 30l291 291q17 17 30 3zM544 128l544 544l-288 288l-544 -544v-288h288zM1152 736l92 92q28 28 28 68t-28 68l-152 152q-28 28 -68 28t-68 -28l-92 -92zM1536 1120
-v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_312" unicode="&#xf14c;" d="M1280 608v480q0 26 -19 45t-45 19h-480q-42 0 -59 -39q-17 -41 14 -70l144 -144l-534 -534q-19 -19 -19 -45t19 -45l102 -102q19 -19 45 -19t45 19l534 534l144 -144q18 -19 45 -19q12 0 25 5q39 17 39 59zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960
-q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="share_sign" unicode="&#xf14d;" d="M1005 435l352 352q19 19 19 45t-19 45l-352 352q-30 31 -69 14q-40 -17 -40 -59v-160q-119 0 -216 -19.5t-162.5 -51t-114 -79t-76.5 -95.5t-44.5 -109t-21.5 -111.5t-5 -110.5q0 -181 167 -404q11 -12 25 -12q7 0 13 3q22 9 19 33q-44 354 62 473q46 52 130 75.5
-t224 23.5v-160q0 -42 40 -59q12 -5 24 -5q26 0 45 19zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="compass" unicode="&#xf14e;" d="M640 448l256 128l-256 128v-256zM1024 1039v-542l-512 -256v542zM1312 640q0 148 -73 273t-198 198t-273 73t-273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103
-t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="collapse" unicode="&#xf150;" d="M1145 861q18 -35 -5 -66l-320 -448q-19 -27 -52 -27t-52 27l-320 448q-23 31 -5 66q17 35 57 35h640q40 0 57 -35zM1280 160v960q0 13 -9.5 22.5t-22.5 9.5h-960q-13 0 -22.5 -9.5t-9.5 -22.5v-960q0 -13 9.5 -22.5t22.5 -9.5h960q13 0 22.5 9.5t9.5 22.5zM1536 1120
-v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="collapse_top" unicode="&#xf151;" d="M1145 419q-17 -35 -57 -35h-640q-40 0 -57 35q-18 35 5 66l320 448q19 27 52 27t52 -27l320 -448q23 -31 5 -66zM1280 160v960q0 13 -9.5 22.5t-22.5 9.5h-960q-13 0 -22.5 -9.5t-9.5 -22.5v-960q0 -13 9.5 -22.5t22.5 -9.5h960q13 0 22.5 9.5t9.5 22.5zM1536 1120v-960
-q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_317" unicode="&#xf152;" d="M1088 640q0 -33 -27 -52l-448 -320q-31 -23 -66 -5q-35 17 -35 57v640q0 40 35 57q35 18 66 -5l448 -320q27 -19 27 -52zM1280 160v960q0 14 -9 23t-23 9h-960q-14 0 -23 -9t-9 -23v-960q0 -14 9 -23t23 -9h960q14 0 23 9t9 23zM1536 1120v-960q0 -119 -84.5 -203.5
-t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="eur" unicode="&#xf153;" horiz-adv-x="1024" d="M976 229l35 -159q3 -12 -3 -22.5t-17 -14.5l-5 -1q-4 -2 -10.5 -3.5t-16 -4.5t-21.5 -5.5t-25.5 -5t-30 -5t-33.5 -4.5t-36.5 -3t-38.5 -1q-234 0 -409 130.5t-238 351.5h-95q-13 0 -22.5 9.5t-9.5 22.5v113q0 13 9.5 22.5t22.5 9.5h66q-2 57 1 105h-67q-14 0 -23 9
-t-9 23v114q0 14 9 23t23 9h98q67 210 243.5 338t400.5 128q102 0 194 -23q11 -3 20 -15q6 -11 3 -24l-43 -159q-3 -13 -14 -19.5t-24 -2.5l-4 1q-4 1 -11.5 2.5l-17.5 3.5t-22.5 3.5t-26 3t-29 2.5t-29.5 1q-126 0 -226 -64t-150 -176h468q16 0 25 -12q10 -12 7 -26
-l-24 -114q-5 -26 -32 -26h-488q-3 -37 0 -105h459q15 0 25 -12q9 -12 6 -27l-24 -112q-2 -11 -11 -18.5t-20 -7.5h-387q48 -117 149.5 -185.5t228.5 -68.5q18 0 36 1.5t33.5 3.5t29.5 4.5t24.5 5t18.5 4.5l12 3l5 2q13 5 26 -2q12 -7 15 -21z"/>
-    <glyph glyph-name="gbp" unicode="&#xf154;" horiz-adv-x="1024" d="M1020 399v-367q0 -14 -9 -23t-23 -9h-956q-14 0 -23 9t-9 23v150q0 13 9.5 22.5t22.5 9.5h97v383h-95q-14 0 -23 9.5t-9 22.5v131q0 14 9 23t23 9h95v223q0 171 123.5 282t314.5 111q185 0 335 -125q9 -8 10 -20.5t-7 -22.5l-103 -127q-9 -11 -22 -12q-13 -2 -23 7
-q-5 5 -26 19t-69 32t-93 18q-85 0 -137 -47t-52 -123v-215h305q13 0 22.5 -9t9.5 -23v-131q0 -13 -9.5 -22.5t-22.5 -9.5h-305v-379h414v181q0 13 9 22.5t23 9.5h162q14 0 23 -9.5t9 -22.5z"/>
-    <glyph glyph-name="usd" unicode="&#xf155;" horiz-adv-x="1024" d="M978 351q0 -153 -99.5 -263.5t-258.5 -136.5v-175q0 -14 -9 -23t-23 -9h-135q-13 0 -22.5 9.5t-9.5 22.5v175q-66 9 -127.5 31t-101.5 44.5t-74 48t-46.5 37.5t-17.5 18q-17 21 -2 41l103 135q7 10 23 12q15 2 24 -9l2 -2q113 -99 243 -125q37 -8 74 -8q81 0 142.5 43
-t61.5 122q0 28 -15 53t-33.5 42t-58.5 37.5t-66 32t-80 32.5q-39 16 -61.5 25t-61.5 26.5t-62.5 31t-56.5 35.5t-53.5 42.5t-43.5 49t-35.5 58t-21 66.5t-8.5 78q0 138 98 242t255 134v180q0 13 9.5 22.5t22.5 9.5h135q14 0 23 -9t9 -23v-176q57 -6 110.5 -23t87 -33.5
-t63.5 -37.5t39 -29t15 -14q17 -18 5 -38l-81 -146q-8 -15 -23 -16q-14 -3 -27 7q-3 3 -14.5 12t-39 26.5t-58.5 32t-74.5 26t-85.5 11.5q-95 0 -155 -43t-60 -111q0 -26 8.5 -48t29.5 -41.5t39.5 -33t56 -31t60.5 -27t70 -27.5q53 -20 81 -31.5t76 -35t75.5 -42.5t62 -50
-t53 -63.5t31.5 -76.5t13 -94z"/>
-    <glyph glyph-name="inr" unicode="&#xf156;" horiz-adv-x="898" d="M898 1066v-102q0 -14 -9 -23t-23 -9h-168q-23 -144 -129 -234t-276 -110q167 -178 459 -536q14 -16 4 -34q-8 -18 -29 -18h-195q-16 0 -25 12q-306 367 -498 571q-9 9 -9 22v127q0 13 9.5 22.5t22.5 9.5h112q132 0 212.5 43t102.5 125h-427q-14 0 -23 9t-9 23v102
-q0 14 9 23t23 9h413q-57 113 -268 113h-145q-13 0 -22.5 9.5t-9.5 22.5v133q0 14 9 23t23 9h832q14 0 23 -9t9 -23v-102q0 -14 -9 -23t-23 -9h-233q47 -61 64 -144h171q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="jpy" unicode="&#xf157;" horiz-adv-x="1027" d="M603 0h-172q-13 0 -22.5 9t-9.5 23v330h-288q-13 0 -22.5 9t-9.5 23v103q0 13 9.5 22.5t22.5 9.5h288v85h-288q-13 0 -22.5 9t-9.5 23v104q0 13 9.5 22.5t22.5 9.5h214l-321 578q-8 16 0 32q10 16 28 16h194q19 0 29 -18l215 -425q19 -38 56 -125q10 24 30.5 68t27.5 61
-l191 420q8 19 29 19h191q17 0 27 -16q9 -14 1 -31l-313 -579h215q13 0 22.5 -9.5t9.5 -22.5v-104q0 -14 -9.5 -23t-22.5 -9h-290v-85h290q13 0 22.5 -9.5t9.5 -22.5v-103q0 -14 -9.5 -23t-22.5 -9h-290v-330q0 -13 -9.5 -22.5t-22.5 -9.5z"/>
-    <glyph glyph-name="rub" unicode="&#xf158;" horiz-adv-x="1280" d="M1043 971q0 100 -65 162t-171 62h-320v-448h320q106 0 171 62t65 162zM1280 971q0 -193 -126.5 -315t-326.5 -122h-340v-118h505q14 0 23 -9t9 -23v-128q0 -14 -9 -23t-23 -9h-505v-192q0 -14 -9.5 -23t-22.5 -9h-167q-14 0 -23 9t-9 23v192h-224q-14 0 -23 9t-9 23v128
-q0 14 9 23t23 9h224v118h-224q-14 0 -23 9t-9 23v149q0 13 9 22.5t23 9.5h224v629q0 14 9 23t23 9h539q200 0 326.5 -122t126.5 -315z"/>
-    <glyph glyph-name="krw" unicode="&#xf159;" horiz-adv-x="1792" d="M514 341l81 299h-159l75 -300q1 -1 1 -3t1 -3q0 1 0.5 3.5t0.5 3.5zM630 768l35 128h-292l32 -128h225zM822 768h139l-35 128h-70zM1271 340l78 300h-162l81 -299q0 -1 0.5 -3.5t1.5 -3.5q0 1 0.5 3t0.5 3zM1382 768l33 128h-297l34 -128h230zM1792 736v-64q0 -14 -9 -23
-t-23 -9h-213l-164 -616q-7 -24 -31 -24h-159q-24 0 -31 24l-166 616h-209l-167 -616q-7 -24 -31 -24h-159q-11 0 -19.5 7t-10.5 17l-160 616h-208q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h175l-33 128h-142q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h109l-89 344q-5 15 5 28
-q10 12 26 12h137q26 0 31 -24l90 -360h359l97 360q7 24 31 24h126q24 0 31 -24l98 -360h365l93 360q5 24 31 24h137q16 0 26 -12q10 -13 5 -28l-91 -344h111q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-145l-34 -128h179q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="btc" unicode="&#xf15a;" horiz-adv-x="1280" d="M1167 896q18 -182 -131 -258q117 -28 175 -103t45 -214q-7 -71 -32.5 -125t-64.5 -89t-97 -58.5t-121.5 -34.5t-145.5 -15v-255h-154v251q-80 0 -122 1v-252h-154v255q-18 0 -54 0.5t-55 0.5h-200l31 183h111q50 0 58 51v402h16q-6 1 -16 1v287q-13 68 -89 68h-111v164
-l212 -1q64 0 97 1v252h154v-247q82 2 122 2v245h154v-252q79 -7 140 -22.5t113 -45t82.5 -78t36.5 -114.5zM952 351q0 36 -15 64t-37 46t-57.5 30.5t-65.5 18.5t-74 9t-69 3t-64.5 -1t-47.5 -1v-338q8 0 37 -0.5t48 -0.5t53 1.5t58.5 4t57 8.5t55.5 14t47.5 21t39.5 30
-t24.5 40t9.5 51zM881 827q0 33 -12.5 58.5t-30.5 42t-48 28t-55 16.5t-61.5 8t-58 2.5t-54 -1t-39.5 -0.5v-307q5 0 34.5 -0.5t46.5 0t50 2t55 5.5t51.5 11t48.5 18.5t37 27t27 38.5t9 51z"/>
-    <glyph glyph-name="file" unicode="&#xf15b;" d="M1024 1024v472q22 -14 36 -28l408 -408q14 -14 28 -36h-472zM896 992q0 -40 28 -68t68 -28h544v-1056q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h800v-544z"/>
-    <glyph glyph-name="file_text" unicode="&#xf15c;" d="M1468 1060q14 -14 28 -36h-472v472q22 -14 36 -28zM992 896h544v-1056q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h800v-544q0 -40 28 -68t68 -28zM1152 160v64q0 14 -9 23t-23 9h-704q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h704
-q14 0 23 9t9 23zM1152 416v64q0 14 -9 23t-23 9h-704q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h704q14 0 23 9t9 23zM1152 672v64q0 14 -9 23t-23 9h-704q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h704q14 0 23 9t9 23z"/>
-    <glyph glyph-name="sort_by_alphabet" unicode="&#xf15d;" horiz-adv-x="1664" d="M1191 1128h177l-72 218l-12 47q-2 16 -2 20h-4l-3 -20q0 -1 -3.5 -18t-7.5 -29zM736 96q0 -12 -10 -24l-319 -319q-10 -9 -23 -9q-12 0 -23 9l-320 320q-15 16 -7 35q8 20 30 20h192v1376q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-1376h192q14 0 23 -9t9 -23zM1572 -23
-v-233h-584v90l369 529q12 18 21 27l11 9v3q-2 0 -6.5 -0.5t-7.5 -0.5q-12 -3 -30 -3h-232v-115h-120v229h567v-89l-369 -530q-6 -8 -21 -26l-11 -11v-2l14 2q9 2 30 2h248v119h121zM1661 874v-106h-288v106h75l-47 144h-243l-47 -144h75v-106h-287v106h70l230 662h162
-l230 -662h70z"/>
-    <glyph glyph-name="_329" unicode="&#xf15e;" horiz-adv-x="1664" d="M1191 104h177l-72 218l-12 47q-2 16 -2 20h-4l-3 -20q0 -1 -3.5 -18t-7.5 -29zM736 96q0 -12 -10 -24l-319 -319q-10 -9 -23 -9q-12 0 -23 9l-320 320q-15 16 -7 35q8 20 30 20h192v1376q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-1376h192q14 0 23 -9t9 -23zM1661 -150
-v-106h-288v106h75l-47 144h-243l-47 -144h75v-106h-287v106h70l230 662h162l230 -662h70zM1572 1001v-233h-584v90l369 529q12 18 21 27l11 9v3q-2 0 -6.5 -0.5t-7.5 -0.5q-12 -3 -30 -3h-232v-115h-120v229h567v-89l-369 -530q-6 -8 -21 -26l-11 -10v-3l14 3q9 1 30 1h248
-v119h121z"/>
-    <glyph glyph-name="sort_by_attributes" unicode="&#xf160;" horiz-adv-x="1792" d="M736 96q0 -12 -10 -24l-319 -319q-10 -9 -23 -9q-12 0 -23 9l-320 320q-15 16 -7 35q8 20 30 20h192v1376q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-1376h192q14 0 23 -9t9 -23zM1792 -32v-192q0 -14 -9 -23t-23 -9h-832q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h832
-q14 0 23 -9t9 -23zM1600 480v-192q0 -14 -9 -23t-23 -9h-640q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h640q14 0 23 -9t9 -23zM1408 992v-192q0 -14 -9 -23t-23 -9h-448q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h448q14 0 23 -9t9 -23zM1216 1504v-192q0 -14 -9 -23t-23 -9h-256
-q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h256q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="sort_by_attributes_alt" unicode="&#xf161;" horiz-adv-x="1792" d="M1216 -32v-192q0 -14 -9 -23t-23 -9h-256q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h256q14 0 23 -9t9 -23zM736 96q0 -12 -10 -24l-319 -319q-10 -9 -23 -9q-12 0 -23 9l-320 320q-15 16 -7 35q8 20 30 20h192v1376q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-1376h192
-q14 0 23 -9t9 -23zM1408 480v-192q0 -14 -9 -23t-23 -9h-448q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h448q14 0 23 -9t9 -23zM1600 992v-192q0 -14 -9 -23t-23 -9h-640q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h640q14 0 23 -9t9 -23zM1792 1504v-192q0 -14 -9 -23t-23 -9h-832
-q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h832q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="sort_by_order" unicode="&#xf162;" d="M1346 223q0 63 -44 116t-103 53q-52 0 -83 -37t-31 -94t36.5 -95t104.5 -38q50 0 85 27t35 68zM736 96q0 -12 -10 -24l-319 -319q-10 -9 -23 -9q-12 0 -23 9l-320 320q-15 16 -7 35q8 20 30 20h192v1376q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-1376h192q14 0 23 -9t9 -23
-zM1486 165q0 -62 -13 -121.5t-41 -114t-68 -95.5t-98.5 -65.5t-127.5 -24.5q-62 0 -108 16q-24 8 -42 15l39 113q15 -7 31 -11q37 -13 75 -13q84 0 134.5 58.5t66.5 145.5h-2q-21 -23 -61.5 -37t-84.5 -14q-106 0 -173 71.5t-67 172.5q0 105 72 178t181 73q123 0 205 -94.5
-t82 -252.5zM1456 882v-114h-469v114h167v432q0 7 0.5 19t0.5 17v16h-2l-7 -12q-8 -13 -26 -31l-62 -58l-82 86l192 185h123v-654h165z"/>
-    <glyph glyph-name="sort_by_order_alt" unicode="&#xf163;" d="M1346 1247q0 63 -44 116t-103 53q-52 0 -83 -37t-31 -94t36.5 -95t104.5 -38q50 0 85 27t35 68zM736 96q0 -12 -10 -24l-319 -319q-10 -9 -23 -9q-12 0 -23 9l-320 320q-15 16 -7 35q8 20 30 20h192v1376q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-1376h192q14 0 23 -9
-t9 -23zM1456 -142v-114h-469v114h167v432q0 7 0.5 19t0.5 17v16h-2l-7 -12q-8 -13 -26 -31l-62 -58l-82 86l192 185h123v-654h165zM1486 1189q0 -62 -13 -121.5t-41 -114t-68 -95.5t-98.5 -65.5t-127.5 -24.5q-62 0 -108 16q-24 8 -42 15l39 113q15 -7 31 -11q37 -13 75 -13
-q84 0 134.5 58.5t66.5 145.5h-2q-21 -23 -61.5 -37t-84.5 -14q-106 0 -173 71.5t-67 172.5q0 105 72 178t181 73q123 0 205 -94.5t82 -252.5z"/>
-    <glyph glyph-name="_334" unicode="&#xf164;" horiz-adv-x="1664" d="M256 192q0 26 -19 45t-45 19q-27 0 -45.5 -19t-18.5 -45q0 -27 18.5 -45.5t45.5 -18.5q26 0 45 18.5t19 45.5zM416 704v-640q0 -26 -19 -45t-45 -19h-288q-26 0 -45 19t-19 45v640q0 26 19 45t45 19h288q26 0 45 -19t19 -45zM1600 704q0 -86 -55 -149q15 -44 15 -76
-q3 -76 -43 -137q17 -56 0 -117q-15 -57 -54 -94q9 -112 -49 -181q-64 -76 -197 -78h-36h-76h-17q-66 0 -144 15.5t-121.5 29t-120.5 39.5q-123 43 -158 44q-26 1 -45 19.5t-19 44.5v641q0 25 18 43.5t43 20.5q24 2 76 59t101 121q68 87 101 120q18 18 31 48t17.5 48.5
-t13.5 60.5q7 39 12.5 61t19.5 52t34 50q19 19 45 19q46 0 82.5 -10.5t60 -26t40 -40.5t24 -45t12 -50t5 -45t0.5 -39q0 -38 -9.5 -76t-19 -60t-27.5 -56q-3 -6 -10 -18t-11 -22t-8 -24h277q78 0 135 -57t57 -135z"/>
-    <glyph glyph-name="_335" unicode="&#xf165;" horiz-adv-x="1664" d="M256 960q0 -26 -19 -45t-45 -19q-27 0 -45.5 19t-18.5 45q0 27 18.5 45.5t45.5 18.5q26 0 45 -18.5t19 -45.5zM416 448v640q0 26 -19 45t-45 19h-288q-26 0 -45 -19t-19 -45v-640q0 -26 19 -45t45 -19h288q26 0 45 19t19 45zM1545 597q55 -61 55 -149q-1 -78 -57.5 -135
-t-134.5 -57h-277q4 -14 8 -24t11 -22t10 -18q18 -37 27 -57t19 -58.5t10 -76.5q0 -24 -0.5 -39t-5 -45t-12 -50t-24 -45t-40 -40.5t-60 -26t-82.5 -10.5q-26 0 -45 19q-20 20 -34 50t-19.5 52t-12.5 61q-9 42 -13.5 60.5t-17.5 48.5t-31 48q-33 33 -101 120q-49 64 -101 121
-t-76 59q-25 2 -43 20.5t-18 43.5v641q0 26 19 44.5t45 19.5q35 1 158 44q77 26 120.5 39.5t121.5 29t144 15.5h17h76h36q133 -2 197 -78q58 -69 49 -181q39 -37 54 -94q17 -61 0 -117q46 -61 43 -137q0 -32 -15 -76z"/>
-    <glyph glyph-name="youtube_sign" unicode="&#xf166;" d="M919 233v157q0 50 -29 50q-17 0 -33 -16v-224q16 -16 33 -16q29 0 29 49zM1103 355h66v34q0 51 -33 51t-33 -51v-34zM532 621v-70h-80v-423h-74v423h-78v70h232zM733 495v-367h-67v40q-39 -45 -76 -45q-33 0 -42 28q-6 17 -6 54v290h66v-270q0 -24 1 -26q1 -15 15 -15
-q20 0 42 31v280h67zM985 384v-146q0 -52 -7 -73q-12 -42 -53 -42q-35 0 -68 41v-36h-67v493h67v-161q32 40 68 40q41 0 53 -42q7 -21 7 -74zM1236 255v-9q0 -29 -2 -43q-3 -22 -15 -40q-27 -40 -80 -40q-52 0 -81 38q-21 27 -21 86v129q0 59 20 86q29 38 80 38t78 -38
-q21 -29 21 -86v-76h-133v-65q0 -51 34 -51q24 0 30 26q0 1 0.5 7t0.5 16.5v21.5h68zM785 1079v-156q0 -51 -32 -51t-32 51v156q0 52 32 52t32 -52zM1318 366q0 177 -19 260q-10 44 -43 73.5t-76 34.5q-136 15 -412 15q-275 0 -411 -15q-44 -5 -76.5 -34.5t-42.5 -73.5
-q-20 -87 -20 -260q0 -176 20 -260q10 -43 42.5 -73t75.5 -35q137 -15 412 -15t412 15q43 5 75.5 35t42.5 73q20 84 20 260zM563 1017l90 296h-75l-51 -195l-53 195h-78q7 -23 23 -69l24 -69q35 -103 46 -158v-201h74v201zM852 936v130q0 58 -21 87q-29 38 -78 38
-q-51 0 -78 -38q-21 -29 -21 -87v-130q0 -58 21 -87q27 -38 78 -38q49 0 78 38q21 27 21 87zM1033 816h67v370h-67v-283q-22 -31 -42 -31q-15 0 -16 16q-1 2 -1 26v272h-67v-293q0 -37 6 -55q11 -27 43 -27q36 0 77 45v-40zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5
-h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="youtube" unicode="&#xf167;" d="M971 292v-211q0 -67 -39 -67q-23 0 -45 22v301q22 22 45 22q39 0 39 -67zM1309 291v-46h-90v46q0 68 45 68t45 -68zM343 509h107v94h-312v-94h105v-569h100v569zM631 -60h89v494h-89v-378q-30 -42 -57 -42q-18 0 -21 21q-1 3 -1 35v364h-89v-391q0 -49 8 -73
-q12 -37 58 -37q48 0 102 61v-54zM1060 88v197q0 73 -9 99q-17 56 -71 56q-50 0 -93 -54v217h-89v-663h89v48q45 -55 93 -55q54 0 71 55q9 27 9 100zM1398 98v13h-91q0 -51 -2 -61q-7 -36 -40 -36q-46 0 -46 69v87h179v103q0 79 -27 116q-39 51 -106 51q-68 0 -107 -51
-q-28 -37 -28 -116v-173q0 -79 29 -116q39 -51 108 -51q72 0 108 53q18 27 21 54q2 9 2 58zM790 1011v210q0 69 -43 69t-43 -69v-210q0 -70 43 -70t43 70zM1509 260q0 -234 -26 -350q-14 -59 -58 -99t-102 -46q-184 -21 -555 -21t-555 21q-58 6 -102.5 46t-57.5 99
-q-26 112 -26 350q0 234 26 350q14 59 58 99t103 47q183 20 554 20t555 -20q58 -7 102.5 -47t57.5 -99q26 -112 26 -350zM511 1536h102l-121 -399v-271h-100v271q-14 74 -61 212q-37 103 -65 187h106l71 -263zM881 1203v-175q0 -81 -28 -118q-38 -51 -106 -51q-67 0 -105 51
-q-28 38 -28 118v175q0 80 28 117q38 51 105 51q68 0 106 -51q28 -37 28 -117zM1216 1365v-499h-91v55q-53 -62 -103 -62q-46 0 -59 37q-8 24 -8 75v394h91v-367q0 -33 1 -35q3 -22 21 -22q27 0 57 43v381h91z"/>
-    <glyph glyph-name="xing" unicode="&#xf168;" horiz-adv-x="1408" d="M597 869q-10 -18 -257 -456q-27 -46 -65 -46h-239q-21 0 -31 17t0 36l253 448q1 0 0 1l-161 279q-12 22 -1 37q9 15 32 15h239q40 0 66 -45zM1403 1511q11 -16 0 -37l-528 -934v-1l336 -615q11 -20 1 -37q-10 -15 -32 -15h-239q-42 0 -66 45l-339 622q18 32 531 942
-q25 45 64 45h241q22 0 31 -15z"/>
-    <glyph glyph-name="xing_sign" unicode="&#xf169;" d="M685 771q0 1 -126 222q-21 34 -52 34h-184q-18 0 -26 -11q-7 -12 1 -29l125 -216v-1l-196 -346q-9 -14 0 -28q8 -13 24 -13h185q31 0 50 36zM1309 1268q-7 12 -24 12h-187q-30 0 -49 -35l-411 -729q1 -2 262 -481q20 -35 52 -35h184q18 0 25 12q8 13 -1 28l-260 476v1
-l409 723q8 16 0 28zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="youtube_play" unicode="&#xf16a;" horiz-adv-x="1792" d="M711 408l484 250l-484 253v-503zM896 1270q168 0 324.5 -4.5t229.5 -9.5l73 -4q1 0 17 -1.5t23 -3t23.5 -4.5t28.5 -8t28 -13t31 -19.5t29 -26.5q6 -6 15.5 -18.5t29 -58.5t26.5 -101q8 -64 12.5 -136.5t5.5 -113.5v-40v-136q1 -145 -18 -290q-7 -55 -25 -99.5t-32 -61.5
-l-14 -17q-14 -15 -29 -26.5t-31 -19t-28 -12.5t-28.5 -8t-24 -4.5t-23 -3t-16.5 -1.5q-251 -19 -627 -19q-207 2 -359.5 6.5t-200.5 7.5l-49 4l-36 4q-36 5 -54.5 10t-51 21t-56.5 41q-6 6 -15.5 18.5t-29 58.5t-26.5 101q-8 64 -12.5 136.5t-5.5 113.5v40v136
-q-1 145 18 290q7 55 25 99.5t32 61.5l14 17q14 15 29 26.5t31 19.5t28 13t28.5 8t23.5 4.5t23 3t17 1.5q251 18 627 18z"/>
-    <glyph glyph-name="dropbox" unicode="&#xf16b;" horiz-adv-x="1792" d="M402 829l494 -305l-342 -285l-490 319zM1388 274v-108l-490 -293v-1l-1 1l-1 -1v1l-489 293v108l147 -96l342 284v2l1 -1l1 1v-2l343 -284zM554 1418l342 -285l-494 -304l-338 270zM1390 829l338 -271l-489 -319l-343 285zM1239 1418l489 -319l-338 -270l-494 304z"/>
-    <glyph glyph-name="stackexchange" unicode="&#xf16c;" d="M1289 -96h-1118v480h-160v-640h1438v640h-160v-480zM347 428l33 157l783 -165l-33 -156zM450 802l67 146l725 -339l-67 -145zM651 1158l102 123l614 -513l-102 -123zM1048 1536l477 -641l-128 -96l-477 641zM330 65v159h800v-159h-800z"/>
-    <glyph glyph-name="instagram" unicode="&#xf16d;" d="M1024 640q0 106 -75 181t-181 75t-181 -75t-75 -181t75 -181t181 -75t181 75t75 181zM1162 640q0 -164 -115 -279t-279 -115t-279 115t-115 279t115 279t279 115t279 -115t115 -279zM1270 1050q0 -38 -27 -65t-65 -27t-65 27t-27 65t27 65t65 27t65 -27t27 -65zM768 1270
-q-7 0 -76.5 0.5t-105.5 0t-96.5 -3t-103 -10t-71.5 -18.5q-50 -20 -88 -58t-58 -88q-11 -29 -18.5 -71.5t-10 -103t-3 -96.5t0 -105.5t0.5 -76.5t-0.5 -76.5t0 -105.5t3 -96.5t10 -103t18.5 -71.5q20 -50 58 -88t88 -58q29 -11 71.5 -18.5t103 -10t96.5 -3t105.5 0t76.5 0.5
-t76.5 -0.5t105.5 0t96.5 3t103 10t71.5 18.5q50 20 88 58t58 88q11 29 18.5 71.5t10 103t3 96.5t0 105.5t-0.5 76.5t0.5 76.5t0 105.5t-3 96.5t-10 103t-18.5 71.5q-20 50 -58 88t-88 58q-29 11 -71.5 18.5t-103 10t-96.5 3t-105.5 0t-76.5 -0.5zM1536 640q0 -229 -5 -317
-q-10 -208 -124 -322t-322 -124q-88 -5 -317 -5t-317 5q-208 10 -322 124t-124 322q-5 88 -5 317t5 317q10 208 124 322t322 124q88 5 317 5t317 -5q208 -10 322 -124t124 -322q5 -88 5 -317z"/>
-    <glyph glyph-name="flickr" unicode="&#xf16e;" d="M1248 1408q119 0 203.5 -84.5t84.5 -203.5v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960zM698 640q0 88 -62 150t-150 62t-150 -62t-62 -150t62 -150t150 -62t150 62t62 150zM1262 640q0 88 -62 150
-t-150 62t-150 -62t-62 -150t62 -150t150 -62t150 62t62 150z"/>
-    <glyph glyph-name="adn" unicode="&#xf170;" d="M768 914l201 -306h-402zM1133 384h94l-459 691l-459 -691h94l104 160h522zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="f171" unicode="&#xf171;" horiz-adv-x="1408" d="M815 677q8 -63 -50.5 -101t-111.5 -6q-39 17 -53.5 58t-0.5 82t52 58q36 18 72.5 12t64 -35.5t27.5 -67.5zM926 698q-14 107 -113 164t-197 13q-63 -28 -100.5 -88.5t-34.5 -129.5q4 -91 77.5 -155t165.5 -56q91 8 152 84t50 168zM1165 1240q-20 27 -56 44.5t-58 22
-t-71 12.5q-291 47 -566 -2q-43 -7 -66 -12t-55 -22t-50 -43q30 -28 76 -45.5t73.5 -22t87.5 -11.5q228 -29 448 -1q63 8 89.5 12t72.5 21.5t75 46.5zM1222 205q-8 -26 -15.5 -76.5t-14 -84t-28.5 -70t-58 -56.5q-86 -48 -189.5 -71.5t-202 -22t-201.5 18.5q-46 8 -81.5 18
-t-76.5 27t-73 43.5t-52 61.5q-25 96 -57 292l6 16l18 9q223 -148 506.5 -148t507.5 148q21 -6 24 -23t-5 -45t-8 -37zM1403 1166q-26 -167 -111 -655q-5 -30 -27 -56t-43.5 -40t-54.5 -31q-252 -126 -610 -88q-248 27 -394 139q-15 12 -25.5 26.5t-17 35t-9 34t-6 39.5
-t-5.5 35q-9 50 -26.5 150t-28 161.5t-23.5 147.5t-22 158q3 26 17.5 48.5t31.5 37.5t45 30t46 22.5t48 18.5q125 46 313 64q379 37 676 -50q155 -46 215 -122q16 -20 16.5 -51t-5.5 -54z"/>
-    <glyph glyph-name="bitbucket_sign" unicode="&#xf172;" d="M848 666q0 43 -41 66t-77 1q-43 -20 -42.5 -72.5t43.5 -70.5q39 -23 81 4t36 72zM928 682q8 -66 -36 -121t-110 -61t-119 40t-56 113q-2 49 25.5 93t72.5 64q70 31 141.5 -10t81.5 -118zM1100 1073q-20 -21 -53.5 -34t-53 -16t-63.5 -8q-155 -20 -324 0q-44 6 -63 9.5
-t-52.5 16t-54.5 32.5q13 19 36 31t40 15.5t47 8.5q198 35 408 1q33 -5 51 -8.5t43 -16t39 -31.5zM1142 327q0 7 5.5 26.5t3 32t-17.5 16.5q-161 -106 -365 -106t-366 106l-12 -6l-5 -12q26 -154 41 -210q47 -81 204 -108q249 -46 428 53q34 19 49 51.5t22.5 85.5t12.5 71z
-M1272 1020q9 53 -8 75q-43 55 -155 88q-216 63 -487 36q-132 -12 -226 -46q-38 -15 -59.5 -25t-47 -34t-29.5 -54q8 -68 19 -138t29 -171t24 -137q1 -5 5 -31t7 -36t12 -27t22 -28q105 -80 284 -100q259 -28 440 63q24 13 39.5 23t31 29t19.5 40q48 267 80 473zM1536 1120
-v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="tumblr" unicode="&#xf173;" horiz-adv-x="1024" d="M944 207l80 -237q-23 -35 -111 -66t-177 -32q-104 -2 -190.5 26t-142.5 74t-95 106t-55.5 120t-16.5 118v544h-168v215q72 26 129 69.5t91 90t58 102t34 99t15 88.5q1 5 4.5 8.5t7.5 3.5h244v-424h333v-252h-334v-518q0 -30 6.5 -56t22.5 -52.5t49.5 -41.5t81.5 -14
-q78 2 134 29z"/>
-    <glyph glyph-name="tumblr_sign" unicode="&#xf174;" d="M1136 75l-62 183q-44 -22 -103 -22q-36 -1 -62 10.5t-38.5 31.5t-17.5 40.5t-5 43.5v398h257v194h-256v326h-188q-8 0 -9 -10q-5 -44 -17.5 -87t-39 -95t-77 -95t-118.5 -68v-165h130v-418q0 -57 21.5 -115t65 -111t121 -85.5t176.5 -30.5q69 1 136.5 25t85.5 50z
-M1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="long_arrow_down" unicode="&#xf175;" horiz-adv-x="768" d="M765 237q8 -19 -5 -35l-350 -384q-10 -10 -23 -10q-14 0 -24 10l-355 384q-13 16 -5 35q9 19 29 19h224v1248q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-1248h224q21 0 29 -19z"/>
-    <glyph glyph-name="long_arrow_up" unicode="&#xf176;" horiz-adv-x="768" d="M765 1043q-9 -19 -29 -19h-224v-1248q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v1248h-224q-21 0 -29 19t5 35l350 384q10 10 23 10q14 0 24 -10l355 -384q13 -16 5 -35z"/>
-    <glyph glyph-name="long_arrow_left" unicode="&#xf177;" horiz-adv-x="1792" d="M1792 736v-192q0 -14 -9 -23t-23 -9h-1248v-224q0 -21 -19 -29t-35 5l-384 350q-10 10 -10 23q0 14 10 24l384 354q16 14 35 6q19 -9 19 -29v-224h1248q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="long_arrow_right" unicode="&#xf178;" horiz-adv-x="1792" d="M1728 643q0 -14 -10 -24l-384 -354q-16 -14 -35 -6q-19 9 -19 29v224h-1248q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h1248v224q0 21 19 29t35 -5l384 -350q10 -10 10 -23z"/>
-    <glyph glyph-name="apple" unicode="&#xf179;" horiz-adv-x="1408" d="M1393 321q-39 -125 -123 -250q-129 -196 -257 -196q-49 0 -140 32q-86 32 -151 32q-61 0 -142 -33q-81 -34 -132 -34q-152 0 -301 259q-147 261 -147 503q0 228 113 374q113 144 284 144q72 0 177 -30q104 -30 138 -30q45 0 143 34q102 34 173 34q119 0 213 -65
-q52 -36 104 -100q-79 -67 -114 -118q-65 -94 -65 -207q0 -124 69 -223t158 -126zM1017 1494q0 -61 -29 -136q-30 -75 -93 -138q-54 -54 -108 -72q-37 -11 -104 -17q3 149 78 257q74 107 250 148q1 -3 2.5 -11t2.5 -11q0 -4 0.5 -10t0.5 -10z"/>
-    <glyph glyph-name="windows" unicode="&#xf17a;" horiz-adv-x="1664" d="M682 530v-651l-682 94v557h682zM682 1273v-659h-682v565zM1664 530v-786l-907 125v661h907zM1664 1408v-794h-907v669z"/>
-    <glyph glyph-name="android" unicode="&#xf17b;" horiz-adv-x="1408" d="M493 1053q16 0 27.5 11.5t11.5 27.5t-11.5 27.5t-27.5 11.5t-27 -11.5t-11 -27.5t11 -27.5t27 -11.5zM915 1053q16 0 27 11.5t11 27.5t-11 27.5t-27 11.5t-27.5 -11.5t-11.5 -27.5t11.5 -27.5t27.5 -11.5zM103 869q42 0 72 -30t30 -72v-430q0 -43 -29.5 -73t-72.5 -30
-t-73 30t-30 73v430q0 42 30 72t73 30zM1163 850v-666q0 -46 -32 -78t-77 -32h-75v-227q0 -43 -30 -73t-73 -30t-73 30t-30 73v227h-138v-227q0 -43 -30 -73t-73 -30q-42 0 -72 30t-30 73l-1 227h-74q-46 0 -78 32t-32 78v666h918zM931 1255q107 -55 171 -153.5t64 -215.5
-h-925q0 117 64 215.5t172 153.5l-71 131q-7 13 5 20q13 6 20 -6l72 -132q95 42 201 42t201 -42l72 132q7 12 20 6q12 -7 5 -20zM1408 767v-430q0 -43 -30 -73t-73 -30q-42 0 -72 30t-30 73v430q0 43 30 72.5t72 29.5q43 0 73 -29.5t30 -72.5z"/>
-    <glyph glyph-name="linux" unicode="&#xf17c;" d="M663 1125q-11 -1 -15.5 -10.5t-8.5 -9.5q-5 -1 -5 5q0 12 19 15h10zM750 1111q-4 -1 -11.5 6.5t-17.5 4.5q24 11 32 -2q3 -6 -3 -9zM399 684q-4 1 -6 -3t-4.5 -12.5t-5.5 -13.5t-10 -13q-10 -11 -1 -12q4 -1 12.5 7t12.5 18q1 3 2 7t2 6t1.5 4.5t0.5 4v3t-1 2.5t-3 2z
-M1254 325q0 18 -55 42q4 15 7.5 27.5t5 26t3 21.5t0.5 22.5t-1 19.5t-3.5 22t-4 20.5t-5 25t-5.5 26.5q-10 48 -47 103t-72 75q24 -20 57 -83q87 -162 54 -278q-11 -40 -50 -42q-31 -4 -38.5 18.5t-8 83.5t-11.5 107q-9 39 -19.5 69t-19.5 45.5t-15.5 24.5t-13 15t-7.5 7
-q-14 62 -31 103t-29.5 56t-23.5 33t-15 40q-4 21 6 53.5t4.5 49.5t-44.5 25q-15 3 -44.5 18t-35.5 16q-8 1 -11 26t8 51t36 27q37 3 51 -30t4 -58q-11 -19 -2 -26.5t30 -0.5q13 4 13 36v37q-5 30 -13.5 50t-21 30.5t-23.5 15t-27 7.5q-107 -8 -89 -134q0 -15 -1 -15
-q-9 9 -29.5 10.5t-33 -0.5t-15.5 5q1 57 -16 90t-45 34q-27 1 -41.5 -27.5t-16.5 -59.5q-1 -15 3.5 -37t13 -37.5t15.5 -13.5q10 3 16 14q4 9 -7 8q-7 0 -15.5 14.5t-9.5 33.5q-1 22 9 37t34 14q17 0 27 -21t9.5 -39t-1.5 -22q-22 -15 -31 -29q-8 -12 -27.5 -23.5
-t-20.5 -12.5q-13 -14 -15.5 -27t7.5 -18q14 -8 25 -19.5t16 -19t18.5 -13t35.5 -6.5q47 -2 102 15q2 1 23 7t34.5 10.5t29.5 13t21 17.5q9 14 20 8q5 -3 6.5 -8.5t-3 -12t-16.5 -9.5q-20 -6 -56.5 -21.5t-45.5 -19.5q-44 -19 -70 -23q-25 -5 -79 2q-10 2 -9 -2t17 -19
-q25 -23 67 -22q17 1 36 7t36 14t33.5 17.5t30 17t24.5 12t17.5 2.5t8.5 -11q0 -2 -1 -4.5t-4 -5t-6 -4.5t-8.5 -5t-9 -4.5t-10 -5t-9.5 -4.5q-28 -14 -67.5 -44t-66.5 -43t-49 -1q-21 11 -63 73q-22 31 -25 22q-1 -3 -1 -10q0 -25 -15 -56.5t-29.5 -55.5t-21 -58t11.5 -63
-q-23 -6 -62.5 -90t-47.5 -141q-2 -18 -1.5 -69t-5.5 -59q-8 -24 -29 -3q-32 31 -36 94q-2 28 4 56q4 19 -1 18q-2 -1 -4 -5q-36 -65 10 -166q5 -12 25 -28t24 -20q20 -23 104 -90.5t93 -76.5q16 -15 17.5 -38t-14 -43t-45.5 -23q8 -15 29 -44.5t28 -54t7 -70.5q46 24 7 92
-q-4 8 -10.5 16t-9.5 12t-2 6q3 5 13 9.5t20 -2.5q46 -52 166 -36q133 15 177 87q23 38 34 30q12 -6 10 -52q-1 -25 -23 -92q-9 -23 -6 -37.5t24 -15.5q3 19 14.5 77t13.5 90q2 21 -6.5 73.5t-7.5 97t23 70.5q15 18 51 18q1 37 34.5 53t72.5 10.5t60 -22.5zM626 1152
-q3 17 -2.5 30t-11.5 15q-9 2 -9 -7q2 -5 5 -6q10 0 7 -15q-3 -20 8 -20q3 0 3 3zM1045 955q-2 8 -6.5 11.5t-13 5t-14.5 5.5q-5 3 -9.5 8t-7 8t-5.5 6.5t-4 4t-4 -1.5q-14 -16 7 -43.5t39 -31.5q9 -1 14.5 8t3.5 20zM867 1168q0 11 -5 19.5t-11 12.5t-9 3q-6 0 -8 -2t0 -4
-t5 -3q14 -4 18 -31q0 -3 8 2q2 2 2 3zM921 1401q0 2 -2.5 5t-9 7t-9.5 6q-15 15 -24 15q-9 -1 -11.5 -7.5t-1 -13t-0.5 -12.5q-1 -4 -6 -10.5t-6 -9t3 -8.5q4 -3 8 0t11 9t15 9q1 1 9 1t15 2t9 7zM1486 60q20 -12 31 -24.5t12 -24t-2.5 -22.5t-15.5 -22t-23.5 -19.5
-t-30 -18.5t-31.5 -16.5t-32 -15.5t-27 -13q-38 -19 -85.5 -56t-75.5 -64q-17 -16 -68 -19.5t-89 14.5q-18 9 -29.5 23.5t-16.5 25.5t-22 19.5t-47 9.5q-44 1 -130 1q-19 0 -57 -1.5t-58 -2.5q-44 -1 -79.5 -15t-53.5 -30t-43.5 -28.5t-53.5 -11.5q-29 1 -111 31t-146 43
-q-19 4 -51 9.5t-50 9t-39.5 9.5t-33.5 14.5t-17 19.5q-10 23 7 66.5t18 54.5q1 16 -4 40t-10 42.5t-4.5 36.5t10.5 27q14 12 57 14t60 12q30 18 42 35t12 51q21 -73 -32 -106q-32 -20 -83 -15q-34 3 -43 -10q-13 -15 5 -57q2 -6 8 -18t8.5 -18t4.5 -17t1 -22q0 -15 -17 -49
-t-14 -48q3 -17 37 -26q20 -6 84.5 -18.5t99.5 -20.5q24 -6 74 -22t82.5 -23t55.5 -4q43 6 64.5 28t23 48t-7.5 58.5t-19 52t-20 36.5q-121 190 -169 242q-68 74 -113 40q-11 -9 -15 15q-3 16 -2 38q1 29 10 52t24 47t22 42q8 21 26.5 72t29.5 78t30 61t39 54
-q110 143 124 195q-12 112 -16 310q-2 90 24 151.5t106 104.5q39 21 104 21q53 1 106 -13.5t89 -41.5q57 -42 91.5 -121.5t29.5 -147.5q-5 -95 30 -214q34 -113 133 -218q55 -59 99.5 -163t59.5 -191q8 -49 5 -84.5t-12 -55.5t-20 -22q-10 -2 -23.5 -19t-27 -35.5
-t-40.5 -33.5t-61 -14q-18 1 -31.5 5t-22.5 13.5t-13.5 15.5t-11.5 20.5t-9 19.5q-22 37 -41 30t-28 -49t7 -97q20 -70 1 -195q-10 -65 18 -100.5t73 -33t85 35.5q59 49 89.5 66.5t103.5 42.5q53 18 77 36.5t18.5 34.5t-25 28.5t-51.5 23.5q-33 11 -49.5 48t-15 72.5
-t15.5 47.5q1 -31 8 -56.5t14.5 -40.5t20.5 -28.5t21 -19t21.5 -13t16.5 -9.5z"/>
-    <glyph glyph-name="dribble" unicode="&#xf17d;" d="M1024 36q-42 241 -140 498h-2l-2 -1q-16 -6 -43 -16.5t-101 -49t-137 -82t-131 -114.5t-103 -148l-15 11q184 -150 418 -150q132 0 256 52zM839 643q-21 49 -53 111q-311 -93 -673 -93q-1 -7 -1 -21q0 -124 44 -236.5t124 -201.5q50 89 123.5 166.5t142.5 124.5t130.5 81
-t99.5 48l37 13q4 1 13 3.5t13 4.5zM732 855q-120 213 -244 378q-138 -65 -234 -186t-128 -272q302 0 606 80zM1416 536q-210 60 -409 29q87 -239 128 -469q111 75 185 189.5t96 250.5zM611 1277q-1 0 -2 -1q1 1 2 1zM1201 1132q-185 164 -433 164q-76 0 -155 -19
-q131 -170 246 -382q69 26 130 60.5t96.5 61.5t65.5 57t37.5 40.5zM1424 647q-3 232 -149 410l-1 -1q-9 -12 -19 -24.5t-43.5 -44.5t-71 -60.5t-100 -65t-131.5 -64.5q25 -53 44 -95q2 -5 6.5 -17t7.5 -17q36 5 74.5 7t73.5 2t69 -1.5t64 -4t56.5 -5.5t48 -6.5t36.5 -6
-t25 -4.5zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="skype" unicode="&#xf17e;" d="M1173 473q0 50 -19.5 91.5t-48.5 68.5t-73 49t-82.5 34t-87.5 23l-104 24q-30 7 -44 10.5t-35 11.5t-30 16t-16.5 21t-7.5 30q0 77 144 77q43 0 77 -12t54 -28.5t38 -33.5t40 -29t48 -12q47 0 75.5 32t28.5 77q0 55 -56 99.5t-142 67.5t-182 23q-68 0 -132 -15.5
-t-119.5 -47t-89 -87t-33.5 -128.5q0 -61 19 -106.5t56 -75.5t80 -48.5t103 -32.5l146 -36q90 -22 112 -36q32 -20 32 -60q0 -39 -40 -64.5t-105 -25.5q-51 0 -91.5 16t-65 38.5t-45.5 45t-46 38.5t-54 16q-50 0 -75.5 -30t-25.5 -75q0 -92 122 -157.5t291 -65.5
-q73 0 140 18.5t122.5 53.5t88.5 93.5t33 131.5zM1536 256q0 -159 -112.5 -271.5t-271.5 -112.5q-130 0 -234 80q-77 -16 -150 -16q-143 0 -273.5 55.5t-225 150t-150 225t-55.5 273.5q0 73 16 150q-80 104 -80 234q0 159 112.5 271.5t271.5 112.5q130 0 234 -80
-q77 16 150 16q143 0 273.5 -55.5t225 -150t150 -225t55.5 -273.5q0 -73 -16 -150q80 -104 80 -234z"/>
-    <glyph glyph-name="foursquare" unicode="&#xf180;" horiz-adv-x="1280" d="M1000 1102l37 194q5 23 -9 40t-35 17h-712q-23 0 -38.5 -17t-15.5 -37v-1101q0 -7 6 -1l291 352q23 26 38 33.5t48 7.5h239q22 0 37 14.5t18 29.5q24 130 37 191q4 21 -11.5 40t-36.5 19h-294q-29 0 -48 19t-19 48v42q0 29 19 47.5t48 18.5h346q18 0 35 13.5t20 29.5z
-M1227 1324q-15 -73 -53.5 -266.5t-69.5 -350t-35 -173.5q-6 -22 -9 -32.5t-14 -32.5t-24.5 -33t-38.5 -21t-58 -10h-271q-13 0 -22 -10q-8 -9 -426 -494q-22 -25 -58.5 -28.5t-48.5 5.5q-55 22 -55 98v1410q0 55 38 102.5t120 47.5h888q95 0 127 -53t10 -159zM1227 1324
-l-158 -790q4 17 35 173.5t69.5 350t53.5 266.5z"/>
-    <glyph glyph-name="trello" unicode="&#xf181;" d="M704 192v1024q0 14 -9 23t-23 9h-480q-14 0 -23 -9t-9 -23v-1024q0 -14 9 -23t23 -9h480q14 0 23 9t9 23zM1376 576v640q0 14 -9 23t-23 9h-480q-14 0 -23 -9t-9 -23v-640q0 -14 9 -23t23 -9h480q14 0 23 9t9 23zM1536 1344v-1408q0 -26 -19 -45t-45 -19h-1408
-q-26 0 -45 19t-19 45v1408q0 26 19 45t45 19h1408q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="female" unicode="&#xf182;" horiz-adv-x="1280" d="M1280 480q0 -40 -28 -68t-68 -28q-51 0 -80 43l-227 341h-45v-132l247 -411q9 -15 9 -33q0 -26 -19 -45t-45 -19h-192v-272q0 -46 -33 -79t-79 -33h-160q-46 0 -79 33t-33 79v272h-192q-26 0 -45 19t-19 45q0 18 9 33l247 411v132h-45l-227 -341q-29 -43 -80 -43
-q-40 0 -68 28t-28 68q0 29 16 53l256 384q73 107 176 107h384q103 0 176 -107l256 -384q16 -24 16 -53zM864 1280q0 -93 -65.5 -158.5t-158.5 -65.5t-158.5 65.5t-65.5 158.5t65.5 158.5t158.5 65.5t158.5 -65.5t65.5 -158.5z"/>
-    <glyph glyph-name="male" unicode="&#xf183;" horiz-adv-x="1024" d="M1024 832v-416q0 -40 -28 -68t-68 -28t-68 28t-28 68v352h-64v-912q0 -46 -33 -79t-79 -33t-79 33t-33 79v464h-64v-464q0 -46 -33 -79t-79 -33t-79 33t-33 79v912h-64v-352q0 -40 -28 -68t-68 -28t-68 28t-28 68v416q0 80 56 136t136 56h640q80 0 136 -56t56 -136z
-M736 1280q0 -93 -65.5 -158.5t-158.5 -65.5t-158.5 65.5t-65.5 158.5t65.5 158.5t158.5 65.5t158.5 -65.5t65.5 -158.5z"/>
-    <glyph glyph-name="gittip" unicode="&#xf184;" d="M773 234l350 473q16 22 24.5 59t-6 85t-61.5 79q-40 26 -83 25.5t-73.5 -17.5t-54.5 -45q-36 -40 -96 -40q-59 0 -95 40q-24 28 -54.5 45t-73.5 17.5t-84 -25.5q-46 -31 -60.5 -79t-6 -85t24.5 -59zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103
-t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="sun" unicode="&#xf185;" horiz-adv-x="1792" d="M1472 640q0 117 -45.5 223.5t-123 184t-184 123t-223.5 45.5t-223.5 -45.5t-184 -123t-123 -184t-45.5 -223.5t45.5 -223.5t123 -184t184 -123t223.5 -45.5t223.5 45.5t184 123t123 184t45.5 223.5zM1748 363q-4 -15 -20 -20l-292 -96v-306q0 -16 -13 -26q-15 -10 -29 -4
-l-292 94l-180 -248q-10 -13 -26 -13t-26 13l-180 248l-292 -94q-14 -6 -29 4q-13 10 -13 26v306l-292 96q-16 5 -20 20q-5 17 4 29l180 248l-180 248q-9 13 -4 29q4 15 20 20l292 96v306q0 16 13 26q15 10 29 4l292 -94l180 248q9 12 26 12t26 -12l180 -248l292 94
-q14 6 29 -4q13 -10 13 -26v-306l292 -96q16 -5 20 -20q5 -16 -4 -29l-180 -248l180 -248q9 -12 4 -29z"/>
-    <glyph glyph-name="_366" unicode="&#xf186;" d="M1262 233q-54 -9 -110 -9q-182 0 -337 90t-245 245t-90 337q0 192 104 357q-201 -60 -328.5 -229t-127.5 -384q0 -130 51 -248.5t136.5 -204t204 -136.5t248.5 -51q144 0 273.5 61.5t220.5 171.5zM1465 318q-94 -203 -283.5 -324.5t-413.5 -121.5q-156 0 -298 61
-t-245 164t-164 245t-61 298q0 153 57.5 292.5t156 241.5t235.5 164.5t290 68.5q44 2 61 -39q18 -41 -15 -72q-86 -78 -131.5 -181.5t-45.5 -218.5q0 -148 73 -273t198 -198t273 -73q118 0 228 51q41 18 72 -13q14 -14 17.5 -34t-4.5 -38z"/>
-    <glyph glyph-name="archive" unicode="&#xf187;" horiz-adv-x="1792" d="M1088 704q0 26 -19 45t-45 19h-256q-26 0 -45 -19t-19 -45t19 -45t45 -19h256q26 0 45 19t19 45zM1664 896v-960q0 -26 -19 -45t-45 -19h-1408q-26 0 -45 19t-19 45v960q0 26 19 45t45 19h1408q26 0 45 -19t19 -45zM1728 1344v-256q0 -26 -19 -45t-45 -19h-1536
-q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h1536q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="bug" unicode="&#xf188;" horiz-adv-x="1664" d="M1632 576q0 -26 -19 -45t-45 -19h-224q0 -171 -67 -290l208 -209q19 -19 19 -45t-19 -45q-18 -19 -45 -19t-45 19l-198 197q-5 -5 -15 -13t-42 -28.5t-65 -36.5t-82 -29t-97 -13v896h-128v-896q-51 0 -101.5 13.5t-87 33t-66 39t-43.5 32.5l-15 14l-183 -207
-q-20 -21 -48 -21q-24 0 -43 16q-19 18 -20.5 44.5t15.5 46.5l202 227q-58 114 -58 274h-224q-26 0 -45 19t-19 45t19 45t45 19h224v294l-173 173q-19 19 -19 45t19 45t45 19t45 -19l173 -173h844l173 173q19 19 45 19t45 -19t19 -45t-19 -45l-173 -173v-294h224q26 0 45 -19
-t19 -45zM1152 1152h-640q0 133 93.5 226.5t226.5 93.5t226.5 -93.5t93.5 -226.5z"/>
-    <glyph glyph-name="vk" unicode="&#xf189;" horiz-adv-x="1920" d="M1917 1016q23 -64 -150 -294q-24 -32 -65 -85q-40 -51 -55 -72t-30.5 -49.5t-12 -42t13 -34.5t32.5 -43t57 -53q4 -2 5 -4q141 -131 191 -221q3 -5 6.5 -12.5t7 -26.5t-0.5 -34t-25 -27.5t-59 -12.5l-256 -4q-24 -5 -56 5t-52 22l-20 12q-30 21 -70 64t-68.5 77.5t-61 58
-t-56.5 15.5q-3 -1 -8 -3.5t-17 -14.5t-21.5 -29.5t-17 -52t-6.5 -77.5q0 -15 -3.5 -27.5t-7.5 -18.5l-4 -5q-18 -19 -53 -22h-115q-71 -4 -146 16.5t-131.5 53t-103 66t-70.5 57.5l-25 24q-10 10 -27.5 30t-71.5 91t-106 151t-122.5 211t-130.5 272q-6 16 -6 27t3 16l4 6
-q15 19 57 19l274 2q12 -2 23 -6.5t16 -8.5l5 -3q16 -11 24 -32q20 -50 46 -103.5t41 -81.5l16 -29q29 -60 56 -104t48.5 -68.5t41.5 -38.5t34 -14t27 5q2 1 5 5t12 22t13.5 47t9.5 81t0 125q-2 40 -9 73t-14 46l-6 12q-25 34 -85 43q-13 2 5 24q16 19 38 30q53 26 239 24
-q82 -1 135 -13q20 -5 33.5 -13.5t20.5 -24t10.5 -32t3.5 -45.5t-1 -55t-2.5 -70.5t-1.5 -82.5q0 -11 -1 -42t-0.5 -48t3.5 -40.5t11.5 -39t22.5 -24.5q8 -2 17 -4t26 11t38 34.5t52 67t68 107.5q60 104 107 225q4 10 10 17.5t11 10.5l4 3l5 2.5t13 3t20 0.5l288 2
-q39 5 64 -2.5t31 -16.5z"/>
-    <glyph glyph-name="weibo" unicode="&#xf18a;" horiz-adv-x="1792" d="M675 252q21 34 11 69t-45 50q-34 14 -73 1t-60 -46q-22 -34 -13 -68.5t43 -50.5t74.5 -2.5t62.5 47.5zM769 373q8 13 3.5 26.5t-17.5 18.5q-14 5 -28.5 -0.5t-21.5 -18.5q-17 -31 13 -45q14 -5 29 0.5t22 18.5zM943 266q-45 -102 -158 -150t-224 -12
-q-107 34 -147.5 126.5t6.5 187.5q47 93 151.5 139t210.5 19q111 -29 158.5 -119.5t2.5 -190.5zM1255 426q-9 96 -89 170t-208.5 109t-274.5 21q-223 -23 -369.5 -141.5t-132.5 -264.5q9 -96 89 -170t208.5 -109t274.5 -21q223 23 369.5 141.5t132.5 264.5zM1563 422
-q0 -68 -37 -139.5t-109 -137t-168.5 -117.5t-226 -83t-270.5 -31t-275 33.5t-240.5 93t-171.5 151t-65 199.5q0 115 69.5 245t197.5 258q169 169 341.5 236t246.5 -7q65 -64 20 -209q-4 -14 -1 -20t10 -7t14.5 0.5t13.5 3.5l6 2q139 59 246 59t153 -61q45 -63 0 -178
-q-2 -13 -4.5 -20t4.5 -12.5t12 -7.5t17 -6q57 -18 103 -47t80 -81.5t34 -116.5zM1489 1046q42 -47 54.5 -108.5t-6.5 -117.5q-8 -23 -29.5 -34t-44.5 -4q-23 8 -34 29.5t-4 44.5q20 63 -24 111t-107 35q-24 -5 -45 8t-25 37q-5 24 8 44.5t37 25.5q60 13 119 -5.5t101 -65.5z
-M1670 1209q87 -96 112.5 -222.5t-13.5 -241.5q-9 -27 -34 -40t-52 -4t-40 34t-5 52q28 82 10 172t-80 158q-62 69 -148 95.5t-173 8.5q-28 -6 -52 9.5t-30 43.5t9.5 51.5t43.5 29.5q123 26 244 -11.5t208 -134.5z"/>
-    <glyph glyph-name="renren" unicode="&#xf18b;" d="M1133 -34q-171 -94 -368 -94q-196 0 -367 94q138 87 235.5 211t131.5 268q35 -144 132.5 -268t235.5 -211zM638 1394v-485q0 -252 -126.5 -459.5t-330.5 -306.5q-181 215 -181 495q0 187 83.5 349.5t229.5 269.5t325 137zM1536 638q0 -280 -181 -495
-q-204 99 -330.5 306.5t-126.5 459.5v485q179 -30 325 -137t229.5 -269.5t83.5 -349.5z"/>
-    <glyph glyph-name="_372" unicode="&#xf18c;" horiz-adv-x="1408" d="M1402 433q-32 -80 -76 -138t-91 -88.5t-99 -46.5t-101.5 -14.5t-96.5 8.5t-86.5 22t-69.5 27.5t-46 22.5l-17 10q-113 -228 -289.5 -359.5t-384.5 -132.5q-19 0 -32 13t-13 32t13 31.5t32 12.5q173 1 322.5 107.5t251.5 294.5q-36 -14 -72 -23t-83 -13t-91 2.5t-93 28.5
-t-92 59t-84.5 100t-74.5 146q114 47 214 57t167.5 -7.5t124.5 -56.5t88.5 -77t56.5 -82q53 131 79 291q-7 -1 -18 -2.5t-46.5 -2.5t-69.5 0.5t-81.5 10t-88.5 23t-84 42.5t-75 65t-54.5 94.5t-28.5 127.5q70 28 133.5 36.5t112.5 -1t92 -30t73.5 -50t56 -61t42 -63t27.5 -56
-t16 -39.5l4 -16q12 122 12 195q-8 6 -21.5 16t-49 44.5t-63.5 71.5t-54 93t-33 112.5t12 127t70 138.5q73 -25 127.5 -61.5t84.5 -76.5t48 -85t20.5 -89t-0.5 -85.5t-13 -76.5t-19 -62t-17 -42l-7 -15q1 -4 1 -50t-1 -72q3 7 10 18.5t30.5 43t50.5 58t71 55.5t91.5 44.5
-t112 14.5t132.5 -24q-2 -78 -21.5 -141.5t-50 -104.5t-69.5 -71.5t-81.5 -45.5t-84.5 -24t-80 -9.5t-67.5 1t-46.5 4.5l-17 3q-23 -147 -73 -283q6 7 18 18.5t49.5 41t77.5 52.5t99.5 42t117.5 20t129 -23.5t137 -77.5z"/>
-    <glyph glyph-name="stack_exchange" unicode="&#xf18d;" horiz-adv-x="1280" d="M1259 283v-66q0 -85 -57.5 -144.5t-138.5 -59.5h-57l-260 -269v269h-529q-81 0 -138.5 59.5t-57.5 144.5v66h1238zM1259 609v-255h-1238v255h1238zM1259 937v-255h-1238v255h1238zM1259 1077v-67h-1238v67q0 84 57.5 143.5t138.5 59.5h846q81 0 138.5 -59.5t57.5 -143.5z
-"/>
-    <glyph glyph-name="_374" unicode="&#xf18e;" d="M1152 640q0 -14 -9 -23l-320 -320q-9 -9 -23 -9q-13 0 -22.5 9.5t-9.5 22.5v192h-352q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h352v192q0 14 9 23t23 9q12 0 24 -10l319 -319q9 -9 9 -23zM1312 640q0 148 -73 273t-198 198t-273 73t-273 -73t-198 -198
-t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="arrow_circle_alt_left" unicode="&#xf190;" d="M1152 736v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-352v-192q0 -14 -9 -23t-23 -9q-12 0 -24 10l-319 319q-9 9 -9 23t9 23l320 320q9 9 23 9q13 0 22.5 -9.5t9.5 -22.5v-192h352q13 0 22.5 -9.5t9.5 -22.5zM1312 640q0 148 -73 273t-198 198t-273 73t-273 -73t-198 -198
-t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_376" unicode="&#xf191;" d="M1024 960v-640q0 -26 -19 -45t-45 -19q-20 0 -37 12l-448 320q-27 19 -27 52t27 52l448 320q17 12 37 12q26 0 45 -19t19 -45zM1280 160v960q0 13 -9.5 22.5t-22.5 9.5h-960q-13 0 -22.5 -9.5t-9.5 -22.5v-960q0 -13 9.5 -22.5t22.5 -9.5h960q13 0 22.5 9.5t9.5 22.5z
-M1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="dot_circle_alt" unicode="&#xf192;" d="M1024 640q0 -106 -75 -181t-181 -75t-181 75t-75 181t75 181t181 75t181 -75t75 -181zM768 1184q-148 0 -273 -73t-198 -198t-73 -273t73 -273t198 -198t273 -73t273 73t198 198t73 273t-73 273t-198 198t-273 73zM1536 640q0 -209 -103 -385.5t-279.5 -279.5
-t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_378" unicode="&#xf193;" horiz-adv-x="1664" d="M1023 349l102 -204q-58 -179 -210 -290t-339 -111q-156 0 -288.5 77.5t-210 210t-77.5 288.5q0 181 104.5 330t274.5 211l17 -131q-122 -54 -195 -165.5t-73 -244.5q0 -185 131.5 -316.5t316.5 -131.5q126 0 232.5 65t165 175.5t49.5 236.5zM1571 249l58 -114l-256 -128
-q-13 -7 -29 -7q-40 0 -57 35l-239 477h-472q-24 0 -42.5 16.5t-21.5 40.5l-96 779q-2 17 6 42q14 51 57 82.5t97 31.5q66 0 113 -47t47 -113q0 -69 -52 -117.5t-120 -41.5l37 -289h423v-128h-407l16 -128h455q40 0 57 -35l228 -455z"/>
-    <glyph glyph-name="vimeo_square" unicode="&#xf194;" d="M1292 898q10 216 -161 222q-231 8 -312 -261q44 19 82 19q85 0 74 -96q-4 -57 -74 -167t-105 -110q-43 0 -82 169q-13 54 -45 255q-30 189 -160 177q-59 -7 -164 -100l-81 -72l-81 -72l52 -67q76 52 87 52q57 0 107 -179q15 -55 45 -164.5t45 -164.5q68 -179 164 -179
-q157 0 383 294q220 283 226 444zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_380" unicode="&#xf195;" horiz-adv-x="1152" d="M1152 704q0 -191 -94.5 -353t-256.5 -256.5t-353 -94.5h-160q-14 0 -23 9t-9 23v611l-215 -66q-3 -1 -9 -1q-10 0 -19 6q-13 10 -13 26v128q0 23 23 31l233 71v93l-215 -66q-3 -1 -9 -1q-10 0 -19 6q-13 10 -13 26v128q0 23 23 31l233 71v250q0 14 9 23t23 9h160
-q14 0 23 -9t9 -23v-181l375 116q15 5 28 -5t13 -26v-128q0 -23 -23 -31l-393 -121v-93l375 116q15 5 28 -5t13 -26v-128q0 -23 -23 -31l-393 -121v-487q188 13 318 151t130 328q0 14 9 23t23 9h160q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="plus_square_o" unicode="&#xf196;" horiz-adv-x="1408" d="M1152 736v-64q0 -14 -9 -23t-23 -9h-352v-352q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v352h-352q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h352v352q0 14 9 23t23 9h64q14 0 23 -9t9 -23v-352h352q14 0 23 -9t9 -23zM1280 288v832q0 66 -47 113t-113 47h-832
-q-66 0 -113 -47t-47 -113v-832q0 -66 47 -113t113 -47h832q66 0 113 47t47 113zM1408 1120v-832q0 -119 -84.5 -203.5t-203.5 -84.5h-832q-119 0 -203.5 84.5t-84.5 203.5v832q0 119 84.5 203.5t203.5 84.5h832q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_382" unicode="&#xf197;" horiz-adv-x="2176" d="M620 416q-110 -64 -268 -64h-128v64h-64q-13 0 -22.5 23.5t-9.5 56.5q0 24 7 49q-58 2 -96.5 10.5t-38.5 20.5t38.5 20.5t96.5 10.5q-7 25 -7 49q0 33 9.5 56.5t22.5 23.5h64v64h128q158 0 268 -64h1113q42 -7 106.5 -18t80.5 -14q89 -15 150 -40.5t83.5 -47.5t22.5 -40
-t-22.5 -40t-83.5 -47.5t-150 -40.5q-16 -3 -80.5 -14t-106.5 -18h-1113zM1739 668q53 -36 53 -92t-53 -92l81 -30q68 48 68 122t-68 122zM625 400h1015q-217 -38 -456 -80q-57 0 -113 -24t-83 -48l-28 -24l-288 -288q-26 -26 -70.5 -45t-89.5 -19h-96l-93 464h29
-q157 0 273 64zM352 816h-29l93 464h96q46 0 90 -19t70 -45l288 -288q4 -4 11 -10.5t30.5 -23t48.5 -29t61.5 -23t72.5 -10.5l456 -80h-1015q-116 64 -273 64z"/>
-    <glyph glyph-name="_383" unicode="&#xf198;" horiz-adv-x="1664" d="M1519 760q62 0 103.5 -40.5t41.5 -101.5q0 -97 -93 -130l-172 -59l56 -167q7 -21 7 -47q0 -59 -42 -102t-101 -43q-47 0 -85.5 27t-53.5 72l-55 165l-310 -106l55 -164q8 -24 8 -47q0 -59 -42 -102t-102 -43q-47 0 -85 27t-53 72l-55 163l-153 -53q-29 -9 -50 -9
-q-61 0 -101.5 40t-40.5 101q0 47 27.5 85t71.5 53l156 53l-105 313l-156 -54q-26 -8 -48 -8q-60 0 -101 40.5t-41 100.5q0 47 27.5 85t71.5 53l157 53l-53 159q-8 24 -8 47q0 60 42 102.5t102 42.5q47 0 85 -27t53 -72l54 -160l310 105l-54 160q-8 24 -8 47q0 59 42.5 102
-t101.5 43q47 0 85.5 -27.5t53.5 -71.5l53 -161l162 55q21 6 43 6q60 0 102.5 -39.5t42.5 -98.5q0 -45 -30 -81.5t-74 -51.5l-157 -54l105 -316l164 56q24 8 46 8zM725 498l310 105l-105 315l-310 -107z"/>
-    <glyph glyph-name="_384" unicode="&#xf199;" d="M1248 1408q119 0 203.5 -84.5t84.5 -203.5v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960zM1280 352v436q-31 -35 -64 -55q-34 -22 -132.5 -85t-151.5 -99q-98 -69 -164 -69v0v0q-66 0 -164 69
-q-47 32 -142 92.5t-142 92.5q-12 8 -33 27t-31 27v-436q0 -40 28 -68t68 -28h832q40 0 68 28t28 68zM1280 925q0 41 -27.5 70t-68.5 29h-832q-40 0 -68 -28t-28 -68q0 -37 30.5 -76.5t67.5 -64.5q47 -32 137.5 -89t129.5 -83q3 -2 17 -11.5t21 -14t21 -13t23.5 -13
-t21.5 -9.5t22.5 -7.5t20.5 -2.5t20.5 2.5t22.5 7.5t21.5 9.5t23.5 13t21 13t21 14t17 11.5l267 174q35 23 66.5 62.5t31.5 73.5z"/>
-    <glyph glyph-name="_385" unicode="&#xf19a;" horiz-adv-x="1792" d="M127 640q0 163 67 313l367 -1005q-196 95 -315 281t-119 411zM1415 679q0 -19 -2.5 -38.5t-10 -49.5t-11.5 -44t-17.5 -59t-17.5 -58l-76 -256l-278 826q46 3 88 8q19 2 26 18.5t-2.5 31t-28.5 13.5l-205 -10q-75 1 -202 10q-12 1 -20.5 -5t-11.5 -15t-1.5 -18.5t9 -16.5
-t19.5 -8l80 -8l120 -328l-168 -504l-280 832q46 3 88 8q19 2 26 18.5t-2.5 31t-28.5 13.5l-205 -10q-7 0 -23 0.5t-26 0.5q105 160 274.5 253.5t367.5 93.5q147 0 280.5 -53t238.5 -149h-10q-55 0 -92 -40.5t-37 -95.5q0 -12 2 -24t4 -21.5t8 -23t9 -21t12 -22.5t12.5 -21
-t14.5 -24t14 -23q63 -107 63 -212zM909 573l237 -647q1 -6 5 -11q-126 -44 -255 -44q-112 0 -217 32zM1570 1009q95 -174 95 -369q0 -209 -104 -385.5t-279 -278.5l235 678q59 169 59 276q0 42 -6 79zM896 1536q182 0 348 -71t286 -191t191 -286t71 -348t-71 -348t-191 -286
-t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71zM896 -215q173 0 331.5 68t273 182.5t182.5 273t68 331.5t-68 331.5t-182.5 273t-273 182.5t-331.5 68t-331.5 -68t-273 -182.5t-182.5 -273t-68 -331.5t68 -331.5t182.5 -273
-t273 -182.5t331.5 -68z"/>
-    <glyph glyph-name="_386" unicode="&#xf19b;" horiz-adv-x="1792" d="M1086 1536v-1536l-272 -128q-228 20 -414 102t-293 208.5t-107 272.5q0 140 100.5 263.5t275 205.5t391.5 108v-172q-217 -38 -356.5 -150t-139.5 -255q0 -152 154.5 -267t388.5 -145v1360zM1755 954l37 -390l-525 114l147 83q-119 70 -280 99v172q277 -33 481 -157z"/>
-    <glyph glyph-name="_387" unicode="&#xf19c;" horiz-adv-x="2048" d="M960 1536l960 -384v-128h-128q0 -26 -20.5 -45t-48.5 -19h-1526q-28 0 -48.5 19t-20.5 45h-128v128zM256 896h256v-768h128v768h256v-768h128v768h256v-768h128v768h256v-768h59q28 0 48.5 -19t20.5 -45v-64h-1664v64q0 26 20.5 45t48.5 19h59v768zM1851 -64
-q28 0 48.5 -19t20.5 -45v-128h-1920v128q0 26 20.5 45t48.5 19h1782z"/>
-    <glyph glyph-name="_388" unicode="&#xf19d;" horiz-adv-x="2304" d="M1774 700l18 -316q4 -69 -82 -128t-235 -93.5t-323 -34.5t-323 34.5t-235 93.5t-82 128l18 316l574 -181q22 -7 48 -7t48 7zM2304 1024q0 -23 -22 -31l-1120 -352q-4 -1 -10 -1t-10 1l-652 206q-43 -34 -71 -111.5t-34 -178.5q63 -36 63 -109q0 -69 -58 -107l58 -433
-q2 -14 -8 -25q-9 -11 -24 -11h-192q-15 0 -24 11q-10 11 -8 25l58 433q-58 38 -58 107q0 73 65 111q11 207 98 330l-333 104q-22 8 -22 31t22 31l1120 352q4 1 10 1t10 -1l1120 -352q22 -8 22 -31z"/>
-    <glyph glyph-name="_389" unicode="&#xf19e;" d="M859 579l13 -707q-62 11 -105 11q-41 0 -105 -11l13 707q-40 69 -168.5 295.5t-216.5 374.5t-181 287q58 -15 108 -15q44 0 111 15q63 -111 133.5 -229.5t167 -276.5t138.5 -227q37 61 109.5 177.5t117.5 190t105 176t107 189.5q54 -14 107 -14q56 0 114 14v0
-q-28 -39 -60 -88.5t-49.5 -78.5t-56.5 -96t-49 -84q-146 -248 -353 -610z"/>
-    <glyph glyph-name="uniF1A0" unicode="&#xf1a0;" d="M768 750h725q12 -67 12 -128q0 -217 -91 -387.5t-259.5 -266.5t-386.5 -96q-157 0 -299 60.5t-245 163.5t-163.5 245t-60.5 299t60.5 299t163.5 245t245 163.5t299 60.5q300 0 515 -201l-209 -201q-123 119 -306 119q-129 0 -238.5 -65t-173.5 -176.5t-64 -243.5
-t64 -243.5t173.5 -176.5t238.5 -65q87 0 160 24t120 60t82 82t51.5 87t22.5 78h-436v264z"/>
-    <glyph glyph-name="f1a1" unicode="&#xf1a1;" horiz-adv-x="1792" d="M1095 369q16 -16 0 -31q-62 -62 -199 -62t-199 62q-16 15 0 31q6 6 15 6t15 -6q48 -49 169 -49q120 0 169 49q6 6 15 6t15 -6zM788 550q0 -37 -26 -63t-63 -26t-63.5 26t-26.5 63q0 38 26.5 64t63.5 26t63 -26.5t26 -63.5zM1183 550q0 -37 -26.5 -63t-63.5 -26t-63 26
-t-26 63t26 63.5t63 26.5t63.5 -26t26.5 -64zM1434 670q0 49 -35 84t-85 35t-86 -36q-130 90 -311 96l63 283l200 -45q0 -37 26 -63t63 -26t63.5 26.5t26.5 63.5t-26.5 63.5t-63.5 26.5q-54 0 -80 -50l-221 49q-19 5 -25 -16l-69 -312q-180 -7 -309 -97q-35 37 -87 37
-q-50 0 -85 -35t-35 -84q0 -35 18.5 -64t49.5 -44q-6 -27 -6 -56q0 -142 140 -243t337 -101q198 0 338 101t140 243q0 32 -7 57q30 15 48 43.5t18 63.5zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191
-t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="_392" unicode="&#xf1a2;" d="M939 407q13 -13 0 -26q-53 -53 -171 -53t-171 53q-13 13 0 26q5 6 13 6t13 -6q42 -42 145 -42t145 42q5 6 13 6t13 -6zM676 563q0 -31 -23 -54t-54 -23t-54 23t-23 54q0 32 22.5 54.5t54.5 22.5t54.5 -22.5t22.5 -54.5zM1014 563q0 -31 -23 -54t-54 -23t-54 23t-23 54
-q0 32 22.5 54.5t54.5 22.5t54.5 -22.5t22.5 -54.5zM1229 666q0 42 -30 72t-73 30q-42 0 -73 -31q-113 78 -267 82l54 243l171 -39q1 -32 23.5 -54t53.5 -22q32 0 54.5 22.5t22.5 54.5t-22.5 54.5t-54.5 22.5q-48 0 -69 -43l-189 42q-17 5 -21 -13l-60 -268q-154 -6 -265 -83
-q-30 32 -74 32q-43 0 -73 -30t-30 -72q0 -30 16 -55t42 -38q-5 -25 -5 -48q0 -122 120 -208.5t289 -86.5q170 0 290 86.5t120 208.5q0 25 -6 49q25 13 40.5 37.5t15.5 54.5zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960
-q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_393" unicode="&#xf1a3;" d="M866 697l90 27v62q0 79 -58 135t-138 56t-138 -55.5t-58 -134.5v-283q0 -20 -14 -33.5t-33 -13.5t-32.5 13.5t-13.5 33.5v120h-151v-122q0 -82 57.5 -139t139.5 -57q81 0 138.5 56.5t57.5 136.5v280q0 19 13.5 33t33.5 14q19 0 32.5 -14t13.5 -33v-54zM1199 502v122h-150
-v-126q0 -20 -13.5 -33.5t-33.5 -13.5q-19 0 -32.5 14t-13.5 33v123l-90 -26l-60 28v-123q0 -80 58 -137t139 -57t138.5 57t57.5 139zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103
-t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="f1a4" unicode="&#xf1a4;" horiz-adv-x="1920" d="M1062 824v118q0 42 -30 72t-72 30t-72 -30t-30 -72v-612q0 -175 -126 -299t-303 -124q-178 0 -303.5 125.5t-125.5 303.5v266h328v-262q0 -43 30 -72.5t72 -29.5t72 29.5t30 72.5v620q0 171 126.5 292t301.5 121q176 0 302 -122t126 -294v-136l-195 -58zM1592 602h328
-v-266q0 -178 -125.5 -303.5t-303.5 -125.5q-177 0 -303 124.5t-126 300.5v268l131 -61l195 58v-270q0 -42 30 -71.5t72 -29.5t72 29.5t30 71.5v275z"/>
-    <glyph glyph-name="_395" unicode="&#xf1a5;" d="M1472 160v480h-704v704h-480q-93 0 -158.5 -65.5t-65.5 -158.5v-480h704v-704h480q93 0 158.5 65.5t65.5 158.5zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5
-t84.5 -203.5z"/>
-    <glyph glyph-name="_396" unicode="&#xf1a6;" horiz-adv-x="2048" d="M328 1254h204v-983h-532v697h328v286zM328 435v369h-123v-369h123zM614 968v-697h205v697h-205zM614 1254v-204h205v204h-205zM901 968h533v-942h-533v163h328v82h-328v697zM1229 435v369h-123v-369h123zM1516 968h532v-942h-532v163h327v82h-327v697zM1843 435v369h-123
-v-369h123z"/>
-    <glyph glyph-name="_397" unicode="&#xf1a7;" d="M1046 516q0 -64 -38 -109t-91 -45q-43 0 -70 15v277q28 17 70 17q53 0 91 -45.5t38 -109.5zM703 944q0 -64 -38 -109.5t-91 -45.5q-43 0 -70 15v277q28 17 70 17q53 0 91 -45t38 -109zM1265 513q0 134 -88 229t-213 95q-20 0 -39 -3q-23 -78 -78 -136q-87 -95 -211 -101
-v-636l211 41v206q51 -19 117 -19q125 0 213 95t88 229zM922 940q0 134 -88.5 229t-213.5 95q-74 0 -141 -36h-186v-840l211 41v206q55 -19 116 -19q125 0 213.5 95t88.5 229zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960
-q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_398" unicode="&#xf1a8;" horiz-adv-x="2038" d="M1222 607q75 3 143.5 -20.5t118 -58.5t101 -94.5t84 -108t75.5 -120.5q33 -56 78.5 -109t75.5 -80.5t99 -88.5q-48 -30 -108.5 -57.5t-138.5 -59t-114 -47.5q-44 37 -74 115t-43.5 164.5t-33 180.5t-42.5 168.5t-72.5 123t-122.5 48.5l-10 -2l-6 -4q4 -5 13 -14
-q6 -5 28 -23.5t25.5 -22t19 -18t18 -20.5t11.5 -21t10.5 -27.5t4.5 -31t4 -40.5l1 -33q1 -26 -2.5 -57.5t-7.5 -52t-12.5 -58.5t-11.5 -53q-35 1 -101 -9.5t-98 -10.5q-39 0 -72 10q-2 16 -2 47q0 74 3 96q2 13 31.5 41.5t57 59t26.5 51.5q-24 2 -43 -24
-q-36 -53 -111.5 -99.5t-136.5 -46.5q-25 0 -75.5 63t-106.5 139.5t-84 96.5q-6 4 -27 30q-482 -112 -513 -112q-16 0 -28 11t-12 27q0 15 8.5 26.5t22.5 14.5l486 106q-8 14 -8 25t5.5 17.5t16 11.5t20 7t23 4.5t18.5 4.5q4 1 15.5 7.5t17.5 6.5q15 0 28 -16t20 -33
-q163 37 172 37q17 0 29.5 -11t12.5 -28q0 -15 -8.5 -26t-23.5 -14l-182 -40l-1 -16q-1 -26 81.5 -117.5t104.5 -91.5q47 0 119 80t72 129q0 36 -23.5 53t-51 18.5t-51 11.5t-23.5 34q0 16 10 34l-68 19q43 44 43 117q0 26 -5 58q82 16 144 16q44 0 71.5 -1.5t48.5 -8.5
-t31 -13.5t20.5 -24.5t15.5 -33.5t17 -47.5t24 -60l50 25q-3 -40 -23 -60t-42.5 -21t-40 -6.5t-16.5 -20.5zM1282 842q-5 5 -13.5 15.5t-12 14.5t-10.5 11.5t-10 10.5l-8 8t-8.5 7.5t-8 5t-8.5 4.5q-7 3 -14.5 5t-20.5 2.5t-22 0.5h-32.5h-37.5q-126 0 -217 -43
-q16 30 36 46.5t54 29.5t65.5 36t46 36.5t50 55t43.5 50.5q12 -9 28 -31.5t32 -36.5t38 -13l12 1v-76l22 -1q247 95 371 190q28 21 50 39t42.5 37.5t33 31t29.5 34t24 31t24.5 37t23 38t27 47.5t29.5 53l7 9q-2 -53 -43 -139q-79 -165 -205 -264t-306 -142q-14 -3 -42 -7.5
-t-50 -9.5t-39 -14q3 -19 24.5 -46t21.5 -34q0 -11 -26 -30zM1061 -79q39 26 131.5 47.5t146.5 21.5q9 0 22.5 -15.5t28 -42.5t26 -50t24 -51t14.5 -33q-121 -45 -244 -45q-61 0 -125 11zM822 568l48 12l109 -177l-73 -48zM1323 51q3 -15 3 -16q0 -7 -17.5 -14.5t-46 -13
-t-54 -9.5t-53.5 -7.5t-32 -4.5l-7 43q21 2 60.5 8.5t72 10t60.5 3.5h14zM866 679l-96 -20l-6 17q10 1 32.5 7t34.5 6q19 0 35 -10zM1061 45h31l10 -83l-41 -12v95zM1950 1535v1v-1zM1950 1535l-1 -5l-2 -2l1 3zM1950 1535l1 1z"/>
-    <glyph glyph-name="_399" unicode="&#xf1a9;" d="M1167 -50q-5 19 -24 5q-30 -22 -87 -39t-131 -17q-129 0 -193 49q-5 4 -13 4q-11 0 -26 -12q-7 -6 -7.5 -16t7.5 -20q34 -32 87.5 -46t102.5 -12.5t99 4.5q41 4 84.5 20.5t65 30t28.5 20.5q12 12 7 29zM1128 65q-19 47 -39 61q-23 15 -76 15q-47 0 -71 -10
-q-29 -12 -78 -56q-26 -24 -12 -44q9 -8 17.5 -4.5t31.5 23.5q3 2 10.5 8.5t10.5 8.5t10 7t11.5 7t12.5 5t15 4.5t16.5 2.5t20.5 1q27 0 44.5 -7.5t23 -14.5t13.5 -22q10 -17 12.5 -20t12.5 1q23 12 14 34zM1483 346q0 22 -5 44.5t-16.5 45t-34 36.5t-52.5 14
-q-33 0 -97 -41.5t-129 -83.5t-101 -42q-27 -1 -63.5 19t-76 49t-83.5 58t-100 49t-111 19q-115 -1 -197 -78.5t-84 -178.5q-2 -112 74 -164q29 -20 62.5 -28.5t103.5 -8.5q57 0 132 32.5t134 71t120 70.5t93 31q26 -1 65 -31.5t71.5 -67t68 -67.5t55.5 -32q35 -3 58.5 14
-t55.5 63q28 41 42.5 101t14.5 106zM1536 506q0 -164 -62 -304.5t-166 -236t-242.5 -149.5t-290.5 -54t-293 57.5t-247.5 157t-170.5 241.5t-64 302q0 89 19.5 172.5t49 145.5t70.5 118.5t78.5 94t78.5 69.5t64.5 46.5t42.5 24.5q14 8 51 26.5t54.5 28.5t48 30t60.5 44
-q36 28 58 72.5t30 125.5q129 -155 186 -193q44 -29 130 -68t129 -66q21 -13 39 -25t60.5 -46.5t76 -70.5t75 -95t69 -122t47 -148.5t19.5 -177.5z"/>
-    <glyph glyph-name="_400" unicode="&#xf1aa;" d="M1070 463l-160 -160l-151 -152l-30 -30q-65 -64 -151.5 -87t-171.5 -2q-16 -70 -72 -115t-129 -45q-85 0 -145 60.5t-60 145.5q0 72 44.5 128t113.5 72q-22 86 1 173t88 152l12 12l151 -152l-11 -11q-37 -37 -37 -89t37 -90q37 -37 89 -37t89 37l30 30l151 152l161 160z
-M729 1145l12 -12l-152 -152l-12 12q-37 37 -89 37t-89 -37t-37 -89.5t37 -89.5l29 -29l152 -152l160 -160l-151 -152l-161 160l-151 152l-30 30q-68 67 -90 159.5t5 179.5q-70 15 -115 71t-45 129q0 85 60 145.5t145 60.5q76 0 133.5 -49t69.5 -123q84 20 169.5 -3.5
-t149.5 -87.5zM1536 78q0 -85 -60 -145.5t-145 -60.5q-74 0 -131 47t-71 118q-86 -28 -179.5 -6t-161.5 90l-11 12l151 152l12 -12q37 -37 89 -37t89 37t37 89t-37 89l-30 30l-152 152l-160 160l152 152l160 -160l152 -152l29 -30q64 -64 87.5 -150.5t2.5 -171.5
-q76 -11 126.5 -68.5t50.5 -134.5zM1534 1202q0 -77 -51 -135t-127 -69q26 -85 3 -176.5t-90 -158.5l-12 -12l-151 152l12 12q37 37 37 89t-37 89t-89 37t-89 -37l-30 -30l-152 -152l-160 -160l-152 152l161 160l152 152l29 30q67 67 159 89.5t178 -3.5q11 75 68.5 126
-t135.5 51q85 0 145 -60.5t60 -145.5z"/>
-    <glyph glyph-name="f1ab" unicode="&#xf1ab;" d="M654 458q-1 -3 -12.5 0.5t-31.5 11.5l-20 9q-44 20 -87 49q-7 5 -41 31.5t-38 28.5q-67 -103 -134 -181q-81 -95 -105 -110q-4 -2 -19.5 -4t-18.5 0q6 4 82 92q21 24 85.5 115t78.5 118q17 30 51 98.5t36 77.5q-8 1 -110 -33q-8 -2 -27.5 -7.5t-34.5 -9.5t-17 -5
-q-2 -2 -2 -10.5t-1 -9.5q-5 -10 -31 -15q-23 -7 -47 0q-18 4 -28 21q-4 6 -5 23q6 2 24.5 5t29.5 6q58 16 105 32q100 35 102 35q10 2 43 19.5t44 21.5q9 3 21.5 8t14.5 5.5t6 -0.5q2 -12 -1 -33q0 -2 -12.5 -27t-26.5 -53.5t-17 -33.5q-25 -50 -77 -131l64 -28
-q12 -6 74.5 -32t67.5 -28q4 -1 10.5 -25.5t4.5 -30.5zM449 944q3 -15 -4 -28q-12 -23 -50 -38q-30 -12 -60 -12q-26 3 -49 26q-14 15 -18 41l1 3q3 -3 19.5 -5t26.5 0t58 16q36 12 55 14q17 0 21 -17zM1147 815l63 -227l-139 42zM39 15l694 232v1032l-694 -233v-1031z
-M1280 332l102 -31l-181 657l-100 31l-216 -536l102 -31l45 110l211 -65zM777 1294l573 -184v380zM1088 -29l158 -13l-54 -160l-40 66q-130 -83 -276 -108q-58 -12 -91 -12h-84q-79 0 -199.5 39t-183.5 85q-8 7 -8 16q0 8 5 13.5t13 5.5q4 0 18 -7.5t30.5 -16.5t20.5 -11
-q73 -37 159.5 -61.5t157.5 -24.5q95 0 167 14.5t157 50.5q15 7 30.5 15.5t34 19t28.5 16.5zM1536 1050v-1079l-774 246q-14 -6 -375 -127.5t-368 -121.5q-13 0 -18 13q0 1 -1 3v1078q3 9 4 10q5 6 20 11q107 36 149 50v384l558 -198q2 0 160.5 55t316 108.5t161.5 53.5
-q20 0 20 -21v-418z"/>
-    <glyph glyph-name="_402" unicode="&#xf1ac;" horiz-adv-x="1792" d="M288 1152q66 0 113 -47t47 -113v-1088q0 -66 -47 -113t-113 -47h-128q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h128zM1664 989q58 -34 93 -93t35 -128v-768q0 -106 -75 -181t-181 -75h-864q-66 0 -113 47t-47 113v1536q0 40 28 68t68 28h672q40 0 88 -20t76 -48
-l152 -152q28 -28 48 -76t20 -88v-163zM928 0v128q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM928 256v128q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM928 512v128q0 14 -9 23
-t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM1184 0v128q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM1184 256v128q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128
-q14 0 23 9t9 23zM1184 512v128q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM1440 0v128q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM1440 256v128q0 14 -9 23t-23 9h-128
-q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM1440 512v128q0 14 -9 23t-23 9h-128q-14 0 -23 -9t-9 -23v-128q0 -14 9 -23t23 -9h128q14 0 23 9t9 23zM1536 896v256h-160q-40 0 -68 28t-28 68v160h-640v-512h896z"/>
-    <glyph glyph-name="_403" unicode="&#xf1ad;" d="M1344 1536q26 0 45 -19t19 -45v-1664q0 -26 -19 -45t-45 -19h-1280q-26 0 -45 19t-19 45v1664q0 26 19 45t45 19h1280zM512 1248v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23zM512 992v-64q0 -14 9 -23t23 -9h64q14 0 23 9
-t9 23v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23zM512 736v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23zM512 480v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23zM384 160v64
-q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM384 416v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM384 672v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64
-q14 0 23 9t9 23zM384 928v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM384 1184v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM896 -96v192q0 14 -9 23t-23 9h-320q-14 0 -23 -9
-t-9 -23v-192q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM896 416v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM896 672v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM896 928v64
-q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM896 1184v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1152 160v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64
-q14 0 23 9t9 23zM1152 416v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1152 672v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1152 928v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9
-t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1152 1184v64q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h64q14 0 23 9t9 23z"/>
-    <glyph glyph-name="_404" unicode="&#xf1ae;" horiz-adv-x="1280" d="M1188 988l-292 -292v-824q0 -46 -33 -79t-79 -33t-79 33t-33 79v384h-64v-384q0 -46 -33 -79t-79 -33t-79 33t-33 79v824l-292 292q-28 28 -28 68t28 68q29 28 68.5 28t67.5 -28l228 -228h368l228 228q28 28 68 28t68 -28q28 -29 28 -68.5t-28 -67.5zM864 1152
-q0 -93 -65.5 -158.5t-158.5 -65.5t-158.5 65.5t-65.5 158.5t65.5 158.5t158.5 65.5t158.5 -65.5t65.5 -158.5z"/>
-    <glyph glyph-name="uniF1B1" unicode="&#xf1b0;" horiz-adv-x="1664" d="M780 1064q0 -60 -19 -113.5t-63 -92.5t-105 -39q-76 0 -138 57.5t-92 135.5t-30 151q0 60 19 113.5t63 92.5t105 39q77 0 138.5 -57.5t91.5 -135t30 -151.5zM438 581q0 -80 -42 -139t-119 -59q-76 0 -141.5 55.5t-100.5 133.5t-35 152q0 80 42 139.5t119 59.5
-q76 0 141.5 -55.5t100.5 -134t35 -152.5zM832 608q118 0 255 -97.5t229 -237t92 -254.5q0 -46 -17 -76.5t-48.5 -45t-64.5 -20t-76 -5.5q-68 0 -187.5 45t-182.5 45q-66 0 -192.5 -44.5t-200.5 -44.5q-183 0 -183 146q0 86 56 191.5t139.5 192.5t187.5 146t193 59zM1071 819
-q-61 0 -105 39t-63 92.5t-19 113.5q0 74 30 151.5t91.5 135t138.5 57.5q61 0 105 -39t63 -92.5t19 -113.5q0 -73 -30 -151t-92 -135.5t-138 -57.5zM1503 923q77 0 119 -59.5t42 -139.5q0 -74 -35 -152t-100.5 -133.5t-141.5 -55.5q-77 0 -119 59t-42 139q0 74 35 152.5
-t100.5 134t141.5 55.5z"/>
-    <glyph glyph-name="_406" unicode="&#xf1b1;" horiz-adv-x="768" d="M704 1008q0 -145 -57 -243.5t-152 -135.5l45 -821q2 -26 -16 -45t-44 -19h-192q-26 0 -44 19t-16 45l45 821q-95 37 -152 135.5t-57 243.5q0 128 42.5 249.5t117.5 200t160 78.5t160 -78.5t117.5 -200t42.5 -249.5z"/>
-    <glyph glyph-name="_407" unicode="&#xf1b2;" horiz-adv-x="1792" d="M896 -93l640 349v636l-640 -233v-752zM832 772l698 254l-698 254l-698 -254zM1664 1024v-768q0 -35 -18 -65t-49 -47l-704 -384q-28 -16 -61 -16t-61 16l-704 384q-31 17 -49 47t-18 65v768q0 40 23 73t61 47l704 256q22 8 44 8t44 -8l704 -256q38 -14 61 -47t23 -73z
-"/>
-    <glyph glyph-name="_408" unicode="&#xf1b3;" horiz-adv-x="2304" d="M640 -96l384 192v314l-384 -164v-342zM576 358l404 173l-404 173l-404 -173zM1664 -96l384 192v314l-384 -164v-342zM1600 358l404 173l-404 173l-404 -173zM1152 651l384 165v266l-384 -164v-267zM1088 1030l441 189l-441 189l-441 -189zM2176 512v-416q0 -36 -19 -67
-t-52 -47l-448 -224q-25 -14 -57 -14t-57 14l-448 224q-4 2 -7 4q-2 -2 -7 -4l-448 -224q-25 -14 -57 -14t-57 14l-448 224q-33 16 -52 47t-19 67v416q0 38 21.5 70t56.5 48l434 186v400q0 38 21.5 70t56.5 48l448 192q23 10 50 10t50 -10l448 -192q35 -16 56.5 -48t21.5 -70
-v-400l434 -186q36 -16 57 -48t21 -70z"/>
-    <glyph glyph-name="_409" unicode="&#xf1b4;" horiz-adv-x="2048" d="M1848 1197h-511v-124h511v124zM1596 771q-90 0 -146 -52.5t-62 -142.5h408q-18 195 -200 195zM1612 186q63 0 122 32t76 87h221q-100 -307 -427 -307q-214 0 -340.5 132t-126.5 347q0 208 130.5 345.5t336.5 137.5q138 0 240.5 -68t153 -179t50.5 -248q0 -17 -2 -47h-658
-q0 -111 57.5 -171.5t166.5 -60.5zM277 236h296q205 0 205 167q0 180 -199 180h-302v-347zM277 773h281q78 0 123.5 36.5t45.5 113.5q0 144 -190 144h-260v-294zM0 1282h594q87 0 155 -14t126.5 -47.5t90 -96.5t31.5 -154q0 -181 -172 -263q114 -32 172 -115t58 -204
-q0 -75 -24.5 -136.5t-66 -103.5t-98.5 -71t-121 -42t-134 -13h-611v1260z"/>
-    <glyph glyph-name="_410" unicode="&#xf1b5;" d="M1248 1408q119 0 203.5 -84.5t84.5 -203.5v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960zM499 1041h-371v-787h382q117 0 197 57.5t80 170.5q0 158 -143 200q107 52 107 164q0 57 -19.5 96.5
-t-56.5 60.5t-79 29.5t-97 8.5zM477 723h-176v184h163q119 0 119 -90q0 -94 -106 -94zM486 388h-185v217h189q124 0 124 -113q0 -104 -128 -104zM1136 356q-68 0 -104 38t-36 107h411q1 10 1 30q0 132 -74.5 220.5t-203.5 88.5q-128 0 -210 -86t-82 -216q0 -135 79 -217
-t213 -82q205 0 267 191h-138q-11 -34 -47.5 -54t-75.5 -20zM1126 722q113 0 124 -122h-254q4 56 39 89t91 33zM964 988h319v-77h-319v77z"/>
-    <glyph glyph-name="_411" unicode="&#xf1b6;" horiz-adv-x="1792" d="M1582 954q0 -101 -71.5 -172.5t-172.5 -71.5t-172.5 71.5t-71.5 172.5t71.5 172.5t172.5 71.5t172.5 -71.5t71.5 -172.5zM812 212q0 104 -73 177t-177 73q-27 0 -54 -6l104 -42q77 -31 109.5 -106.5t1.5 -151.5q-31 -77 -107 -109t-152 -1q-21 8 -62 24.5t-61 24.5
-q32 -60 91 -96.5t130 -36.5q104 0 177 73t73 177zM1642 953q0 126 -89.5 215.5t-215.5 89.5q-127 0 -216.5 -89.5t-89.5 -215.5q0 -127 89.5 -216t216.5 -89q126 0 215.5 89t89.5 216zM1792 953q0 -189 -133.5 -322t-321.5 -133l-437 -319q-12 -129 -109 -218t-229 -89
-q-121 0 -214 76t-118 192l-230 92v429l389 -157q79 48 173 48q13 0 35 -2l284 407q2 187 135.5 319t320.5 132q188 0 321.5 -133.5t133.5 -321.5z"/>
-    <glyph glyph-name="_412" unicode="&#xf1b7;" d="M1242 889q0 80 -57 136.5t-137 56.5t-136.5 -57t-56.5 -136q0 -80 56.5 -136.5t136.5 -56.5t137 56.5t57 136.5zM632 301q0 -83 -58 -140.5t-140 -57.5q-56 0 -103 29t-72 77q52 -20 98 -40q60 -24 120 1.5t85 86.5q24 60 -1.5 120t-86.5 84l-82 33q22 5 42 5
-q82 0 140 -57.5t58 -140.5zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v153l172 -69q20 -92 93.5 -152t168.5 -60q104 0 181 70t87 173l345 252q150 0 255.5 105.5t105.5 254.5q0 150 -105.5 255.5t-255.5 105.5
-q-148 0 -253 -104.5t-107 -252.5l-225 -322q-9 1 -28 1q-75 0 -137 -37l-297 119v468q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5zM1289 887q0 -100 -71 -170.5t-171 -70.5t-170.5 70.5t-70.5 170.5t70.5 171t170.5 71q101 0 171.5 -70.5t70.5 -171.5z
-"/>
-    <glyph glyph-name="_413" unicode="&#xf1b8;" horiz-adv-x="1792" d="M836 367l-15 -368l-2 -22l-420 29q-36 3 -67 31.5t-47 65.5q-11 27 -14.5 55t4 65t12 55t21.5 64t19 53q78 -12 509 -28zM449 953l180 -379l-147 92q-63 -72 -111.5 -144.5t-72.5 -125t-39.5 -94.5t-18.5 -63l-4 -21l-190 357q-17 26 -18 56t6 47l8 18q35 63 114 188
-l-140 86zM1680 436l-188 -359q-12 -29 -36.5 -46.5t-43.5 -20.5l-18 -4q-71 -7 -219 -12l8 -164l-230 367l211 362l7 -173q170 -16 283 -5t170 33zM895 1360q-47 -63 -265 -435l-317 187l-19 12l225 356q20 31 60 45t80 10q24 -2 48.5 -12t42 -21t41.5 -33t36 -34.5
-t36 -39.5t32 -35zM1550 1053l212 -363q18 -37 12.5 -76t-27.5 -74q-13 -20 -33 -37t-38 -28t-48.5 -22t-47 -16t-51.5 -14t-46 -12q-34 72 -265 436l313 195zM1407 1279l142 83l-220 -373l-419 20l151 86q-34 89 -75 166t-75.5 123.5t-64.5 80t-47 46.5l-17 13l405 -1
-q31 3 58 -10.5t39 -28.5l11 -15q39 -61 112 -190z"/>
-    <glyph glyph-name="_414" unicode="&#xf1b9;" horiz-adv-x="2048" d="M480 448q0 66 -47 113t-113 47t-113 -47t-47 -113t47 -113t113 -47t113 47t47 113zM516 768h1016l-89 357q-2 8 -14 17.5t-21 9.5h-768q-9 0 -21 -9.5t-14 -17.5zM1888 448q0 66 -47 113t-113 47t-113 -47t-47 -113t47 -113t113 -47t113 47t47 113zM2048 544v-384
-q0 -14 -9 -23t-23 -9h-96v-128q0 -80 -56 -136t-136 -56t-136 56t-56 136v128h-1024v-128q0 -80 -56 -136t-136 -56t-136 56t-56 136v128h-96q-14 0 -23 9t-9 23v384q0 93 65.5 158.5t158.5 65.5h28l105 419q23 94 104 157.5t179 63.5h768q98 0 179 -63.5t104 -157.5
-l105 -419h28q93 0 158.5 -65.5t65.5 -158.5z"/>
-    <glyph glyph-name="_415" unicode="&#xf1ba;" horiz-adv-x="2048" d="M1824 640q93 0 158.5 -65.5t65.5 -158.5v-384q0 -14 -9 -23t-23 -9h-96v-64q0 -80 -56 -136t-136 -56t-136 56t-56 136v64h-1024v-64q0 -80 -56 -136t-136 -56t-136 56t-56 136v64h-96q-14 0 -23 9t-9 23v384q0 93 65.5 158.5t158.5 65.5h28l105 419q23 94 104 157.5
-t179 63.5h128v224q0 14 9 23t23 9h448q14 0 23 -9t9 -23v-224h128q98 0 179 -63.5t104 -157.5l105 -419h28zM320 160q66 0 113 47t47 113t-47 113t-113 47t-113 -47t-47 -113t47 -113t113 -47zM516 640h1016l-89 357q-2 8 -14 17.5t-21 9.5h-768q-9 0 -21 -9.5t-14 -17.5z
-M1728 160q66 0 113 47t47 113t-47 113t-113 47t-113 -47t-47 -113t47 -113t113 -47z"/>
-    <glyph glyph-name="_416" unicode="&#xf1bb;" d="M1504 64q0 -26 -19 -45t-45 -19h-462q1 -17 6 -87.5t5 -108.5q0 -25 -18 -42.5t-43 -17.5h-320q-25 0 -43 17.5t-18 42.5q0 38 5 108.5t6 87.5h-462q-26 0 -45 19t-19 45t19 45l402 403h-229q-26 0 -45 19t-19 45t19 45l402 403h-197q-26 0 -45 19t-19 45t19 45l384 384
-q19 19 45 19t45 -19l384 -384q19 -19 19 -45t-19 -45t-45 -19h-197l402 -403q19 -19 19 -45t-19 -45t-45 -19h-229l402 -403q19 -19 19 -45z"/>
-    <glyph glyph-name="_417" unicode="&#xf1bc;" d="M1127 326q0 32 -30 51q-193 115 -447 115q-133 0 -287 -34q-42 -9 -42 -52q0 -20 13.5 -34.5t35.5 -14.5q5 0 37 8q132 27 243 27q226 0 397 -103q19 -11 33 -11q19 0 33 13.5t14 34.5zM1223 541q0 40 -35 61q-237 141 -548 141q-153 0 -303 -42q-48 -13 -48 -64
-q0 -25 17.5 -42.5t42.5 -17.5q7 0 37 8q122 33 251 33q279 0 488 -124q24 -13 38 -13q25 0 42.5 17.5t17.5 42.5zM1331 789q0 47 -40 70q-126 73 -293 110.5t-343 37.5q-204 0 -364 -47q-23 -7 -38.5 -25.5t-15.5 -48.5q0 -31 20.5 -52t51.5 -21q11 0 40 8q133 37 307 37
-q159 0 309.5 -34t253.5 -95q21 -12 40 -12q29 0 50.5 20.5t21.5 51.5zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_418" unicode="&#xf1bd;" horiz-adv-x="1024" d="M1024 1233l-303 -582l24 -31h279v-415h-507l-44 -30l-142 -273l-30 -30h-301v303l303 583l-24 30h-279v415h507l44 30l142 273l30 30h301v-303z"/>
-    <glyph glyph-name="_419" unicode="&#xf1be;" horiz-adv-x="2304" d="M784 164l16 241l-16 523q-1 10 -7.5 17t-16.5 7q-9 0 -16 -7t-7 -17l-14 -523l14 -241q1 -10 7.5 -16.5t15.5 -6.5q22 0 24 23zM1080 193l11 211l-12 586q0 16 -13 24q-8 5 -16 5t-16 -5q-13 -8 -13 -24l-1 -6l-10 -579q0 -1 11 -236v-1q0 -10 6 -17q9 -11 23 -11
-q11 0 20 9q9 7 9 20zM35 533l20 -128l-20 -126q-2 -9 -9 -9t-9 9l-17 126l17 128q2 9 9 9t9 -9zM121 612l26 -207l-26 -203q-2 -9 -10 -9q-9 0 -9 10l-23 202l23 207q0 9 9 9q8 0 10 -9zM401 159zM213 650l25 -245l-25 -237q0 -11 -11 -11q-10 0 -12 11l-21 237l21 245
-q2 12 12 12q11 0 11 -12zM307 657l23 -252l-23 -244q-2 -13 -14 -13q-13 0 -13 13l-21 244l21 252q0 13 13 13q12 0 14 -13zM401 639l21 -234l-21 -246q-2 -16 -16 -16q-6 0 -10.5 4.5t-4.5 11.5l-20 246l20 234q0 6 4.5 10.5t10.5 4.5q14 0 16 -15zM784 164zM495 785
-l21 -380l-21 -246q0 -7 -5 -12.5t-12 -5.5q-16 0 -18 18l-18 246l18 380q2 18 18 18q7 0 12 -5.5t5 -12.5zM589 871l19 -468l-19 -244q0 -8 -5.5 -13.5t-13.5 -5.5q-18 0 -20 19l-16 244l16 468q2 19 20 19q8 0 13.5 -5.5t5.5 -13.5zM687 911l18 -506l-18 -242
-q-2 -21 -22 -21q-19 0 -21 21l-16 242l16 506q0 9 6.5 15.5t14.5 6.5q9 0 15 -6.5t7 -15.5zM1079 169v0v0v0zM881 915l15 -510l-15 -239q0 -10 -7.5 -17.5t-17.5 -7.5t-17 7t-8 18l-14 239l14 510q0 11 7.5 18t17.5 7t17.5 -7t7.5 -18zM980 896l14 -492l-14 -236
-q0 -11 -8 -19t-19 -8t-19 8t-9 19l-12 236l12 492q1 12 9 20t19 8t18.5 -8t8.5 -20zM1192 404l-14 -231v0q0 -13 -9 -22t-22 -9t-22 9t-10 22l-6 114l-6 117l12 636v3q2 15 12 24q9 7 20 7q8 0 15 -5q14 -8 16 -26zM2304 423q0 -117 -83 -199.5t-200 -82.5h-786
-q-13 2 -22 11t-9 22v899q0 23 28 33q85 34 181 34q195 0 338 -131.5t160 -323.5q53 22 110 22q117 0 200 -83t83 -201z"/>
-    <glyph glyph-name="uniF1C0" unicode="&#xf1c0;" d="M768 768q237 0 443 43t325 127v-170q0 -69 -103 -128t-280 -93.5t-385 -34.5t-385 34.5t-280 93.5t-103 128v170q119 -84 325 -127t443 -43zM768 0q237 0 443 43t325 127v-170q0 -69 -103 -128t-280 -93.5t-385 -34.5t-385 34.5t-280 93.5t-103 128v170q119 -84 325 -127
-t443 -43zM768 384q237 0 443 43t325 127v-170q0 -69 -103 -128t-280 -93.5t-385 -34.5t-385 34.5t-280 93.5t-103 128v170q119 -84 325 -127t443 -43zM768 1536q208 0 385 -34.5t280 -93.5t103 -128v-128q0 -69 -103 -128t-280 -93.5t-385 -34.5t-385 34.5t-280 93.5
-t-103 128v128q0 69 103 128t280 93.5t385 34.5z"/>
-    <glyph glyph-name="uniF1C1" unicode="&#xf1c1;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M894 465q33 -26 84 -56q59 7 117 7q147 0 177 -49q16 -22 2 -52q0 -1 -1 -2l-2 -2v-1q-6 -38 -71 -38q-48 0 -115 20t-130 53q-221 -24 -392 -83q-153 -262 -242 -262q-15 0 -28 7l-24 12q-1 1 -6 5q-10 10 -6 36q9 40 56 91.5t132 96.5q14 9 23 -6q2 -2 2 -4q52 85 107 197
-q68 136 104 262q-24 82 -30.5 159.5t6.5 127.5q11 40 42 40h21h1q23 0 35 -15q18 -21 9 -68q-2 -6 -4 -8q1 -3 1 -8v-30q-2 -123 -14 -192q55 -164 146 -238zM318 54q52 24 137 158q-51 -40 -87.5 -84t-49.5 -74zM716 974q-15 -42 -2 -132q1 7 7 44q0 3 7 43q1 4 4 8
-q-1 1 -1 2q-1 2 -1 3q-1 22 -13 36q0 -1 -1 -2v-2zM592 313q135 54 284 81q-2 1 -13 9.5t-16 13.5q-76 67 -127 176q-27 -86 -83 -197q-30 -56 -45 -83zM1238 329q-24 24 -140 24q76 -28 124 -28q14 0 18 1q0 1 -2 3z"/>
-    <glyph glyph-name="_422" unicode="&#xf1c2;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M233 768v-107h70l164 -661h159l128 485q7 20 10 46q2 16 2 24h4l3 -24q1 -3 3.5 -20t5.5 -26l128 -485h159l164 661h70v107h-300v-107h90l-99 -438q-5 -20 -7 -46l-2 -21h-4q0 3 -0.5 6.5t-1.5 8t-1 6.5q-1 5 -4 21t-5 25l-144 545h-114l-144 -545q-2 -9 -4.5 -24.5
-t-3.5 -21.5l-4 -21h-4l-2 21q-2 26 -7 46l-99 438h90v107h-300z"/>
-    <glyph glyph-name="_423" unicode="&#xf1c3;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M429 106v-106h281v106h-75l103 161q5 7 10 16.5t7.5 13.5t3.5 4h2q1 -4 5 -10q2 -4 4.5 -7.5t6 -8t6.5 -8.5l107 -161h-76v-106h291v106h-68l-192 273l195 282h67v107h-279v-107h74l-103 -159q-4 -7 -10 -16.5t-9 -13.5l-2 -3h-2q-1 4 -5 10q-6 11 -17 23l-106 159h76v107
-h-290v-107h68l189 -272l-194 -283h-68z"/>
-    <glyph glyph-name="_424" unicode="&#xf1c4;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M416 106v-106h327v106h-93v167h137q76 0 118 15q67 23 106.5 87t39.5 146q0 81 -37 141t-100 87q-48 19 -130 19h-368v-107h92v-555h-92zM769 386h-119v268h120q52 0 83 -18q56 -33 56 -115q0 -89 -62 -120q-31 -15 -78 -15z"/>
-    <glyph glyph-name="_425" unicode="&#xf1c5;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M1280 320v-320h-1024v192l192 192l128 -128l384 384zM448 512q-80 0 -136 56t-56 136t56 136t136 56t136 -56t56 -136t-56 -136t-136 -56z"/>
-    <glyph glyph-name="_426" unicode="&#xf1c6;" d="M640 1152v128h-128v-128h128zM768 1024v128h-128v-128h128zM640 896v128h-128v-128h128zM768 768v128h-128v-128h128zM1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400
-v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-128v-128h-128v128h-512v-1536h1280zM781 593l107 -349q8 -27 8 -52q0 -83 -72.5 -137.5t-183.5 -54.5t-183.5 54.5t-72.5 137.5q0 25 8 52q21 63 120 396v128h128v-128h79
-q22 0 39 -13t23 -34zM640 128q53 0 90.5 19t37.5 45t-37.5 45t-90.5 19t-90.5 -19t-37.5 -45t37.5 -45t90.5 -19z"/>
-    <glyph glyph-name="_427" unicode="&#xf1c7;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M620 686q20 -8 20 -30v-544q0 -22 -20 -30q-8 -2 -12 -2q-12 0 -23 9l-166 167h-131q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h131l166 167q16 15 35 7zM1037 -3q31 0 50 24q129 159 129 363t-129 363q-16 21 -43 24t-47 -14q-21 -17 -23.5 -43.5t14.5 -47.5
-q100 -123 100 -282t-100 -282q-17 -21 -14.5 -47.5t23.5 -42.5q18 -15 40 -15zM826 145q27 0 47 20q87 93 87 219t-87 219q-18 19 -45 20t-46 -17t-20 -44.5t18 -46.5q52 -57 52 -131t-52 -131q-19 -20 -18 -46.5t20 -44.5q20 -17 44 -17z"/>
-    <glyph glyph-name="_428" unicode="&#xf1c8;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M768 768q52 0 90 -38t38 -90v-384q0 -52 -38 -90t-90 -38h-384q-52 0 -90 38t-38 90v384q0 52 38 90t90 38h384zM1260 766q20 -8 20 -30v-576q0 -22 -20 -30q-8 -2 -12 -2q-14 0 -23 9l-265 266v90l265 266q9 9 23 9q4 0 12 -2z"/>
-    <glyph glyph-name="_429" unicode="&#xf1c9;" d="M1468 1156q28 -28 48 -76t20 -88v-1152q0 -40 -28 -68t-68 -28h-1344q-40 0 -68 28t-28 68v1600q0 40 28 68t68 28h896q40 0 88 -20t76 -48zM1024 1400v-376h376q-10 29 -22 41l-313 313q-12 12 -41 22zM1408 -128v1024h-416q-40 0 -68 28t-28 68v416h-768v-1536h1280z
-M480 768q8 11 21 12.5t24 -6.5l51 -38q11 -8 12.5 -21t-6.5 -24l-182 -243l182 -243q8 -11 6.5 -24t-12.5 -21l-51 -38q-11 -8 -24 -6.5t-21 12.5l-226 301q-14 19 0 38zM1282 467q14 -19 0 -38l-226 -301q-8 -11 -21 -12.5t-24 6.5l-51 38q-11 8 -12.5 21t6.5 24l182 243
-l-182 243q-8 11 -6.5 24t12.5 21l51 38q11 8 24 6.5t21 -12.5zM662 6q-13 2 -20.5 13t-5.5 24l138 831q2 13 13 20.5t24 5.5l63 -10q13 -2 20.5 -13t5.5 -24l-138 -831q-2 -13 -13 -20.5t-24 -5.5z"/>
-    <glyph glyph-name="_430" unicode="&#xf1ca;" d="M1497 709v-198q-101 -23 -198 -23q-65 -136 -165.5 -271t-181.5 -215.5t-128 -106.5q-80 -45 -162 3q-28 17 -60.5 43.5t-85 83.5t-102.5 128.5t-107.5 184t-105.5 244t-91.5 314.5t-70.5 390h283q26 -218 70 -398.5t104.5 -317t121.5 -235.5t140 -195q169 169 287 406
-q-142 72 -223 220t-81 333q0 192 104 314.5t284 122.5q178 0 273 -105.5t95 -297.5q0 -159 -58 -286q-7 -1 -19.5 -3t-46 -2t-63 6t-62 25.5t-50.5 51.5q31 103 31 184q0 87 -29 132t-79 45q-53 0 -85 -49.5t-32 -140.5q0 -186 105 -293.5t267 -107.5q62 0 121 14z"/>
-    <glyph glyph-name="_431" unicode="&#xf1cb;" horiz-adv-x="1792" d="M216 367l603 -402v359l-334 223zM154 511l193 129l-193 129v-258zM973 -35l603 402l-269 180l-334 -223v-359zM896 458l272 182l-272 182l-272 -182zM485 733l334 223v359l-603 -402zM1445 640l193 -129v258zM1307 733l269 180l-603 402v-359zM1792 913v-546
-q0 -41 -34 -64l-819 -546q-21 -13 -43 -13t-43 13l-819 546q-34 23 -34 64v546q0 41 34 64l819 546q21 13 43 13t43 -13l819 -546q34 -23 34 -64z"/>
-    <glyph glyph-name="_432" unicode="&#xf1cc;" horiz-adv-x="2048" d="M1800 764q111 -46 179.5 -145.5t68.5 -221.5q0 -164 -118 -280.5t-285 -116.5q-4 0 -11.5 0.5t-10.5 0.5h-1209h-1h-2h-5q-170 10 -288 125.5t-118 280.5q0 110 55 203t147 147q-12 39 -12 82q0 115 82 196t199 81q95 0 172 -58q75 154 222.5 248t326.5 94
-q166 0 306 -80.5t221.5 -218.5t81.5 -301q0 -6 -0.5 -18t-0.5 -18zM468 498q0 -122 84 -193t208 -71q137 0 240 99q-16 20 -47.5 56.5t-43.5 50.5q-67 -65 -144 -65q-55 0 -93.5 33.5t-38.5 87.5q0 53 38.5 87t91.5 34q44 0 84.5 -21t73 -55t65 -75t69 -82t77 -75t97 -55
-t121.5 -21q121 0 204.5 71.5t83.5 190.5q0 121 -84 192t-207 71q-143 0 -241 -97l93 -108q66 64 142 64q52 0 92 -33t40 -84q0 -57 -37 -91.5t-94 -34.5q-43 0 -82.5 21t-72 55t-65.5 75t-69.5 82t-77.5 75t-96.5 55t-118.5 21q-122 0 -207 -70.5t-85 -189.5z"/>
-    <glyph glyph-name="_433" unicode="&#xf1cd;" horiz-adv-x="1792" d="M896 1536q182 0 348 -71t286 -191t191 -286t71 -348t-71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71zM896 1408q-190 0 -361 -90l194 -194q82 28 167 28t167 -28l194 194q-171 90 -361 90zM218 279l194 194
-q-28 82 -28 167t28 167l-194 194q-90 -171 -90 -361t90 -361zM896 -128q190 0 361 90l-194 194q-82 -28 -167 -28t-167 28l-194 -194q171 -90 361 -90zM896 256q159 0 271.5 112.5t112.5 271.5t-112.5 271.5t-271.5 112.5t-271.5 -112.5t-112.5 -271.5t112.5 -271.5
-t271.5 -112.5zM1380 473l194 -194q90 171 90 361t-90 361l-194 -194q28 -82 28 -167t-28 -167z"/>
-    <glyph glyph-name="_434" unicode="&#xf1ce;" horiz-adv-x="1792" d="M1760 640q0 -176 -68.5 -336t-184 -275.5t-275.5 -184t-336 -68.5t-336 68.5t-275.5 184t-184 275.5t-68.5 336q0 213 97 398.5t265 305.5t374 151v-228q-221 -45 -366.5 -221t-145.5 -406q0 -130 51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5
-t136.5 204t51 248.5q0 230 -145.5 406t-366.5 221v228q206 -31 374 -151t265 -305.5t97 -398.5z"/>
-    <glyph glyph-name="uniF1D0" unicode="&#xf1d0;" horiz-adv-x="1792" d="M19 662q8 217 116 406t305 318h5q0 -1 -1 -3q-8 -8 -28 -33.5t-52 -76.5t-60 -110.5t-44.5 -135.5t-14 -150.5t39 -157.5t108.5 -154q50 -50 102 -69.5t90.5 -11.5t69.5 23.5t47 32.5l16 16q39 51 53 116.5t6.5 122.5t-21 107t-26.5 80l-14 29q-10 25 -30.5 49.5t-43 41
-t-43.5 29.5t-35 19l-13 6l104 115q39 -17 78 -52t59 -61l19 -27q1 48 -18.5 103.5t-40.5 87.5l-20 31l161 183l160 -181q-33 -46 -52.5 -102.5t-22.5 -90.5l-4 -33q22 37 61.5 72.5t67.5 52.5l28 17l103 -115q-44 -14 -85 -50t-60 -65l-19 -29q-31 -56 -48 -133.5t-7 -170
-t57 -156.5q33 -45 77.5 -60.5t85 -5.5t76 26.5t57.5 33.5l21 16q60 53 96.5 115t48.5 121.5t10 121.5t-18 118t-37 107.5t-45.5 93t-45 72t-34.5 47.5l-13 17q-14 13 -7 13l10 -3q40 -29 62.5 -46t62 -50t64 -58t58.5 -65t55.5 -77t45.5 -88t38 -103t23.5 -117t10.5 -136
-q3 -259 -108 -465t-312 -321t-456 -115q-185 0 -351 74t-283.5 198t-184 293t-60.5 353z"/>
-    <glyph glyph-name="uniF1D1" unicode="&#xf1d1;" horiz-adv-x="1792" d="M874 -102v-66q-208 6 -385 109.5t-283 275.5l58 34q29 -49 73 -99l65 57q148 -168 368 -212l-17 -86q65 -12 121 -13zM276 428l-83 -28q22 -60 49 -112l-57 -33q-98 180 -98 385t98 385l57 -33q-30 -56 -49 -112l82 -28q-35 -100 -35 -212q0 -109 36 -212zM1528 251
-l58 -34q-106 -172 -283 -275.5t-385 -109.5v66q56 1 121 13l-17 86q220 44 368 212l65 -57q44 50 73 99zM1377 805l-233 -80q14 -42 14 -85t-14 -85l232 -80q-31 -92 -98 -169l-185 162q-57 -67 -147 -85l48 -241q-52 -10 -98 -10t-98 10l48 241q-90 18 -147 85l-185 -162
-q-67 77 -98 169l232 80q-14 42 -14 85t14 85l-233 80q33 93 99 169l185 -162q59 68 147 86l-48 240q44 10 98 10t98 -10l-48 -240q88 -18 147 -86l185 162q66 -76 99 -169zM874 1448v-66q-65 -2 -121 -13l17 -86q-220 -42 -368 -211l-65 56q-38 -42 -73 -98l-57 33
-q106 172 282 275.5t385 109.5zM1705 640q0 -205 -98 -385l-57 33q27 52 49 112l-83 28q36 103 36 212q0 112 -35 212l82 28q-19 56 -49 112l57 33q98 -180 98 -385zM1585 1063l-57 -33q-35 56 -73 98l-65 -56q-148 169 -368 211l17 86q-56 11 -121 13v66q209 -6 385 -109.5
-t282 -275.5zM1748 640q0 173 -67.5 331t-181.5 272t-272 181.5t-331 67.5t-331 -67.5t-272 -181.5t-181.5 -272t-67.5 -331t67.5 -331t181.5 -272t272 -181.5t331 -67.5t331 67.5t272 181.5t181.5 272t67.5 331zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71
-t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="uniF1D2" unicode="&#xf1d2;" d="M582 228q0 -66 -93 -66q-107 0 -107 63q0 64 98 64q102 0 102 -61zM546 694q0 -85 -74 -85q-77 0 -77 84q0 90 77 90q36 0 55 -25.5t19 -63.5zM712 769v125q-78 -29 -135 -29q-50 29 -110 29q-86 0 -145 -57t-59 -143q0 -50 29.5 -102t73.5 -67v-3q-38 -17 -38 -85
-q0 -53 41 -77v-3q-113 -37 -113 -139q0 -45 20 -78.5t54 -51t72 -25.5t81 -8q224 0 224 188q0 67 -48 99t-126 46q-27 5 -51.5 20.5t-24.5 39.5q0 44 49 52q77 15 122 70t45 134q0 24 -10 52q37 9 49 13zM771 350h137q-2 27 -2 82v387q0 46 2 69h-137q3 -23 3 -71v-392
-q0 -50 -3 -75zM1280 366v121q-30 -21 -68 -21q-53 0 -53 82v225h52q9 0 26.5 -1t26.5 -1v117h-105q0 82 3 102h-140q4 -24 4 -55v-47h-60v-117q36 3 37 3q3 0 11 -0.5t12 -0.5v-2h-2v-217q0 -37 2.5 -64t11.5 -56.5t24.5 -48.5t43.5 -31t66 -12q64 0 108 24zM924 1072
-q0 36 -24 63.5t-60 27.5t-60.5 -27t-24.5 -64q0 -36 25 -62.5t60 -26.5t59.5 27t24.5 62zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_438" unicode="&#xf1d3;" horiz-adv-x="1792" d="M595 22q0 100 -165 100q-158 0 -158 -104q0 -101 172 -101q151 0 151 105zM536 777q0 61 -30 102t-89 41q-124 0 -124 -145q0 -135 124 -135q119 0 119 137zM805 1101v-202q-36 -12 -79 -22q16 -43 16 -84q0 -127 -73 -216.5t-197 -112.5q-40 -8 -59.5 -27t-19.5 -58
-q0 -31 22.5 -51.5t58 -32t78.5 -22t86 -25.5t78.5 -37.5t58 -64t22.5 -98.5q0 -304 -363 -304q-69 0 -130 12.5t-116 41t-87.5 82t-32.5 127.5q0 165 182 225v4q-67 41 -67 126q0 109 63 137v4q-72 24 -119.5 108.5t-47.5 165.5q0 139 95 231.5t235 92.5q96 0 178 -47
-q98 0 218 47zM1123 220h-222q4 45 4 134v609q0 94 -4 128h222q-4 -33 -4 -124v-613q0 -89 4 -134zM1724 442v-196q-71 -39 -174 -39q-62 0 -107 20t-70 50t-39.5 78t-18.5 92t-4 103v351h2v4q-7 0 -19 1t-18 1q-21 0 -59 -6v190h96v76q0 54 -6 89h227q-6 -41 -6 -165h171
-v-190q-15 0 -43.5 2t-42.5 2h-85v-365q0 -131 87 -131q61 0 109 33zM1148 1389q0 -58 -39 -101.5t-96 -43.5q-58 0 -98 43.5t-40 101.5q0 59 39.5 103t98.5 44q58 0 96.5 -44.5t38.5 -102.5z"/>
-    <glyph glyph-name="_439" unicode="&#xf1d4;" d="M809 532l266 499h-112l-157 -312q-24 -48 -44 -92l-42 92l-155 312h-120l263 -493v-324h101v318zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="uniF1D5" unicode="&#xf1d5;" horiz-adv-x="1280" d="M842 964q0 -80 -57 -136.5t-136 -56.5q-60 0 -111 35q-62 -67 -115 -146q-247 -371 -202 -859q1 -22 -12.5 -38.5t-34.5 -18.5h-5q-20 0 -35 13.5t-17 33.5q-14 126 -3.5 247.5t29.5 217t54 186t69 155.5t74 125q61 90 132 165q-16 35 -16 77q0 80 56.5 136.5t136.5 56.5
-t136.5 -56.5t56.5 -136.5zM1223 953q0 -158 -78 -292t-212.5 -212t-292.5 -78q-64 0 -131 14q-21 5 -32.5 23.5t-6.5 39.5q5 20 23 31.5t39 7.5q51 -13 108 -13q97 0 186 38t153 102t102 153t38 186t-38 186t-102 153t-153 102t-186 38t-186 -38t-153 -102t-102 -153
-t-38 -186q0 -114 52 -218q10 -20 3.5 -40t-25.5 -30t-39.5 -3t-30.5 26q-64 123 -64 265q0 119 46.5 227t124.5 186t186 124t226 46q158 0 292.5 -78t212.5 -212.5t78 -292.5z"/>
-    <glyph glyph-name="uniF1D6" unicode="&#xf1d6;" horiz-adv-x="1792" d="M270 730q-8 19 -8 52q0 20 11 49t24 45q-1 22 7.5 53t22.5 43q0 139 92.5 288.5t217.5 209.5q139 66 324 66q133 0 266 -55q49 -21 90 -48t71 -56t55 -68t42 -74t32.5 -84.5t25.5 -89.5t22 -98l1 -5q55 -83 55 -150q0 -14 -9 -40t-9 -38q0 -1 1.5 -3.5t3.5 -5t2 -3.5
-q77 -114 120.5 -214.5t43.5 -208.5q0 -43 -19.5 -100t-55.5 -57q-9 0 -19.5 7.5t-19 17.5t-19 26t-16 26.5t-13.5 26t-9 17.5q-1 1 -3 1l-5 -4q-59 -154 -132 -223q20 -20 61.5 -38.5t69 -41.5t35.5 -65q-2 -4 -4 -16t-7 -18q-64 -97 -302 -97q-53 0 -110.5 9t-98 20
-t-104.5 30q-15 5 -23 7q-14 4 -46 4.5t-40 1.5q-41 -45 -127.5 -65t-168.5 -20q-35 0 -69 1.5t-93 9t-101 20.5t-74.5 40t-32.5 64q0 40 10 59.5t41 48.5q11 2 40.5 13t49.5 12q4 0 14 2q2 2 2 4l-2 3q-48 11 -108 105.5t-73 156.5l-5 3q-4 0 -12 -20q-18 -41 -54.5 -74.5
-t-77.5 -37.5h-1q-4 0 -6 4.5t-5 5.5q-23 54 -23 100q0 275 252 466z"/>
-    <glyph glyph-name="uniF1D7" unicode="&#xf1d7;" horiz-adv-x="2048" d="M580 1075q0 41 -25 66t-66 25q-43 0 -76 -25.5t-33 -65.5q0 -39 33 -64.5t76 -25.5q41 0 66 24.5t25 65.5zM1323 568q0 28 -25.5 50t-65.5 22q-27 0 -49.5 -22.5t-22.5 -49.5q0 -28 22.5 -50.5t49.5 -22.5q40 0 65.5 22t25.5 51zM1087 1075q0 41 -24.5 66t-65.5 25
-q-43 0 -76 -25.5t-33 -65.5q0 -39 33 -64.5t76 -25.5q41 0 65.5 24.5t24.5 65.5zM1722 568q0 28 -26 50t-65 22q-27 0 -49.5 -22.5t-22.5 -49.5q0 -28 22.5 -50.5t49.5 -22.5q39 0 65 22t26 51zM1456 965q-31 4 -70 4q-169 0 -311 -77t-223.5 -208.5t-81.5 -287.5
-q0 -78 23 -152q-35 -3 -68 -3q-26 0 -50 1.5t-55 6.5t-44.5 7t-54.5 10.5t-50 10.5l-253 -127l72 218q-290 203 -290 490q0 169 97.5 311t264 223.5t363.5 81.5q176 0 332.5 -66t262 -182.5t136.5 -260.5zM2048 404q0 -117 -68.5 -223.5t-185.5 -193.5l55 -181l-199 109
-q-150 -37 -218 -37q-169 0 -311 70.5t-223.5 191.5t-81.5 264t81.5 264t223.5 191.5t311 70.5q161 0 303 -70.5t227.5 -192t85.5 -263.5z"/>
-    <glyph glyph-name="_443" unicode="&#xf1d8;" horiz-adv-x="1792" d="M1764 1525q33 -24 27 -64l-256 -1536q-5 -29 -32 -45q-14 -8 -31 -8q-11 0 -24 5l-453 185l-242 -295q-18 -23 -49 -23q-13 0 -22 4q-19 7 -30.5 23.5t-11.5 36.5v349l864 1059l-1069 -925l-395 162q-37 14 -40 55q-2 40 32 59l1664 960q15 9 32 9q20 0 36 -11z"/>
-    <glyph glyph-name="_444" unicode="&#xf1d9;" horiz-adv-x="1792" d="M1764 1525q33 -24 27 -64l-256 -1536q-5 -29 -32 -45q-14 -8 -31 -8q-11 0 -24 5l-527 215l-298 -327q-18 -21 -47 -21q-14 0 -23 4q-19 7 -30 23.5t-11 36.5v452l-472 193q-37 14 -40 55q-3 39 32 59l1664 960q35 21 68 -2zM1422 26l221 1323l-1434 -827l336 -137
-l863 639l-478 -797z"/>
-    <glyph glyph-name="_445" unicode="&#xf1da;" d="M1536 640q0 -156 -61 -298t-164 -245t-245 -164t-298 -61q-172 0 -327 72.5t-264 204.5q-7 10 -6.5 22.5t8.5 20.5l137 138q10 9 25 9q16 -2 23 -12q73 -95 179 -147t225 -52q104 0 198.5 40.5t163.5 109.5t109.5 163.5t40.5 198.5t-40.5 198.5t-109.5 163.5
-t-163.5 109.5t-198.5 40.5q-98 0 -188 -35.5t-160 -101.5l137 -138q31 -30 14 -69q-17 -40 -59 -40h-448q-26 0 -45 19t-19 45v448q0 42 40 59q39 17 69 -14l130 -129q107 101 244.5 156.5t284.5 55.5q156 0 298 -61t245 -164t164 -245t61 -298zM896 928v-448q0 -14 -9 -23
-t-23 -9h-320q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h224v352q0 14 9 23t23 9h64q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="_446" unicode="&#xf1db;" d="M768 1280q-130 0 -248.5 -51t-204 -136.5t-136.5 -204t-51 -248.5t51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5t136.5 204t51 248.5t-51 248.5t-136.5 204t-204 136.5t-248.5 51zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103
-t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_447" unicode="&#xf1dc;" horiz-adv-x="1792" d="M1682 -128q-44 0 -132.5 3.5t-133.5 3.5q-44 0 -132 -3.5t-132 -3.5q-24 0 -37 20.5t-13 45.5q0 31 17 46t39 17t51 7t45 15q33 21 33 140l-1 391q0 21 -1 31q-13 4 -50 4h-675q-38 0 -51 -4q-1 -10 -1 -31l-1 -371q0 -142 37 -164q16 -10 48 -13t57 -3.5t45 -15
-t20 -45.5q0 -26 -12.5 -48t-36.5 -22q-47 0 -139.5 3.5t-138.5 3.5q-43 0 -128 -3.5t-127 -3.5q-23 0 -35.5 21t-12.5 45q0 30 15.5 45t36 17.5t47.5 7.5t42 15q33 23 33 143l-1 57v813q0 3 0.5 26t0 36.5t-1.5 38.5t-3.5 42t-6.5 36.5t-11 31.5t-16 18q-15 10 -45 12t-53 2
-t-41 14t-18 45q0 26 12 48t36 22q46 0 138.5 -3.5t138.5 -3.5q42 0 126.5 3.5t126.5 3.5q25 0 37.5 -22t12.5 -48q0 -30 -17 -43.5t-38.5 -14.5t-49.5 -4t-43 -13q-35 -21 -35 -160l1 -320q0 -21 1 -32q13 -3 39 -3h699q25 0 38 3q1 11 1 32l1 320q0 139 -35 160
-q-18 11 -58.5 12.5t-66 13t-25.5 49.5q0 26 12.5 48t37.5 22q44 0 132 -3.5t132 -3.5q43 0 129 3.5t129 3.5q25 0 37.5 -22t12.5 -48q0 -30 -17.5 -44t-40 -14.5t-51.5 -3t-44 -12.5q-35 -23 -35 -161l1 -943q0 -119 34 -140q16 -10 46 -13.5t53.5 -4.5t41.5 -15.5t18 -44.5
-q0 -26 -12 -48t-36 -22z"/>
-    <glyph glyph-name="_448" unicode="&#xf1dd;" horiz-adv-x="1280" d="M1278 1347v-73q0 -29 -18.5 -61t-42.5 -32q-50 0 -54 -1q-26 -6 -32 -31q-3 -11 -3 -64v-1152q0 -25 -18 -43t-43 -18h-108q-25 0 -43 18t-18 43v1218h-143v-1218q0 -25 -17.5 -43t-43.5 -18h-108q-26 0 -43.5 18t-17.5 43v496q-147 12 -245 59q-126 58 -192 179
-q-64 117 -64 259q0 166 88 286q88 118 209 159q111 37 417 37h479q25 0 43 -18t18 -43z"/>
-    <glyph glyph-name="_449" unicode="&#xf1de;" d="M352 128v-128h-352v128h352zM704 256q26 0 45 -19t19 -45v-256q0 -26 -19 -45t-45 -19h-256q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h256zM864 640v-128h-864v128h864zM224 1152v-128h-224v128h224zM1536 128v-128h-736v128h736zM576 1280q26 0 45 -19t19 -45v-256
-q0 -26 -19 -45t-45 -19h-256q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h256zM1216 768q26 0 45 -19t19 -45v-256q0 -26 -19 -45t-45 -19h-256q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h256zM1536 640v-128h-224v128h224zM1536 1152v-128h-864v128h864z"/>
-    <glyph glyph-name="uniF1E0" unicode="&#xf1e0;" d="M1216 512q133 0 226.5 -93.5t93.5 -226.5t-93.5 -226.5t-226.5 -93.5t-226.5 93.5t-93.5 226.5q0 12 2 34l-360 180q-92 -86 -218 -86q-133 0 -226.5 93.5t-93.5 226.5t93.5 226.5t226.5 93.5q126 0 218 -86l360 180q-2 22 -2 34q0 133 93.5 226.5t226.5 93.5
-t226.5 -93.5t93.5 -226.5t-93.5 -226.5t-226.5 -93.5q-126 0 -218 86l-360 -180q2 -22 2 -34t-2 -34l360 -180q92 86 218 86z"/>
-    <glyph glyph-name="_451" unicode="&#xf1e1;" d="M1280 341q0 88 -62.5 151t-150.5 63q-84 0 -145 -58l-241 120q2 16 2 23t-2 23l241 120q61 -58 145 -58q88 0 150.5 63t62.5 151t-62.5 150.5t-150.5 62.5t-151 -62.5t-63 -150.5q0 -7 2 -23l-241 -120q-62 57 -145 57q-88 0 -150.5 -62.5t-62.5 -150.5t62.5 -150.5
-t150.5 -62.5q83 0 145 57l241 -120q-2 -16 -2 -23q0 -88 63 -150.5t151 -62.5t150.5 62.5t62.5 150.5zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_452" unicode="&#xf1e2;" horiz-adv-x="1792" d="M571 947q-10 25 -34 35t-49 0q-108 -44 -191 -127t-127 -191q-10 -25 0 -49t35 -34q13 -5 24 -5q42 0 60 40q34 84 98.5 148.5t148.5 98.5q25 11 35 35t0 49zM1513 1303l46 -46l-244 -243l68 -68q19 -19 19 -45.5t-19 -45.5l-64 -64q89 -161 89 -343q0 -143 -55.5 -273.5
-t-150 -225t-225 -150t-273.5 -55.5t-273.5 55.5t-225 150t-150 225t-55.5 273.5t55.5 273.5t150 225t225 150t273.5 55.5q182 0 343 -89l64 64q19 19 45.5 19t45.5 -19l68 -68zM1521 1359q-10 -10 -22 -10q-13 0 -23 10l-91 90q-9 10 -9 23t9 23q10 9 23 9t23 -9l90 -91
-q10 -9 10 -22.5t-10 -22.5zM1751 1129q-11 -9 -23 -9t-23 9l-90 91q-10 9 -10 22.5t10 22.5q9 10 22.5 10t22.5 -10l91 -90q9 -10 9 -23t-9 -23zM1792 1312q0 -14 -9 -23t-23 -9h-96q-14 0 -23 9t-9 23t9 23t23 9h96q14 0 23 -9t9 -23zM1600 1504v-96q0 -14 -9 -23t-23 -9
-t-23 9t-9 23v96q0 14 9 23t23 9t23 -9t9 -23zM1751 1449l-91 -90q-10 -10 -22 -10q-13 0 -23 10q-10 9 -10 22.5t10 22.5l90 91q10 9 23 9t23 -9q9 -10 9 -23t-9 -23z"/>
-    <glyph glyph-name="_453" unicode="&#xf1e3;" horiz-adv-x="1792" d="M609 720l287 208l287 -208l-109 -336h-355zM896 1536q182 0 348 -71t286 -191t191 -286t71 -348t-71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71zM1515 186q149 203 149 454v3l-102 -89l-240 224l63 323
-l134 -12q-150 206 -389 282l53 -124l-287 -159l-287 159l53 124q-239 -76 -389 -282l135 12l62 -323l-240 -224l-102 89v-3q0 -251 149 -454l30 132l326 -40l139 -298l-116 -69q117 -39 240 -39t240 39l-116 69l139 298l326 40z"/>
-    <glyph glyph-name="_454" unicode="&#xf1e4;" horiz-adv-x="1792" d="M448 224v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM256 608v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM832 224v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23
-v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM640 608v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM66 768q-28 0 -47 19t-19 46v129h514v-129q0 -27 -19 -46t-46 -19h-383zM1216 224v-192q0 -14 -9 -23t-23 -9h-192
-q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1024 608v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1600 224v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23
-zM1408 608v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1792 1016v-13h-514v10q0 104 -382 102q-382 -1 -382 -102v-10h-514v13q0 17 8.5 43t34 64t65.5 75.5t110.5 76t160 67.5t224 47.5t293.5 18.5t293 -18.5t224 -47.5
-t160.5 -67.5t110.5 -76t65.5 -75.5t34 -64t8.5 -43zM1792 608v-192q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v192q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1792 962v-129q0 -27 -19 -46t-46 -19h-384q-27 0 -46 19t-19 46v129h514z"/>
-    <glyph glyph-name="_455" unicode="&#xf1e5;" horiz-adv-x="1792" d="M704 1216v-768q0 -26 -19 -45t-45 -19v-576q0 -26 -19 -45t-45 -19h-512q-26 0 -45 19t-19 45v512l249 873q7 23 31 23h424zM1024 1216v-704h-256v704h256zM1792 320v-512q0 -26 -19 -45t-45 -19h-512q-26 0 -45 19t-19 45v576q-26 0 -45 19t-19 45v768h424q24 0 31 -23z
-M736 1504v-224h-352v224q0 14 9 23t23 9h288q14 0 23 -9t9 -23zM1408 1504v-224h-352v224q0 14 9 23t23 9h288q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="_456" unicode="&#xf1e6;" horiz-adv-x="1792" d="M1755 1083q37 -38 37 -90.5t-37 -90.5l-401 -400l150 -150l-160 -160q-163 -163 -389.5 -186.5t-411.5 100.5l-362 -362h-181v181l362 362q-124 185 -100.5 411.5t186.5 389.5l160 160l150 -150l400 401q38 37 91 37t90 -37t37 -90.5t-37 -90.5l-400 -401l234 -234
-l401 400q38 37 91 37t90 -37z"/>
-    <glyph glyph-name="_457" unicode="&#xf1e7;" horiz-adv-x="1792" d="M873 796q0 -83 -63.5 -142.5t-152.5 -59.5t-152.5 59.5t-63.5 142.5q0 84 63.5 143t152.5 59t152.5 -59t63.5 -143zM1375 796q0 -83 -63 -142.5t-153 -59.5q-89 0 -152.5 59.5t-63.5 142.5q0 84 63.5 143t152.5 59q90 0 153 -59t63 -143zM1600 616v667q0 87 -32 123.5
-t-111 36.5h-1112q-83 0 -112.5 -34t-29.5 -126v-673q43 -23 88.5 -40t81 -28t81 -18.5t71 -11t70 -4t58.5 -0.5t56.5 2t44.5 2q68 1 95 -27q6 -6 10 -9q26 -25 61 -51q7 91 118 87q5 0 36.5 -1.5t43 -2t45.5 -1t53 1t54.5 4.5t61 8.5t62 13.5t67 19.5t67.5 27t72 34.5z
-M1763 621q-121 -149 -372 -252q84 -285 -23 -465q-66 -113 -183 -148q-104 -32 -182 15q-86 51 -82 164l-1 326v1q-8 2 -24.5 6t-23.5 5l-1 -338q4 -114 -83 -164q-79 -47 -183 -15q-117 36 -182 150q-105 180 -22 463q-251 103 -372 252q-25 37 -4 63t60 -1q4 -2 11.5 -7
-t10.5 -8v694q0 72 47 123t114 51h1257q67 0 114 -51t47 -123v-694l21 15q39 27 60 1t-4 -63z"/>
-    <glyph glyph-name="_458" unicode="&#xf1e8;" horiz-adv-x="1792" d="M896 1102v-434h-145v434h145zM1294 1102v-434h-145v434h145zM1294 342l253 254v795h-1194v-1049h326v-217l217 217h398zM1692 1536v-1013l-434 -434h-326l-217 -217h-217v217h-398v1158l109 289h1483z"/>
-    <glyph glyph-name="_459" unicode="&#xf1e9;" d="M773 217v-127q-1 -292 -6 -305q-12 -32 -51 -40q-54 -9 -181.5 38t-162.5 89q-13 15 -17 36q-1 12 4 26q4 10 34 47t181 216q1 0 60 70q15 19 39.5 24.5t49.5 -3.5q24 -10 37.5 -29t12.5 -42zM624 468q-3 -55 -52 -70l-120 -39q-275 -88 -292 -88q-35 2 -54 36
-q-12 25 -17 75q-8 76 1 166.5t30 124.5t56 32q13 0 202 -77q71 -29 115 -47l84 -34q23 -9 35.5 -30.5t11.5 -48.5zM1450 171q-7 -54 -91.5 -161t-135.5 -127q-37 -14 -63 7q-14 10 -184 287l-47 77q-14 21 -11.5 46t19.5 46q35 43 83 26q1 -1 119 -40q203 -66 242 -79.5
-t47 -20.5q28 -22 22 -61zM778 803q5 -102 -54 -122q-58 -17 -114 71l-378 598q-8 35 19 62q41 43 207.5 89.5t224.5 31.5q40 -10 49 -45q3 -18 22 -305.5t24 -379.5zM1440 695q3 -39 -26 -59q-15 -10 -329 -86q-67 -15 -91 -23l1 2q-23 -6 -46 4t-37 32q-30 47 0 87
-q1 1 75 102q125 171 150 204t34 39q28 19 65 2q48 -23 123 -133.5t81 -167.5v-3z"/>
-    <glyph glyph-name="_460" unicode="&#xf1ea;" horiz-adv-x="2048" d="M1024 1024h-384v-384h384v384zM1152 384v-128h-640v128h640zM1152 1152v-640h-640v640h640zM1792 384v-128h-512v128h512zM1792 640v-128h-512v128h512zM1792 896v-128h-512v128h512zM1792 1152v-128h-512v128h512zM256 192v960h-128v-960q0 -26 19 -45t45 -19t45 19
-t19 45zM1920 192v1088h-1536v-1088q0 -33 -11 -64h1483q26 0 45 19t19 45zM2048 1408v-1216q0 -80 -56 -136t-136 -56h-1664q-80 0 -136 56t-56 136v1088h256v128h1792z"/>
-    <glyph glyph-name="_461" unicode="&#xf1eb;" horiz-adv-x="2048" d="M1024 13q-20 0 -93 73.5t-73 93.5q0 32 62.5 54t103.5 22t103.5 -22t62.5 -54q0 -20 -73 -93.5t-93 -73.5zM1294 284q-2 0 -40 25t-101.5 50t-128.5 25t-128.5 -25t-101 -50t-40.5 -25q-18 0 -93.5 75t-75.5 93q0 13 10 23q78 77 196 121t233 44t233 -44t196 -121
-q10 -10 10 -23q0 -18 -75.5 -93t-93.5 -75zM1567 556q-11 0 -23 8q-136 105 -252 154.5t-268 49.5q-85 0 -170.5 -22t-149 -53t-113.5 -62t-79 -53t-31 -22q-17 0 -92 75t-75 93q0 12 10 22q132 132 320 205t380 73t380 -73t320 -205q10 -10 10 -22q0 -18 -75 -93t-92 -75z
-M1838 827q-11 0 -22 9q-179 157 -371.5 236.5t-420.5 79.5t-420.5 -79.5t-371.5 -236.5q-11 -9 -22 -9q-17 0 -92.5 75t-75.5 93q0 13 10 23q187 186 445 288t527 102t527 -102t445 -288q10 -10 10 -23q0 -18 -75.5 -93t-92.5 -75z"/>
-    <glyph glyph-name="_462" unicode="&#xf1ec;" horiz-adv-x="1792" d="M384 0q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM768 0q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM384 384q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5
-t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1152 0q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM768 384q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5
-t37.5 90.5zM384 768q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1152 384q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM768 768q0 53 -37.5 90.5t-90.5 37.5
-t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1536 0v384q0 52 -38 90t-90 38t-90 -38t-38 -90v-384q0 -52 38 -90t90 -38t90 38t38 90zM1152 768q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5z
-M1536 1088v256q0 26 -19 45t-45 19h-1280q-26 0 -45 -19t-19 -45v-256q0 -26 19 -45t45 -19h1280q26 0 45 19t19 45zM1536 768q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1664 1408v-1536q0 -52 -38 -90t-90 -38
-h-1408q-52 0 -90 38t-38 90v1536q0 52 38 90t90 38h1408q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_463" unicode="&#xf1ed;" d="M1519 890q18 -84 -4 -204q-87 -444 -565 -444h-44q-25 0 -44 -16.5t-24 -42.5l-4 -19l-55 -346l-2 -15q-5 -26 -24.5 -42.5t-44.5 -16.5h-251q-21 0 -33 15t-9 36q9 56 26.5 168t26.5 168t27 167.5t27 167.5q5 37 43 37h131q133 -2 236 21q175 39 287 144q102 95 155 246
-q24 70 35 133q1 6 2.5 7.5t3.5 1t6 -3.5q79 -59 98 -162zM1347 1172q0 -107 -46 -236q-80 -233 -302 -315q-113 -40 -252 -42q0 -1 -90 -1l-90 1q-100 0 -118 -96q-2 -8 -85 -530q-1 -10 -12 -10h-295q-22 0 -36.5 16.5t-11.5 38.5l232 1471q5 29 27.5 48t51.5 19h598
-q34 0 97.5 -13t111.5 -32q107 -41 163.5 -123t56.5 -196z"/>
-    <glyph glyph-name="_464" unicode="&#xf1ee;" horiz-adv-x="1792" d="M441 864q33 0 52 -26q266 -364 362 -774h-446q-127 441 -367 749q-12 16 -3 33.5t29 17.5h373zM1000 507q-49 -199 -125 -393q-79 310 -256 594q40 221 44 449q211 -340 337 -650zM1099 1216q235 -324 384.5 -698.5t184.5 -773.5h-451q-41 665 -553 1472h435zM1792 640
-q0 -424 -101 -812q-67 560 -359 1083q-25 301 -106 584q-4 16 5.5 28.5t25.5 12.5h359q21 0 38.5 -13t22.5 -33q115 -409 115 -850z"/>
-    <glyph glyph-name="uniF1F0" unicode="&#xf1f0;" horiz-adv-x="2304" d="M1975 546h-138q14 37 66 179l3 9q4 10 10 26t9 26l12 -55zM531 611l-58 295q-11 54 -75 54h-268l-2 -13q311 -79 403 -336zM710 960l-162 -438l-17 89q-26 70 -85 129.5t-131 88.5l135 -510h175l261 641h-176zM849 318h166l104 642h-166zM1617 944q-69 27 -149 27
-q-123 0 -201 -59t-79 -153q-1 -102 145 -174q48 -23 67 -41t19 -39q0 -30 -30 -46t-69 -16q-86 0 -156 33l-22 11l-23 -144q74 -34 185 -34q130 -1 208.5 59t80.5 160q0 106 -140 174q-49 25 -71 42t-22 38q0 22 24.5 38.5t70.5 16.5q70 1 124 -24l15 -8zM2042 960h-128
-q-65 0 -87 -54l-246 -588h174l35 96h212q5 -22 20 -96h154zM2304 1280v-1280q0 -52 -38 -90t-90 -38h-2048q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h2048q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_466" unicode="&#xf1f1;" horiz-adv-x="2304" d="M1119 1195q-128 85 -281 85q-103 0 -197.5 -40.5t-162.5 -108.5t-108.5 -162t-40.5 -197q0 -104 40.5 -198t108.5 -162t162 -108.5t198 -40.5q153 0 281 85q-131 107 -178 265.5t0.5 316.5t177.5 265zM1152 1171q-126 -99 -172 -249.5t-0.5 -300.5t172.5 -249
-q127 99 172.5 249t-0.5 300.5t-172 249.5zM1185 1195q130 -107 177.5 -265.5t0.5 -317t-178 -264.5q128 -85 281 -85q104 0 198 40.5t162 108.5t108.5 162t40.5 198q0 103 -40.5 197t-108.5 162t-162.5 108.5t-197.5 40.5q-153 0 -281 -85zM1926 473h7v3h-17v-3h7v-17h3v17z
-M1955 456h4v20h-5l-6 -13l-6 13h-5v-20h3v15l6 -13h4l5 13v-15zM1947 16v-2h-2h-3v3h3h2v-1zM1947 7h3l-4 5h2l1 1q1 1 1 3t-1 3l-1 1h-3h-6v-13h3v5h1zM685 75q0 19 11 31t30 12q18 0 29 -12.5t11 -30.5q0 -19 -11 -31t-29 -12q-19 0 -30 12t-11 31zM1158 119q30 0 35 -32
-h-70q5 32 35 32zM1514 75q0 19 11 31t29 12t29.5 -12.5t11.5 -30.5q0 -19 -11 -31t-30 -12q-18 0 -29 12t-11 31zM1786 75q0 18 11.5 30.5t29.5 12.5t29.5 -12.5t11.5 -30.5q0 -19 -11.5 -31t-29.5 -12t-29.5 12.5t-11.5 30.5zM1944 3q-2 0 -4 1q-1 0 -3 2t-2 3q-1 2 -1 4
-q0 3 1 4q0 2 2 4l1 1q2 0 2 1q2 1 4 1q3 0 4 -1l4 -2l2 -4v-1q1 -2 1 -3l-1 -1v-3t-1 -1l-1 -2q-2 -2 -4 -2q-1 -1 -4 -1zM599 7h30v85q0 24 -14.5 38.5t-39.5 15.5q-32 0 -47 -24q-14 24 -45 24q-24 0 -39 -20v16h-30v-135h30v75q0 36 33 36q30 0 30 -36v-75h29v75
-q0 36 33 36q30 0 30 -36v-75zM765 7h29v68v67h-29v-16q-17 20 -43 20q-29 0 -48 -20t-19 -51t19 -51t48 -20q28 0 43 20v-17zM943 48q0 34 -47 40l-14 2q-23 4 -23 14q0 15 25 15q23 0 43 -11l12 24q-22 14 -55 14q-26 0 -41 -12t-15 -32q0 -33 47 -39l13 -2q24 -4 24 -14
-q0 -17 -31 -17q-25 0 -45 14l-13 -23q25 -17 58 -17q29 0 45.5 12t16.5 32zM1073 14l-8 25q-13 -7 -26 -7q-19 0 -19 22v61h48v27h-48v41h-30v-41h-28v-27h28v-61q0 -50 47 -50q21 0 36 10zM1159 146q-29 0 -48 -20t-19 -51q0 -32 19.5 -51.5t49.5 -19.5q33 0 55 19l-14 22
-q-18 -15 -39 -15q-34 0 -41 33h101v12q0 32 -18 51.5t-46 19.5zM1318 146q-23 0 -35 -20v16h-30v-135h30v76q0 35 29 35q10 0 18 -4l9 28q-9 4 -21 4zM1348 75q0 -31 19.5 -51t52.5 -20q29 0 48 16l-14 24q-18 -13 -35 -12q-18 0 -29.5 12t-11.5 31t11.5 31t29.5 12
-q19 0 35 -12l14 24q-20 16 -48 16q-33 0 -52.5 -20t-19.5 -51zM1593 7h30v68v67h-30v-16q-15 20 -42 20q-29 0 -48.5 -20t-19.5 -51t19.5 -51t48.5 -20q28 0 42 20v-17zM1726 146q-23 0 -35 -20v16h-29v-135h29v76q0 35 29 35q10 0 18 -4l9 28q-8 4 -21 4zM1866 7h29v68v122
-h-29v-71q-15 20 -43 20t-47.5 -20.5t-19.5 -50.5t19.5 -50.5t47.5 -20.5q29 0 43 20v-17zM1944 27l-2 -1h-3q-2 -1 -4 -3q-3 -1 -3 -4q-1 -2 -1 -6q0 -3 1 -5q0 -2 3 -4q2 -2 4 -3t5 -1q4 0 6 1q0 1 2 2l2 1q1 1 3 4q1 2 1 5q0 4 -1 6q-1 1 -3 4q0 1 -2 2l-2 1q-1 0 -3 0.5
-t-3 0.5zM2304 1280v-1280q0 -52 -38 -90t-90 -38h-2048q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h2048q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_467" unicode="&#xf1f2;" horiz-adv-x="2304" d="M313 759q0 -51 -36 -84q-29 -26 -89 -26h-17v220h17q61 0 89 -27q36 -31 36 -83zM2089 824q0 -52 -64 -52h-19v101h20q63 0 63 -49zM380 759q0 74 -50 120.5t-129 46.5h-95v-333h95q74 0 119 38q60 51 60 128zM410 593h65v333h-65v-333zM730 694q0 40 -20.5 62t-75.5 42
-q-29 10 -39.5 19t-10.5 23q0 16 13.5 26.5t34.5 10.5q29 0 53 -27l34 44q-41 37 -98 37q-44 0 -74 -27.5t-30 -67.5q0 -35 18 -55.5t64 -36.5q37 -13 45 -19q19 -12 19 -34q0 -20 -14 -33.5t-36 -13.5q-48 0 -71 44l-42 -40q44 -64 115 -64q51 0 83 30.5t32 79.5zM1008 604
-v77q-37 -37 -78 -37q-49 0 -80.5 32.5t-31.5 82.5q0 48 31.5 81.5t77.5 33.5q43 0 81 -38v77q-40 20 -80 20q-74 0 -125.5 -50.5t-51.5 -123.5t51 -123.5t125 -50.5q42 0 81 19zM2240 0v527q-65 -40 -144.5 -84t-237.5 -117t-329.5 -137.5t-417.5 -134.5t-504 -118h1569
-q26 0 45 19t19 45zM1389 757q0 75 -53 128t-128 53t-128 -53t-53 -128t53 -128t128 -53t128 53t53 128zM1541 584l144 342h-71l-90 -224l-89 224h-71l142 -342h35zM1714 593h184v56h-119v90h115v56h-115v74h119v57h-184v-333zM2105 593h80l-105 140q76 16 76 94q0 47 -31 73
-t-87 26h-97v-333h65v133h9zM2304 1274v-1268q0 -56 -38.5 -95t-93.5 -39h-2040q-55 0 -93.5 39t-38.5 95v1268q0 56 38.5 95t93.5 39h2040q55 0 93.5 -39t38.5 -95z"/>
-    <glyph glyph-name="f1f3" unicode="&#xf1f3;" horiz-adv-x="2304" d="M119 854h89l-45 108zM740 328l74 79l-70 79h-163v-49h142v-55h-142v-54h159zM898 406l99 -110v217zM1186 453q0 33 -40 33h-84v-69h83q41 0 41 36zM1475 457q0 29 -42 29h-82v-61h81q43 0 43 32zM1197 923q0 29 -42 29h-82v-60h81q43 0 43 31zM1656 854h89l-44 108z
-M699 1009v-271h-66v212l-94 -212h-57l-94 212v-212h-132l-25 60h-135l-25 -60h-70l116 271h96l110 -257v257h106l85 -184l77 184h108zM1255 453q0 -20 -5.5 -35t-14 -25t-22.5 -16.5t-26 -10t-31.5 -4.5t-31.5 -1t-32.5 0.5t-29.5 0.5v-91h-126l-80 90l-83 -90h-256v271h260
-l80 -89l82 89h207q109 0 109 -89zM964 794v-56h-217v271h217v-57h-152v-49h148v-55h-148v-54h152zM2304 235v-229q0 -55 -38.5 -94.5t-93.5 -39.5h-2040q-55 0 -93.5 39.5t-38.5 94.5v678h111l25 61h55l25 -61h218v46l19 -46h113l20 47v-47h541v99l10 1q10 0 10 -14v-86h279
-v23q23 -12 55 -18t52.5 -6.5t63 0.5t51.5 1l25 61h56l25 -61h227v58l34 -58h182v378h-180v-44l-25 44h-185v-44l-23 44h-249q-69 0 -109 -22v22h-172v-22q-24 22 -73 22h-628l-43 -97l-43 97h-198v-44l-22 44h-169l-78 -179v391q0 55 38.5 94.5t93.5 39.5h2040
-q55 0 93.5 -39.5t38.5 -94.5v-678h-120q-51 0 -81 -22v22h-177q-55 0 -78 -22v22h-316v-22q-31 22 -87 22h-209v-22q-23 22 -91 22h-234l-54 -58l-50 58h-349v-378h343l55 59l52 -59h211v89h21q59 0 90 13v-102h174v99h8q8 0 10 -2t2 -10v-87h529q57 0 88 24v-24h168
-q60 0 95 17zM1546 469q0 -23 -12 -43t-34 -29q25 -9 34 -26t9 -46v-54h-65v45q0 33 -12 43.5t-46 10.5h-69v-99h-65v271h154q48 0 77 -15t29 -58zM1269 936q0 -24 -12.5 -44t-33.5 -29q26 -9 34.5 -25.5t8.5 -46.5v-53h-65q0 9 0.5 26.5t0 25t-3 18.5t-8.5 16t-17.5 8.5
-t-29.5 3.5h-70v-98h-64v271l153 -1q49 0 78 -14.5t29 -57.5zM1798 327v-56h-216v271h216v-56h-151v-49h148v-55h-148v-54zM1372 1009v-271h-66v271h66zM2065 357q0 -86 -102 -86h-126v58h126q34 0 34 25q0 16 -17 21t-41.5 5t-49.5 3.5t-42 22.5t-17 55q0 39 26 60t66 21
-h130v-57h-119q-36 0 -36 -25q0 -16 17.5 -20.5t42 -4t49 -2.5t42 -21.5t17.5 -54.5zM2304 407v-101q-24 -35 -88 -35h-125v58h125q33 0 33 25q0 13 -12.5 19t-31 5.5t-40 2t-40 8t-31 24t-12.5 48.5q0 39 26.5 60t66.5 21h129v-57h-118q-36 0 -36 -25q0 -20 29 -22t68.5 -5
-t56.5 -26zM2139 1008v-270h-92l-122 203v-203h-132l-26 60h-134l-25 -60h-75q-129 0 -129 133q0 138 133 138h63v-59q-7 0 -28 1t-28.5 0.5t-23 -2t-21.5 -6.5t-14.5 -13.5t-11.5 -23t-3 -33.5q0 -38 13.5 -58t49.5 -20h29l92 213h97l109 -256v256h99l114 -188v188h66z"/>
-    <glyph glyph-name="_469" unicode="&#xf1f4;" horiz-adv-x="2304" d="M745 630q0 -37 -25.5 -61.5t-62.5 -24.5q-29 0 -46.5 16t-17.5 44q0 37 25 62.5t62 25.5q28 0 46.5 -16.5t18.5 -45.5zM1530 779q0 -42 -22 -57t-66 -15l-32 -1l17 107q2 11 13 11h18q22 0 35 -2t25 -12.5t12 -30.5zM1881 630q0 -36 -25.5 -61t-61.5 -25q-29 0 -47 16
-t-18 44q0 37 25 62.5t62 25.5q28 0 46.5 -16.5t18.5 -45.5zM513 801q0 59 -38.5 85.5t-100.5 26.5h-160q-19 0 -21 -19l-65 -408q-1 -6 3 -11t10 -5h76q20 0 22 19l18 110q1 8 7 13t15 6.5t17 1.5t19 -1t14 -1q86 0 135 48.5t49 134.5zM822 489l41 261q1 6 -3 11t-10 5h-76
-q-14 0 -17 -33q-27 40 -95 40q-72 0 -122.5 -54t-50.5 -127q0 -59 34.5 -94t92.5 -35q28 0 58 12t48 32q-4 -12 -4 -21q0 -16 13 -16h69q19 0 22 19zM1269 752q0 5 -4 9.5t-9 4.5h-77q-11 0 -18 -10l-106 -156l-44 150q-5 16 -22 16h-75q-5 0 -9 -4.5t-4 -9.5q0 -2 19.5 -59
-t42 -123t23.5 -70q-82 -112 -82 -120q0 -13 13 -13h77q11 0 18 10l255 368q2 2 2 7zM1649 801q0 59 -38.5 85.5t-100.5 26.5h-159q-20 0 -22 -19l-65 -408q-1 -6 3 -11t10 -5h82q12 0 16 13l18 116q1 8 7 13t15 6.5t17 1.5t19 -1t14 -1q86 0 135 48.5t49 134.5zM1958 489
-l41 261q1 6 -3 11t-10 5h-76q-14 0 -17 -33q-26 40 -95 40q-72 0 -122.5 -54t-50.5 -127q0 -59 34.5 -94t92.5 -35q29 0 59 12t47 32q0 -1 -2 -9t-2 -12q0 -16 13 -16h69q19 0 22 19zM2176 898v1q0 14 -13 14h-74q-11 0 -13 -11l-65 -416l-1 -2q0 -5 4 -9.5t10 -4.5h66
-q19 0 21 19zM392 764q-5 -35 -26 -46t-60 -11l-33 -1l17 107q2 11 13 11h19q40 0 58 -11.5t12 -48.5zM2304 1280v-1280q0 -52 -38 -90t-90 -38h-2048q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h2048q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_470" unicode="&#xf1f5;" horiz-adv-x="2304" d="M1597 633q0 -69 -21 -106q-19 -35 -52 -35q-23 0 -41 9v224q29 30 57 30q57 0 57 -122zM2035 669h-110q6 98 56 98q51 0 54 -98zM476 534q0 59 -33 91.5t-101 57.5q-36 13 -52 24t-16 25q0 26 38 26q58 0 124 -33l18 112q-67 32 -149 32q-77 0 -123 -38q-48 -39 -48 -109
-q0 -58 32.5 -90.5t99.5 -56.5q39 -14 54.5 -25.5t15.5 -27.5q0 -31 -48 -31q-29 0 -70 12.5t-72 30.5l-18 -113q72 -41 168 -41q81 0 129 37q51 41 51 117zM771 749l19 111h-96v135l-129 -21l-18 -114l-46 -8l-17 -103h62v-219q0 -84 44 -120q38 -30 111 -30q32 0 79 11v118
-q-32 -7 -44 -7q-42 0 -42 50v197h77zM1087 724v139q-15 3 -28 3q-32 0 -55.5 -16t-33.5 -46l-10 56h-131v-471h150v306q26 31 82 31q16 0 26 -2zM1124 389h150v471h-150v-471zM1746 638q0 122 -45 179q-40 52 -111 52q-64 0 -117 -56l-8 47h-132v-645l150 25v151
-q36 -11 68 -11q83 0 134 56q61 65 61 202zM1278 986q0 33 -23 56t-56 23t-56 -23t-23 -56t23 -56.5t56 -23.5t56 23.5t23 56.5zM2176 629q0 113 -48 176q-50 64 -144 64q-96 0 -151.5 -66t-55.5 -180q0 -128 63 -188q55 -55 161 -55q101 0 160 40l-16 103q-57 -31 -128 -31
-q-43 0 -63 19q-23 19 -28 66h248q2 14 2 52zM2304 1280v-1280q0 -52 -38 -90t-90 -38h-2048q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h2048q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_471" unicode="&#xf1f6;" horiz-adv-x="2048" d="M1558 684q61 -356 298 -556q0 -52 -38 -90t-90 -38h-448q0 -106 -75 -181t-181 -75t-180.5 74.5t-75.5 180.5zM1024 -176q16 0 16 16t-16 16q-59 0 -101.5 42.5t-42.5 101.5q0 16 -16 16t-16 -16q0 -73 51.5 -124.5t124.5 -51.5zM2026 1424q8 -10 7.5 -23.5t-10.5 -22.5
-l-1872 -1622q-10 -8 -23.5 -7t-21.5 11l-84 96q-8 10 -7.5 23.5t10.5 21.5l186 161q-19 32 -19 66q50 42 91 88t85 119.5t74.5 158.5t50 206t19.5 260q0 152 117 282.5t307 158.5q-8 19 -8 39q0 40 28 68t68 28t68 -28t28 -68q0 -20 -8 -39q124 -18 219 -82.5t148 -157.5
-l418 363q10 8 23.5 7t21.5 -11z"/>
-    <glyph glyph-name="_472" unicode="&#xf1f7;" horiz-adv-x="2048" d="M1040 -160q0 16 -16 16q-59 0 -101.5 42.5t-42.5 101.5q0 16 -16 16t-16 -16q0 -73 51.5 -124.5t124.5 -51.5q16 0 16 16zM503 315l877 760q-42 88 -132.5 146.5t-223.5 58.5q-93 0 -169.5 -31.5t-121.5 -80.5t-69 -103t-24 -105q0 -384 -137 -645zM1856 128
-q0 -52 -38 -90t-90 -38h-448q0 -106 -75 -181t-181 -75t-180.5 74.5t-75.5 180.5l149 129h757q-166 187 -227 459l111 97q61 -356 298 -556zM1942 1520l84 -96q8 -10 7.5 -23.5t-10.5 -22.5l-1872 -1622q-10 -8 -23.5 -7t-21.5 11l-84 96q-8 10 -7.5 23.5t10.5 21.5l186 161
-q-19 32 -19 66q50 42 91 88t85 119.5t74.5 158.5t50 206t19.5 260q0 152 117 282.5t307 158.5q-8 19 -8 39q0 40 28 68t68 28t68 -28t28 -68q0 -20 -8 -39q124 -18 219 -82.5t148 -157.5l418 363q10 8 23.5 7t21.5 -11z"/>
-    <glyph glyph-name="_473" unicode="&#xf1f8;" horiz-adv-x="1408" d="M512 160v704q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-704q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM768 160v704q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-704q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1024 160v704q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-704
-q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM480 1152h448l-48 117q-7 9 -17 11h-317q-10 -2 -17 -11zM1408 1120v-64q0 -14 -9 -23t-23 -9h-96v-948q0 -83 -47 -143.5t-113 -60.5h-832q-66 0 -113 58.5t-47 141.5v952h-96q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h309l70 167
-q15 37 54 63t79 26h320q40 0 79 -26t54 -63l70 -167h309q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="_474" unicode="&#xf1f9;" d="M1150 462v-109q0 -50 -36.5 -89t-94 -60.5t-118 -32.5t-117.5 -11q-205 0 -342.5 139t-137.5 346q0 203 136 339t339 136q34 0 75.5 -4.5t93 -18t92.5 -34t69 -56.5t28 -81v-109q0 -16 -16 -16h-118q-16 0 -16 16v70q0 43 -65.5 67.5t-137.5 24.5q-140 0 -228.5 -91.5
-t-88.5 -237.5q0 -151 91.5 -249.5t233.5 -98.5q68 0 138 24t70 66v70q0 7 4.5 11.5t10.5 4.5h119q6 0 11 -4.5t5 -11.5zM768 1280q-130 0 -248.5 -51t-204 -136.5t-136.5 -204t-51 -248.5t51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5t136.5 204t51 248.5
-t-51 248.5t-136.5 204t-204 136.5t-248.5 51zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_475" unicode="&#xf1fa;" d="M972 761q0 108 -53.5 169t-147.5 61q-63 0 -124 -30.5t-110 -84.5t-79.5 -137t-30.5 -180q0 -112 53.5 -173t150.5 -61q96 0 176 66.5t122.5 166t42.5 203.5zM1536 640q0 -111 -37 -197t-98.5 -135t-131.5 -74.5t-145 -27.5q-6 0 -15.5 -0.5t-16.5 -0.5q-95 0 -142 53
-q-28 33 -33 83q-52 -66 -131.5 -110t-173.5 -44q-161 0 -249.5 95.5t-88.5 269.5q0 157 66 290t179 210.5t246 77.5q87 0 155 -35.5t106 -99.5l2 19l11 56q1 6 5.5 12t9.5 6h118q5 0 13 -11q5 -5 3 -16l-120 -614q-5 -24 -5 -48q0 -39 12.5 -52t44.5 -13q28 1 57 5.5t73 24
-t77 50t57 89.5t24 137q0 292 -174 466t-466 174q-130 0 -248.5 -51t-204 -136.5t-136.5 -204t-51 -248.5t51 -248.5t136.5 -204t204 -136.5t248.5 -51q228 0 405 144q11 9 24 8t21 -12l41 -49q8 -12 7 -24q-2 -13 -12 -22q-102 -83 -227.5 -128t-258.5 -45q-156 0 -298 61
-t-245 164t-164 245t-61 298t61 298t164 245t245 164t298 61q344 0 556 -212t212 -556z"/>
-    <glyph glyph-name="_476" unicode="&#xf1fb;" horiz-adv-x="1792" d="M1698 1442q94 -94 94 -226.5t-94 -225.5l-225 -223l104 -104q10 -10 10 -23t-10 -23l-210 -210q-10 -10 -23 -10t-23 10l-105 105l-603 -603q-37 -37 -90 -37h-203l-256 -128l-64 64l128 256v203q0 53 37 90l603 603l-105 105q-10 10 -10 23t10 23l210 210q10 10 23 10
-t23 -10l104 -104l223 225q93 94 225.5 94t226.5 -94zM512 64l576 576l-192 192l-576 -576v-192h192z"/>
-    <glyph glyph-name="f1fc" unicode="&#xf1fc;" horiz-adv-x="1792" d="M1615 1536q70 0 122.5 -46.5t52.5 -116.5q0 -63 -45 -151q-332 -629 -465 -752q-97 -91 -218 -91q-126 0 -216.5 92.5t-90.5 219.5q0 128 92 212l638 579q59 54 130 54zM706 502q39 -76 106.5 -130t150.5 -76l1 -71q4 -213 -129.5 -347t-348.5 -134q-123 0 -218 46.5
-t-152.5 127.5t-86.5 183t-29 220q7 -5 41 -30t62 -44.5t59 -36.5t46 -17q41 0 55 37q25 66 57.5 112.5t69.5 76t88 47.5t103 25.5t125 10.5z"/>
-    <glyph glyph-name="_478" unicode="&#xf1fd;" horiz-adv-x="1792" d="M1792 128v-384h-1792v384q45 0 85 14t59 27.5t47 37.5q30 27 51.5 38t56.5 11q24 0 44 -7t31 -15t33 -27q29 -25 47 -38t58 -27t86 -14q45 0 85 14.5t58 27t48 37.5q21 19 32.5 27t31 15t43.5 7q35 0 56.5 -11t51.5 -38q28 -24 47 -37.5t59 -27.5t85 -14t85 14t59 27.5
-t47 37.5q30 27 51.5 38t56.5 11q34 0 55.5 -11t51.5 -38q28 -24 47 -37.5t59 -27.5t85 -14zM1792 448v-192q-24 0 -44 7t-31 15t-33 27q-29 25 -47 38t-58 27t-85 14q-46 0 -86 -14t-58 -27t-47 -38q-22 -19 -33 -27t-31 -15t-44 -7q-35 0 -56.5 11t-51.5 38q-29 25 -47 38
-t-58 27t-86 14q-45 0 -85 -14.5t-58 -27t-48 -37.5q-21 -19 -32.5 -27t-31 -15t-43.5 -7q-35 0 -56.5 11t-51.5 38q-28 24 -47 37.5t-59 27.5t-85 14q-46 0 -86 -14t-58 -27t-47 -38q-30 -27 -51.5 -38t-56.5 -11v192q0 80 56 136t136 56h64v448h256v-448h256v448h256v-448
-h256v448h256v-448h64q80 0 136 -56t56 -136zM512 1312q0 -77 -36 -118.5t-92 -41.5q-53 0 -90.5 37.5t-37.5 90.5q0 29 9.5 51t23.5 34t31 28t31 31.5t23.5 44.5t9.5 67q38 0 83 -74t45 -150zM1024 1312q0 -77 -36 -118.5t-92 -41.5q-53 0 -90.5 37.5t-37.5 90.5
-q0 29 9.5 51t23.5 34t31 28t31 31.5t23.5 44.5t9.5 67q38 0 83 -74t45 -150zM1536 1312q0 -77 -36 -118.5t-92 -41.5q-53 0 -90.5 37.5t-37.5 90.5q0 29 9.5 51t23.5 34t31 28t31 31.5t23.5 44.5t9.5 67q38 0 83 -74t45 -150z"/>
-    <glyph glyph-name="_479" unicode="&#xf1fe;" horiz-adv-x="2048" d="M2048 0v-128h-2048v1536h128v-1408h1920zM1664 1024l256 -896h-1664v576l448 576l576 -576z"/>
-    <glyph glyph-name="_480" unicode="&#xf200;" horiz-adv-x="1792" d="M768 646l546 -546q-106 -108 -247.5 -168t-298.5 -60q-209 0 -385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103v-762zM955 640h773q0 -157 -60 -298.5t-168 -247.5zM1664 768h-768v768q209 0 385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_481" unicode="&#xf201;" horiz-adv-x="2048" d="M2048 0v-128h-2048v1536h128v-1408h1920zM1920 1248v-435q0 -21 -19.5 -29.5t-35.5 7.5l-121 121l-633 -633q-10 -10 -23 -10t-23 10l-233 233l-416 -416l-192 192l585 585q10 10 23 10t23 -10l233 -233l464 464l-121 121q-16 16 -7.5 35.5t29.5 19.5h435q14 0 23 -9
-t9 -23z"/>
-    <glyph glyph-name="_482" unicode="&#xf202;" horiz-adv-x="1792" d="M1292 832q0 -6 10 -41q10 -29 25 -49.5t41 -34t44 -20t55 -16.5q325 -91 325 -332q0 -146 -105.5 -242.5t-254.5 -96.5q-59 0 -111.5 18.5t-91.5 45.5t-77 74.5t-63 87.5t-53.5 103.5t-43.5 103t-39.5 106.5t-35.5 95q-32 81 -61.5 133.5t-73.5 96.5t-104 64t-142 20
-q-96 0 -183 -55.5t-138 -144.5t-51 -185q0 -160 106.5 -279.5t263.5 -119.5q177 0 258 95q56 63 83 116l84 -152q-15 -34 -44 -70l1 -1q-131 -152 -388 -152q-147 0 -269.5 79t-190.5 207.5t-68 274.5q0 105 43.5 206t116 176.5t172 121.5t204.5 46q87 0 159 -19t123.5 -50
-t95 -80t72.5 -99t58.5 -117t50.5 -124.5t50 -130.5t55 -127q96 -200 233 -200q81 0 138.5 48.5t57.5 128.5q0 42 -19 72t-50.5 46t-72.5 31.5t-84.5 27t-87.5 34t-81 52t-65 82t-39 122.5q-3 16 -3 33q0 110 87.5 192t198.5 78q78 -3 120.5 -14.5t90.5 -53.5h-1
-q12 -11 23 -24.5t26 -36t19 -27.5l-129 -99q-26 49 -54 70v1q-23 21 -97 21q-49 0 -84 -33t-35 -83z"/>
-    <glyph glyph-name="_483" unicode="&#xf203;" d="M1432 484q0 173 -234 239q-35 10 -53 16.5t-38 25t-29 46.5q0 2 -2 8.5t-3 12t-1 7.5q0 36 24.5 59.5t60.5 23.5q54 0 71 -15h-1q20 -15 39 -51l93 71q-39 54 -49 64q-33 29 -67.5 39t-85.5 10q-80 0 -142 -57.5t-62 -137.5q0 -7 2 -23q16 -96 64.5 -140t148.5 -73
-q29 -8 49 -15.5t45 -21.5t38.5 -34.5t13.5 -46.5v-5q1 -58 -40.5 -93t-100.5 -35q-97 0 -167 144q-23 47 -51.5 121.5t-48 125.5t-54 110.5t-74 95.5t-103.5 60.5t-147 24.5q-101 0 -192 -56t-144 -148t-50 -192v-1q4 -108 50.5 -199t133.5 -147.5t196 -56.5q186 0 279 110
-q20 27 31 51l-60 109q-42 -80 -99 -116t-146 -36q-115 0 -191 87t-76 204q0 105 82 189t186 84q112 0 170 -53.5t104 -172.5q8 -21 25.5 -68.5t28.5 -76.5t31.5 -74.5t38.5 -74t45.5 -62.5t55.5 -53.5t66 -33t80 -13.5q107 0 183 69.5t76 174.5zM1536 1120v-960
-q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_484" unicode="&#xf204;" horiz-adv-x="2048" d="M1152 640q0 104 -40.5 198.5t-109.5 163.5t-163.5 109.5t-198.5 40.5t-198.5 -40.5t-163.5 -109.5t-109.5 -163.5t-40.5 -198.5t40.5 -198.5t109.5 -163.5t163.5 -109.5t198.5 -40.5t198.5 40.5t163.5 109.5t109.5 163.5t40.5 198.5zM1920 640q0 104 -40.5 198.5
-t-109.5 163.5t-163.5 109.5t-198.5 40.5h-386q119 -90 188.5 -224t69.5 -288t-69.5 -288t-188.5 -224h386q104 0 198.5 40.5t163.5 109.5t109.5 163.5t40.5 198.5zM2048 640q0 -130 -51 -248.5t-136.5 -204t-204 -136.5t-248.5 -51h-768q-130 0 -248.5 51t-204 136.5
-t-136.5 204t-51 248.5t51 248.5t136.5 204t204 136.5t248.5 51h768q130 0 248.5 -51t204 -136.5t136.5 -204t51 -248.5z"/>
-    <glyph glyph-name="_485" unicode="&#xf205;" horiz-adv-x="2048" d="M0 640q0 130 51 248.5t136.5 204t204 136.5t248.5 51h768q130 0 248.5 -51t204 -136.5t136.5 -204t51 -248.5t-51 -248.5t-136.5 -204t-204 -136.5t-248.5 -51h-768q-130 0 -248.5 51t-204 136.5t-136.5 204t-51 248.5zM1408 128q104 0 198.5 40.5t163.5 109.5
-t109.5 163.5t40.5 198.5t-40.5 198.5t-109.5 163.5t-163.5 109.5t-198.5 40.5t-198.5 -40.5t-163.5 -109.5t-109.5 -163.5t-40.5 -198.5t40.5 -198.5t109.5 -163.5t163.5 -109.5t198.5 -40.5z"/>
-    <glyph glyph-name="_486" unicode="&#xf206;" horiz-adv-x="2304" d="M762 384h-314q-40 0 -57.5 35t6.5 67l188 251q-65 31 -137 31q-132 0 -226 -94t-94 -226t94 -226t226 -94q115 0 203 72.5t111 183.5zM576 512h186q-18 85 -75 148zM1056 512l288 384h-480l-99 -132q105 -103 126 -252h165zM2176 448q0 132 -94 226t-226 94
-q-60 0 -121 -24l174 -260q15 -23 10 -49t-27 -40q-15 -11 -36 -11q-35 0 -53 29l-174 260q-93 -95 -93 -225q0 -132 94 -226t226 -94t226 94t94 226zM2304 448q0 -185 -131.5 -316.5t-316.5 -131.5t-316.5 131.5t-131.5 316.5q0 97 39.5 183.5t109.5 149.5l-65 98l-353 -469
-q-18 -26 -51 -26h-197q-23 -164 -149 -274t-294 -110q-185 0 -316.5 131.5t-131.5 316.5t131.5 316.5t316.5 131.5q114 0 215 -55l137 183h-224q-26 0 -45 19t-19 45t19 45t45 19h384v-128h435l-85 128h-222q-26 0 -45 19t-19 45t19 45t45 19h256q33 0 53 -28l267 -400
-q91 44 192 44q185 0 316.5 -131.5t131.5 -316.5z"/>
-    <glyph glyph-name="_487" unicode="&#xf207;" d="M384 320q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1408 320q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1362 716l-72 384q-5 23 -22.5 37.5t-40.5 14.5
-h-918q-23 0 -40.5 -14.5t-22.5 -37.5l-72 -384q-5 -30 14 -53t49 -23h1062q30 0 49 23t14 53zM1136 1328q0 20 -14 34t-34 14h-640q-20 0 -34 -14t-14 -34t14 -34t34 -14h640q20 0 34 14t14 34zM1536 603v-603h-128v-128q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5
-t-37.5 90.5v128h-768v-128q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5v128h-128v603q0 112 25 223l103 454q9 78 97.5 137t230 89t312.5 30t312.5 -30t230 -89t97.5 -137l105 -454q23 -102 23 -223z"/>
-    <glyph glyph-name="_488" unicode="&#xf208;" horiz-adv-x="2048" d="M1463 704q0 -35 -25 -60.5t-61 -25.5h-702q-36 0 -61 25.5t-25 60.5t25 60.5t61 25.5h702q36 0 61 -25.5t25 -60.5zM1677 704q0 86 -23 170h-982q-36 0 -61 25t-25 60q0 36 25 61t61 25h908q-88 143 -235 227t-320 84q-177 0 -327.5 -87.5t-238 -237.5t-87.5 -327
-q0 -86 23 -170h982q36 0 61 -25t25 -60q0 -36 -25 -61t-61 -25h-908q88 -143 235.5 -227t320.5 -84q132 0 253 51.5t208 139t139 208t52 253.5zM2048 959q0 -35 -25 -60t-61 -25h-131q17 -85 17 -170q0 -167 -65.5 -319.5t-175.5 -263t-262.5 -176t-319.5 -65.5
-q-246 0 -448.5 133t-301.5 350h-189q-36 0 -61 25t-25 61q0 35 25 60t61 25h132q-17 85 -17 170q0 167 65.5 319.5t175.5 263t262.5 176t320.5 65.5q245 0 447.5 -133t301.5 -350h188q36 0 61 -25t25 -61z"/>
-    <glyph glyph-name="_489" unicode="&#xf209;" horiz-adv-x="1280" d="M953 1158l-114 -328l117 -21q165 451 165 518q0 56 -38 56q-57 0 -130 -225zM654 471l33 -88q37 42 71 67l-33 5.5t-38.5 7t-32.5 8.5zM362 1367q0 -98 159 -521q17 10 49 10q15 0 75 -5l-121 351q-75 220 -123 220q-19 0 -29 -17.5t-10 -37.5zM283 608q0 -36 51.5 -119
-t117.5 -153t100 -70q14 0 25.5 13t11.5 27q0 24 -32 102q-13 32 -32 72t-47.5 89t-61.5 81t-62 32q-20 0 -45.5 -27t-25.5 -47zM125 273q0 -41 25 -104q59 -145 183.5 -227t281.5 -82q227 0 382 170q152 169 152 427q0 43 -1 67t-11.5 62t-30.5 56q-56 49 -211.5 75.5
-t-270.5 26.5q-37 0 -49 -11q-12 -5 -12 -35q0 -34 21.5 -60t55.5 -40t77.5 -23.5t87.5 -11.5t85 -4t70 0h23q24 0 40 -19q15 -19 19 -55q-28 -28 -96 -54q-61 -22 -93 -46q-64 -46 -108.5 -114t-44.5 -137q0 -31 18.5 -88.5t18.5 -87.5l-3 -12q-4 -12 -4 -14
-q-137 10 -146 216q-8 -2 -41 -2q2 -7 2 -21q0 -53 -40.5 -89.5t-94.5 -36.5q-82 0 -166.5 78t-84.5 159q0 34 33 67q52 -64 60 -76q77 -104 133 -104q12 0 26.5 8.5t14.5 20.5q0 34 -87.5 145t-116.5 111q-43 0 -70 -44.5t-27 -90.5zM11 264q0 101 42.5 163t136.5 88
-q-28 74 -28 104q0 62 61 123t122 61q29 0 70 -15q-163 462 -163 567q0 80 41 130.5t119 50.5q131 0 325 -581q6 -17 8 -23q6 16 29 79.5t43.5 118.5t54 127.5t64.5 123t70.5 86.5t76.5 36q71 0 112 -49t41 -122q0 -108 -159 -550q61 -15 100.5 -46t58.5 -78t26 -93.5
-t7 -110.5q0 -150 -47 -280t-132 -225t-211 -150t-278 -55q-111 0 -223 42q-149 57 -258 191.5t-109 286.5z"/>
-    <glyph glyph-name="_490" unicode="&#xf20a;" horiz-adv-x="2048" d="M785 528h207q-14 -158 -98.5 -248.5t-214.5 -90.5q-162 0 -254.5 116t-92.5 316q0 194 93 311.5t233 117.5q148 0 232 -87t97 -247h-203q-5 64 -35.5 99t-81.5 35q-57 0 -88.5 -60.5t-31.5 -177.5q0 -48 5 -84t18 -69.5t40 -51.5t66 -18q95 0 109 139zM1497 528h206
-q-14 -158 -98 -248.5t-214 -90.5q-162 0 -254.5 116t-92.5 316q0 194 93 311.5t233 117.5q148 0 232 -87t97 -247h-204q-4 64 -35 99t-81 35q-57 0 -88.5 -60.5t-31.5 -177.5q0 -48 5 -84t18 -69.5t39.5 -51.5t65.5 -18q49 0 76.5 38t33.5 101zM1856 647q0 207 -15.5 307
-t-60.5 161q-6 8 -13.5 14t-21.5 15t-16 11q-86 63 -697 63q-625 0 -710 -63q-5 -4 -17.5 -11.5t-21 -14t-14.5 -14.5q-45 -60 -60 -159.5t-15 -308.5q0 -208 15 -307.5t60 -160.5q6 -8 15 -15t20.5 -14t17.5 -12q44 -33 239.5 -49t470.5 -16q610 0 697 65q5 4 17 11t20.5 14
-t13.5 16q46 60 61 159t15 309zM2048 1408v-1536h-2048v1536h2048z"/>
-    <glyph glyph-name="_491" unicode="&#xf20b;" d="M992 912v-496q0 -14 -9 -23t-23 -9h-160q-14 0 -23 9t-9 23v496q0 112 -80 192t-192 80h-272v-1152q0 -14 -9 -23t-23 -9h-160q-14 0 -23 9t-9 23v1344q0 14 9 23t23 9h464q135 0 249 -66.5t180.5 -180.5t66.5 -249zM1376 1376v-880q0 -135 -66.5 -249t-180.5 -180.5
-t-249 -66.5h-464q-14 0 -23 9t-9 23v960q0 14 9 23t23 9h160q14 0 23 -9t9 -23v-768h272q112 0 192 80t80 192v880q0 14 9 23t23 9h160q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="_492" unicode="&#xf20c;" d="M1311 694v-114q0 -24 -13.5 -38t-37.5 -14h-202q-24 0 -38 14t-14 38v114q0 24 14 38t38 14h202q24 0 37.5 -14t13.5 -38zM821 464v250q0 53 -32.5 85.5t-85.5 32.5h-133q-68 0 -96 -52q-28 52 -96 52h-130q-53 0 -85.5 -32.5t-32.5 -85.5v-250q0 -22 21 -22h55
-q22 0 22 22v230q0 24 13.5 38t38.5 14h94q24 0 38 -14t14 -38v-230q0 -22 21 -22h54q22 0 22 22v230q0 24 14 38t38 14h97q24 0 37.5 -14t13.5 -38v-230q0 -22 22 -22h55q21 0 21 22zM1410 560v154q0 53 -33 85.5t-86 32.5h-264q-53 0 -86 -32.5t-33 -85.5v-410
-q0 -21 22 -21h55q21 0 21 21v180q31 -42 94 -42h191q53 0 86 32.5t33 85.5zM1536 1176v-1072q0 -96 -68 -164t-164 -68h-1072q-96 0 -164 68t-68 164v1072q0 96 68 164t164 68h1072q96 0 164 -68t68 -164z"/>
-    <glyph glyph-name="_493" unicode="&#xf20d;" d="M915 450h-294l147 551zM1001 128h311l-324 1024h-440l-324 -1024h311l383 314zM1536 1120v-960q0 -118 -85 -203t-203 -85h-960q-118 0 -203 85t-85 203v960q0 118 85 203t203 85h960q118 0 203 -85t85 -203z"/>
-    <glyph glyph-name="_494" unicode="&#xf20e;" horiz-adv-x="2048" d="M2048 641q0 -21 -13 -36.5t-33 -19.5l-205 -356q3 -9 3 -18q0 -20 -12.5 -35.5t-32.5 -19.5l-193 -337q3 -8 3 -16q0 -23 -16.5 -40t-40.5 -17q-25 0 -41 18h-400q-17 -20 -43 -20t-43 20h-399q-17 -20 -43 -20q-23 0 -40 16.5t-17 40.5q0 8 4 20l-193 335
-q-20 4 -32.5 19.5t-12.5 35.5q0 9 3 18l-206 356q-20 5 -32.5 20.5t-12.5 35.5q0 21 13.5 36.5t33.5 19.5l199 344q0 1 -0.5 3t-0.5 3q0 36 34 51l209 363q-4 10 -4 18q0 24 17 40.5t40 16.5q26 0 44 -21h396q16 21 43 21t43 -21h398q18 21 44 21q23 0 40 -16.5t17 -40.5
-q0 -6 -4 -18l207 -358q23 -1 39 -17.5t16 -38.5q0 -13 -7 -27l187 -324q19 -4 31.5 -19.5t12.5 -35.5zM1063 -158h389l-342 354h-143l-342 -354h360q18 16 39 16t39 -16zM112 654q1 -4 1 -13q0 -10 -2 -15l208 -360l15 -6l188 199v347l-187 194q-13 -8 -29 -10zM986 1438
-h-388l190 -200l554 200h-280q-16 -16 -38 -16t-38 16zM1689 226q1 6 5 11l-64 68l-17 -79h76zM1583 226l22 105l-252 266l-296 -307l63 -64h463zM1495 -142l16 28l65 310h-427l333 -343q8 4 13 5zM578 -158h5l342 354h-373v-335l4 -6q14 -5 22 -13zM552 226h402l64 66
-l-309 321l-157 -166v-221zM359 226h163v189l-168 -177q4 -8 5 -12zM358 1051q0 -1 0.5 -2t0.5 -2q0 -16 -8 -29l171 -177v269zM552 1121v-311l153 -157l297 314l-223 236zM556 1425l-4 -8v-264l205 74l-191 201q-6 -2 -10 -3zM1447 1438h-16l-621 -224l213 -225zM1023 946
-l-297 -315l311 -319l296 307zM688 634l-136 141v-284zM1038 270l-42 -44h85zM1374 618l238 -251l132 624l-3 5l-1 1zM1718 1018q-8 13 -8 29v2l-216 376q-5 1 -13 5l-437 -463l310 -327zM522 1142v223l-163 -282zM522 196h-163l163 -283v283zM1607 196l-48 -227l130 227h-82
-zM1729 266l207 361q-2 10 -2 14q0 1 3 16l-171 296l-129 -612l77 -82q5 3 15 7z"/>
-    <glyph glyph-name="f210" unicode="&#xf210;" d="M0 856q0 131 91.5 226.5t222.5 95.5h742l352 358v-1470q0 -132 -91.5 -227t-222.5 -95h-780q-131 0 -222.5 95t-91.5 227v790zM1232 102l-176 180v425q0 46 -32 79t-78 33h-484q-46 0 -78 -33t-32 -79v-492q0 -46 32.5 -79.5t77.5 -33.5h770z"/>
-    <glyph glyph-name="_496" unicode="&#xf211;" d="M934 1386q-317 -121 -556 -362.5t-358 -560.5q-20 89 -20 176q0 208 102.5 384.5t278.5 279t384 102.5q82 0 169 -19zM1203 1267q93 -65 164 -155q-389 -113 -674.5 -400.5t-396.5 -676.5q-93 72 -155 162q112 386 395 671t667 399zM470 -67q115 356 379.5 622t619.5 384
-q40 -92 54 -195q-292 -120 -516 -345t-343 -518q-103 14 -194 52zM1536 -125q-193 50 -367 115q-135 -84 -290 -107q109 205 274 370.5t369 275.5q-21 -152 -101 -284q65 -175 115 -370z"/>
-    <glyph glyph-name="f212" unicode="&#xf212;" horiz-adv-x="2048" d="M1893 1144l155 -1272q-131 0 -257 57q-200 91 -393 91q-226 0 -374 -148q-148 148 -374 148q-193 0 -393 -91q-128 -57 -252 -57h-5l155 1272q224 127 482 127q233 0 387 -106q154 106 387 106q258 0 482 -127zM1398 157q129 0 232 -28.5t260 -93.5l-124 1021
-q-171 78 -368 78q-224 0 -374 -141q-150 141 -374 141q-197 0 -368 -78l-124 -1021q105 43 165.5 65t148.5 39.5t178 17.5q202 0 374 -108q172 108 374 108zM1438 191l-55 907q-211 -4 -359 -155q-152 155 -374 155q-176 0 -336 -66l-114 -941q124 51 228.5 76t221.5 25
-q209 0 374 -102q172 107 374 102z"/>
-    <glyph glyph-name="_498" unicode="&#xf213;" horiz-adv-x="2048" d="M1500 165v733q0 21 -15 36t-35 15h-93q-20 0 -35 -15t-15 -36v-733q0 -20 15 -35t35 -15h93q20 0 35 15t15 35zM1216 165v531q0 20 -15 35t-35 15h-101q-20 0 -35 -15t-15 -35v-531q0 -20 15 -35t35 -15h101q20 0 35 15t15 35zM924 165v429q0 20 -15 35t-35 15h-101
-q-20 0 -35 -15t-15 -35v-429q0 -20 15 -35t35 -15h101q20 0 35 15t15 35zM632 165v362q0 20 -15 35t-35 15h-101q-20 0 -35 -15t-15 -35v-362q0 -20 15 -35t35 -15h101q20 0 35 15t15 35zM2048 311q0 -166 -118 -284t-284 -118h-1244q-166 0 -284 118t-118 284
-q0 116 63 214.5t168 148.5q-10 34 -10 73q0 113 80.5 193.5t193.5 80.5q102 0 180 -67q45 183 194 300t338 117q149 0 275 -73.5t199.5 -199.5t73.5 -275q0 -66 -14 -122q135 -33 221 -142.5t86 -247.5z"/>
-    <glyph glyph-name="_499" unicode="&#xf214;" d="M0 1536h1536v-1392l-776 -338l-760 338v1392zM1436 209v926h-1336v-926l661 -294zM1436 1235v201h-1336v-201h1336zM181 937v-115h-37v115h37zM181 789v-115h-37v115h37zM181 641v-115h-37v115h37zM181 493v-115h-37v115h37zM181 345v-115h-37v115h37zM207 202l15 34
-l105 -47l-15 -33zM343 142l15 34l105 -46l-15 -34zM478 82l15 34l105 -46l-15 -34zM614 23l15 33l104 -46l-15 -34zM797 10l105 46l15 -33l-105 -47zM932 70l105 46l15 -34l-105 -46zM1068 130l105 46l15 -34l-105 -46zM1203 189l105 47l15 -34l-105 -46zM259 1389v-36h-114
-v36h114zM421 1389v-36h-115v36h115zM583 1389v-36h-115v36h115zM744 1389v-36h-114v36h114zM906 1389v-36h-114v36h114zM1068 1389v-36h-115v36h115zM1230 1389v-36h-115v36h115zM1391 1389v-36h-114v36h114zM181 1049v-79h-37v115h115v-36h-78zM421 1085v-36h-115v36h115z
-M583 1085v-36h-115v36h115zM744 1085v-36h-114v36h114zM906 1085v-36h-114v36h114zM1068 1085v-36h-115v36h115zM1230 1085v-36h-115v36h115zM1355 970v79h-78v36h115v-115h-37zM1355 822v115h37v-115h-37zM1355 674v115h37v-115h-37zM1355 526v115h37v-115h-37zM1355 378
-v115h37v-115h-37zM1355 230v115h37v-115h-37zM760 265q-129 0 -221 91.5t-92 221.5q0 129 92 221t221 92q130 0 221.5 -92t91.5 -221q0 -130 -91.5 -221.5t-221.5 -91.5zM595 646q0 -36 19.5 -56.5t49.5 -25t64 -7t64 -2t49.5 -9t19.5 -30.5q0 -49 -112 -49q-97 0 -123 51
-h-3l-31 -63q67 -42 162 -42q29 0 56.5 5t55.5 16t45.5 33t17.5 53q0 46 -27.5 69.5t-67.5 27t-79.5 3t-67 5t-27.5 25.5q0 21 20.5 33t40.5 15t41 3q34 0 70.5 -11t51.5 -34h3l30 58q-3 1 -21 8.5t-22.5 9t-19.5 7t-22 7t-20 4.5t-24 4t-23 1q-29 0 -56.5 -5t-54 -16.5
-t-43 -34t-16.5 -53.5z"/>
-    <glyph glyph-name="_500" unicode="&#xf215;" horiz-adv-x="2048" d="M863 504q0 112 -79.5 191.5t-191.5 79.5t-191 -79.5t-79 -191.5t79 -191t191 -79t191.5 79t79.5 191zM1726 505q0 112 -79 191t-191 79t-191.5 -79t-79.5 -191q0 -113 79.5 -192t191.5 -79t191 79.5t79 191.5zM2048 1314v-1348q0 -44 -31.5 -75.5t-76.5 -31.5h-1832
-q-45 0 -76.5 31.5t-31.5 75.5v1348q0 44 31.5 75.5t76.5 31.5h431q44 0 76 -31.5t32 -75.5v-161h754v161q0 44 32 75.5t76 31.5h431q45 0 76.5 -31.5t31.5 -75.5z"/>
-    <glyph glyph-name="_501" unicode="&#xf216;" horiz-adv-x="2048" d="M1430 953zM1690 749q148 0 253 -98.5t105 -244.5q0 -157 -109 -261.5t-267 -104.5q-85 0 -162 27.5t-138 73.5t-118 106t-109 126t-103.5 132.5t-108.5 126.5t-117 106t-136 73.5t-159 27.5q-154 0 -251.5 -91.5t-97.5 -244.5q0 -157 104 -250t263 -93q100 0 208 37.5
-t193 98.5q5 4 21 18.5t30 24t22 9.5q14 0 24.5 -10.5t10.5 -24.5q0 -24 -60 -77q-101 -88 -234.5 -142t-260.5 -54q-133 0 -245.5 58t-180 165t-67.5 241q0 205 141.5 341t347.5 136q120 0 226.5 -43.5t185.5 -113t151.5 -153t139 -167.5t133.5 -153.5t149.5 -113
-t172.5 -43.5q102 0 168.5 61.5t66.5 162.5q0 95 -64.5 159t-159.5 64q-30 0 -81.5 -18.5t-68.5 -18.5q-20 0 -35.5 15t-15.5 35q0 18 8.5 57t8.5 59q0 159 -107.5 263t-266.5 104q-58 0 -111.5 -18.5t-84 -40.5t-55.5 -40.5t-33 -18.5q-15 0 -25.5 10.5t-10.5 25.5
-q0 19 25 46q59 67 147 103.5t182 36.5q191 0 318 -125.5t127 -315.5q0 -37 -4 -66q57 15 115 15z"/>
-    <glyph glyph-name="_502" unicode="&#xf217;" horiz-adv-x="1664" d="M1216 832q0 26 -19 45t-45 19h-128v128q0 26 -19 45t-45 19t-45 -19t-19 -45v-128h-128q-26 0 -45 -19t-19 -45t19 -45t45 -19h128v-128q0 -26 19 -45t45 -19t45 19t19 45v128h128q26 0 45 19t19 45zM640 0q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5
-t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1536 0q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1664 1088v-512q0 -24 -16 -42.5t-41 -21.5l-1044 -122q1 -7 4.5 -21.5t6 -26.5t2.5 -22q0 -16 -24 -64h920
-q26 0 45 -19t19 -45t-19 -45t-45 -19h-1024q-26 0 -45 19t-19 45q0 14 11 39.5t29.5 59.5t20.5 38l-177 823h-204q-26 0 -45 19t-19 45t19 45t45 19h256q16 0 28.5 -6.5t20 -15.5t13 -24.5t7.5 -26.5t5.5 -29.5t4.5 -25.5h1201q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="_503" unicode="&#xf218;" horiz-adv-x="1664" d="M1280 832q0 26 -19 45t-45 19t-45 -19l-147 -146v293q0 26 -19 45t-45 19t-45 -19t-19 -45v-293l-147 146q-19 19 -45 19t-45 -19t-19 -45t19 -45l256 -256q19 -19 45 -19t45 19l256 256q19 19 19 45zM640 0q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5
-t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1536 0q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1664 1088v-512q0 -24 -16 -42.5t-41 -21.5l-1044 -122q1 -7 4.5 -21.5t6 -26.5t2.5 -22q0 -16 -24 -64h920
-q26 0 45 -19t19 -45t-19 -45t-45 -19h-1024q-26 0 -45 19t-19 45q0 14 11 39.5t29.5 59.5t20.5 38l-177 823h-204q-26 0 -45 19t-19 45t19 45t45 19h256q16 0 28.5 -6.5t20 -15.5t13 -24.5t7.5 -26.5t5.5 -29.5t4.5 -25.5h1201q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="_504" unicode="&#xf219;" horiz-adv-x="2048" d="M212 768l623 -665l-300 665h-323zM1024 -4l349 772h-698zM538 896l204 384h-262l-288 -384h346zM1213 103l623 665h-323zM683 896h682l-204 384h-274zM1510 896h346l-288 384h-262zM1651 1382l384 -512q14 -18 13 -41.5t-17 -40.5l-960 -1024q-18 -20 -47 -20t-47 20
-l-960 1024q-16 17 -17 40.5t13 41.5l384 512q18 26 51 26h1152q33 0 51 -26z"/>
-    <glyph glyph-name="_505" unicode="&#xf21a;" horiz-adv-x="2048" d="M1811 -19q19 19 45 19t45 -19l128 -128l-90 -90l-83 83l-83 -83q-18 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83l-83 -83
-q-19 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-128 128l90 90l83 -83l83 83q19 19 45 19t45 -19l83 -83l83 83q19 19 45 19t45 -19l83 -83l83 83q19 19 45 19t45 -19l83 -83l83 83q19 19 45 19t45 -19l83 -83l83 83q19 19 45 19t45 -19l83 -83l83 83
-q19 19 45 19t45 -19l83 -83zM237 19q-19 -19 -45 -19t-45 19l-128 128l90 90l83 -82l83 82q19 19 45 19t45 -19l83 -82l64 64v293l-210 314q-17 26 -7 56.5t40 40.5l177 58v299h128v128h256v128h256v-128h256v-128h128v-299l177 -58q30 -10 40 -40.5t-7 -56.5l-210 -314
-v-293l19 18q19 19 45 19t45 -19l83 -82l83 82q19 19 45 19t45 -19l128 -128l-90 -90l-83 83l-83 -83q-18 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83l-83 -83
-q-19 -19 -45 -19t-45 19l-83 83l-83 -83q-19 -19 -45 -19t-45 19l-83 83zM640 1152v-128l384 128l384 -128v128h-128v128h-512v-128h-128z"/>
-    <glyph glyph-name="_506" unicode="&#xf21b;" d="M576 0l96 448l-96 128l-128 64zM832 0l128 640l-128 -64l-96 -128zM992 1010q-2 4 -4 6q-10 8 -96 8q-70 0 -167 -19q-7 -2 -21 -2t-21 2q-97 19 -167 19q-86 0 -96 -8q-2 -2 -4 -6q2 -18 4 -27q2 -3 7.5 -6.5t7.5 -10.5q2 -4 7.5 -20.5t7 -20.5t7.5 -17t8.5 -17t9 -14
-t12 -13.5t14 -9.5t17.5 -8t20.5 -4t24.5 -2q36 0 59 12.5t32.5 30t14.5 34.5t11.5 29.5t17.5 12.5h12q11 0 17.5 -12.5t11.5 -29.5t14.5 -34.5t32.5 -30t59 -12.5q13 0 24.5 2t20.5 4t17.5 8t14 9.5t12 13.5t9 14t8.5 17t7.5 17t7 20.5t7.5 20.5q2 7 7.5 10.5t7.5 6.5
-q2 9 4 27zM1408 131q0 -121 -73 -190t-194 -69h-874q-121 0 -194 69t-73 190q0 61 4.5 118t19 125.5t37.5 123.5t63.5 103.5t93.5 74.5l-90 220h214q-22 64 -22 128q0 12 2 32q-194 40 -194 96q0 57 210 99q17 62 51.5 134t70.5 114q32 37 76 37q30 0 84 -31t84 -31t84 31
-t84 31q44 0 76 -37q36 -42 70.5 -114t51.5 -134q210 -42 210 -99q0 -56 -194 -96q7 -81 -20 -160h214l-82 -225q63 -33 107.5 -96.5t65.5 -143.5t29 -151.5t8 -148.5z"/>
-    <glyph glyph-name="_507" unicode="&#xf21c;" horiz-adv-x="2304" d="M2301 500q12 -103 -22 -198.5t-99 -163.5t-158.5 -106t-196.5 -31q-161 11 -279.5 125t-134.5 274q-12 111 27.5 210.5t118.5 170.5l-71 107q-96 -80 -151 -194t-55 -244q0 -27 -18.5 -46.5t-45.5 -19.5h-256h-69q-23 -164 -149 -274t-294 -110q-185 0 -316.5 131.5
-t-131.5 316.5t131.5 316.5t316.5 131.5q76 0 152 -27l24 45q-123 110 -304 110h-64q-26 0 -45 19t-19 45t19 45t45 19h128q78 0 145 -13.5t116.5 -38.5t71.5 -39.5t51 -36.5h512h115l-85 128h-222q-30 0 -49 22.5t-14 52.5q4 23 23 38t43 15h253q33 0 53 -28l70 -105
-l114 114q19 19 46 19h101q26 0 45 -19t19 -45v-128q0 -26 -19 -45t-45 -19h-179l115 -172q131 63 275 36q143 -26 244 -134.5t118 -253.5zM448 128q115 0 203 72.5t111 183.5h-314q-35 0 -55 31q-18 32 -1 63l147 277q-47 13 -91 13q-132 0 -226 -94t-94 -226t94 -226
-t226 -94zM1856 128q132 0 226 94t94 226t-94 226t-226 94q-60 0 -121 -24l174 -260q15 -23 10 -49t-27 -40q-15 -11 -36 -11q-35 0 -53 29l-174 260q-93 -95 -93 -225q0 -132 94 -226t226 -94z"/>
-    <glyph glyph-name="_508" unicode="&#xf21d;" d="M1408 0q0 -63 -61.5 -113.5t-164 -81t-225 -46t-253.5 -15.5t-253.5 15.5t-225 46t-164 81t-61.5 113.5q0 49 33 88.5t91 66.5t118 44.5t131 29.5q26 5 48 -10.5t26 -41.5q5 -26 -10.5 -48t-41.5 -26q-58 -10 -106 -23.5t-76.5 -25.5t-48.5 -23.5t-27.5 -19.5t-8.5 -12
-q3 -11 27 -26.5t73 -33t114 -32.5t160.5 -25t201.5 -10t201.5 10t160.5 25t114 33t73 33.5t27 27.5q-1 4 -8.5 11t-27.5 19t-48.5 23.5t-76.5 25t-106 23.5q-26 4 -41.5 26t-10.5 48q4 26 26 41.5t48 10.5q71 -12 131 -29.5t118 -44.5t91 -66.5t33 -88.5zM1024 896v-384
-q0 -26 -19 -45t-45 -19h-64v-384q0 -26 -19 -45t-45 -19h-256q-26 0 -45 19t-19 45v384h-64q-26 0 -45 19t-19 45v384q0 53 37.5 90.5t90.5 37.5h384q53 0 90.5 -37.5t37.5 -90.5zM928 1280q0 -93 -65.5 -158.5t-158.5 -65.5t-158.5 65.5t-65.5 158.5t65.5 158.5t158.5 65.5
-t158.5 -65.5t65.5 -158.5z"/>
-    <glyph glyph-name="_509" unicode="&#xf21e;" horiz-adv-x="1792" d="M1280 512h305q-5 -6 -10 -10.5t-9 -7.5l-3 -4l-623 -600q-18 -18 -44 -18t-44 18l-624 602q-5 2 -21 20h369q22 0 39.5 13.5t22.5 34.5l70 281l190 -667q6 -20 23 -33t39 -13q21 0 38 13t23 33l146 485l56 -112q18 -35 57 -35zM1792 940q0 -145 -103 -300h-369l-111 221
-q-8 17 -25.5 27t-36.5 8q-45 -5 -56 -46l-129 -430l-196 686q-6 20 -23.5 33t-39.5 13t-39 -13.5t-22 -34.5l-116 -464h-423q-103 155 -103 300q0 220 127 344t351 124q62 0 126.5 -21.5t120 -58t95.5 -68.5t76 -68q36 36 76 68t95.5 68.5t120 58t126.5 21.5q224 0 351 -124
-t127 -344z"/>
-    <glyph glyph-name="venus" unicode="&#xf221;" horiz-adv-x="1280" d="M1152 960q0 -221 -147.5 -384.5t-364.5 -187.5v-260h224q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-224v-224q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v224h-224q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h224v260q-150 16 -271.5 103t-186 224t-52.5 292
-q11 134 80.5 249t182 188t245.5 88q170 19 319 -54t236 -212t87 -306zM128 960q0 -185 131.5 -316.5t316.5 -131.5t316.5 131.5t131.5 316.5t-131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5z"/>
-    <glyph glyph-name="_511" unicode="&#xf222;" d="M1472 1408q26 0 45 -19t19 -45v-416q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v262l-382 -383q126 -156 126 -359q0 -117 -45.5 -223.5t-123 -184t-184 -123t-223.5 -45.5t-223.5 45.5t-184 123t-123 184t-45.5 223.5t45.5 223.5t123 184t184 123t223.5 45.5
-q203 0 359 -126l382 382h-261q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h416zM576 0q185 0 316.5 131.5t131.5 316.5t-131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_512" unicode="&#xf223;" horiz-adv-x="1280" d="M830 1220q145 -72 233.5 -210.5t88.5 -305.5q0 -221 -147.5 -384.5t-364.5 -187.5v-132h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-96v-96q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v96h-96q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96v132q-217 24 -364.5 187.5
-t-147.5 384.5q0 167 88.5 305.5t233.5 210.5q-165 96 -228 273q-6 16 3.5 29.5t26.5 13.5h69q21 0 29 -20q44 -106 140 -171t214 -65t214 65t140 171q8 20 37 20h61q17 0 26.5 -13.5t3.5 -29.5q-63 -177 -228 -273zM576 256q185 0 316.5 131.5t131.5 316.5t-131.5 316.5
-t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_513" unicode="&#xf224;" d="M1024 1504q0 14 9 23t23 9h288q26 0 45 -19t19 -45v-288q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v134l-254 -255q126 -158 126 -359q0 -221 -147.5 -384.5t-364.5 -187.5v-132h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-96v-96q0 -14 -9 -23t-23 -9h-64
-q-14 0 -23 9t-9 23v96h-96q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96v132q-149 16 -270.5 103t-186.5 223.5t-53 291.5q16 204 160 353.5t347 172.5q118 14 228 -19t198 -103l255 254h-134q-14 0 -23 9t-9 23v64zM576 256q185 0 316.5 131.5t131.5 316.5t-131.5 316.5
-t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_514" unicode="&#xf225;" horiz-adv-x="1792" d="M1280 1504q0 14 9 23t23 9h288q26 0 45 -19t19 -45v-288q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v134l-254 -255q126 -158 126 -359q0 -221 -147.5 -384.5t-364.5 -187.5v-132h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-96v-96q0 -14 -9 -23t-23 -9h-64
-q-14 0 -23 9t-9 23v96h-96q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96v132q-217 24 -364.5 187.5t-147.5 384.5q0 201 126 359l-52 53l-101 -111q-9 -10 -22 -10.5t-23 7.5l-48 44q-10 8 -10.5 21.5t8.5 23.5l105 115l-111 112v-134q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9
-t-9 23v288q0 26 19 45t45 19h288q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-133l106 -107l86 94q9 10 22 10.5t23 -7.5l48 -44q10 -8 10.5 -21.5t-8.5 -23.5l-90 -99l57 -56q158 126 359 126t359 -126l255 254h-134q-14 0 -23 9t-9 23v64zM832 256q185 0 316.5 131.5
-t131.5 316.5t-131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_515" unicode="&#xf226;" horiz-adv-x="1792" d="M1790 1007q12 -155 -52.5 -292t-186 -224t-271.5 -103v-260h224q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-224v-224q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v224h-512v-224q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v224h-224q-14 0 -23 9t-9 23v64q0 14 9 23
-t23 9h224v260q-150 16 -271.5 103t-186 224t-52.5 292q17 206 164.5 356.5t352.5 169.5q206 21 377 -94q171 115 377 94q205 -19 352.5 -169.5t164.5 -356.5zM896 647q128 131 128 313t-128 313q-128 -131 -128 -313t128 -313zM576 512q115 0 218 57q-154 165 -154 391
-q0 224 154 391q-103 57 -218 57q-185 0 -316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5zM1152 128v260q-137 15 -256 94q-119 -79 -256 -94v-260h512zM1216 512q185 0 316.5 131.5t131.5 316.5t-131.5 316.5t-316.5 131.5q-115 0 -218 -57q154 -167 154 -391
-q0 -226 -154 -391q103 -57 218 -57z"/>
-    <glyph glyph-name="_516" unicode="&#xf227;" horiz-adv-x="1920" d="M1536 1120q0 14 9 23t23 9h288q26 0 45 -19t19 -45v-288q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v134l-254 -255q76 -95 107.5 -214t9.5 -247q-31 -182 -166 -312t-318 -156q-210 -29 -384.5 80t-241.5 300q-117 6 -221 57.5t-177.5 133t-113.5 192.5t-32 230
-q9 135 78 252t182 191.5t248 89.5q118 14 227.5 -19t198.5 -103l255 254h-134q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h288q26 0 45 -19t19 -45v-288q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v134l-254 -255q59 -74 93 -169q182 -9 328 -124l255 254h-134q-14 0 -23 9
-t-9 23v64zM1024 704q0 20 -4 58q-162 -25 -271 -150t-109 -292q0 -20 4 -58q162 25 271 150t109 292zM128 704q0 -168 111 -294t276 -149q-3 29 -3 59q0 210 135 369.5t338 196.5q-53 120 -163.5 193t-245.5 73q-185 0 -316.5 -131.5t-131.5 -316.5zM1088 -128
-q185 0 316.5 131.5t131.5 316.5q0 168 -111 294t-276 149q3 -28 3 -59q0 -210 -135 -369.5t-338 -196.5q53 -120 163.5 -193t245.5 -73z"/>
-    <glyph glyph-name="_517" unicode="&#xf228;" horiz-adv-x="2048" d="M1664 1504q0 14 9 23t23 9h288q26 0 45 -19t19 -45v-288q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v134l-254 -255q76 -95 107.5 -214t9.5 -247q-32 -180 -164.5 -310t-313.5 -157q-223 -34 -409 90q-117 -78 -256 -93v-132h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23
-t-23 -9h-96v-96q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v96h-96q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96v132q-155 17 -279.5 109.5t-187 237.5t-39.5 307q25 187 159.5 322.5t320.5 164.5q224 34 410 -90q146 97 320 97q201 0 359 -126l255 254h-134q-14 0 -23 9
-t-9 23v64zM896 391q128 131 128 313t-128 313q-128 -131 -128 -313t128 -313zM128 704q0 -185 131.5 -316.5t316.5 -131.5q117 0 218 57q-154 167 -154 391t154 391q-101 57 -218 57q-185 0 -316.5 -131.5t-131.5 -316.5zM1216 256q185 0 316.5 131.5t131.5 316.5
-t-131.5 316.5t-316.5 131.5q-117 0 -218 -57q154 -167 154 -391t-154 -391q101 -57 218 -57z"/>
-    <glyph glyph-name="_518" unicode="&#xf229;" d="M1472 1408q26 0 45 -19t19 -45v-416q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v262l-213 -214l140 -140q9 -10 9 -23t-9 -22l-46 -46q-9 -9 -22 -9t-23 9l-140 141l-78 -79q126 -156 126 -359q0 -117 -45.5 -223.5t-123 -184t-184 -123t-223.5 -45.5t-223.5 45.5
-t-184 123t-123 184t-45.5 223.5t45.5 223.5t123 184t184 123t223.5 45.5q203 0 359 -126l78 78l-172 172q-9 10 -9 23t9 22l46 46q9 9 22 9t23 -9l172 -172l213 213h-261q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h416zM576 0q185 0 316.5 131.5t131.5 316.5t-131.5 316.5
-t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_519" unicode="&#xf22a;" horiz-adv-x="1280" d="M640 892q217 -24 364.5 -187.5t147.5 -384.5q0 -167 -87 -306t-236 -212t-319 -54q-133 15 -245.5 88t-182 188t-80.5 249q-12 155 52.5 292t186 224t271.5 103v132h-160q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h160v165l-92 -92q-10 -9 -23 -9t-22 9l-46 46q-9 9 -9 22
-t9 23l202 201q19 19 45 19t45 -19l202 -201q9 -10 9 -23t-9 -22l-46 -46q-9 -9 -22 -9t-23 9l-92 92v-165h160q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-160v-132zM576 -128q185 0 316.5 131.5t131.5 316.5t-131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5
-t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_520" unicode="&#xf22b;" horiz-adv-x="2048" d="M1901 621q19 -19 19 -45t-19 -45l-294 -294q-9 -10 -22.5 -10t-22.5 10l-45 45q-10 9 -10 22.5t10 22.5l185 185h-294v-224q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v224h-132q-24 -217 -187.5 -364.5t-384.5 -147.5q-167 0 -306 87t-212 236t-54 319q15 133 88 245.5
-t188 182t249 80.5q155 12 292 -52.5t224 -186t103 -271.5h132v224q0 14 9 23t23 9h64q14 0 23 -9t9 -23v-224h294l-185 185q-10 9 -10 22.5t10 22.5l45 45q9 10 22.5 10t22.5 -10zM576 128q185 0 316.5 131.5t131.5 316.5t-131.5 316.5t-316.5 131.5t-316.5 -131.5
-t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_521" unicode="&#xf22c;" horiz-adv-x="1280" d="M1152 960q0 -221 -147.5 -384.5t-364.5 -187.5v-612q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v612q-217 24 -364.5 187.5t-147.5 384.5q0 117 45.5 223.5t123 184t184 123t223.5 45.5t223.5 -45.5t184 -123t123 -184t45.5 -223.5zM576 512q185 0 316.5 131.5
-t131.5 316.5t-131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5z"/>
-    <glyph glyph-name="_522" unicode="&#xf22d;" horiz-adv-x="1280" d="M1024 576q0 185 -131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5t131.5 -316.5t316.5 -131.5t316.5 131.5t131.5 316.5zM1152 576q0 -117 -45.5 -223.5t-123 -184t-184 -123t-223.5 -45.5t-223.5 45.5t-184 123t-123 184t-45.5 223.5t45.5 223.5t123 184t184 123
-t223.5 45.5t223.5 -45.5t184 -123t123 -184t45.5 -223.5z"/>
-    <glyph glyph-name="_523" unicode="&#xf22e;" horiz-adv-x="1792"/>
-    <glyph glyph-name="_524" unicode="&#xf22f;" horiz-adv-x="1792"/>
-    <glyph glyph-name="_525" unicode="&#xf230;" d="M1451 1408q35 0 60 -25t25 -60v-1366q0 -35 -25 -60t-60 -25h-391v595h199l30 232h-229v148q0 56 23.5 84t91.5 28l122 1v207q-63 9 -178 9q-136 0 -217.5 -80t-81.5 -226v-171h-200v-232h200v-595h-735q-35 0 -60 25t-25 60v1366q0 35 25 60t60 25h1366z"/>
-    <glyph glyph-name="_526" unicode="&#xf231;" horiz-adv-x="1280" d="M0 939q0 108 37.5 203.5t103.5 166.5t152 123t185 78t202 26q158 0 294 -66.5t221 -193.5t85 -287q0 -96 -19 -188t-60 -177t-100 -149.5t-145 -103t-189 -38.5q-68 0 -135 32t-96 88q-10 -39 -28 -112.5t-23.5 -95t-20.5 -71t-26 -71t-32 -62.5t-46 -77.5t-62 -86.5
-l-14 -5l-9 10q-15 157 -15 188q0 92 21.5 206.5t66.5 287.5t52 203q-32 65 -32 169q0 83 52 156t132 73q61 0 95 -40.5t34 -102.5q0 -66 -44 -191t-44 -187q0 -63 45 -104.5t109 -41.5q55 0 102 25t78.5 68t56 95t38 110.5t20 111t6.5 99.5q0 173 -109.5 269.5t-285.5 96.5
-q-200 0 -334 -129.5t-134 -328.5q0 -44 12.5 -85t27 -65t27 -45.5t12.5 -30.5q0 -28 -15 -73t-37 -45q-2 0 -17 3q-51 15 -90.5 56t-61 94.5t-32.5 108t-11 106.5z"/>
-    <glyph glyph-name="_527" unicode="&#xf232;" d="M985 562q13 0 97.5 -44t89.5 -53q2 -5 2 -15q0 -33 -17 -76q-16 -39 -71 -65.5t-102 -26.5q-57 0 -190 62q-98 45 -170 118t-148 185q-72 107 -71 194v8q3 91 74 158q24 22 52 22q6 0 18 -1.5t19 -1.5q19 0 26.5 -6.5t15.5 -27.5q8 -20 33 -88t25 -75q0 -21 -34.5 -57.5
-t-34.5 -46.5q0 -7 5 -15q34 -73 102 -137q56 -53 151 -101q12 -7 22 -7q15 0 54 48.5t52 48.5zM782 32q127 0 243.5 50t200.5 134t134 200.5t50 243.5t-50 243.5t-134 200.5t-200.5 134t-243.5 50t-243.5 -50t-200.5 -134t-134 -200.5t-50 -243.5q0 -203 120 -368l-79 -233
-l242 77q158 -104 345 -104zM782 1414q153 0 292.5 -60t240.5 -161t161 -240.5t60 -292.5t-60 -292.5t-161 -240.5t-240.5 -161t-292.5 -60q-195 0 -365 94l-417 -134l136 405q-108 178 -108 389q0 153 60 292.5t161 240.5t240.5 161t292.5 60z"/>
-    <glyph glyph-name="_528" unicode="&#xf233;" horiz-adv-x="1792" d="M128 128h1024v128h-1024v-128zM128 640h1024v128h-1024v-128zM1696 192q0 40 -28 68t-68 28t-68 -28t-28 -68t28 -68t68 -28t68 28t28 68zM128 1152h1024v128h-1024v-128zM1696 704q0 40 -28 68t-68 28t-68 -28t-28 -68t28 -68t68 -28t68 28t28 68zM1696 1216
-q0 40 -28 68t-68 28t-68 -28t-28 -68t28 -68t68 -28t68 28t28 68zM1792 384v-384h-1792v384h1792zM1792 896v-384h-1792v384h1792zM1792 1408v-384h-1792v384h1792z"/>
-    <glyph glyph-name="_529" unicode="&#xf234;" horiz-adv-x="2048" d="M704 640q-159 0 -271.5 112.5t-112.5 271.5t112.5 271.5t271.5 112.5t271.5 -112.5t112.5 -271.5t-112.5 -271.5t-271.5 -112.5zM1664 512h352q13 0 22.5 -9.5t9.5 -22.5v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-352v-352q0 -13 -9.5 -22.5t-22.5 -9.5h-192q-13 0 -22.5 9.5
-t-9.5 22.5v352h-352q-13 0 -22.5 9.5t-9.5 22.5v192q0 13 9.5 22.5t22.5 9.5h352v352q0 13 9.5 22.5t22.5 9.5h192q13 0 22.5 -9.5t9.5 -22.5v-352zM928 288q0 -52 38 -90t90 -38h256v-238q-68 -50 -171 -50h-874q-121 0 -194 69t-73 190q0 53 3.5 103.5t14 109t26.5 108.5
-t43 97.5t62 81t85.5 53.5t111.5 20q19 0 39 -17q79 -61 154.5 -91.5t164.5 -30.5t164.5 30.5t154.5 91.5q20 17 39 17q132 0 217 -96h-223q-52 0 -90 -38t-38 -90v-192z"/>
-    <glyph glyph-name="_530" unicode="&#xf235;" horiz-adv-x="2048" d="M704 640q-159 0 -271.5 112.5t-112.5 271.5t112.5 271.5t271.5 112.5t271.5 -112.5t112.5 -271.5t-112.5 -271.5t-271.5 -112.5zM1781 320l249 -249q9 -9 9 -23q0 -13 -9 -22l-136 -136q-9 -9 -22 -9q-14 0 -23 9l-249 249l-249 -249q-9 -9 -23 -9q-13 0 -22 9l-136 136
-q-9 9 -9 22q0 14 9 23l249 249l-249 249q-9 9 -9 23q0 13 9 22l136 136q9 9 22 9q14 0 23 -9l249 -249l249 249q9 9 23 9q13 0 22 -9l136 -136q9 -9 9 -22q0 -14 -9 -23zM1283 320l-181 -181q-37 -37 -37 -91q0 -53 37 -90l83 -83q-21 -3 -44 -3h-874q-121 0 -194 69
-t-73 190q0 53 3.5 103.5t14 109t26.5 108.5t43 97.5t62 81t85.5 53.5t111.5 20q19 0 39 -17q154 -122 319 -122t319 122q20 17 39 17q28 0 57 -6q-28 -27 -41 -50t-13 -56q0 -54 37 -91z"/>
-    <glyph glyph-name="_531" unicode="&#xf236;" horiz-adv-x="2048" d="M256 512h1728q26 0 45 -19t19 -45v-448h-256v256h-1536v-256h-256v1216q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-704zM832 832q0 106 -75 181t-181 75t-181 -75t-75 -181t75 -181t181 -75t181 75t75 181zM2048 576v64q0 159 -112.5 271.5t-271.5 112.5h-704
-q-26 0 -45 -19t-19 -45v-384h1152z"/>
-    <glyph glyph-name="_532" unicode="&#xf237;" d="M1536 1536l-192 -448h192v-192h-274l-55 -128h329v-192h-411l-357 -832l-357 832h-411v192h329l-55 128h-274v192h192l-192 448h256l323 -768h378l323 768h256zM768 320l108 256h-216z"/>
-    <glyph glyph-name="_533" unicode="&#xf238;" d="M1088 1536q185 0 316.5 -93.5t131.5 -226.5v-896q0 -130 -125.5 -222t-305.5 -97l213 -202q16 -15 8 -35t-30 -20h-1056q-22 0 -30 20t8 35l213 202q-180 5 -305.5 97t-125.5 222v896q0 133 131.5 226.5t316.5 93.5h640zM768 192q80 0 136 56t56 136t-56 136t-136 56
-t-136 -56t-56 -136t56 -136t136 -56zM1344 768v512h-1152v-512h1152z"/>
-    <glyph glyph-name="_534" unicode="&#xf239;" d="M1088 1536q185 0 316.5 -93.5t131.5 -226.5v-896q0 -130 -125.5 -222t-305.5 -97l213 -202q16 -15 8 -35t-30 -20h-1056q-22 0 -30 20t8 35l213 202q-180 5 -305.5 97t-125.5 222v896q0 133 131.5 226.5t316.5 93.5h640zM288 224q66 0 113 47t47 113t-47 113t-113 47
-t-113 -47t-47 -113t47 -113t113 -47zM704 768v512h-544v-512h544zM1248 224q66 0 113 47t47 113t-47 113t-113 47t-113 -47t-47 -113t47 -113t113 -47zM1408 768v512h-576v-512h576z"/>
-    <glyph glyph-name="_535" unicode="&#xf23a;" horiz-adv-x="1792" d="M597 1115v-1173q0 -25 -12.5 -42.5t-36.5 -17.5q-17 0 -33 8l-465 233q-21 10 -35.5 33.5t-14.5 46.5v1140q0 20 10 34t29 14q14 0 44 -15l511 -256q3 -3 3 -5zM661 1014l534 -866l-534 266v600zM1792 996v-1054q0 -25 -14 -40.5t-38 -15.5t-47 13l-441 220zM1789 1116
-q0 -3 -256.5 -419.5t-300.5 -487.5l-390 634l324 527q17 28 52 28q14 0 26 -6l541 -270q4 -2 4 -6z"/>
-    <glyph glyph-name="_536" unicode="&#xf23b;" d="M809 532l266 499h-112l-157 -312q-24 -48 -44 -92l-42 92l-155 312h-120l263 -493v-324h101v318zM1536 1408v-1536h-1536v1536h1536z"/>
-    <glyph glyph-name="_537" unicode="&#xf23c;" horiz-adv-x="2296" d="M478 -139q-8 -16 -27 -34.5t-37 -25.5q-25 -9 -51.5 3.5t-28.5 31.5q-1 22 40 55t68 38q23 4 34 -21.5t2 -46.5zM1819 -139q7 -16 26 -34.5t38 -25.5q25 -9 51.5 3.5t27.5 31.5q2 22 -39.5 55t-68.5 38q-22 4 -33 -21.5t-2 -46.5zM1867 -30q13 -27 56.5 -59.5t77.5 -41.5
-q45 -13 82 4.5t37 50.5q0 46 -67.5 100.5t-115.5 59.5q-40 5 -63.5 -37.5t-6.5 -76.5zM428 -30q-13 -27 -56 -59.5t-77 -41.5q-45 -13 -82 4.5t-37 50.5q0 46 67.5 100.5t115.5 59.5q40 5 63 -37.5t6 -76.5zM1158 1094h1q-41 0 -76 -15q27 -8 44 -30.5t17 -49.5
-q0 -35 -27 -60t-65 -25q-52 0 -80 43q-5 -23 -5 -42q0 -74 56 -126.5t135 -52.5q80 0 136 52.5t56 126.5t-56 126.5t-136 52.5zM1462 1312q-99 109 -220.5 131.5t-245.5 -44.5q27 60 82.5 96.5t118 39.5t121.5 -17t99.5 -74.5t44.5 -131.5zM2212 73q8 -11 -11 -42
-q7 -23 7 -40q1 -56 -44.5 -112.5t-109.5 -91.5t-118 -37q-48 -2 -92 21.5t-66 65.5q-687 -25 -1259 0q-23 -41 -66.5 -65t-92.5 -22q-86 3 -179.5 80.5t-92.5 160.5q2 22 7 40q-19 31 -11 42q6 10 31 1q14 22 41 51q-7 29 2 38q11 10 39 -4q29 20 59 34q0 29 13 37
-q23 12 51 -16q35 5 61 -2q18 -4 38 -19v73q-11 0 -18 2q-53 10 -97 44.5t-55 87.5q-9 38 0 81q15 62 93 95q2 17 19 35.5t36 23.5t33 -7.5t19 -30.5h13q46 -5 60 -23q3 -3 5 -7q10 1 30.5 3.5t30.5 3.5q-15 11 -30 17q-23 40 -91 43q0 6 1 10q-62 2 -118.5 18.5t-84.5 47.5
-q-32 36 -42.5 92t-2.5 112q16 126 90 179q23 16 52 4.5t32 -40.5q0 -1 1.5 -14t2.5 -21t3 -20t5.5 -19t8.5 -10q27 -14 76 -12q48 46 98 74q-40 4 -162 -14l47 46q61 58 163 111q145 73 282 86q-20 8 -41 15.5t-47 14t-42.5 10.5t-47.5 11t-43 10q595 126 904 -139
-q98 -84 158 -222q85 -10 121 9h1q5 3 8.5 10t5.5 19t3 19.5t3 21.5l1 14q3 28 32 40t52 -5q73 -52 91 -178q7 -57 -3.5 -113t-42.5 -91q-28 -32 -83.5 -48.5t-115.5 -18.5v-10q-71 -2 -95 -43q-14 -5 -31 -17q11 -1 32 -3.5t30 -3.5q1 5 5 8q16 18 60 23h13q5 18 19 30t33 8
-t36 -23t19 -36q79 -32 93 -95q9 -40 1 -81q-12 -53 -56 -88t-97 -44q-10 -2 -17 -2q0 -49 -1 -73q20 15 38 19q26 7 61 2q28 28 51 16q14 -9 14 -37q33 -16 59 -34q27 13 38 4q10 -10 2 -38q28 -30 41 -51q23 8 31 -1zM1937 1025q0 -29 -9 -54q82 -32 112 -132
-q4 37 -9.5 98.5t-41.5 90.5q-20 19 -36 17t-16 -20zM1859 925q35 -42 47.5 -108.5t-0.5 -124.5q67 13 97 45q13 14 18 28q-3 64 -31 114.5t-79 66.5q-15 -15 -52 -21zM1822 921q-30 0 -44 1q42 -115 53 -239q21 0 43 3q16 68 1 135t-53 100zM258 839q30 100 112 132
-q-9 25 -9 54q0 18 -16.5 20t-35.5 -17q-28 -29 -41.5 -90.5t-9.5 -98.5zM294 737q29 -31 97 -45q-13 58 -0.5 124.5t47.5 108.5v0q-37 6 -52 21q-51 -16 -78.5 -66t-31.5 -115q9 -17 18 -28zM471 683q14 124 73 235q-19 -4 -55 -18l-45 -19v1q-46 -89 -20 -196q25 -3 47 -3z
-M1434 644q8 -38 16.5 -108.5t11.5 -89.5q3 -18 9.5 -21.5t23.5 4.5q40 20 62 85.5t23 125.5q-24 2 -146 4zM1152 1285q-116 0 -199 -82.5t-83 -198.5q0 -117 83 -199.5t199 -82.5t199 82.5t83 199.5q0 116 -83 198.5t-199 82.5zM1380 646q-105 2 -211 0v1q-1 -27 2.5 -86
-t13.5 -66q29 -14 93.5 -14.5t95.5 10.5q9 3 11 39t-0.5 69.5t-4.5 46.5zM1112 447q8 4 9.5 48t-0.5 88t-4 63v1q-212 -3 -214 -3q-4 -20 -7 -62t0 -83t14 -46q34 -15 101 -16t101 10zM718 636q-16 -59 4.5 -118.5t77.5 -84.5q15 -8 24 -5t12 21q3 16 8 90t10 103
-q-69 -2 -136 -6zM591 510q3 -23 -34 -36q132 -141 271.5 -240t305.5 -154q172 49 310.5 146t293.5 250q-33 13 -30 34q0 2 0.5 3.5t1.5 3t1 2.5v1v-1q-17 2 -50 5.5t-48 4.5q-26 -90 -82 -132q-51 -38 -82 1q-5 6 -9 14q-7 13 -17 62q-2 -5 -5 -9t-7.5 -7t-8 -5.5t-9.5 -4
-l-10 -2.5t-12 -2l-12 -1.5t-13.5 -1t-13.5 -0.5q-106 -9 -163 11q-4 -17 -10 -26.5t-21 -15t-23 -7t-36 -3.5q-6 -1 -9 -1q-179 -17 -203 40q-2 -63 -56 -54q-47 8 -91 54q-12 13 -20 26q-17 29 -26 65q-58 -6 -87 -10q1 -2 4 -10zM507 -118q3 14 3 30q-17 71 -51 130
-t-73 70q-41 12 -101.5 -14.5t-104.5 -80t-39 -107.5q35 -53 100 -93t119 -42q51 -2 94 28t53 79zM510 53q23 -63 27 -119q195 113 392 174q-98 52 -180.5 120t-179.5 165q-6 -4 -29 -13q0 -1 -1 -4t-1 -5q31 -18 22 -37q-12 -23 -56 -34q-10 -13 -29 -24h-1q-2 -83 1 -150
-q19 -34 35 -73zM579 -113q532 -21 1145 0q-254 147 -428 196q-76 -35 -156 -57q-8 -3 -16 0q-65 21 -129 49q-208 -60 -416 -188h-1v-1q1 0 1 1zM1763 -67q4 54 28 120q14 38 33 71l-1 -1q3 77 3 153q-15 8 -30 25q-42 9 -56 33q-9 20 22 38q-2 4 -2 9q-16 4 -28 12
-q-204 -190 -383 -284q198 -59 414 -176zM2155 -90q5 54 -39 107.5t-104 80t-102 14.5q-38 -11 -72.5 -70.5t-51.5 -129.5q0 -16 3 -30q10 -49 53 -79t94 -28q54 2 119 42t100 93z"/>
-    <glyph glyph-name="_538" unicode="&#xf23d;" horiz-adv-x="2304" d="M1524 -25q0 -68 -48 -116t-116 -48t-116.5 48t-48.5 116t48.5 116.5t116.5 48.5t116 -48.5t48 -116.5zM775 -25q0 -68 -48.5 -116t-116.5 -48t-116 48t-48 116t48 116.5t116 48.5t116.5 -48.5t48.5 -116.5zM0 1469q57 -60 110.5 -104.5t121 -82t136 -63t166 -45.5
-t200 -31.5t250 -18.5t304 -9.5t372.5 -2.5q139 0 244.5 -5t181 -16.5t124 -27.5t71 -39.5t24 -51.5t-19.5 -64t-56.5 -76.5t-89.5 -91t-116 -104.5t-139 -119q-185 -157 -286 -247q29 51 76.5 109t94 105.5t94.5 98.5t83 91.5t54 80.5t13 70t-45.5 55.5t-116.5 41t-204 23.5
-t-304 5q-168 -2 -314 6t-256 23t-204.5 41t-159.5 51.5t-122.5 62.5t-91.5 66.5t-68 71.5t-50.5 69.5t-40 68t-36.5 59.5z"/>
-    <glyph glyph-name="_539" unicode="&#xf23e;" horiz-adv-x="1792" d="M896 1472q-169 0 -323 -66t-265.5 -177.5t-177.5 -265.5t-66 -323t66 -323t177.5 -265.5t265.5 -177.5t323 -66t323 66t265.5 177.5t177.5 265.5t66 323t-66 323t-177.5 265.5t-265.5 177.5t-323 66zM896 1536q182 0 348 -71t286 -191t191 -286t71 -348t-71 -348
-t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71zM496 704q16 0 16 -16v-480q0 -16 -16 -16h-32q-16 0 -16 16v480q0 16 16 16h32zM896 640q53 0 90.5 -37.5t37.5 -90.5q0 -35 -17.5 -64t-46.5 -46v-114q0 -14 -9 -23
-t-23 -9h-64q-14 0 -23 9t-9 23v114q-29 17 -46.5 46t-17.5 64q0 53 37.5 90.5t90.5 37.5zM896 1408q209 0 385.5 -103t279.5 -279.5t103 -385.5t-103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103zM544 928v-96
-q0 -14 9 -23t23 -9h64q14 0 23 9t9 23v96q0 93 65.5 158.5t158.5 65.5t158.5 -65.5t65.5 -158.5v-96q0 -14 9 -23t23 -9h64q14 0 23 9t9 23v96q0 146 -103 249t-249 103t-249 -103t-103 -249zM1408 192v512q0 26 -19 45t-45 19h-896q-26 0 -45 -19t-19 -45v-512
-q0 -26 19 -45t45 -19h896q26 0 45 19t19 45z"/>
-    <glyph glyph-name="_540" unicode="&#xf240;" horiz-adv-x="2304" d="M1920 1024v-768h-1664v768h1664zM2048 448h128v384h-128v288q0 14 -9 23t-23 9h-1856q-14 0 -23 -9t-9 -23v-960q0 -14 9 -23t23 -9h1856q14 0 23 9t9 23v288zM2304 832v-384q0 -53 -37.5 -90.5t-90.5 -37.5v-160q0 -66 -47 -113t-113 -47h-1856q-66 0 -113 47t-47 113
-v960q0 66 47 113t113 47h1856q66 0 113 -47t47 -113v-160q53 0 90.5 -37.5t37.5 -90.5z"/>
-    <glyph glyph-name="_541" unicode="&#xf241;" horiz-adv-x="2304" d="M256 256v768h1280v-768h-1280zM2176 960q53 0 90.5 -37.5t37.5 -90.5v-384q0 -53 -37.5 -90.5t-90.5 -37.5v-160q0 -66 -47 -113t-113 -47h-1856q-66 0 -113 47t-47 113v960q0 66 47 113t113 47h1856q66 0 113 -47t47 -113v-160zM2176 448v384h-128v288q0 14 -9 23t-23 9
-h-1856q-14 0 -23 -9t-9 -23v-960q0 -14 9 -23t23 -9h1856q14 0 23 9t9 23v288h128z"/>
-    <glyph glyph-name="_542" unicode="&#xf242;" horiz-adv-x="2304" d="M256 256v768h896v-768h-896zM2176 960q53 0 90.5 -37.5t37.5 -90.5v-384q0 -53 -37.5 -90.5t-90.5 -37.5v-160q0 -66 -47 -113t-113 -47h-1856q-66 0 -113 47t-47 113v960q0 66 47 113t113 47h1856q66 0 113 -47t47 -113v-160zM2176 448v384h-128v288q0 14 -9 23t-23 9
-h-1856q-14 0 -23 -9t-9 -23v-960q0 -14 9 -23t23 -9h1856q14 0 23 9t9 23v288h128z"/>
-    <glyph glyph-name="_543" unicode="&#xf243;" horiz-adv-x="2304" d="M256 256v768h512v-768h-512zM2176 960q53 0 90.5 -37.5t37.5 -90.5v-384q0 -53 -37.5 -90.5t-90.5 -37.5v-160q0 -66 -47 -113t-113 -47h-1856q-66 0 -113 47t-47 113v960q0 66 47 113t113 47h1856q66 0 113 -47t47 -113v-160zM2176 448v384h-128v288q0 14 -9 23t-23 9
-h-1856q-14 0 -23 -9t-9 -23v-960q0 -14 9 -23t23 -9h1856q14 0 23 9t9 23v288h128z"/>
-    <glyph glyph-name="_544" unicode="&#xf244;" horiz-adv-x="2304" d="M2176 960q53 0 90.5 -37.5t37.5 -90.5v-384q0 -53 -37.5 -90.5t-90.5 -37.5v-160q0 -66 -47 -113t-113 -47h-1856q-66 0 -113 47t-47 113v960q0 66 47 113t113 47h1856q66 0 113 -47t47 -113v-160zM2176 448v384h-128v288q0 14 -9 23t-23 9h-1856q-14 0 -23 -9t-9 -23
-v-960q0 -14 9 -23t23 -9h1856q14 0 23 9t9 23v288h128z"/>
-    <glyph glyph-name="_545" unicode="&#xf245;" horiz-adv-x="1280" d="M1133 493q31 -30 14 -69q-17 -40 -59 -40h-382l201 -476q10 -25 0 -49t-34 -35l-177 -75q-25 -10 -49 0t-35 34l-191 452l-312 -312q-19 -19 -45 -19q-12 0 -24 5q-40 17 -40 59v1504q0 42 40 59q12 5 24 5q27 0 45 -19z"/>
-    <glyph glyph-name="_546" unicode="&#xf246;" horiz-adv-x="1024" d="M832 1408q-320 0 -320 -224v-416h128v-128h-128v-544q0 -224 320 -224h64v-128h-64q-272 0 -384 146q-112 -146 -384 -146h-64v128h64q320 0 320 224v544h-128v128h128v416q0 224 -320 224h-64v128h64q272 0 384 -146q112 146 384 146h64v-128h-64z"/>
-    <glyph glyph-name="_547" unicode="&#xf247;" horiz-adv-x="2048" d="M2048 1152h-128v-1024h128v-384h-384v128h-1280v-128h-384v384h128v1024h-128v384h384v-128h1280v128h384v-384zM1792 1408v-128h128v128h-128zM128 1408v-128h128v128h-128zM256 -128v128h-128v-128h128zM1664 0v128h128v1024h-128v128h-1280v-128h-128v-1024h128v-128
-h1280zM1920 -128v128h-128v-128h128zM1280 896h384v-768h-896v256h-384v768h896v-256zM512 512h640v512h-640v-512zM1536 256v512h-256v-384h-384v-128h640z"/>
-    <glyph glyph-name="_548" unicode="&#xf248;" horiz-adv-x="2304" d="M2304 768h-128v-640h128v-384h-384v128h-896v-128h-384v384h128v128h-384v-128h-384v384h128v640h-128v384h384v-128h896v128h384v-384h-128v-128h384v128h384v-384zM2048 1024v-128h128v128h-128zM1408 1408v-128h128v128h-128zM128 1408v-128h128v128h-128zM256 256
-v128h-128v-128h128zM1536 384h-128v-128h128v128zM384 384h896v128h128v640h-128v128h-896v-128h-128v-640h128v-128zM896 -128v128h-128v-128h128zM2176 -128v128h-128v-128h128zM2048 128v640h-128v128h-384v-384h128v-384h-384v128h-384v-128h128v-128h896v128h128z"/>
-    <glyph glyph-name="_549" unicode="&#xf249;" d="M1024 288v-416h-928q-40 0 -68 28t-28 68v1344q0 40 28 68t68 28h1344q40 0 68 -28t28 -68v-928h-416q-40 0 -68 -28t-28 -68zM1152 256h381q-15 -82 -65 -132l-184 -184q-50 -50 -132 -65v381z"/>
-    <glyph glyph-name="_550" unicode="&#xf24a;" d="M1400 256h-248v-248q29 10 41 22l185 185q12 12 22 41zM1120 384h288v896h-1280v-1280h896v288q0 40 28 68t68 28zM1536 1312v-1024q0 -40 -20 -88t-48 -76l-184 -184q-28 -28 -76 -48t-88 -20h-1024q-40 0 -68 28t-28 68v1344q0 40 28 68t68 28h1344q40 0 68 -28t28 -68
-z"/>
-    <glyph glyph-name="_551" unicode="&#xf24b;" horiz-adv-x="2304" d="M1951 538q0 -26 -15.5 -44.5t-38.5 -23.5q-8 -2 -18 -2h-153v140h153q10 0 18 -2q23 -5 38.5 -23.5t15.5 -44.5zM1933 751q0 -25 -15 -42t-38 -21q-3 -1 -15 -1h-139v129h139q3 0 8.5 -0.5t6.5 -0.5q23 -4 38 -21.5t15 -42.5zM728 587v308h-228v-308q0 -58 -38 -94.5
-t-105 -36.5q-108 0 -229 59v-112q53 -15 121 -23t109 -9l42 -1q328 0 328 217zM1442 403v113q-99 -52 -200 -59q-108 -8 -169 41t-61 142t61 142t169 41q101 -7 200 -58v112q-48 12 -100 19.5t-80 9.5l-28 2q-127 6 -218.5 -14t-140.5 -60t-71 -88t-22 -106t22 -106t71 -88
-t140.5 -60t218.5 -14q101 4 208 31zM2176 518q0 54 -43 88.5t-109 39.5v3q57 8 89 41.5t32 79.5q0 55 -41 88t-107 36q-3 0 -12 0.5t-14 0.5h-455v-510h491q74 0 121.5 36.5t47.5 96.5zM2304 1280v-1280q0 -52 -38 -90t-90 -38h-2048q-52 0 -90 38t-38 90v1280q0 52 38 90
-t90 38h2048q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_552" unicode="&#xf24c;" horiz-adv-x="2304" d="M858 295v693q-106 -41 -172 -135.5t-66 -211.5t66 -211.5t172 -134.5zM1362 641q0 117 -66 211.5t-172 135.5v-694q106 41 172 135.5t66 211.5zM1577 641q0 -159 -78.5 -294t-213.5 -213.5t-294 -78.5q-119 0 -227.5 46.5t-187 125t-125 187t-46.5 227.5q0 159 78.5 294
-t213.5 213.5t294 78.5t294 -78.5t213.5 -213.5t78.5 -294zM1960 634q0 139 -55.5 261.5t-147.5 205.5t-213.5 131t-252.5 48h-301q-176 0 -323.5 -81t-235 -230t-87.5 -335q0 -171 87 -317.5t236 -231.5t323 -85h301q129 0 251.5 50.5t214.5 135t147.5 202.5t55.5 246z
-M2304 1280v-1280q0 -52 -38 -90t-90 -38h-2048q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h2048q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_553" unicode="&#xf24d;" horiz-adv-x="1792" d="M1664 -96v1088q0 13 -9.5 22.5t-22.5 9.5h-1088q-13 0 -22.5 -9.5t-9.5 -22.5v-1088q0 -13 9.5 -22.5t22.5 -9.5h1088q13 0 22.5 9.5t9.5 22.5zM1792 992v-1088q0 -66 -47 -113t-113 -47h-1088q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h1088q66 0 113 -47t47 -113
-zM1408 1376v-160h-128v160q0 13 -9.5 22.5t-22.5 9.5h-1088q-13 0 -22.5 -9.5t-9.5 -22.5v-1088q0 -13 9.5 -22.5t22.5 -9.5h160v-128h-160q-66 0 -113 47t-47 113v1088q0 66 47 113t113 47h1088q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="_554" unicode="&#xf24e;" horiz-adv-x="2304" d="M1728 1088l-384 -704h768zM448 1088l-384 -704h768zM1269 1280q-14 -40 -45.5 -71.5t-71.5 -45.5v-1291h608q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-1344q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h608v1291q-40 14 -71.5 45.5t-45.5 71.5h-491q-14 0 -23 9t-9 23v64
-q0 14 9 23t23 9h491q21 57 70 92.5t111 35.5t111 -35.5t70 -92.5h491q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-491zM1088 1264q33 0 56.5 23.5t23.5 56.5t-23.5 56.5t-56.5 23.5t-56.5 -23.5t-23.5 -56.5t23.5 -56.5t56.5 -23.5zM2176 384q0 -73 -46.5 -131t-117.5 -91
-t-144.5 -49.5t-139.5 -16.5t-139.5 16.5t-144.5 49.5t-117.5 91t-46.5 131q0 11 35 81t92 174.5t107 195.5t102 184t56 100q18 33 56 33t56 -33q4 -7 56 -100t102 -184t107 -195.5t92 -174.5t35 -81zM896 384q0 -73 -46.5 -131t-117.5 -91t-144.5 -49.5t-139.5 -16.5
-t-139.5 16.5t-144.5 49.5t-117.5 91t-46.5 131q0 11 35 81t92 174.5t107 195.5t102 184t56 100q18 33 56 33t56 -33q4 -7 56 -100t102 -184t107 -195.5t92 -174.5t35 -81z"/>
-    <glyph glyph-name="_555" unicode="&#xf250;" d="M1408 1408q0 -261 -106.5 -461.5t-266.5 -306.5q160 -106 266.5 -306.5t106.5 -461.5h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-1472q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96q0 261 106.5 461.5t266.5 306.5q-160 106 -266.5 306.5t-106.5 461.5h-96q-14 0 -23 9
-t-9 23v64q0 14 9 23t23 9h1472q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-96zM874 700q77 29 149 92.5t129.5 152.5t92.5 210t35 253h-1024q0 -132 35 -253t92.5 -210t129.5 -152.5t149 -92.5q19 -7 30.5 -23.5t11.5 -36.5t-11.5 -36.5t-30.5 -23.5q-77 -29 -149 -92.5
-t-129.5 -152.5t-92.5 -210t-35 -253h1024q0 132 -35 253t-92.5 210t-129.5 152.5t-149 92.5q-19 7 -30.5 23.5t-11.5 36.5t11.5 36.5t30.5 23.5z"/>
-    <glyph glyph-name="_556" unicode="&#xf251;" d="M1408 1408q0 -261 -106.5 -461.5t-266.5 -306.5q160 -106 266.5 -306.5t106.5 -461.5h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-1472q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96q0 261 106.5 461.5t266.5 306.5q-160 106 -266.5 306.5t-106.5 461.5h-96q-14 0 -23 9
-t-9 23v64q0 14 9 23t23 9h1472q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-96zM1280 1408h-1024q0 -66 9 -128h1006q9 61 9 128zM1280 -128q0 130 -34 249.5t-90.5 208t-126.5 152t-146 94.5h-230q-76 -31 -146 -94.5t-126.5 -152t-90.5 -208t-34 -249.5h1024z"/>
-    <glyph glyph-name="_557" unicode="&#xf252;" d="M1408 1408q0 -261 -106.5 -461.5t-266.5 -306.5q160 -106 266.5 -306.5t106.5 -461.5h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-1472q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96q0 261 106.5 461.5t266.5 306.5q-160 106 -266.5 306.5t-106.5 461.5h-96q-14 0 -23 9
-t-9 23v64q0 14 9 23t23 9h1472q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-96zM1280 1408h-1024q0 -206 85 -384h854q85 178 85 384zM1223 192q-54 141 -145.5 241.5t-194.5 142.5h-230q-103 -42 -194.5 -142.5t-145.5 -241.5h910z"/>
-    <glyph glyph-name="_558" unicode="&#xf253;" d="M1408 1408q0 -261 -106.5 -461.5t-266.5 -306.5q160 -106 266.5 -306.5t106.5 -461.5h96q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-1472q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96q0 261 106.5 461.5t266.5 306.5q-160 106 -266.5 306.5t-106.5 461.5h-96q-14 0 -23 9
-t-9 23v64q0 14 9 23t23 9h1472q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-96zM874 700q77 29 149 92.5t129.5 152.5t92.5 210t35 253h-1024q0 -132 35 -253t92.5 -210t129.5 -152.5t149 -92.5q19 -7 30.5 -23.5t11.5 -36.5t-11.5 -36.5t-30.5 -23.5q-137 -51 -244 -196
-h700q-107 145 -244 196q-19 7 -30.5 23.5t-11.5 36.5t11.5 36.5t30.5 23.5z"/>
-    <glyph glyph-name="_559" unicode="&#xf254;" d="M1504 -64q14 0 23 -9t9 -23v-128q0 -14 -9 -23t-23 -9h-1472q-14 0 -23 9t-9 23v128q0 14 9 23t23 9h1472zM130 0q3 55 16 107t30 95t46 87t53.5 76t64.5 69.5t66 60t70.5 55t66.5 47.5t65 43q-43 28 -65 43t-66.5 47.5t-70.5 55t-66 60t-64.5 69.5t-53.5 76t-46 87
-t-30 95t-16 107h1276q-3 -55 -16 -107t-30 -95t-46 -87t-53.5 -76t-64.5 -69.5t-66 -60t-70.5 -55t-66.5 -47.5t-65 -43q43 -28 65 -43t66.5 -47.5t70.5 -55t66 -60t64.5 -69.5t53.5 -76t46 -87t30 -95t16 -107h-1276zM1504 1536q14 0 23 -9t9 -23v-128q0 -14 -9 -23t-23 -9
-h-1472q-14 0 -23 9t-9 23v128q0 14 9 23t23 9h1472z"/>
-    <glyph glyph-name="_560" unicode="&#xf255;" d="M768 1152q-53 0 -90.5 -37.5t-37.5 -90.5v-128h-32v93q0 48 -32 81.5t-80 33.5q-46 0 -79 -33t-33 -79v-429l-32 30v172q0 48 -32 81.5t-80 33.5q-46 0 -79 -33t-33 -79v-224q0 -47 35 -82l310 -296q39 -39 39 -102q0 -26 19 -45t45 -19h640q26 0 45 19t19 45v25
-q0 41 10 77l108 436q10 36 10 77v246q0 48 -32 81.5t-80 33.5q-46 0 -79 -33t-33 -79v-32h-32v125q0 40 -25 72.5t-64 40.5q-14 2 -23 2q-46 0 -79 -33t-33 -79v-128h-32v122q0 51 -32.5 89.5t-82.5 43.5q-5 1 -13 1zM768 1280q84 0 149 -50q57 34 123 34q59 0 111 -27
-t86 -76q27 7 59 7q100 0 170 -71.5t70 -171.5v-246q0 -51 -13 -108l-109 -436q-6 -24 -6 -71q0 -80 -56 -136t-136 -56h-640q-84 0 -138 58.5t-54 142.5l-308 296q-76 73 -76 175v224q0 99 70.5 169.5t169.5 70.5q11 0 16 -1q6 95 75.5 160t164.5 65q52 0 98 -21
-q72 69 174 69z"/>
-    <glyph glyph-name="_561" unicode="&#xf256;" horiz-adv-x="1792" d="M880 1408q-46 0 -79 -33t-33 -79v-656h-32v528q0 46 -33 79t-79 33t-79 -33t-33 -79v-528v-256l-154 205q-38 51 -102 51q-53 0 -90.5 -37.5t-37.5 -90.5q0 -43 26 -77l384 -512q38 -51 102 -51h688q34 0 61 22t34 56l76 405q5 32 5 59v498q0 46 -33 79t-79 33t-79 -33
-t-33 -79v-272h-32v528q0 46 -33 79t-79 33t-79 -33t-33 -79v-528h-32v656q0 46 -33 79t-79 33zM880 1536q68 0 125.5 -35.5t88.5 -96.5q19 4 42 4q99 0 169.5 -70.5t70.5 -169.5v-17q105 6 180.5 -64t75.5 -175v-498q0 -40 -8 -83l-76 -404q-14 -79 -76.5 -131t-143.5 -52
-h-688q-60 0 -114.5 27.5t-90.5 74.5l-384 512q-51 68 -51 154q0 106 75 181t181 75q78 0 128 -34v434q0 99 70.5 169.5t169.5 70.5q23 0 42 -4q31 61 88.5 96.5t125.5 35.5z"/>
-    <glyph glyph-name="_562" unicode="&#xf257;" horiz-adv-x="1792" d="M1073 -128h-177q-163 0 -226 141q-23 49 -23 102v5q-62 30 -98.5 88.5t-36.5 127.5q0 38 5 48h-261q-106 0 -181 75t-75 181t75 181t181 75h113l-44 17q-74 28 -119.5 93.5t-45.5 145.5q0 106 75 181t181 75q46 0 91 -17l628 -239h401q106 0 181 -75t75 -181v-668
-q0 -88 -54 -157.5t-140 -90.5l-339 -85q-92 -23 -186 -23zM1024 583l-155 -71l-163 -74q-30 -14 -48 -41.5t-18 -60.5q0 -46 33 -79t79 -33q26 0 46 10l338 154q-49 10 -80.5 50t-31.5 90v55zM1344 272q0 46 -33 79t-79 33q-26 0 -46 -10l-290 -132q-28 -13 -37 -17
-t-30.5 -17t-29.5 -23.5t-16 -29t-8 -40.5q0 -50 31.5 -82t81.5 -32q20 0 38 9l352 160q30 14 48 41.5t18 60.5zM1112 1024l-650 248q-24 8 -46 8q-53 0 -90.5 -37.5t-37.5 -90.5q0 -40 22.5 -73t59.5 -47l526 -200v-64h-640q-53 0 -90.5 -37.5t-37.5 -90.5t37.5 -90.5
-t90.5 -37.5h535l233 106v198q0 63 46 106l111 102h-69zM1073 0q82 0 155 19l339 85q43 11 70 45.5t27 78.5v668q0 53 -37.5 90.5t-90.5 37.5h-308l-136 -126q-36 -33 -36 -82v-296q0 -46 33 -77t79 -31t79 35t33 81v208h32v-208q0 -70 -57 -114q52 -8 86.5 -48.5t34.5 -93.5
-q0 -42 -23 -78t-61 -53l-310 -141h91z"/>
-    <glyph glyph-name="_563" unicode="&#xf258;" horiz-adv-x="2048" d="M1151 1536q61 0 116 -28t91 -77l572 -781q118 -159 118 -359v-355q0 -80 -56 -136t-136 -56h-384q-80 0 -136 56t-56 136v177l-286 143h-546q-80 0 -136 56t-56 136v32q0 119 84.5 203.5t203.5 84.5h420l42 128h-686q-100 0 -173.5 67.5t-81.5 166.5q-65 79 -65 182v32
-q0 80 56 136t136 56h959zM1920 -64v355q0 157 -93 284l-573 781q-39 52 -103 52h-959q-26 0 -45 -19t-19 -45q0 -32 1.5 -49.5t9.5 -40.5t25 -43q10 31 35.5 50t56.5 19h832v-32h-832q-26 0 -45 -19t-19 -45q0 -44 3 -58q8 -44 44 -73t81 -29h640h91q40 0 68 -28t28 -68
-q0 -15 -5 -30l-64 -192q-10 -29 -35 -47.5t-56 -18.5h-443q-66 0 -113 -47t-47 -113v-32q0 -26 19 -45t45 -19h561q16 0 29 -7l317 -158q24 -13 38.5 -36t14.5 -50v-197q0 -26 19 -45t45 -19h384q26 0 45 19t19 45z"/>
-    <glyph glyph-name="_564" unicode="&#xf259;" horiz-adv-x="2048" d="M459 -256q-77 0 -137.5 47.5t-79.5 122.5l-101 401q-13 57 -13 108q0 45 -5 67l-116 477q-7 27 -7 57q0 93 62 161t155 78q17 85 82.5 139t152.5 54q83 0 148 -51.5t85 -132.5l83 -348l103 428q20 81 85 132.5t148 51.5q89 0 155.5 -57.5t80.5 -144.5q92 -10 152 -79
-t60 -162q0 -24 -7 -59l-123 -512q10 7 37.5 28.5t38.5 29.5t35 23t41 20.5t41.5 11t49.5 5.5q105 0 180 -74t75 -179q0 -62 -28.5 -118t-78.5 -94l-507 -380q-68 -51 -153 -51h-694zM1104 1408q-38 0 -68.5 -24t-39.5 -62l-164 -682h-127l-145 602q-9 38 -39.5 62t-68.5 24
-q-48 0 -80 -33t-32 -80q0 -15 3 -28l132 -547h-26l-99 408q-9 37 -40 62.5t-69 25.5q-47 0 -80 -33t-33 -79q0 -14 3 -26l116 -478q7 -28 9 -86t10 -88l100 -401q8 -32 34 -52.5t59 -20.5h694q42 0 76 26l507 379q56 43 56 110q0 52 -37.5 88.5t-89.5 36.5q-43 0 -77 -26
-l-307 -230v227q0 4 32 138t68 282t39 161q4 18 4 29q0 47 -32 81t-79 34q-39 0 -69.5 -24t-39.5 -62l-116 -482h-26l150 624q3 14 3 28q0 48 -31.5 82t-79.5 34z"/>
-    <glyph glyph-name="_565" unicode="&#xf25a;" horiz-adv-x="1792" d="M640 1408q-53 0 -90.5 -37.5t-37.5 -90.5v-512v-384l-151 202q-41 54 -107 54q-52 0 -89 -38t-37 -90q0 -43 26 -77l384 -512q38 -51 102 -51h718q22 0 39.5 13.5t22.5 34.5l92 368q24 96 24 194v217q0 41 -28 71t-68 30t-68 -28t-28 -68h-32v61q0 48 -32 81.5t-80 33.5
-q-46 0 -79 -33t-33 -79v-64h-32v90q0 55 -37 94.5t-91 39.5q-53 0 -90.5 -37.5t-37.5 -90.5v-96h-32v570q0 55 -37 94.5t-91 39.5zM640 1536q107 0 181.5 -77.5t74.5 -184.5v-220q22 2 32 2q99 0 173 -69q47 21 99 21q113 0 184 -87q27 7 56 7q94 0 159 -67.5t65 -161.5
-v-217q0 -116 -28 -225l-92 -368q-16 -64 -68 -104.5t-118 -40.5h-718q-60 0 -114.5 27.5t-90.5 74.5l-384 512q-51 68 -51 154q0 105 74.5 180.5t179.5 75.5q71 0 130 -35v547q0 106 75 181t181 75zM768 128v384h-32v-384h32zM1024 128v384h-32v-384h32zM1280 128v384h-32
-v-384h32z"/>
-    <glyph glyph-name="_566" unicode="&#xf25b;" d="M1288 889q60 0 107 -23q141 -63 141 -226v-177q0 -94 -23 -186l-85 -339q-21 -86 -90.5 -140t-157.5 -54h-668q-106 0 -181 75t-75 181v401l-239 628q-17 45 -17 91q0 106 75 181t181 75q80 0 145.5 -45.5t93.5 -119.5l17 -44v113q0 106 75 181t181 75t181 -75t75 -181
-v-261q27 5 48 5q69 0 127.5 -36.5t88.5 -98.5zM1072 896q-33 0 -60.5 -18t-41.5 -48l-74 -163l-71 -155h55q50 0 90 -31.5t50 -80.5l154 338q10 20 10 46q0 46 -33 79t-79 33zM1293 761q-22 0 -40.5 -8t-29 -16t-23.5 -29.5t-17 -30.5t-17 -37l-132 -290q-10 -20 -10 -46
-q0 -46 33 -79t79 -33q33 0 60.5 18t41.5 48l160 352q9 18 9 38q0 50 -32 81.5t-82 31.5zM128 1120q0 -22 8 -46l248 -650v-69l102 111q43 46 106 46h198l106 233v535q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5v-640h-64l-200 526q-14 37 -47 59.5t-73 22.5
-q-53 0 -90.5 -37.5t-37.5 -90.5zM1180 -128q44 0 78.5 27t45.5 70l85 339q19 73 19 155v91l-141 -310q-17 -38 -53 -61t-78 -23q-53 0 -93.5 34.5t-48.5 86.5q-44 -57 -114 -57h-208v32h208q46 0 81 33t35 79t-31 79t-77 33h-296q-49 0 -82 -36l-126 -136v-308
-q0 -53 37.5 -90.5t90.5 -37.5h668z"/>
-    <glyph glyph-name="_567" unicode="&#xf25c;" horiz-adv-x="1973" d="M857 992v-117q0 -13 -9.5 -22t-22.5 -9h-298v-812q0 -13 -9 -22.5t-22 -9.5h-135q-13 0 -22.5 9t-9.5 23v812h-297q-13 0 -22.5 9t-9.5 22v117q0 14 9 23t23 9h793q13 0 22.5 -9.5t9.5 -22.5zM1895 995l77 -961q1 -13 -8 -24q-10 -10 -23 -10h-134q-12 0 -21 8.5
-t-10 20.5l-46 588l-189 -425q-8 -19 -29 -19h-120q-20 0 -29 19l-188 427l-45 -590q-1 -12 -10 -20.5t-21 -8.5h-135q-13 0 -23 10q-9 10 -9 24l78 961q1 12 10 20.5t21 8.5h142q20 0 29 -19l220 -520q10 -24 20 -51q3 7 9.5 24.5t10.5 26.5l221 520q9 19 29 19h141
-q13 0 22 -8.5t10 -20.5z"/>
-    <glyph glyph-name="_568" unicode="&#xf25d;" horiz-adv-x="1792" d="M1042 833q0 88 -60 121q-33 18 -117 18h-123v-281h162q66 0 102 37t36 105zM1094 548l205 -373q8 -17 -1 -31q-8 -16 -27 -16h-152q-20 0 -28 17l-194 365h-155v-350q0 -14 -9 -23t-23 -9h-134q-14 0 -23 9t-9 23v960q0 14 9 23t23 9h294q128 0 190 -24q85 -31 134 -109
-t49 -180q0 -92 -42.5 -165.5t-115.5 -109.5q6 -10 9 -16zM896 1376q-150 0 -286 -58.5t-234.5 -157t-157 -234.5t-58.5 -286t58.5 -286t157 -234.5t234.5 -157t286 -58.5t286 58.5t234.5 157t157 234.5t58.5 286t-58.5 286t-157 234.5t-234.5 157t-286 58.5zM1792 640
-q0 -182 -71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="_569" unicode="&#xf25e;" horiz-adv-x="1792" d="M605 303q153 0 257 104q14 18 3 36l-45 82q-6 13 -24 17q-16 2 -27 -11l-4 -3q-4 -4 -11.5 -10t-17.5 -13.5t-23.5 -14.5t-28.5 -13t-33.5 -9.5t-37.5 -3.5q-76 0 -125 50t-49 127q0 76 48 125.5t122 49.5q37 0 71.5 -14t50.5 -28l16 -14q11 -11 26 -10q16 2 24 14l53 78
-q13 20 -2 39q-3 4 -11 12t-30 23.5t-48.5 28t-67.5 22.5t-86 10q-148 0 -246 -96.5t-98 -240.5q0 -146 97 -241.5t247 -95.5zM1235 303q153 0 257 104q14 18 4 36l-45 82q-8 14 -25 17q-16 2 -27 -11l-4 -3q-4 -4 -11.5 -10t-17.5 -13.5t-23.5 -14.5t-28.5 -13t-33.5 -9.5
-t-37.5 -3.5q-76 0 -125 50t-49 127q0 76 48 125.5t122 49.5q37 0 71.5 -14t50.5 -28l16 -14q11 -11 26 -10q16 2 24 14l53 78q13 20 -2 39q-3 4 -11 12t-30 23.5t-48.5 28t-67.5 22.5t-86 10q-147 0 -245.5 -96.5t-98.5 -240.5q0 -146 97 -241.5t247 -95.5zM896 1376
-q-150 0 -286 -58.5t-234.5 -157t-157 -234.5t-58.5 -286t58.5 -286t157 -234.5t234.5 -157t286 -58.5t286 58.5t234.5 157t157 234.5t58.5 286t-58.5 286t-157 234.5t-234.5 157t-286 58.5zM896 1536q182 0 348 -71t286 -191t191 -286t71 -348t-71 -348t-191 -286t-286 -191
-t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71z"/>
-    <glyph glyph-name="f260" unicode="&#xf260;" horiz-adv-x="2048" d="M736 736l384 -384l-384 -384l-672 672l672 672l168 -168l-96 -96l-72 72l-480 -480l480 -480l193 193l-289 287zM1312 1312l672 -672l-672 -672l-168 168l96 96l72 -72l480 480l-480 480l-193 -193l289 -287l-96 -96l-384 384z"/>
-    <glyph glyph-name="f261" unicode="&#xf261;" horiz-adv-x="1792" d="M717 182l271 271l-279 279l-88 -88l192 -191l-96 -96l-279 279l279 279l40 -40l87 87l-127 128l-454 -454zM1075 190l454 454l-454 454l-271 -271l279 -279l88 88l-192 191l96 96l279 -279l-279 -279l-40 40l-87 -88zM1792 640q0 -182 -71 -348t-191 -286t-286 -191
-t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="_572" unicode="&#xf262;" horiz-adv-x="2304" d="M651 539q0 -39 -27.5 -66.5t-65.5 -27.5q-39 0 -66.5 27.5t-27.5 66.5q0 38 27.5 65.5t66.5 27.5q38 0 65.5 -27.5t27.5 -65.5zM1805 540q0 -39 -27.5 -66.5t-66.5 -27.5t-66.5 27.5t-27.5 66.5t27.5 66t66.5 27t66.5 -27t27.5 -66zM765 539q0 79 -56.5 136t-136.5 57
-t-136.5 -56.5t-56.5 -136.5t56.5 -136.5t136.5 -56.5t136.5 56.5t56.5 136.5zM1918 540q0 80 -56.5 136.5t-136.5 56.5q-79 0 -136 -56.5t-57 -136.5t56.5 -136.5t136.5 -56.5t136.5 56.5t56.5 136.5zM850 539q0 -116 -81.5 -197.5t-196.5 -81.5q-116 0 -197.5 82t-81.5 197
-t82 196.5t197 81.5t196.5 -81.5t81.5 -196.5zM2004 540q0 -115 -81.5 -196.5t-197.5 -81.5q-115 0 -196.5 81.5t-81.5 196.5t81.5 196.5t196.5 81.5q116 0 197.5 -81.5t81.5 -196.5zM1040 537q0 191 -135.5 326.5t-326.5 135.5q-125 0 -231 -62t-168 -168.5t-62 -231.5
-t62 -231.5t168 -168.5t231 -62q191 0 326.5 135.5t135.5 326.5zM1708 1110q-254 111 -556 111q-319 0 -573 -110q117 0 223 -45.5t182.5 -122.5t122 -183t45.5 -223q0 115 43.5 219.5t118 180.5t177.5 123t217 50zM2187 537q0 191 -135 326.5t-326 135.5t-326.5 -135.5
-t-135.5 -326.5t135.5 -326.5t326.5 -135.5t326 135.5t135 326.5zM1921 1103h383q-44 -51 -75 -114.5t-40 -114.5q110 -151 110 -337q0 -156 -77 -288t-209 -208.5t-287 -76.5q-133 0 -249 56t-196 155q-47 -56 -129 -179q-11 22 -53.5 82.5t-74.5 97.5
-q-80 -99 -196.5 -155.5t-249.5 -56.5q-155 0 -287 76.5t-209 208.5t-77 288q0 186 110 337q-9 51 -40 114.5t-75 114.5h365q149 100 355 156.5t432 56.5q224 0 421 -56t348 -157z"/>
-    <glyph glyph-name="f263" unicode="&#xf263;" horiz-adv-x="1280" d="M640 629q-188 0 -321 133t-133 320q0 188 133 321t321 133t321 -133t133 -321q0 -187 -133 -320t-321 -133zM640 1306q-92 0 -157.5 -65.5t-65.5 -158.5q0 -92 65.5 -157.5t157.5 -65.5t157.5 65.5t65.5 157.5q0 93 -65.5 158.5t-157.5 65.5zM1163 574q13 -27 15 -49.5
-t-4.5 -40.5t-26.5 -38.5t-42.5 -37t-61.5 -41.5q-115 -73 -315 -94l73 -72l267 -267q30 -31 30 -74t-30 -73l-12 -13q-31 -30 -74 -30t-74 30q-67 68 -267 268l-267 -268q-31 -30 -74 -30t-73 30l-12 13q-31 30 -31 73t31 74l267 267l72 72q-203 21 -317 94
-q-39 25 -61.5 41.5t-42.5 37t-26.5 38.5t-4.5 40.5t15 49.5q10 20 28 35t42 22t56 -2t65 -35q5 -4 15 -11t43 -24.5t69 -30.5t92 -24t113 -11q91 0 174 25.5t120 50.5l38 25q33 26 65 35t56 2t42 -22t28 -35z"/>
-    <glyph glyph-name="_574" unicode="&#xf264;" d="M927 956q0 -66 -46.5 -112.5t-112.5 -46.5t-112.5 46.5t-46.5 112.5t46.5 112.5t112.5 46.5t112.5 -46.5t46.5 -112.5zM1141 593q-10 20 -28 32t-47.5 9.5t-60.5 -27.5q-10 -8 -29 -20t-81 -32t-127 -20t-124 18t-86 36l-27 18q-31 25 -60.5 27.5t-47.5 -9.5t-28 -32
-q-22 -45 -2 -74.5t87 -73.5q83 -53 226 -67l-51 -52q-142 -142 -191 -190q-22 -22 -22 -52.5t22 -52.5l9 -9q22 -22 52.5 -22t52.5 22l191 191q114 -115 191 -191q22 -22 52.5 -22t52.5 22l9 9q22 22 22 52.5t-22 52.5l-191 190l-52 52q141 14 225 67q67 44 87 73.5t-2 74.5
-zM1092 956q0 134 -95 229t-229 95t-229 -95t-95 -229t95 -229t229 -95t229 95t95 229zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="_575" unicode="&#xf265;" horiz-adv-x="1720" d="M1565 1408q65 0 110 -45.5t45 -110.5v-519q0 -176 -68 -336t-182.5 -275t-274 -182.5t-334.5 -67.5q-176 0 -335.5 67.5t-274.5 182.5t-183 275t-68 336v519q0 64 46 110t110 46h1409zM861 344q47 0 82 33l404 388q37 35 37 85q0 49 -34.5 83.5t-83.5 34.5q-47 0 -82 -33
-l-323 -310l-323 310q-35 33 -81 33q-49 0 -83.5 -34.5t-34.5 -83.5q0 -51 36 -85l405 -388q33 -33 81 -33z"/>
-    <glyph glyph-name="_576" unicode="&#xf266;" horiz-adv-x="2304" d="M1494 -103l-295 695q-25 -49 -158.5 -305.5t-198.5 -389.5q-1 -1 -27.5 -0.5t-26.5 1.5q-82 193 -255.5 587t-259.5 596q-21 50 -66.5 107.5t-103.5 100.5t-102 43q0 5 -0.5 24t-0.5 27h583v-50q-39 -2 -79.5 -16t-66.5 -43t-10 -64q26 -59 216.5 -499t235.5 -540
-q31 61 140 266.5t131 247.5q-19 39 -126 281t-136 295q-38 69 -201 71v50l513 -1v-47q-60 -2 -93.5 -25t-12.5 -69q33 -70 87 -189.5t86 -187.5q110 214 173 363q24 55 -10 79.5t-129 26.5q1 7 1 25v24q64 0 170.5 0.5t180 1t92.5 0.5v-49q-62 -2 -119 -33t-90 -81
-l-213 -442q13 -33 127.5 -290t121.5 -274l441 1017q-14 38 -49.5 62.5t-65 31.5t-55.5 8v50l460 -4l1 -2l-1 -44q-139 -4 -201 -145q-526 -1216 -559 -1291h-49z"/>
-    <glyph glyph-name="_577" unicode="&#xf267;" horiz-adv-x="1792" d="M949 643q0 -26 -16.5 -45t-41.5 -19q-26 0 -45 16.5t-19 41.5q0 26 17 45t42 19t44 -16.5t19 -41.5zM964 585l350 581q-9 -8 -67.5 -62.5t-125.5 -116.5t-136.5 -127t-117 -110.5t-50.5 -51.5l-349 -580q7 7 67 62t126 116.5t136 127t117 111t50 50.5zM1611 640
-q0 -201 -104 -371q-3 2 -17 11t-26.5 16.5t-16.5 7.5q-13 0 -13 -13q0 -10 59 -44q-74 -112 -184.5 -190.5t-241.5 -110.5l-16 67q-1 10 -15 10q-5 0 -8 -5.5t-2 -9.5l16 -68q-72 -15 -146 -15q-199 0 -372 105q1 2 13 20.5t21.5 33.5t9.5 19q0 13 -13 13q-6 0 -17 -14.5
-t-22.5 -34.5t-13.5 -23q-113 75 -192 187.5t-110 244.5l69 15q10 3 10 15q0 5 -5.5 8t-10.5 2l-68 -15q-14 72 -14 139q0 206 109 379q2 -1 18.5 -12t30 -19t17.5 -8q13 0 13 12q0 6 -12.5 15.5t-32.5 21.5l-20 12q77 112 189 189t244 107l15 -67q2 -10 15 -10q5 0 8 5.5
-t2 10.5l-15 66q71 13 134 13q204 0 379 -109q-39 -56 -39 -65q0 -13 12 -13q11 0 48 64q111 -75 187.5 -186t107.5 -241l-56 -12q-10 -2 -10 -16q0 -5 5.5 -8t9.5 -2l57 13q14 -72 14 -140zM1696 640q0 163 -63.5 311t-170.5 255t-255 170.5t-311 63.5t-311 -63.5
-t-255 -170.5t-170.5 -255t-63.5 -311t63.5 -311t170.5 -255t255 -170.5t311 -63.5t311 63.5t255 170.5t170.5 255t63.5 311zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191
-t191 -286t71 -348z"/>
-    <glyph glyph-name="_578" unicode="&#xf268;" horiz-adv-x="1792" d="M893 1536q240 2 451 -120q232 -134 352 -372l-742 39q-160 9 -294 -74.5t-185 -229.5l-276 424q128 159 311 245.5t383 87.5zM146 1131l337 -663q72 -143 211 -217t293 -45l-230 -451q-212 33 -385 157.5t-272.5 316t-99.5 411.5q0 267 146 491zM1732 962
-q58 -150 59.5 -310.5t-48.5 -306t-153 -272t-246 -209.5q-230 -133 -498 -119l405 623q88 131 82.5 290.5t-106.5 277.5zM896 942q125 0 213.5 -88.5t88.5 -213.5t-88.5 -213.5t-213.5 -88.5t-213.5 88.5t-88.5 213.5t88.5 213.5t213.5 88.5z"/>
-    <glyph glyph-name="_579" unicode="&#xf269;" horiz-adv-x="1792" d="M903 -256q-283 0 -504.5 150.5t-329.5 398.5q-58 131 -67 301t26 332.5t111 312t179 242.5l-11 -281q11 14 68 15.5t70 -15.5q42 81 160.5 138t234.5 59q-54 -45 -119.5 -148.5t-58.5 -163.5q25 -8 62.5 -13.5t63 -7.5t68 -4t50.5 -3q15 -5 9.5 -45.5t-30.5 -75.5
-q-5 -7 -16.5 -18.5t-56.5 -35.5t-101 -34l15 -189l-139 67q-18 -43 -7.5 -81.5t36 -66.5t65.5 -41.5t81 -6.5q51 9 98 34.5t83.5 45t73.5 17.5q61 -4 89.5 -33t19.5 -65q-1 -2 -2.5 -5.5t-8.5 -12.5t-18 -15.5t-31.5 -10.5t-46.5 -1q-60 -95 -144.5 -135.5t-209.5 -29.5
-q74 -61 162.5 -82.5t168.5 -6t154.5 52t128 87.5t80.5 104q43 91 39 192.5t-37.5 188.5t-78.5 125q87 -38 137 -79.5t77 -112.5q15 170 -57.5 343t-209.5 284q265 -77 412 -279.5t151 -517.5q2 -127 -40.5 -255t-123.5 -238t-189 -196t-247.5 -135.5t-288.5 -49.5z"/>
-    <glyph glyph-name="_580" unicode="&#xf26a;" horiz-adv-x="1792" d="M1493 1308q-165 110 -359 110q-155 0 -293 -73t-240 -200q-75 -93 -119.5 -218t-48.5 -266v-42q4 -141 48.5 -266t119.5 -218q102 -127 240 -200t293 -73q194 0 359 110q-121 -108 -274.5 -168t-322.5 -60q-29 0 -43 1q-175 8 -333 82t-272 193t-181 281t-67 339
-q0 182 71 348t191 286t286 191t348 71h3q168 -1 320.5 -60.5t273.5 -167.5zM1792 640q0 -192 -77 -362.5t-213 -296.5q-104 -63 -222 -63q-137 0 -255 84q154 56 253.5 233t99.5 405q0 227 -99 404t-253 234q119 83 254 83q119 0 226 -65q135 -125 210.5 -295t75.5 -361z
-"/>
-    <glyph glyph-name="_581" unicode="&#xf26b;" horiz-adv-x="1792" d="M1792 599q0 -56 -7 -104h-1151q0 -146 109.5 -244.5t257.5 -98.5q99 0 185.5 46.5t136.5 130.5h423q-56 -159 -170.5 -281t-267.5 -188.5t-321 -66.5q-187 0 -356 83q-228 -116 -394 -116q-237 0 -237 263q0 115 45 275q17 60 109 229q199 360 475 606
-q-184 -79 -427 -354q63 274 283.5 449.5t501.5 175.5q30 0 45 -1q255 117 433 117q64 0 116 -13t94.5 -40.5t66.5 -76.5t24 -115q0 -116 -75 -286q101 -182 101 -390zM1722 1239q0 83 -53 132t-137 49q-108 0 -254 -70q121 -47 222.5 -131.5t170.5 -195.5q51 135 51 216z
-M128 2q0 -86 48.5 -132.5t134.5 -46.5q115 0 266 83q-122 72 -213.5 183t-137.5 245q-98 -205 -98 -332zM632 715h728q-5 142 -113 237t-251 95q-144 0 -251.5 -95t-112.5 -237z"/>
-    <glyph glyph-name="_582" unicode="&#xf26c;" horiz-adv-x="2048" d="M1792 288v960q0 13 -9.5 22.5t-22.5 9.5h-1600q-13 0 -22.5 -9.5t-9.5 -22.5v-960q0 -13 9.5 -22.5t22.5 -9.5h1600q13 0 22.5 9.5t9.5 22.5zM1920 1248v-960q0 -66 -47 -113t-113 -47h-736v-128h352q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-832q-14 0 -23 9t-9 23
-v64q0 14 9 23t23 9h352v128h-736q-66 0 -113 47t-47 113v960q0 66 47 113t113 47h1600q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="_583" unicode="&#xf26d;" horiz-adv-x="1792" d="M138 1408h197q-70 -64 -126 -149q-36 -56 -59 -115t-30 -125.5t-8.5 -120t10.5 -132t21 -126t28 -136.5q4 -19 6 -28q51 -238 81 -329q57 -171 152 -275h-272q-48 0 -82 34t-34 82v1304q0 48 34 82t82 34zM1346 1408h308q48 0 82 -34t34 -82v-1304q0 -48 -34 -82t-82 -34
-h-178q212 210 196 565l-469 -101q-2 -45 -12 -82t-31 -72t-59.5 -59.5t-93.5 -36.5q-123 -26 -199 40q-32 27 -53 61t-51.5 129t-64.5 258q-35 163 -45.5 263t-5.5 139t23 77q20 41 62.5 73t102.5 45q45 12 83.5 6.5t67 -17t54 -35t43 -48t34.5 -56.5l468 100
-q-68 175 -180 287z"/>
-    <glyph glyph-name="_584" unicode="&#xf26e;" d="M1401 -11l-6 -6q-113 -113 -259 -175q-154 -64 -317 -64q-165 0 -317 64q-148 63 -259 175q-113 112 -175 258q-42 103 -54 189q-4 28 48 36q51 8 56 -20q1 -1 1 -4q18 -90 46 -159q50 -124 152 -226q98 -98 226 -152q132 -56 276 -56q143 0 276 56q128 55 225 152l6 6
-q10 10 25 6q12 -3 33 -22q36 -37 17 -58zM929 604l-66 -66l63 -63q21 -21 -7 -49q-17 -17 -32 -17q-10 0 -19 10l-62 61l-66 -66q-5 -5 -15 -5q-15 0 -31 16l-2 2q-18 15 -18 29q0 7 8 17l66 65l-66 66q-16 16 14 45q18 18 31 18q6 0 13 -5l65 -66l65 65q18 17 48 -13
-q27 -27 11 -44zM1400 547q0 -118 -46 -228q-45 -105 -126 -186q-80 -80 -187 -126t-228 -46t-228 46t-187 126q-82 82 -125 186q-15 33 -15 40h-1q-9 27 43 44q50 16 60 -12q37 -99 97 -167h1v339v2q3 136 102 232q105 103 253 103q147 0 251 -103t104 -249
-q0 -147 -104.5 -251t-250.5 -104q-58 0 -112 16q-28 11 -13 61q16 51 44 43l14 -3q14 -3 33 -6t30 -3q104 0 176 71.5t72 174.5q0 101 -72 171q-71 71 -175 71q-107 0 -178 -80q-64 -72 -64 -160v-413q110 -67 242 -67q96 0 185 36.5t156 103.5t103.5 155t36.5 183
-q0 198 -141 339q-140 140 -339 140q-200 0 -340 -140q-53 -53 -77 -87l-2 -2q-8 -11 -13 -15.5t-21.5 -9.5t-38.5 3q-21 5 -36.5 16.5t-15.5 26.5v680q0 15 10.5 26.5t27.5 11.5h877q30 0 30 -55t-30 -55h-811v-483h1q40 42 102 84t108 61q109 46 231 46q121 0 228 -46
-t187 -126q81 -81 126 -186q46 -112 46 -229zM1369 1128q9 -8 9 -18t-5.5 -18t-16.5 -21q-26 -26 -39 -26q-9 0 -16 7q-106 91 -207 133q-128 56 -276 56q-133 0 -262 -49q-27 -10 -45 37q-9 25 -8 38q3 16 16 20q130 57 299 57q164 0 316 -64q137 -58 235 -152z"/>
-    <glyph glyph-name="_585" unicode="&#xf270;" horiz-adv-x="1792" d="M1551 60q15 6 26 3t11 -17.5t-15 -33.5q-13 -16 -44 -43.5t-95.5 -68t-141 -74t-188 -58t-229.5 -24.5q-119 0 -238 31t-209 76.5t-172.5 104t-132.5 105t-84 87.5q-8 9 -10 16.5t1 12t8 7t11.5 2t11.5 -4.5q192 -117 300 -166q389 -176 799 -90q190 40 391 135z
-M1758 175q11 -16 2.5 -69.5t-28.5 -102.5q-34 -83 -85 -124q-17 -14 -26 -9t0 24q21 45 44.5 121.5t6.5 98.5q-5 7 -15.5 11.5t-27 6t-29.5 2.5t-35 0t-31.5 -2t-31 -3t-22.5 -2q-6 -1 -13 -1.5t-11 -1t-8.5 -1t-7 -0.5h-5.5h-4.5t-3 0.5t-2 1.5l-1.5 3q-6 16 47 40t103 30
-q46 7 108 1t76 -24zM1364 618q0 -31 13.5 -64t32 -58t37.5 -46t33 -32l13 -11l-227 -224q-40 37 -79 75.5t-58 58.5l-19 20q-11 11 -25 33q-38 -59 -97.5 -102.5t-127.5 -63.5t-140 -23t-137.5 21t-117.5 65.5t-83 113t-31 162.5q0 84 28 154t72 116.5t106.5 83t122.5 57
-t130 34.5t119.5 18.5t99.5 6.5v127q0 65 -21 97q-34 53 -121 53q-6 0 -16.5 -1t-40.5 -12t-56 -29.5t-56 -59.5t-48 -96l-294 27q0 60 22 119t67 113t108 95t151.5 65.5t190.5 24.5q100 0 181 -25t129.5 -61.5t81 -83t45 -86t12.5 -73.5v-589zM692 597q0 -86 70 -133
-q66 -44 139 -22q84 25 114 123q14 45 14 101v162q-59 -2 -111 -12t-106.5 -33.5t-87 -71t-32.5 -114.5z"/>
-    <glyph glyph-name="_586" unicode="&#xf271;" horiz-adv-x="1792" d="M1536 1280q52 0 90 -38t38 -90v-1280q0 -52 -38 -90t-90 -38h-1408q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h128v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h384v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h128zM1152 1376v-288q0 -14 9 -23t23 -9
-h64q14 0 23 9t9 23v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23zM384 1376v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23zM1536 -128v1024h-1408v-1024h1408zM896 448h224q14 0 23 -9t9 -23v-64q0 -14 -9 -23t-23 -9h-224
-v-224q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v224h-224q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h224v224q0 14 9 23t23 9h64q14 0 23 -9t9 -23v-224z"/>
-    <glyph glyph-name="_587" unicode="&#xf272;" horiz-adv-x="1792" d="M1152 416v-64q0 -14 -9 -23t-23 -9h-576q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h576q14 0 23 -9t9 -23zM128 -128h1408v1024h-1408v-1024zM512 1088v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1280 1088v288q0 14 -9 23
-t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1664 1152v-1280q0 -52 -38 -90t-90 -38h-1408q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h128v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h384v96q0 66 47 113t113 47h64q66 0 113 -47
-t47 -113v-96h128q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_588" unicode="&#xf273;" horiz-adv-x="1792" d="M1111 151l-46 -46q-9 -9 -22 -9t-23 9l-188 189l-188 -189q-10 -9 -23 -9t-22 9l-46 46q-9 9 -9 22t9 23l189 188l-189 188q-9 10 -9 23t9 22l46 46q9 9 22 9t23 -9l188 -188l188 188q10 9 23 9t22 -9l46 -46q9 -9 9 -22t-9 -23l-188 -188l188 -188q9 -10 9 -23t-9 -22z
-M128 -128h1408v1024h-1408v-1024zM512 1088v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1280 1088v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1664 1152v-1280
-q0 -52 -38 -90t-90 -38h-1408q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h128v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h384v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h128q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_589" unicode="&#xf274;" horiz-adv-x="1792" d="M1303 572l-512 -512q-10 -9 -23 -9t-23 9l-288 288q-9 10 -9 23t9 22l46 46q9 9 22 9t23 -9l220 -220l444 444q10 9 23 9t22 -9l46 -46q9 -9 9 -22t-9 -23zM128 -128h1408v1024h-1408v-1024zM512 1088v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23
-t23 -9h64q14 0 23 9t9 23zM1280 1088v288q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-288q0 -14 9 -23t23 -9h64q14 0 23 9t9 23zM1664 1152v-1280q0 -52 -38 -90t-90 -38h-1408q-52 0 -90 38t-38 90v1280q0 52 38 90t90 38h128v96q0 66 47 113t113 47h64q66 0 113 -47
-t47 -113v-96h384v96q0 66 47 113t113 47h64q66 0 113 -47t47 -113v-96h128q52 0 90 -38t38 -90z"/>
-    <glyph glyph-name="_590" unicode="&#xf275;" horiz-adv-x="1792" d="M448 1536q26 0 45 -19t19 -45v-891l536 429q17 14 40 14q26 0 45 -19t19 -45v-379l536 429q17 14 40 14q26 0 45 -19t19 -45v-1152q0 -26 -19 -45t-45 -19h-1664q-26 0 -45 19t-19 45v1664q0 26 19 45t45 19h384z"/>
-    <glyph glyph-name="_591" unicode="&#xf276;" horiz-adv-x="1024" d="M512 448q66 0 128 15v-655q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v655q62 -15 128 -15zM512 1536q212 0 362 -150t150 -362t-150 -362t-362 -150t-362 150t-150 362t150 362t362 150zM512 1312q14 0 23 9t9 23t-9 23t-23 9q-146 0 -249 -103t-103 -249
-q0 -14 9 -23t23 -9t23 9t9 23q0 119 84.5 203.5t203.5 84.5z"/>
-    <glyph glyph-name="_592" unicode="&#xf277;" horiz-adv-x="1792" d="M1745 1239q10 -10 10 -23t-10 -23l-141 -141q-28 -28 -68 -28h-1344q-26 0 -45 19t-19 45v256q0 26 19 45t45 19h576v64q0 26 19 45t45 19h128q26 0 45 -19t19 -45v-64h512q40 0 68 -28zM768 320h256v-512q0 -26 -19 -45t-45 -19h-128q-26 0 -45 19t-19 45v512zM1600 768
-q26 0 45 -19t19 -45v-256q0 -26 -19 -45t-45 -19h-1344q-40 0 -68 28l-141 141q-10 10 -10 23t10 23l141 141q28 28 68 28h512v192h256v-192h576z"/>
-    <glyph glyph-name="_593" unicode="&#xf278;" horiz-adv-x="2048" d="M2020 1525q28 -20 28 -53v-1408q0 -20 -11 -36t-29 -23l-640 -256q-24 -11 -48 0l-616 246l-616 -246q-10 -5 -24 -5q-19 0 -36 11q-28 20 -28 53v1408q0 20 11 36t29 23l640 256q24 11 48 0l616 -246l616 246q32 13 60 -6zM736 1390v-1270l576 -230v1270zM128 1173
-v-1270l544 217v1270zM1920 107v1270l-544 -217v-1270z"/>
-    <glyph glyph-name="_594" unicode="&#xf279;" horiz-adv-x="1792" d="M512 1536q13 0 22.5 -9.5t9.5 -22.5v-1472q0 -20 -17 -28l-480 -256q-7 -4 -15 -4q-13 0 -22.5 9.5t-9.5 22.5v1472q0 20 17 28l480 256q7 4 15 4zM1760 1536q13 0 22.5 -9.5t9.5 -22.5v-1472q0 -20 -17 -28l-480 -256q-7 -4 -15 -4q-13 0 -22.5 9.5t-9.5 22.5v1472
-q0 20 17 28l480 256q7 4 15 4zM640 1536q8 0 14 -3l512 -256q18 -10 18 -29v-1472q0 -13 -9.5 -22.5t-22.5 -9.5q-8 0 -14 3l-512 256q-18 10 -18 29v1472q0 13 9.5 22.5t22.5 9.5z"/>
-    <glyph glyph-name="_595" unicode="&#xf27a;" horiz-adv-x="1792" d="M640 640q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1024 640q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1408 640q0 53 -37.5 90.5t-90.5 37.5
-t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5zM1792 640q0 -174 -120 -321.5t-326 -233t-450 -85.5q-110 0 -211 18q-173 -173 -435 -229q-52 -10 -86 -13q-12 -1 -22 6t-13 18q-4 15 20 37q5 5 23.5 21.5t25.5 23.5t23.5 25.5t24 31.5t20.5 37
-t20 48t14.5 57.5t12.5 72.5q-146 90 -229.5 216.5t-83.5 269.5q0 174 120 321.5t326 233t450 85.5t450 -85.5t326 -233t120 -321.5z"/>
-    <glyph glyph-name="_596" unicode="&#xf27b;" horiz-adv-x="1792" d="M640 640q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1024 640q0 -53 -37.5 -90.5t-90.5 -37.5t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM1408 640q0 -53 -37.5 -90.5t-90.5 -37.5
-t-90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5t90.5 -37.5t37.5 -90.5zM896 1152q-204 0 -381.5 -69.5t-282 -187.5t-104.5 -255q0 -112 71.5 -213.5t201.5 -175.5l87 -50l-27 -96q-24 -91 -70 -172q152 63 275 171l43 38l57 -6q69 -8 130 -8q204 0 381.5 69.5t282 187.5
-t104.5 255t-104.5 255t-282 187.5t-381.5 69.5zM1792 640q0 -174 -120 -321.5t-326 -233t-450 -85.5q-70 0 -145 8q-198 -175 -460 -242q-49 -14 -114 -22h-5q-15 0 -27 10.5t-16 27.5v1q-3 4 -0.5 12t2 10t4.5 9.5l6 9t7 8.5t8 9q7 8 31 34.5t34.5 38t31 39.5t32.5 51
-t27 59t26 76q-157 89 -247.5 220t-90.5 281q0 130 71 248.5t191 204.5t286 136.5t348 50.5t348 -50.5t286 -136.5t191 -204.5t71 -248.5z"/>
-    <glyph glyph-name="_597" unicode="&#xf27c;" horiz-adv-x="1024" d="M512 345l512 295v-591l-512 -296v592zM0 640v-591l512 296zM512 1527v-591l-512 -296v591zM512 936l512 295v-591z"/>
-    <glyph glyph-name="_598" unicode="&#xf27d;" horiz-adv-x="1792" d="M1709 1018q-10 -236 -332 -651q-333 -431 -562 -431q-142 0 -240 263q-44 160 -132 482q-72 262 -157 262q-18 0 -127 -76l-77 98q24 21 108 96.5t130 115.5q156 138 241 146q95 9 153 -55.5t81 -203.5q44 -287 66 -373q55 -249 120 -249q51 0 154 161q101 161 109 246
-q13 139 -109 139q-57 0 -121 -26q120 393 459 382q251 -8 236 -326z"/>
-    <glyph glyph-name="f27e" unicode="&#xf27e;" d="M0 1408h1536v-1536h-1536v1536zM1085 293l-221 631l221 297h-634l221 -297l-221 -631l317 -304z"/>
-    <glyph glyph-name="uniF280" unicode="&#xf280;" d="M0 1408h1536v-1536h-1536v1536zM908 1088l-12 -33l75 -83l-31 -114l25 -25l107 57l107 -57l25 25l-31 114l75 83l-12 33h-95l-53 96h-32l-53 -96h-95zM641 925q32 0 44.5 -16t11.5 -63l174 21q0 55 -17.5 92.5t-50.5 56t-69 25.5t-85 7q-133 0 -199 -57.5t-66 -182.5v-72
-h-96v-128h76q20 0 20 -8v-382q0 -14 -5 -20t-18 -7l-73 -7v-88h448v86l-149 14q-6 1 -8.5 1.5t-3.5 2.5t-0.5 4t1 7t0.5 10v387h191l38 128h-231q-6 0 -2 6t4 9v80q0 27 1.5 40.5t7.5 28t19.5 20t36.5 5.5zM1248 96v86l-54 9q-7 1 -9.5 2.5t-2.5 3t1 7.5t1 12v520h-275
-l-23 -101l83 -22q23 -7 23 -27v-370q0 -14 -6 -18.5t-20 -6.5l-70 -9v-86h352z"/>
-    <glyph glyph-name="uniF281" unicode="&#xf281;" horiz-adv-x="1792" d="M1792 690q0 -58 -29.5 -105.5t-79.5 -72.5q12 -46 12 -96q0 -155 -106.5 -287t-290.5 -208.5t-400 -76.5t-399.5 76.5t-290 208.5t-106.5 287q0 47 11 94q-51 25 -82 73.5t-31 106.5q0 82 58 140.5t141 58.5q85 0 145 -63q218 152 515 162l116 521q3 13 15 21t26 5
-l369 -81q18 37 54 59.5t79 22.5q62 0 106 -43.5t44 -105.5t-44 -106t-106 -44t-105.5 43.5t-43.5 105.5l-334 74l-104 -472q300 -9 519 -160q58 61 143 61q83 0 141 -58.5t58 -140.5zM418 491q0 -62 43.5 -106t105.5 -44t106 44t44 106t-44 105.5t-106 43.5q-61 0 -105 -44
-t-44 -105zM1228 136q11 11 11 26t-11 26q-10 10 -25 10t-26 -10q-41 -42 -121 -62t-160 -20t-160 20t-121 62q-11 10 -26 10t-25 -10q-11 -10 -11 -25.5t11 -26.5q43 -43 118.5 -68t122.5 -29.5t91 -4.5t91 4.5t122.5 29.5t118.5 68zM1225 341q62 0 105.5 44t43.5 106
-q0 61 -44 105t-105 44q-62 0 -106 -43.5t-44 -105.5t44 -106t106 -44z"/>
-    <glyph glyph-name="_602" unicode="&#xf282;" horiz-adv-x="1792" d="M69 741h1q16 126 58.5 241.5t115 217t167.5 176t223.5 117.5t276.5 43q231 0 414 -105.5t294 -303.5q104 -187 104 -442v-188h-1125q1 -111 53.5 -192.5t136.5 -122.5t189.5 -57t213 -3t208 46.5t173.5 84.5v-377q-92 -55 -229.5 -92t-312.5 -38t-316 53
-q-189 73 -311.5 249t-124.5 372q-3 242 111 412t325 268q-48 -60 -78 -125.5t-46 -159.5h635q8 77 -8 140t-47 101.5t-70.5 66.5t-80.5 41t-75 20.5t-56 8.5l-22 1q-135 -5 -259.5 -44.5t-223.5 -104.5t-176 -140.5t-138 -163.5z"/>
-    <glyph glyph-name="_603" unicode="&#xf283;" horiz-adv-x="2304" d="M0 32v608h2304v-608q0 -66 -47 -113t-113 -47h-1984q-66 0 -113 47t-47 113zM640 256v-128h384v128h-384zM256 256v-128h256v128h-256zM2144 1408q66 0 113 -47t47 -113v-224h-2304v224q0 66 47 113t113 47h1984z"/>
-    <glyph glyph-name="_604" unicode="&#xf284;" horiz-adv-x="1792" d="M1584 246l-218 111q-74 -120 -196.5 -189t-263.5 -69q-147 0 -271 72t-196 196t-72 270q0 110 42.5 209.5t115 172t172 115t209.5 42.5q131 0 247.5 -60.5t192.5 -168.5l215 125q-110 169 -286.5 265t-378.5 96q-161 0 -308 -63t-253 -169t-169 -253t-63 -308t63 -308
-t169 -253t253 -169t308 -63q213 0 397.5 107t290.5 292zM1030 643l693 -352q-116 -253 -334.5 -400t-492.5 -147q-182 0 -348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71q260 0 470.5 -133.5t335.5 -366.5zM1543 640h-39v-160h-96v352h136q32 0 54.5 -20
-t28.5 -48t1 -56t-27.5 -48t-57.5 -20z"/>
-    <glyph glyph-name="uniF285" unicode="&#xf285;" horiz-adv-x="1792" d="M1427 827l-614 386l92 151h855zM405 562l-184 116v858l1183 -743zM1424 697l147 -95v-858l-532 335zM1387 718l-500 -802h-855l356 571z"/>
-    <glyph glyph-name="uniF286" unicode="&#xf286;" horiz-adv-x="1792" d="M640 528v224q0 16 -16 16h-96q-16 0 -16 -16v-224q0 -16 16 -16h96q16 0 16 16zM1152 528v224q0 16 -16 16h-96q-16 0 -16 -16v-224q0 -16 16 -16h96q16 0 16 16zM1664 496v-752h-640v320q0 80 -56 136t-136 56t-136 -56t-56 -136v-320h-640v752q0 16 16 16h96
-q16 0 16 -16v-112h128v624q0 16 16 16h96q16 0 16 -16v-112h128v112q0 16 16 16h96q16 0 16 -16v-112h128v112q0 6 2.5 9.5t8.5 5t9.5 2t11.5 0t9 -0.5v391q-32 15 -32 50q0 23 16.5 39t38.5 16t38.5 -16t16.5 -39q0 -35 -32 -50v-17q45 10 83 10q21 0 59.5 -7.5t54.5 -7.5
-q17 0 47 7.5t37 7.5q16 0 16 -16v-210q0 -15 -35 -21.5t-62 -6.5q-18 0 -54.5 7.5t-55.5 7.5q-40 0 -90 -12v-133q1 0 9 0.5t11.5 0t9.5 -2t8.5 -5t2.5 -9.5v-112h128v112q0 16 16 16h96q16 0 16 -16v-112h128v112q0 16 16 16h96q16 0 16 -16v-624h128v112q0 16 16 16h96
-q16 0 16 -16z"/>
-    <glyph glyph-name="_607" unicode="&#xf287;" horiz-adv-x="2304" d="M2288 731q16 -8 16 -27t-16 -27l-320 -192q-8 -5 -16 -5q-9 0 -16 4q-16 10 -16 28v128h-858q37 -58 83 -165q16 -37 24.5 -55t24 -49t27 -47t27 -34t31.5 -26t33 -8h96v96q0 14 9 23t23 9h320q14 0 23 -9t9 -23v-320q0 -14 -9 -23t-23 -9h-320q-14 0 -23 9t-9 23v96h-96
-q-32 0 -61 10t-51 23.5t-45 40.5t-37 46t-33.5 57t-28.5 57.5t-28 60.5q-23 53 -37 81.5t-36 65t-44.5 53.5t-46.5 17h-360q-22 -84 -91 -138t-157 -54q-106 0 -181 75t-75 181t75 181t181 75q88 0 157 -54t91 -138h104q24 0 46.5 17t44.5 53.5t36 65t37 81.5q19 41 28 60.5
-t28.5 57.5t33.5 57t37 46t45 40.5t51 23.5t61 10h107q21 57 70 92.5t111 35.5q80 0 136 -56t56 -136t-56 -136t-136 -56q-62 0 -111 35.5t-70 92.5h-107q-17 0 -33 -8t-31.5 -26t-27 -34t-27 -47t-24 -49t-24.5 -55q-46 -107 -83 -165h1114v128q0 18 16 28t32 -1z"/>
-    <glyph glyph-name="_608" unicode="&#xf288;" horiz-adv-x="1792" d="M1150 774q0 -56 -39.5 -95t-95.5 -39h-253v269h253q56 0 95.5 -39.5t39.5 -95.5zM1329 774q0 130 -91.5 222t-222.5 92h-433v-896h180v269h253q130 0 222 91.5t92 221.5zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348
-t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="_609" unicode="&#xf289;" horiz-adv-x="2304" d="M1645 438q0 59 -34 106.5t-87 68.5q-7 -45 -23 -92q-7 -24 -27.5 -38t-44.5 -14q-12 0 -24 3q-31 10 -45 38.5t-4 58.5q23 71 23 143q0 123 -61 227.5t-166 165.5t-228 61q-134 0 -247 -73t-167 -194q108 -28 188 -106q22 -23 22 -55t-22 -54t-54 -22t-55 22
-q-75 75 -180 75q-106 0 -181 -74.5t-75 -180.5t75 -180.5t181 -74.5h1046q79 0 134.5 55.5t55.5 133.5zM1798 438q0 -142 -100.5 -242t-242.5 -100h-1046q-169 0 -289 119.5t-120 288.5q0 153 100 267t249 136q62 184 221 298t354 114q235 0 408.5 -158.5t196.5 -389.5
-q116 -25 192.5 -118.5t76.5 -214.5zM2048 438q0 -175 -97 -319q-23 -33 -64 -33q-24 0 -43 13q-26 17 -32 48.5t12 57.5q71 104 71 233t-71 233q-18 26 -12 57t32 49t57.5 11.5t49.5 -32.5q97 -142 97 -318zM2304 438q0 -244 -134 -443q-23 -34 -64 -34q-23 0 -42 13
-q-26 18 -32.5 49t11.5 57q108 164 108 358q0 195 -108 357q-18 26 -11.5 57.5t32.5 48.5q26 18 57 12t49 -33q134 -198 134 -442z"/>
-    <glyph glyph-name="_610" unicode="&#xf28a;" d="M1500 -13q0 -89 -63 -152.5t-153 -63.5t-153.5 63.5t-63.5 152.5q0 90 63.5 153.5t153.5 63.5t153 -63.5t63 -153.5zM1267 268q-115 -15 -192.5 -102.5t-77.5 -205.5q0 -74 33 -138q-146 -78 -379 -78q-109 0 -201 21t-153.5 54.5t-110.5 76.5t-76 85t-44.5 83
-t-23.5 66.5t-6 39.5q0 19 4.5 42.5t18.5 56t36.5 58t64 43.5t94.5 18t94 -17.5t63 -41t35.5 -53t17.5 -49t4 -33.5q0 -34 -23 -81q28 -27 82 -42t93 -17l40 -1q115 0 190 51t75 133q0 26 -9 48.5t-31.5 44.5t-49.5 41t-74 44t-93.5 47.5t-119.5 56.5q-28 13 -43 20
-q-116 55 -187 100t-122.5 102t-72 125.5t-20.5 162.5q0 78 20.5 150t66 137.5t112.5 114t166.5 77t221.5 28.5q120 0 220 -26t164.5 -67t109.5 -94t64 -105.5t19 -103.5q0 -46 -15 -82.5t-36.5 -58t-48.5 -36t-49 -19.5t-39 -5h-8h-32t-39 5t-44 14t-41 28t-37 46t-24 70.5
-t-10 97.5q-15 16 -59 25.5t-81 10.5l-37 1q-68 0 -117.5 -31t-70.5 -70t-21 -76q0 -24 5 -43t24 -46t53 -51t97 -53.5t150 -58.5q76 -25 138.5 -53.5t109 -55.5t83 -59t60.5 -59.5t41 -62.5t26.5 -62t14.5 -63.5t6 -62t1 -62.5z"/>
-    <glyph glyph-name="_611" unicode="&#xf28b;" d="M704 352v576q0 14 -9 23t-23 9h-256q-14 0 -23 -9t-9 -23v-576q0 -14 9 -23t23 -9h256q14 0 23 9t9 23zM1152 352v576q0 14 -9 23t-23 9h-256q-14 0 -23 -9t-9 -23v-576q0 -14 9 -23t23 -9h256q14 0 23 9t9 23zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103
-t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_612" unicode="&#xf28c;" d="M768 1408q209 0 385.5 -103t279.5 -279.5t103 -385.5t-103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103zM768 96q148 0 273 73t198 198t73 273t-73 273t-198 198t-273 73t-273 -73t-198 -198t-73 -273
-t73 -273t198 -198t273 -73zM864 320q-14 0 -23 9t-9 23v576q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-576q0 -14 -9 -23t-23 -9h-192zM480 320q-14 0 -23 9t-9 23v576q0 14 9 23t23 9h192q14 0 23 -9t9 -23v-576q0 -14 -9 -23t-23 -9h-192z"/>
-    <glyph glyph-name="_613" unicode="&#xf28d;" d="M1088 352v576q0 14 -9 23t-23 9h-576q-14 0 -23 -9t-9 -23v-576q0 -14 9 -23t23 -9h576q14 0 23 9t9 23zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5
-t103 -385.5z"/>
-    <glyph glyph-name="_614" unicode="&#xf28e;" d="M768 1408q209 0 385.5 -103t279.5 -279.5t103 -385.5t-103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103zM768 96q148 0 273 73t198 198t73 273t-73 273t-198 198t-273 73t-273 -73t-198 -198t-73 -273
-t73 -273t198 -198t273 -73zM480 320q-14 0 -23 9t-9 23v576q0 14 9 23t23 9h576q14 0 23 -9t9 -23v-576q0 -14 -9 -23t-23 -9h-576z"/>
-    <glyph glyph-name="_615" unicode="&#xf290;" horiz-adv-x="1792" d="M1757 128l35 -313q3 -28 -16 -50q-19 -21 -48 -21h-1664q-29 0 -48 21q-19 22 -16 50l35 313h1722zM1664 967l86 -775h-1708l86 775q3 24 21 40.5t43 16.5h256v-128q0 -53 37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5v128h384v-128q0 -53 37.5 -90.5t90.5 -37.5
-t90.5 37.5t37.5 90.5v128h256q25 0 43 -16.5t21 -40.5zM1280 1152v-256q0 -26 -19 -45t-45 -19t-45 19t-19 45v256q0 106 -75 181t-181 75t-181 -75t-75 -181v-256q0 -26 -19 -45t-45 -19t-45 19t-19 45v256q0 159 112.5 271.5t271.5 112.5t271.5 -112.5t112.5 -271.5z"/>
-    <glyph glyph-name="_616" unicode="&#xf291;" horiz-adv-x="2048" d="M1920 768q53 0 90.5 -37.5t37.5 -90.5t-37.5 -90.5t-90.5 -37.5h-15l-115 -662q-8 -46 -44 -76t-82 -30h-1280q-46 0 -82 30t-44 76l-115 662h-15q-53 0 -90.5 37.5t-37.5 90.5t37.5 90.5t90.5 37.5h1792zM485 -32q26 2 43.5 22.5t15.5 46.5l-32 416q-2 26 -22.5 43.5
-t-46.5 15.5t-43.5 -22.5t-15.5 -46.5l32 -416q2 -25 20.5 -42t43.5 -17h5zM896 32v416q0 26 -19 45t-45 19t-45 -19t-19 -45v-416q0 -26 19 -45t45 -19t45 19t19 45zM1280 32v416q0 26 -19 45t-45 19t-45 -19t-19 -45v-416q0 -26 19 -45t45 -19t45 19t19 45zM1632 27l32 416
-q2 26 -15.5 46.5t-43.5 22.5t-46.5 -15.5t-22.5 -43.5l-32 -416q-2 -26 15.5 -46.5t43.5 -22.5h5q25 0 43.5 17t20.5 42zM476 1244l-93 -412h-132l101 441q19 88 89 143.5t160 55.5h167q0 26 19 45t45 19h384q26 0 45 -19t19 -45h167q90 0 160 -55.5t89 -143.5l101 -441
-h-132l-93 412q-11 44 -45.5 72t-79.5 28h-167q0 -26 -19 -45t-45 -19h-384q-26 0 -45 19t-19 45h-167q-45 0 -79.5 -28t-45.5 -72z"/>
-    <glyph glyph-name="_617" unicode="&#xf292;" horiz-adv-x="1792" d="M991 512l64 256h-254l-64 -256h254zM1759 1016l-56 -224q-7 -24 -31 -24h-327l-64 -256h311q15 0 25 -12q10 -14 6 -28l-56 -224q-5 -24 -31 -24h-327l-81 -328q-7 -24 -31 -24h-224q-16 0 -26 12q-9 12 -6 28l78 312h-254l-81 -328q-7 -24 -31 -24h-225q-15 0 -25 12
-q-9 12 -6 28l78 312h-311q-15 0 -25 12q-9 12 -6 28l56 224q7 24 31 24h327l64 256h-311q-15 0 -25 12q-10 14 -6 28l56 224q5 24 31 24h327l81 328q7 24 32 24h224q15 0 25 -12q9 -12 6 -28l-78 -312h254l81 328q7 24 32 24h224q15 0 25 -12q9 -12 6 -28l-78 -312h311
-q15 0 25 -12q9 -12 6 -28z"/>
-    <glyph glyph-name="_618" unicode="&#xf293;" d="M841 483l148 -148l-149 -149zM840 1094l149 -149l-148 -148zM710 -130l464 464l-306 306l306 306l-464 464v-611l-255 255l-93 -93l320 -321l-320 -321l93 -93l255 255v-611zM1429 640q0 -209 -32 -365.5t-87.5 -257t-140.5 -162.5t-181.5 -86.5t-219.5 -24.5
-t-219.5 24.5t-181.5 86.5t-140.5 162.5t-87.5 257t-32 365.5t32 365.5t87.5 257t140.5 162.5t181.5 86.5t219.5 24.5t219.5 -24.5t181.5 -86.5t140.5 -162.5t87.5 -257t32 -365.5z"/>
-    <glyph glyph-name="_619" unicode="&#xf294;" horiz-adv-x="1024" d="M596 113l173 172l-173 172v-344zM596 823l173 172l-173 172v-344zM628 640l356 -356l-539 -540v711l-297 -296l-108 108l372 373l-372 373l108 108l297 -296v711l539 -540z"/>
-    <glyph glyph-name="_620" unicode="&#xf295;" d="M1280 256q0 52 -38 90t-90 38t-90 -38t-38 -90t38 -90t90 -38t90 38t38 90zM512 1024q0 52 -38 90t-90 38t-90 -38t-38 -90t38 -90t90 -38t90 38t38 90zM1536 256q0 -159 -112.5 -271.5t-271.5 -112.5t-271.5 112.5t-112.5 271.5t112.5 271.5t271.5 112.5t271.5 -112.5
-t112.5 -271.5zM1440 1344q0 -20 -13 -38l-1056 -1408q-19 -26 -51 -26h-160q-26 0 -45 19t-19 45q0 20 13 38l1056 1408q19 26 51 26h160q26 0 45 -19t19 -45zM768 1024q0 -159 -112.5 -271.5t-271.5 -112.5t-271.5 112.5t-112.5 271.5t112.5 271.5t271.5 112.5
-t271.5 -112.5t112.5 -271.5z"/>
-    <glyph glyph-name="_621" unicode="&#xf296;" horiz-adv-x="1792" d="M104 830l792 -1015l-868 630q-18 13 -25 34.5t0 42.5l101 308v0zM566 830h660l-330 -1015v0zM368 1442l198 -612h-462l198 612q8 23 33 23t33 -23zM1688 830l101 -308q7 -21 0 -42.5t-25 -34.5l-868 -630l792 1015v0zM1688 830h-462l198 612q8 23 33 23t33 -23z"/>
-    <glyph glyph-name="_622" unicode="&#xf297;" horiz-adv-x="1792" d="M384 704h160v224h-160v-224zM1221 372v92q-104 -36 -243 -38q-135 -1 -259.5 46.5t-220.5 122.5l1 -96q88 -80 212 -128.5t272 -47.5q129 0 238 49zM640 704h640v224h-640v-224zM1792 736q0 -187 -99 -352q89 -102 89 -229q0 -157 -129.5 -268t-313.5 -111
-q-122 0 -225 52.5t-161 140.5q-19 -1 -57 -1t-57 1q-58 -88 -161 -140.5t-225 -52.5q-184 0 -313.5 111t-129.5 268q0 127 89 229q-99 165 -99 352q0 209 120 385.5t326.5 279.5t449.5 103t449.5 -103t326.5 -279.5t120 -385.5z"/>
-    <glyph glyph-name="_623" unicode="&#xf298;" d="M515 625v-128h-252v128h252zM515 880v-127h-252v127h252zM1273 369v-128h-341v128h341zM1273 625v-128h-672v128h672zM1273 880v-127h-672v127h672zM1408 20v1240q0 8 -6 14t-14 6h-32l-378 -256l-210 171l-210 -171l-378 256h-32q-8 0 -14 -6t-6 -14v-1240q0 -8 6 -14
-t14 -6h1240q8 0 14 6t6 14zM553 1130l185 150h-406zM983 1130l221 150h-406zM1536 1260v-1240q0 -62 -43 -105t-105 -43h-1240q-62 0 -105 43t-43 105v1240q0 62 43 105t105 43h1240q62 0 105 -43t43 -105z"/>
-    <glyph glyph-name="_624" unicode="&#xf299;" horiz-adv-x="1792" d="M896 720q-104 196 -160 278q-139 202 -347 318q-34 19 -70 36q-89 40 -94 32t34 -38l39 -31q62 -43 112.5 -93.5t94.5 -116.5t70.5 -113t70.5 -131q9 -17 13 -25q44 -84 84 -153t98 -154t115.5 -150t131 -123.5t148.5 -90.5q153 -66 154 -60q1 3 -49 37q-53 36 -81 57
-q-77 58 -179 211t-185 310zM549 177q-76 60 -132.5 125t-98 143.5t-71 154.5t-58.5 186t-52 209t-60.5 252t-76.5 289q273 0 497.5 -36t379 -92t271 -144.5t185.5 -172.5t110 -198.5t56 -199.5t12.5 -198.5t-9.5 -173t-20 -143.5t-13 -107l323 -327h-104l-281 285
-q-22 -2 -91.5 -14t-121.5 -19t-138 -6t-160.5 17t-167.5 59t-179 111z"/>
-    <glyph glyph-name="_625" unicode="&#xf29a;" horiz-adv-x="1792" d="M1374 879q-6 26 -28.5 39.5t-48.5 7.5q-261 -62 -401 -62t-401 62q-26 6 -48.5 -7.5t-28.5 -39.5t7.5 -48.5t39.5 -28.5q194 -46 303 -58q-2 -158 -15.5 -269t-26.5 -155.5t-41 -115.5l-9 -21q-10 -25 1 -49t36 -34q9 -4 23 -4q44 0 60 41l8 20q54 139 71 259h42
-q17 -120 71 -259l8 -20q16 -41 60 -41q14 0 23 4q25 10 36 34t1 49l-9 21q-28 71 -41 115.5t-26.5 155.5t-15.5 269q109 12 303 58q26 6 39.5 28.5t7.5 48.5zM1024 1024q0 53 -37.5 90.5t-90.5 37.5t-90.5 -37.5t-37.5 -90.5t37.5 -90.5t90.5 -37.5t90.5 37.5t37.5 90.5z
-M1600 640q0 -143 -55.5 -273.5t-150 -225t-225 -150t-273.5 -55.5t-273.5 55.5t-225 150t-150 225t-55.5 273.5t55.5 273.5t150 225t225 150t273.5 55.5t273.5 -55.5t225 -150t150 -225t55.5 -273.5zM896 1408q-156 0 -298 -61t-245 -164t-164 -245t-61 -298t61 -298
-t164 -245t245 -164t298 -61t298 61t245 164t164 245t61 298t-61 298t-164 245t-245 164t-298 61zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="_626" unicode="&#xf29b;" d="M1438 723q34 -35 29 -82l-44 -551q-4 -42 -34.5 -70t-71.5 -28q-6 0 -9 1q-44 3 -72.5 36.5t-25.5 77.5l35 429l-143 -8q55 -113 55 -240q0 -216 -148 -372l-137 137q91 101 91 235q0 145 -102.5 248t-247.5 103q-134 0 -236 -92l-137 138q120 114 284 141l264 300
-l-149 87l-181 -161q-33 -30 -77 -27.5t-73 35.5t-26.5 77t34.5 73l239 213q26 23 60 26.5t64 -14.5l488 -283q36 -21 48 -68q17 -67 -26 -117l-205 -232l371 20q49 3 83 -32zM1240 1180q-74 0 -126 52t-52 126t52 126t126 52t126.5 -52t52.5 -126t-52.5 -126t-126.5 -52z
-M613 -62q106 0 196 61l139 -139q-146 -116 -335 -116q-148 0 -273.5 73t-198.5 198t-73 273q0 188 116 336l139 -139q-60 -88 -60 -197q0 -145 102.5 -247.5t247.5 -102.5z"/>
-    <glyph glyph-name="_627" unicode="&#xf29c;" d="M880 336v-160q0 -14 -9 -23t-23 -9h-160q-14 0 -23 9t-9 23v160q0 14 9 23t23 9h160q14 0 23 -9t9 -23zM1136 832q0 -50 -15 -90t-45.5 -69t-52 -44t-59.5 -36q-32 -18 -46.5 -28t-26 -24t-11.5 -29v-32q0 -14 -9 -23t-23 -9h-160q-14 0 -23 9t-9 23v68q0 35 10.5 64.5
-t24 47.5t39 35.5t41 25.5t44.5 21q53 25 75 43t22 49q0 42 -43.5 71.5t-95.5 29.5q-56 0 -95 -27q-29 -20 -80 -83q-9 -12 -25 -12q-11 0 -19 6l-108 82q-10 7 -12 20t5 23q122 192 349 192q129 0 238.5 -89.5t109.5 -214.5zM768 1280q-130 0 -248.5 -51t-204 -136.5
-t-136.5 -204t-51 -248.5t51 -248.5t136.5 -204t204 -136.5t248.5 -51t248.5 51t204 136.5t136.5 204t51 248.5t-51 248.5t-136.5 204t-204 136.5t-248.5 51zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5
-t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="_628" unicode="&#xf29d;" horiz-adv-x="1408" d="M366 1225q-64 0 -110 45.5t-46 110.5q0 64 46 109.5t110 45.5t109.5 -45.5t45.5 -109.5q0 -65 -45.5 -110.5t-109.5 -45.5zM917 583q0 -50 -30 -67.5t-63.5 -6.5t-47.5 34l-367 438q-7 12 -14 15.5t-11 1.5l-3 -3q-7 -8 4 -21l122 -139l1 -354l-161 -457
-q-67 -192 -92 -234q-15 -26 -28 -32q-50 -26 -103 -1q-29 13 -41.5 43t-9.5 57q2 17 197 618l5 416l-85 -164l35 -222q4 -24 -1 -42t-14 -27.5t-19 -16t-17 -7.5l-7 -2q-19 -3 -34.5 3t-24 16t-14 22t-7.5 19.5t-2 9.5l-46 299l211 381q23 34 113 34q75 0 107 -40l424 -521
-q7 -5 14 -17l3 -3l-1 -1q7 -13 7 -29zM514 433q43 -113 88.5 -225t69.5 -168l24 -55q36 -93 42 -125q11 -70 -36 -97q-35 -22 -66 -16t-51 22t-29 35h-1q-6 16 -8 25l-124 351zM1338 -159q31 -49 31 -57q0 -5 -3 -7q-9 -5 -14.5 0.5t-15.5 26t-16 30.5q-114 172 -423 661
-q3 -1 7 1t7 4l3 2q11 9 11 17z"/>
-    <glyph glyph-name="_629" unicode="&#xf29e;" horiz-adv-x="2304" d="M504 542h171l-1 265zM1530 641q0 87 -50.5 140t-146.5 53h-54v-388h52q91 0 145 57t54 138zM956 1018l1 -756q0 -14 -9.5 -24t-23.5 -10h-216q-14 0 -23.5 10t-9.5 24v62h-291l-55 -81q-10 -15 -28 -15h-267q-21 0 -30.5 18t3.5 35l556 757q9 14 27 14h332q14 0 24 -10
-t10 -24zM1783 641q0 -193 -125.5 -303t-324.5 -110h-270q-14 0 -24 10t-10 24v756q0 14 10 24t24 10h268q200 0 326 -109t126 -302zM1939 640q0 -11 -0.5 -29t-8 -71.5t-21.5 -102t-44.5 -108t-73.5 -102.5h-51q38 45 66.5 104.5t41.5 112t21 98t9 72.5l1 27q0 8 -0.5 22.5
-t-7.5 60t-20 91.5t-41 111.5t-66 124.5h43q41 -47 72 -107t45.5 -111.5t23 -96t10.5 -70.5zM2123 640q0 -11 -0.5 -29t-8 -71.5t-21.5 -102t-45 -108t-74 -102.5h-51q38 45 66.5 104.5t41.5 112t21 98t9 72.5l1 27q0 8 -0.5 22.5t-7.5 60t-19.5 91.5t-40.5 111.5t-66 124.5
-h43q41 -47 72 -107t45.5 -111.5t23 -96t10.5 -70.5zM2304 640q0 -11 -0.5 -29t-8 -71.5t-21.5 -102t-44.5 -108t-73.5 -102.5h-51q38 45 66 104.5t41 112t21 98t9 72.5l1 27q0 8 -0.5 22.5t-7.5 60t-19.5 91.5t-40.5 111.5t-66 124.5h43q41 -47 72 -107t45.5 -111.5t23 -96
-t9.5 -70.5z"/>
-    <glyph glyph-name="uniF2A0" unicode="&#xf2a0;" horiz-adv-x="1408" d="M617 -153q0 11 -13 58t-31 107t-20 69q-1 4 -5 26.5t-8.5 36t-13.5 21.5q-15 14 -51 14q-23 0 -70 -5.5t-71 -5.5q-34 0 -47 11q-6 5 -11 15.5t-7.5 20t-6.5 24t-5 18.5q-37 128 -37 255t37 255q1 4 5 18.5t6.5 24t7.5 20t11 15.5q13 11 47 11q24 0 71 -5.5t70 -5.5
-q36 0 51 14q9 8 13.5 21.5t8.5 36t5 26.5q2 9 20 69t31 107t13 58q0 22 -43.5 52.5t-75.5 42.5q-20 8 -45 8q-34 0 -98 -18q-57 -17 -96.5 -40.5t-71 -66t-46 -70t-45.5 -94.5q-6 -12 -9 -19q-49 -107 -68 -216t-19 -244t19 -244t68 -216q56 -122 83 -161q63 -91 179 -127
-l6 -2q64 -18 98 -18q25 0 45 8q32 12 75.5 42.5t43.5 52.5zM776 760q-26 0 -45 19t-19 45.5t19 45.5q37 37 37 90q0 52 -37 91q-19 19 -19 45t19 45t45 19t45 -19q75 -75 75 -181t-75 -181q-21 -19 -45 -19zM957 579q-27 0 -45 19q-19 19 -19 45t19 45q112 114 112 272
-t-112 272q-19 19 -19 45t19 45t45 19t45 -19q150 -150 150 -362t-150 -362q-18 -19 -45 -19zM1138 398q-27 0 -45 19q-19 19 -19 45t19 45q90 91 138.5 208t48.5 245t-48.5 245t-138.5 208q-19 19 -19 45t19 45t45 19t45 -19q109 -109 167 -249t58 -294t-58 -294t-167 -249
-q-18 -19 -45 -19z"/>
-    <glyph glyph-name="uniF2A1" unicode="&#xf2a1;" horiz-adv-x="2176" d="M192 352q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM704 352q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM704 864q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM1472 352
-q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM1984 352q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM1472 864q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM1984 864
-q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM1984 1376q-66 0 -113 -47t-47 -113t47 -113t113 -47t113 47t47 113t-47 113t-113 47zM384 192q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM896 192q0 -80 -56 -136
-t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM384 704q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM896 704q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM384 1216q0 -80 -56 -136t-136 -56
-t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM1664 192q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM896 1216q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM2176 192q0 -80 -56 -136t-136 -56t-136 56
-t-56 136t56 136t136 56t136 -56t56 -136zM1664 704q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM2176 704q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136zM1664 1216q0 -80 -56 -136t-136 -56t-136 56t-56 136
-t56 136t136 56t136 -56t56 -136zM2176 1216q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136z"/>
-    <glyph glyph-name="uniF2A2" unicode="&#xf2a2;" horiz-adv-x="1792" d="M128 -192q0 -26 -19 -45t-45 -19t-45 19t-19 45t19 45t45 19t45 -19t19 -45zM320 0q0 -26 -19 -45t-45 -19t-45 19t-19 45t19 45t45 19t45 -19t19 -45zM365 365l256 -256l-90 -90l-256 256zM704 384q0 -26 -19 -45t-45 -19t-45 19t-19 45t19 45t45 19t45 -19t19 -45z
-M1411 704q0 -59 -11.5 -108.5t-37.5 -93.5t-44 -67.5t-53 -64.5q-31 -35 -45.5 -54t-33.5 -50t-26.5 -64t-7.5 -74q0 -159 -112.5 -271.5t-271.5 -112.5q-26 0 -45 19t-19 45t19 45t45 19q106 0 181 75t75 181q0 57 11.5 105.5t37 91t43.5 66.5t52 63q40 46 59.5 72
-t37.5 74.5t18 103.5q0 185 -131.5 316.5t-316.5 131.5t-316.5 -131.5t-131.5 -316.5q0 -26 -19 -45t-45 -19t-45 19t-19 45q0 117 45.5 223.5t123 184t184 123t223.5 45.5t223.5 -45.5t184 -123t123 -184t45.5 -223.5zM896 576q0 -26 -19 -45t-45 -19t-45 19t-19 45t19 45
-t45 19t45 -19t19 -45zM1184 704q0 -26 -19 -45t-45 -19t-45 19t-19 45q0 93 -65.5 158.5t-158.5 65.5q-92 0 -158 -65.5t-66 -158.5q0 -26 -19 -45t-45 -19t-45 19t-19 45q0 146 103 249t249 103t249 -103t103 -249zM1578 993q10 -25 -1 -49t-36 -34q-9 -4 -23 -4
-q-19 0 -35.5 11t-23.5 30q-68 178 -224 295q-21 16 -25 42t12 47q17 21 43 25t47 -12q183 -137 266 -351zM1788 1074q9 -25 -1.5 -49t-35.5 -34q-11 -4 -23 -4q-44 0 -60 41q-92 238 -297 393q-22 16 -25.5 42t12.5 47q16 22 42 25.5t47 -12.5q235 -175 341 -449z"/>
-    <glyph glyph-name="uniF2A3" unicode="&#xf2a3;" horiz-adv-x="2304" d="M1032 576q-59 2 -84 55q-17 34 -48 53.5t-68 19.5q-53 0 -90.5 -37.5t-37.5 -90.5q0 -56 36 -89l10 -8q34 -31 82 -31q37 0 68 19.5t48 53.5q25 53 84 55zM1600 704q0 56 -36 89l-10 8q-34 31 -82 31q-37 0 -68 -19.5t-48 -53.5q-25 -53 -84 -55q59 -2 84 -55
-q17 -34 48 -53.5t68 -19.5q53 0 90.5 37.5t37.5 90.5zM1174 925q-17 -35 -55 -48t-73 4q-62 31 -134 31q-51 0 -99 -17q3 0 9.5 0.5t9.5 0.5q92 0 170.5 -50t118.5 -133q17 -36 3.5 -73.5t-49.5 -54.5q-18 -9 -39 -9q21 0 39 -9q36 -17 49.5 -54.5t-3.5 -73.5
-q-40 -83 -118.5 -133t-170.5 -50h-6q-16 2 -44 4l-290 27l-239 -120q-14 -7 -29 -7q-40 0 -57 35l-160 320q-11 23 -4 47.5t29 37.5l209 119l148 267q17 155 91.5 291.5t195.5 236.5q31 25 70.5 21.5t64.5 -34.5t21.5 -70t-34.5 -65q-70 -59 -117 -128q123 84 267 101
-q40 5 71.5 -19t35.5 -64q5 -40 -19 -71.5t-64 -35.5q-84 -10 -159 -55q46 10 99 10q115 0 218 -50q36 -18 49 -55.5t-5 -73.5zM2137 1085l160 -320q11 -23 4 -47.5t-29 -37.5l-209 -119l-148 -267q-17 -155 -91.5 -291.5t-195.5 -236.5q-26 -22 -61 -22q-45 0 -74 35
-q-25 31 -21.5 70t34.5 65q70 59 117 128q-123 -84 -267 -101q-4 -1 -12 -1q-36 0 -63.5 24t-31.5 60q-5 40 19 71.5t64 35.5q84 10 159 55q-46 -10 -99 -10q-115 0 -218 50q-36 18 -49 55.5t5 73.5q17 35 55 48t73 -4q62 -31 134 -31q51 0 99 17q-3 0 -9.5 -0.5t-9.5 -0.5
-q-92 0 -170.5 50t-118.5 133q-17 36 -3.5 73.5t49.5 54.5q18 9 39 9q-21 0 -39 9q-36 17 -49.5 54.5t3.5 73.5q40 83 118.5 133t170.5 50h6h1q14 -2 42 -4l291 -27l239 120q14 7 29 7q40 0 57 -35z"/>
-    <glyph glyph-name="uniF2A4" unicode="&#xf2a4;" horiz-adv-x="1792" d="M1056 704q0 -26 19 -45t45 -19t45 19t19 45q0 146 -103 249t-249 103t-249 -103t-103 -249q0 -26 19 -45t45 -19t45 19t19 45q0 93 66 158.5t158 65.5t158 -65.5t66 -158.5zM835 1280q-117 0 -223.5 -45.5t-184 -123t-123 -184t-45.5 -223.5q0 -26 19 -45t45 -19t45 19
-t19 45q0 185 131.5 316.5t316.5 131.5t316.5 -131.5t131.5 -316.5q0 -55 -18 -103.5t-37.5 -74.5t-59.5 -72q-34 -39 -52 -63t-43.5 -66.5t-37 -91t-11.5 -105.5q0 -106 -75 -181t-181 -75q-26 0 -45 -19t-19 -45t19 -45t45 -19q159 0 271.5 112.5t112.5 271.5q0 41 7.5 74
-t26.5 64t33.5 50t45.5 54q35 41 53 64.5t44 67.5t37.5 93.5t11.5 108.5q0 117 -45.5 223.5t-123 184t-184 123t-223.5 45.5zM591 561l226 -226l-579 -579q-12 -12 -29 -12t-29 12l-168 168q-12 12 -12 29t12 29zM1612 1524l168 -168q12 -12 12 -29t-12 -30l-233 -233
-l-26 -25l-71 -71q-66 153 -195 258l91 91l207 207q13 12 30 12t29 -12z"/>
-    <glyph glyph-name="uniF2A5" unicode="&#xf2a5;" d="M866 1021q0 -27 -13 -94q-11 -50 -31.5 -150t-30.5 -150q-2 -11 -4.5 -12.5t-13.5 -2.5q-20 -2 -31 -2q-58 0 -84 49.5t-26 113.5q0 88 35 174t103 124q28 14 51 14q28 0 36.5 -16.5t8.5 -47.5zM1352 597q0 14 -39 75.5t-52 66.5q-21 8 -34 8q-91 0 -226 -77l-2 2
-q3 22 27.5 135t24.5 178q0 233 -242 233q-24 0 -68 -6q-94 -17 -168.5 -89.5t-111.5 -166.5t-37 -189q0 -146 80.5 -225t227.5 -79q25 0 25 -3t-1 -5q-4 -34 -26 -117q-14 -52 -51.5 -101t-82.5 -49q-42 0 -42 47q0 24 10.5 47.5t25 39.5t29.5 28.5t26 20t11 8.5q0 3 -7 10
-q-24 22 -58.5 36.5t-65.5 14.5q-35 0 -63.5 -34t-41 -75t-12.5 -75q0 -88 51.5 -142t138.5 -54q82 0 155 53t117.5 126t65.5 153q6 22 15.5 66.5t14.5 66.5q3 12 14 18q118 60 227 60q48 0 127 -18q1 -1 4 -1q5 0 9.5 4.5t4.5 8.5zM1536 1120v-960q0 -119 -84.5 -203.5
-t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="uniF2A6" unicode="&#xf2a6;" horiz-adv-x="1535" d="M744 1231q0 24 -2 38.5t-8.5 30t-21 23t-37.5 7.5q-39 0 -78 -23q-105 -58 -159 -190.5t-54 -269.5q0 -44 8.5 -85.5t26.5 -80.5t52.5 -62.5t81.5 -23.5q4 0 18 -0.5t20 0t16 3t15 8.5t7 16q16 77 48 231.5t48 231.5q19 91 19 146zM1498 575q0 -7 -7.5 -13.5t-15.5 -6.5
-l-6 1q-22 3 -62 11t-72 12.5t-63 4.5q-167 0 -351 -93q-15 -8 -21 -27q-10 -36 -24.5 -105.5t-22.5 -100.5q-23 -91 -70 -179.5t-112.5 -164.5t-154.5 -123t-185 -47q-135 0 -214.5 83.5t-79.5 219.5q0 53 19.5 117t63 116.5t97.5 52.5q38 0 120 -33.5t83 -61.5
-q0 -1 -16.5 -12.5t-39.5 -31t-46 -44.5t-39 -61t-16 -74q0 -33 16.5 -53t48.5 -20q45 0 85 31.5t66.5 78t48 105.5t32.5 107t16 90v9q0 2 -3.5 3.5t-8.5 1.5h-10t-10 -0.5t-6 -0.5q-227 0 -352 122.5t-125 348.5q0 108 34.5 221t96 210t156 167.5t204.5 89.5q52 9 106 9
-q374 0 374 -360q0 -98 -38 -273t-43 -211l3 -3q101 57 182.5 88t167.5 31q22 0 53 -13q19 -7 80 -102.5t61 -116.5z"/>
-    <glyph glyph-name="uniF2A7" unicode="&#xf2a7;" horiz-adv-x="1664" d="M831 863q32 0 59 -18l222 -148q61 -40 110 -97l146 -170q40 -46 29 -106l-72 -413q-6 -32 -29.5 -53.5t-55.5 -25.5l-527 -56l-352 -32h-9q-39 0 -67.5 28t-28.5 68q0 37 27 64t65 32l260 32h-448q-41 0 -69.5 30t-26.5 71q2 39 32 65t69 26l442 1l-521 64q-41 5 -66 37
-t-19 73q6 35 34.5 57.5t65.5 22.5h10l481 -60l-351 94q-38 10 -62 41.5t-18 68.5q6 36 33 58.5t62 22.5q6 0 20 -2l448 -96l217 -37q1 0 3 -0.5t3 -0.5q23 0 30.5 23t-12.5 36l-186 125q-35 23 -42 63.5t18 73.5q27 38 76 38zM761 661l186 -125l-218 37l-5 2l-36 38
-l-238 262q-1 1 -2.5 3.5t-2.5 3.5q-24 31 -18.5 70t37.5 64q31 23 68 17.5t64 -33.5l142 -147q-2 -1 -5 -3.5t-4 -4.5q-32 -45 -23 -99t55 -85zM1648 1115l15 -266q4 -73 -11 -147l-48 -219q-12 -59 -67 -87l-106 -54q2 62 -39 109l-146 170q-53 61 -117 103l-222 148
-q-34 23 -76 23q-51 0 -88 -37l-235 312q-25 33 -18 73.5t41 63.5q33 22 71.5 14t62.5 -40l266 -352l-262 455q-21 35 -10.5 75t47.5 59q35 18 72.5 6t57.5 -46l241 -420l-136 337q-15 35 -4.5 74t44.5 56q37 19 76 6t56 -51l193 -415l101 -196q8 -15 23 -17.5t27 7.5t11 26
-l-12 224q-2 41 26 71t69 31q39 0 67 -28.5t30 -67.5z"/>
-    <glyph glyph-name="uniF2A8" unicode="&#xf2a8;" horiz-adv-x="1792" d="M335 180q-2 0 -6 2q-86 57 -168.5 145t-139.5 180q-21 30 -21 69q0 9 2 19t4 18t7 18t8.5 16t10.5 17t10 15t12 15.5t11 14.5q184 251 452 365q-110 198 -110 211q0 19 17 29q116 64 128 64q18 0 28 -16l124 -229q92 19 192 19q266 0 497.5 -137.5t378.5 -369.5
-q20 -31 20 -69t-20 -69q-91 -142 -218.5 -253.5t-278.5 -175.5q110 -198 110 -211q0 -20 -17 -29q-116 -64 -127 -64q-19 0 -29 16l-124 229l-64 119l-444 820l7 7q-58 -24 -99 -47q3 -5 127 -234t243 -449t119 -223q0 -7 -9 -9q-13 -3 -72 -3q-57 0 -60 7l-456 841
-q-39 -28 -82 -68q24 -43 214 -393.5t190 -354.5q0 -10 -11 -10q-14 0 -82.5 22t-72.5 28l-106 197l-224 413q-44 -53 -78 -106q2 -3 18 -25t23 -34l176 -327q0 -10 -10 -10zM1165 282l49 -91q273 111 450 385q-180 277 -459 389q67 -64 103 -148.5t36 -176.5
-q0 -106 -47 -200.5t-132 -157.5zM848 896q0 -20 14 -34t34 -14q86 0 147 -61t61 -147q0 -20 14 -34t34 -14t34 14t14 34q0 126 -89 215t-215 89q-20 0 -34 -14t-14 -34zM1214 961l-9 4l7 -7z"/>
-    <glyph glyph-name="uniF2A9" unicode="&#xf2a9;" horiz-adv-x="1280" d="M1050 430q0 -215 -147 -374q-148 -161 -378 -161q-232 0 -378 161q-147 159 -147 374q0 147 68 270.5t189 196.5t268 73q96 0 182 -31q-32 -62 -39 -126q-66 28 -143 28q-167 0 -280.5 -123t-113.5 -291q0 -170 112.5 -288.5t281.5 -118.5t281 118.5t112 288.5
-q0 89 -32 166q66 13 123 49q41 -98 41 -212zM846 619q0 -192 -79.5 -345t-238.5 -253l-14 -1q-29 0 -62 5q83 32 146.5 102.5t99.5 154.5t58.5 189t30 192.5t7.5 178.5q0 69 -3 103q55 -160 55 -326zM791 947v-2q-73 214 -206 440q88 -59 142.5 -186.5t63.5 -251.5z
-M1035 744q-83 0 -160 75q218 120 290 247q19 37 21 56q-42 -94 -139.5 -166.5t-204.5 -97.5q-35 54 -35 113q0 37 17 79t43 68q46 44 157 74q59 16 106 58.5t74 100.5q74 -105 74 -253q0 -109 -24 -170q-32 -77 -88.5 -130.5t-130.5 -53.5z"/>
-    <glyph glyph-name="uniF2AA" unicode="&#xf2aa;" d="M1050 495q0 78 -28 147q-41 -25 -85 -34q22 -50 22 -114q0 -117 -77 -198.5t-193 -81.5t-193.5 81.5t-77.5 198.5q0 115 78 199.5t193 84.5q53 0 98 -19q4 43 27 87q-60 21 -125 21q-154 0 -257.5 -108.5t-103.5 -263.5t103.5 -261t257.5 -106t257.5 106.5t103.5 260.5z
-M872 850q2 -24 2 -71q0 -63 -5 -123t-20.5 -132.5t-40.5 -130t-68.5 -106t-100.5 -70.5q21 -3 42 -3h10q219 139 219 411q0 116 -38 225zM872 850q-4 80 -44 171.5t-98 130.5q92 -156 142 -302zM1207 955q0 102 -51 174q-41 -86 -124 -109q-69 -19 -109 -53.5t-40 -99.5
-q0 -40 24 -77q74 17 140.5 67t95.5 115q-4 -52 -74.5 -111.5t-138.5 -97.5q52 -52 110 -52q51 0 90 37t60 90q17 42 17 117zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960q119 0 203.5 -84.5
-t84.5 -203.5z"/>
-    <glyph glyph-name="uniF2AB" unicode="&#xf2ab;" d="M1279 388q0 22 -22 27q-67 15 -118 59t-80 108q-7 19 -7 25q0 15 19.5 26t43 17t43 20.5t19.5 36.5q0 19 -18.5 31.5t-38.5 12.5q-12 0 -32 -8t-31 -8q-4 0 -12 2q5 95 5 114q0 79 -17 114q-36 78 -103 121.5t-152 43.5q-199 0 -275 -165q-17 -35 -17 -114q0 -19 5 -114
-q-4 -2 -14 -2q-12 0 -32 7.5t-30 7.5q-21 0 -38.5 -12t-17.5 -32q0 -21 19.5 -35.5t43 -20.5t43 -17t19.5 -26q0 -6 -7 -25q-64 -138 -198 -167q-22 -5 -22 -27q0 -46 137 -68q2 -5 6 -26t11.5 -30.5t23.5 -9.5q12 0 37.5 4.5t39.5 4.5q35 0 67 -15t54 -32.5t57.5 -32.5
-t76.5 -15q43 0 79 15t57.5 32.5t53.5 32.5t67 15q14 0 39.5 -4t38.5 -4q16 0 23 10t11 30t6 25q137 22 137 68zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5
-t103 -385.5z"/>
-    <glyph glyph-name="uniF2AC" unicode="&#xf2ac;" horiz-adv-x="1664" d="M848 1408q134 1 240.5 -68.5t163.5 -192.5q27 -58 27 -179q0 -47 -9 -191q14 -7 28 -7q18 0 51 13.5t51 13.5q29 0 56 -18t27 -46q0 -32 -31.5 -54t-69 -31.5t-69 -29t-31.5 -47.5q0 -15 12 -43q37 -82 102.5 -150t144.5 -101q28 -12 80 -23q28 -6 28 -35
-q0 -70 -219 -103q-7 -11 -11 -39t-14 -46.5t-33 -18.5q-20 0 -62 6.5t-64 6.5q-37 0 -62 -5q-32 -5 -63 -22.5t-58 -38t-58 -40.5t-76 -33.5t-99 -13.5q-52 0 -96.5 13.5t-75 33.5t-57.5 40.5t-58 38t-62 22.5q-26 5 -63 5q-24 0 -65.5 -7.5t-58.5 -7.5q-25 0 -35 18.5
-t-14 47.5t-11 40q-219 33 -219 103q0 29 28 35q52 11 80 23q78 32 144.5 101t102.5 150q12 28 12 43q0 28 -31.5 47.5t-69.5 29.5t-69.5 31.5t-31.5 52.5q0 27 26 45.5t55 18.5q15 0 48 -13t53 -13q18 0 32 7q-9 142 -9 190q0 122 27 180q64 137 172 198t264 63z"/>
-    <glyph glyph-name="uniF2AD" unicode="&#xf2ad;" d="M1280 388q0 22 -22 27q-67 14 -118 58t-80 109q-7 14 -7 25q0 15 19.5 26t42.5 17t42.5 20.5t19.5 36.5q0 19 -18.5 31.5t-38.5 12.5q-11 0 -31 -8t-32 -8q-4 0 -12 2q5 63 5 115q0 78 -17 114q-36 78 -102.5 121.5t-152.5 43.5q-198 0 -275 -165q-18 -38 -18 -115
-q0 -38 6 -114q-10 -2 -15 -2q-11 0 -31.5 8t-30.5 8q-20 0 -37.5 -12.5t-17.5 -32.5q0 -21 19.5 -35.5t42.5 -20.5t42.5 -17t19.5 -26q0 -11 -7 -25q-64 -138 -198 -167q-22 -5 -22 -27q0 -47 138 -69q2 -5 6 -26t11 -30.5t23 -9.5q13 0 38.5 5t38.5 5q35 0 67.5 -15
-t54.5 -32.5t57.5 -32.5t76.5 -15q43 0 79 15t57.5 32.5t54 32.5t67.5 15q13 0 39 -4.5t39 -4.5q15 0 22.5 9.5t11.5 31t5 24.5q138 22 138 69zM1536 1120v-960q0 -119 -84.5 -203.5t-203.5 -84.5h-960q-119 0 -203.5 84.5t-84.5 203.5v960q0 119 84.5 203.5t203.5 84.5h960
-q119 0 203.5 -84.5t84.5 -203.5z"/>
-    <glyph glyph-name="uniF2AE" unicode="&#xf2ae;" horiz-adv-x="2304" d="M2304 1536q-69 -46 -125 -92t-89 -81t-59.5 -71.5t-37.5 -57.5t-22 -44.5t-14 -29.5q-10 -18 -35.5 -136.5t-48.5 -164.5q-15 -29 -50 -60.5t-67.5 -50.5t-72.5 -41t-48 -28q-47 -31 -151 -231q-341 14 -630 -158q-92 -53 -303 -179q47 16 86 31t55 22l15 7
-q71 27 163 64.5t133.5 53.5t108 34.5t142.5 31.5q186 31 465 -7q1 0 10 -3q11 -6 14 -17t-3 -22l-194 -345q-15 -29 -47 -22q-128 24 -354 24q-146 0 -402 -44.5t-392 -46.5q-82 -1 -149 13t-107 37t-61 40t-33 34l-1 1v2q0 6 6 6q138 0 371 55q192 366 374.5 524t383.5 158
-q5 0 14.5 -0.5t38 -5t55 -12t61.5 -24.5t63 -39.5t54 -59t40 -82.5l102 177q2 4 21 42.5t44.5 86.5t61 109.5t84 133.5t100.5 137q66 82 128 141.5t121.5 96.5t92.5 53.5t88 39.5z"/>
-    <glyph glyph-name="uniF2B0" unicode="&#xf2b0;" d="M1322 640q0 -45 -5 -76l-236 14l224 -78q-19 -73 -58 -141l-214 103l177 -158q-44 -61 -107 -108l-157 178l103 -215q-61 -37 -140 -59l-79 228l14 -240q-38 -6 -76 -6t-76 6l14 238l-78 -226q-74 19 -140 59l103 215l-157 -178q-59 43 -108 108l178 158l-214 -104
-q-39 69 -58 141l224 79l-237 -14q-5 42 -5 76q0 35 5 77l238 -14l-225 79q19 73 58 140l214 -104l-177 159q46 61 107 108l158 -178l-103 215q67 39 140 58l77 -224l-13 236q36 6 75 6q38 0 76 -6l-14 -237l78 225q74 -19 140 -59l-103 -214l158 178q61 -47 107 -108
-l-177 -159l213 104q37 -62 58 -141l-224 -78l237 14q5 -31 5 -77zM1352 640q0 160 -78.5 295.5t-213 214t-292.5 78.5q-119 0 -227 -46.5t-186.5 -125t-124.5 -187.5t-46 -229q0 -119 46 -228t124.5 -187.5t186.5 -125t227 -46.5q158 0 292.5 78.5t213 214t78.5 294.5z
-M1425 1023v-766l-657 -383l-657 383v766l657 383zM768 -183l708 412v823l-708 411l-708 -411v-823zM1536 1088v-896l-768 -448l-768 448v896l768 448z"/>
-    <glyph glyph-name="uniF2B1" unicode="&#xf2b1;" horiz-adv-x="1664" d="M339 1318h691l-26 -72h-665q-110 0 -188.5 -79t-78.5 -189v-771q0 -95 60.5 -169.5t153.5 -93.5q23 -5 98 -5v-72h-45q-140 0 -239.5 100t-99.5 240v771q0 140 99.5 240t239.5 100zM1190 1536h247l-482 -1294q-23 -61 -40.5 -103.5t-45 -98t-54 -93.5t-64.5 -78.5
-t-79.5 -65t-95.5 -41t-116 -18.5v195q163 26 220 182q20 52 20 105q0 54 -20 106l-285 733h228l187 -585zM1664 978v-1111h-795q37 55 45 73h678v1038q0 85 -49.5 155t-129.5 99l25 67q101 -34 163.5 -123.5t62.5 -197.5z"/>
-    <glyph glyph-name="uniF2B2" unicode="&#xf2b2;" horiz-adv-x="1792" d="M852 1227q0 -29 -17 -52.5t-45 -23.5t-45 23.5t-17 52.5t17 52.5t45 23.5t45 -23.5t17 -52.5zM688 -149v114q0 30 -20.5 51.5t-50.5 21.5t-50 -21.5t-20 -51.5v-114q0 -30 20.5 -52t49.5 -22q30 0 50.5 22t20.5 52zM860 -149v114q0 30 -20 51.5t-50 21.5t-50.5 -21.5
-t-20.5 -51.5v-114q0 -30 20.5 -52t50.5 -22q29 0 49.5 22t20.5 52zM1034 -149v114q0 30 -20.5 51.5t-50.5 21.5t-50.5 -21.5t-20.5 -51.5v-114q0 -30 20.5 -52t50.5 -22t50.5 22t20.5 52zM1208 -149v114q0 30 -20.5 51.5t-50.5 21.5t-50.5 -21.5t-20.5 -51.5v-114
-q0 -30 20.5 -52t50.5 -22t50.5 22t20.5 52zM1476 535q-84 -160 -232 -259.5t-323 -99.5q-123 0 -229.5 51.5t-178.5 137t-113 197.5t-41 232q0 88 21 174q-104 -175 -104 -390q0 -162 65 -312t185 -251q30 57 91 57q56 0 86 -50q32 50 87 50q56 0 86 -50q32 50 87 50t87 -50
-q30 50 86 50q28 0 52.5 -15.5t37.5 -40.5q112 94 177 231.5t73 287.5zM1326 564q0 75 -72 75q-17 0 -47 -6q-95 -19 -149 -19q-226 0 -226 243q0 86 30 204q-83 -127 -83 -275q0 -150 89 -260.5t235 -110.5q111 0 210 70q13 48 13 79zM884 1223q0 50 -32 89.5t-81 39.5
-t-81 -39.5t-32 -89.5q0 -51 31.5 -90.5t81.5 -39.5t81.5 39.5t31.5 90.5zM1513 884q0 96 -37.5 179t-113 137t-173.5 54q-77 0 -149 -35t-127 -94q-48 -159 -48 -268q0 -104 45.5 -157t147.5 -53q53 0 142 19q36 6 53 6q51 0 77.5 -28t26.5 -80q0 -26 -4 -46
-q75 68 117.5 165.5t42.5 200.5zM1792 667q0 -111 -33.5 -249.5t-93.5 -204.5q-58 -64 -195 -142.5t-228 -104.5l-4 -1v-114q0 -43 -29.5 -75t-72.5 -32q-56 0 -86 50q-32 -50 -87 -50t-87 50q-30 -50 -86 -50q-55 0 -87 50q-30 -50 -86 -50q-47 0 -75 33.5t-28 81.5
-q-90 -68 -198 -68q-118 0 -211 80q54 1 106 20q-113 31 -182 127q32 -7 71 -7q89 0 164 46q-192 192 -240 306q-24 56 -24 160q0 57 9 125.5t31.5 146.5t55 141t86.5 105t120 42q59 0 81 -52q19 29 42 54q2 3 12 13t13 16q10 15 23 38t25 42t28 39q87 111 211.5 177
-t260.5 66q35 0 62 -4q59 64 146 64q83 0 140 -57q5 -5 5 -12q0 -5 -6 -13.5t-12.5 -16t-16 -17l-10.5 -10.5q17 -6 36 -18t19 -24q0 -6 -16 -25q157 -138 197 -378q25 30 60 30q45 0 100 -49q90 -80 90 -279z"/>
-    <glyph glyph-name="uniF2B3" unicode="&#xf2b3;" d="M917 631q0 33 -6 64h-362v-132h217q-12 -76 -74.5 -120.5t-142.5 -44.5q-99 0 -169 71.5t-70 170.5t70 170.5t169 71.5q93 0 153 -59l104 101q-108 100 -257 100q-160 0 -272 -112.5t-112 -271.5t112 -271.5t272 -112.5q165 0 266.5 105t101.5 270zM1262 585h109v110
-h-109v110h-110v-110h-110v-110h110v-110h110v110zM1536 640q0 -209 -103 -385.5t-279.5 -279.5t-385.5 -103t-385.5 103t-279.5 279.5t-103 385.5t103 385.5t279.5 279.5t385.5 103t385.5 -103t279.5 -279.5t103 -385.5z"/>
-    <glyph glyph-name="uniF2B4" unicode="&#xf2b4;" d="M1536 1024v-839q0 -48 -49 -62q-174 -52 -338 -52q-73 0 -215.5 29.5t-227.5 29.5q-164 0 -370 -48v-338h-160v1368q-63 25 -101 81t-38 124q0 91 64 155t155 64t155 -64t64 -155q0 -68 -38 -124t-101 -81v-68q190 44 343 44q99 0 198 -15q14 -2 111.5 -22.5t149.5 -20.5
-q77 0 165 18q11 2 80 21t89 19q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="uniF2B5" unicode="&#xf2b5;" horiz-adv-x="2304" d="M192 384q40 0 56 32t0 64t-56 32t-56 -32t0 -64t56 -32zM1665 442q-10 13 -38.5 50t-41.5 54t-38 49t-42.5 53t-40.5 47t-45 49l-125 -140q-83 -94 -208.5 -92t-205.5 98q-57 69 -56.5 158t58.5 157l177 206q-22 11 -51 16.5t-47.5 6t-56.5 -0.5t-49 -1q-92 0 -158 -66
-l-158 -158h-155v-544q5 0 21 0.5t22 0t19.5 -2t20.5 -4.5t17.5 -8.5t18.5 -13.5l297 -292q115 -111 227 -111q78 0 125 47q57 -20 112.5 8t72.5 85q74 -6 127 44q20 18 36 45.5t14 50.5q10 -10 43 -10q43 0 77 21t49.5 53t12 71.5t-30.5 73.5zM1824 384h96v512h-93l-157 180
-q-66 76 -169 76h-167q-89 0 -146 -67l-209 -243q-28 -33 -28 -75t27 -75q43 -51 110 -52t111 49l193 218q25 23 53.5 21.5t47 -27t8.5 -56.5q16 -19 56 -63t60 -68q29 -36 82.5 -105.5t64.5 -84.5q52 -66 60 -140zM2112 384q40 0 56 32t0 64t-56 32t-56 -32t0 -64t56 -32z
-M2304 960v-640q0 -26 -19 -45t-45 -19h-434q-27 -65 -82 -106.5t-125 -51.5q-33 -48 -80.5 -81.5t-102.5 -45.5q-42 -53 -104.5 -81.5t-128.5 -24.5q-60 -34 -126 -39.5t-127.5 14t-117 53.5t-103.5 81l-287 282h-358q-26 0 -45 19t-19 45v672q0 26 19 45t45 19h421
-q14 14 47 48t47.5 48t44 40t50.5 37.5t51 25.5t62 19.5t68 5.5h117q99 0 181 -56q82 56 181 56h167q35 0 67 -6t56.5 -14.5t51.5 -26.5t44.5 -31t43 -39.5t39 -42t41 -48t41.5 -48.5h355q26 0 45 -19t19 -45z"/>
-    <glyph glyph-name="uniF2B6" unicode="&#xf2b6;" horiz-adv-x="1792" d="M1792 882v-978q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v978q0 15 11 24q8 7 39 34.5t41.5 36t45.5 37.5t70 55.5t96 73t143.5 107t192.5 140.5q5 4 52.5 40t71.5 52.5t64 35t69 18.5t69 -18.5t65 -35.5t71 -52t52 -40q110 -80 192.5 -140.5t143.5 -107
-t96 -73t70 -55.5t45.5 -37.5t41.5 -36t39 -34.5q11 -9 11 -24zM1228 297q263 191 345 252q11 8 12.5 20.5t-6.5 23.5l-38 52q-8 11 -21 12.5t-24 -6.5q-231 -169 -343 -250q-5 -3 -52 -39t-71.5 -52.5t-64.5 -35t-69 -18.5t-69 18.5t-64.5 35t-71.5 52.5t-52 39
-q-186 134 -343 250q-11 8 -24 6.5t-21 -12.5l-38 -52q-8 -11 -6.5 -23.5t12.5 -20.5q82 -61 345 -252q10 -8 50 -38t65 -47t64 -39.5t77.5 -33.5t75.5 -11t75.5 11t79 34.5t64.5 39.5t65 47.5t48 36.5z"/>
-    <glyph glyph-name="uniF2B7" unicode="&#xf2b7;" horiz-adv-x="1792" d="M1474 623l39 -51q8 -11 6.5 -23.5t-11.5 -20.5q-43 -34 -126.5 -98.5t-146.5 -113t-67 -51.5q-39 -32 -60 -48t-60.5 -41t-76.5 -36.5t-74 -11.5h-1h-1q-37 0 -74 11.5t-76 36.5t-61 41.5t-60 47.5q-5 4 -65 50.5t-143.5 111t-122.5 94.5q-11 8 -12.5 20.5t6.5 23.5
-l37 52q8 11 21.5 13t24.5 -7q94 -73 306 -236q5 -4 43.5 -35t60.5 -46.5t56.5 -32.5t58.5 -17h1h1q24 0 58.5 17t56.5 32.5t60.5 46.5t43.5 35q258 198 313 242q11 8 24 6.5t21 -12.5zM1664 -96v928q-90 83 -159 139q-91 74 -389 304q-3 2 -43 35t-61 48t-56 32.5t-59 17.5
-h-1h-1q-24 0 -59 -17.5t-56 -32.5t-61 -48t-43 -35q-215 -166 -315.5 -245.5t-129.5 -104t-82 -74.5q-14 -12 -21 -19v-928q0 -13 9.5 -22.5t22.5 -9.5h1472q13 0 22.5 9.5t9.5 22.5zM1792 832v-928q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v928q0 56 41 94
-q123 114 350 290.5t233 181.5q36 30 59 47.5t61.5 42t76 36.5t74.5 12h1h1q37 0 74.5 -12t76 -36.5t61.5 -42t59 -47.5q43 -36 156 -122t226 -177t201 -173q41 -38 41 -94z"/>
-    <glyph glyph-name="uniF2B8" unicode="&#xf2b8;" d="M330 1l202 -214l-34 236l-216 213zM556 -225l274 218l-11 245l-300 -215zM245 413l227 -213l-48 327l-245 204zM495 189l317 214l-14 324l-352 -200zM843 178l95 -80l-2 239l-103 79q0 -1 1 -8.5t0 -12t-5 -7.5l-78 -52l85 -70q7 -6 7 -88zM138 930l256 -200l-68 465
-l-279 173zM1173 267l15 234l-230 -164l2 -240zM417 722l373 194l-19 441l-423 -163zM1270 357l20 233l-226 142l-2 -105l144 -95q6 -4 4 -9l-7 -119zM1461 496l30 222l-179 -128l-20 -228zM1273 329l-71 49l-8 -117q0 -5 -4 -8l-234 -187q-7 -5 -14 0l-98 83l7 -161
-q0 -5 -4 -8l-293 -234q-4 -2 -6 -2q-8 2 -8 3l-228 242q-4 4 -59 277q-2 7 5 11l61 37q-94 86 -95 92l-72 351q-2 7 6 12l94 45q-133 100 -135 108l-96 466q-2 10 7 13l433 135q5 0 8 -1l317 -153q6 -4 6 -9l20 -463q0 -7 -6 -10l-118 -61l126 -85q5 -2 5 -8l5 -123l121 74
-q5 4 11 0l84 -56l3 110q0 6 5 9l206 126q6 3 11 0l245 -135q4 -4 5 -7t-6.5 -60t-17.5 -124.5t-10 -70.5q0 -5 -4 -7l-191 -153q-6 -5 -13 0z"/>
-    <glyph glyph-name="uniF2B9" unicode="&#xf2b9;" horiz-adv-x="1664" d="M1201 298q0 57 -5.5 107t-21 100.5t-39.5 86t-64 58t-91 22.5q-6 -4 -33.5 -20.5t-42.5 -24.5t-40.5 -20t-49 -17t-46.5 -5t-46.5 5t-49 17t-40.5 20t-42.5 24.5t-33.5 20.5q-51 0 -91 -22.5t-64 -58t-39.5 -86t-21 -100.5t-5.5 -107q0 -73 42 -121.5t103 -48.5h576
-q61 0 103 48.5t42 121.5zM1028 892q0 108 -76.5 184t-183.5 76t-183.5 -76t-76.5 -184q0 -107 76.5 -183t183.5 -76t183.5 76t76.5 183zM1664 352v-192q0 -14 -9 -23t-23 -9h-96v-224q0 -66 -47 -113t-113 -47h-1216q-66 0 -113 47t-47 113v1472q0 66 47 113t113 47h1216
-q66 0 113 -47t47 -113v-224h96q14 0 23 -9t9 -23v-192q0 -14 -9 -23t-23 -9h-96v-128h96q14 0 23 -9t9 -23v-192q0 -14 -9 -23t-23 -9h-96v-128h96q14 0 23 -9t9 -23z"/>
-    <glyph glyph-name="uniF2BA" unicode="&#xf2ba;" horiz-adv-x="1664" d="M1028 892q0 -107 -76.5 -183t-183.5 -76t-183.5 76t-76.5 183q0 108 76.5 184t183.5 76t183.5 -76t76.5 -184zM980 672q46 0 82.5 -17t60 -47.5t39.5 -67t24 -81t11.5 -82.5t3.5 -79q0 -67 -39.5 -118.5t-105.5 -51.5h-576q-66 0 -105.5 51.5t-39.5 118.5q0 48 4.5 93.5
-t18.5 98.5t36.5 91.5t63 64.5t93.5 26h5q7 -4 32 -19.5t35.5 -21t33 -17t37 -16t35 -9t39.5 -4.5t39.5 4.5t35 9t37 16t33 17t35.5 21t32 19.5zM1664 928q0 -13 -9.5 -22.5t-22.5 -9.5h-96v-128h96q13 0 22.5 -9.5t9.5 -22.5v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-96v-128h96
-q13 0 22.5 -9.5t9.5 -22.5v-192q0 -13 -9.5 -22.5t-22.5 -9.5h-96v-224q0 -66 -47 -113t-113 -47h-1216q-66 0 -113 47t-47 113v1472q0 66 47 113t113 47h1216q66 0 113 -47t47 -113v-224h96q13 0 22.5 -9.5t9.5 -22.5v-192zM1408 -96v1472q0 13 -9.5 22.5t-22.5 9.5h-1216
-q-13 0 -22.5 -9.5t-9.5 -22.5v-1472q0 -13 9.5 -22.5t22.5 -9.5h1216q13 0 22.5 9.5t9.5 22.5z"/>
-    <glyph glyph-name="uniF2BB" unicode="&#xf2bb;" horiz-adv-x="2048" d="M1024 405q0 64 -9 117.5t-29.5 103t-60.5 78t-97 28.5q-6 -4 -30 -18t-37.5 -21.5t-35.5 -17.5t-43 -14.5t-42 -4.5t-42 4.5t-43 14.5t-35.5 17.5t-37.5 21.5t-30 18q-57 0 -97 -28.5t-60.5 -78t-29.5 -103t-9 -117.5t37 -106.5t91 -42.5h512q54 0 91 42.5t37 106.5z
-M867 925q0 94 -66.5 160.5t-160.5 66.5t-160.5 -66.5t-66.5 -160.5t66.5 -160.5t160.5 -66.5t160.5 66.5t66.5 160.5zM1792 416v64q0 14 -9 23t-23 9h-576q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h576q14 0 23 9t9 23zM1792 676v56q0 15 -10.5 25.5t-25.5 10.5h-568
-q-15 0 -25.5 -10.5t-10.5 -25.5v-56q0 -15 10.5 -25.5t25.5 -10.5h568q15 0 25.5 10.5t10.5 25.5zM1792 928v64q0 14 -9 23t-23 9h-576q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h576q14 0 23 9t9 23zM2048 1248v-1216q0 -66 -47 -113t-113 -47h-352v96q0 14 -9 23t-23 9
-h-64q-14 0 -23 -9t-9 -23v-96h-768v96q0 14 -9 23t-23 9h-64q-14 0 -23 -9t-9 -23v-96h-352q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1728q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2BC" unicode="&#xf2bc;" horiz-adv-x="2048" d="M1024 405q0 -64 -37 -106.5t-91 -42.5h-512q-54 0 -91 42.5t-37 106.5t9 117.5t29.5 103t60.5 78t97 28.5q6 -4 30 -18t37.5 -21.5t35.5 -17.5t43 -14.5t42 -4.5t42 4.5t43 14.5t35.5 17.5t37.5 21.5t30 18q57 0 97 -28.5t60.5 -78t29.5 -103t9 -117.5zM867 925
-q0 -94 -66.5 -160.5t-160.5 -66.5t-160.5 66.5t-66.5 160.5t66.5 160.5t160.5 66.5t160.5 -66.5t66.5 -160.5zM1792 480v-64q0 -14 -9 -23t-23 -9h-576q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h576q14 0 23 -9t9 -23zM1792 732v-56q0 -15 -10.5 -25.5t-25.5 -10.5h-568
-q-15 0 -25.5 10.5t-10.5 25.5v56q0 15 10.5 25.5t25.5 10.5h568q15 0 25.5 -10.5t10.5 -25.5zM1792 992v-64q0 -14 -9 -23t-23 -9h-576q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h576q14 0 23 -9t9 -23zM1920 32v1216q0 13 -9.5 22.5t-22.5 9.5h-1728q-13 0 -22.5 -9.5
-t-9.5 -22.5v-1216q0 -13 9.5 -22.5t22.5 -9.5h352v96q0 14 9 23t23 9h64q14 0 23 -9t9 -23v-96h768v96q0 14 9 23t23 9h64q14 0 23 -9t9 -23v-96h352q13 0 22.5 9.5t9.5 22.5zM2048 1248v-1216q0 -66 -47 -113t-113 -47h-1728q-66 0 -113 47t-47 113v1216q0 66 47 113
-t113 47h1728q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2BD" unicode="&#xf2bd;" horiz-adv-x="1792" d="M1523 197q-22 155 -87.5 257.5t-184.5 118.5q-67 -74 -159.5 -115.5t-195.5 -41.5t-195.5 41.5t-159.5 115.5q-119 -16 -184.5 -118.5t-87.5 -257.5q106 -150 271 -237.5t356 -87.5t356 87.5t271 237.5zM1280 896q0 159 -112.5 271.5t-271.5 112.5t-271.5 -112.5
-t-112.5 -271.5t112.5 -271.5t271.5 -112.5t271.5 112.5t112.5 271.5zM1792 640q0 -182 -71 -347.5t-190.5 -286t-285.5 -191.5t-349 -71q-182 0 -348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="uniF2BE" unicode="&#xf2be;" horiz-adv-x="1792" d="M896 1536q182 0 348 -71t286 -191t191 -286t71 -348q0 -181 -70.5 -347t-190.5 -286t-286 -191.5t-349 -71.5t-349 71t-285.5 191.5t-190.5 286t-71 347.5t71 348t191 286t286 191t348 71zM1515 185q149 205 149 455q0 156 -61 298t-164 245t-245 164t-298 61t-298 -61
-t-245 -164t-164 -245t-61 -298q0 -250 149 -455q66 327 306 327q131 -128 313 -128t313 128q240 0 306 -327zM1280 832q0 159 -112.5 271.5t-271.5 112.5t-271.5 -112.5t-112.5 -271.5t112.5 -271.5t271.5 -112.5t271.5 112.5t112.5 271.5z"/>
-    <glyph glyph-name="uniF2C0" unicode="&#xf2c0;" d="M1201 752q47 -14 89.5 -38t89 -73t79.5 -115.5t55 -172t22 -236.5q0 -154 -100 -263.5t-241 -109.5h-854q-141 0 -241 109.5t-100 263.5q0 131 22 236.5t55 172t79.5 115.5t89 73t89.5 38q-79 125 -79 272q0 104 40.5 198.5t109.5 163.5t163.5 109.5t198.5 40.5
-t198.5 -40.5t163.5 -109.5t109.5 -163.5t40.5 -198.5q0 -147 -79 -272zM768 1408q-159 0 -271.5 -112.5t-112.5 -271.5t112.5 -271.5t271.5 -112.5t271.5 112.5t112.5 271.5t-112.5 271.5t-271.5 112.5zM1195 -128q88 0 150.5 71.5t62.5 173.5q0 239 -78.5 377t-225.5 145
-q-145 -127 -336 -127t-336 127q-147 -7 -225.5 -145t-78.5 -377q0 -102 62.5 -173.5t150.5 -71.5h854z"/>
-    <glyph glyph-name="uniF2C1" unicode="&#xf2c1;" horiz-adv-x="1280" d="M1024 278q0 -64 -37 -107t-91 -43h-512q-54 0 -91 43t-37 107t9 118t29.5 104t61 78.5t96.5 28.5q80 -75 188 -75t188 75q56 0 96.5 -28.5t61 -78.5t29.5 -104t9 -118zM870 797q0 -94 -67.5 -160.5t-162.5 -66.5t-162.5 66.5t-67.5 160.5t67.5 160.5t162.5 66.5
-t162.5 -66.5t67.5 -160.5zM1152 -96v1376h-1024v-1376q0 -13 9.5 -22.5t22.5 -9.5h960q13 0 22.5 9.5t9.5 22.5zM1280 1376v-1472q0 -66 -47 -113t-113 -47h-960q-66 0 -113 47t-47 113v1472q0 66 47 113t113 47h352v-96q0 -14 9 -23t23 -9h192q14 0 23 9t9 23v96h352
-q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2C2" unicode="&#xf2c2;" horiz-adv-x="2048" d="M896 324q0 54 -7.5 100.5t-24.5 90t-51 68.5t-81 25q-64 -64 -156 -64t-156 64q-47 0 -81 -25t-51 -68.5t-24.5 -90t-7.5 -100.5q0 -55 31.5 -93.5t75.5 -38.5h426q44 0 75.5 38.5t31.5 93.5zM768 768q0 80 -56 136t-136 56t-136 -56t-56 -136t56 -136t136 -56t136 56
-t56 136zM1792 288v64q0 14 -9 23t-23 9h-704q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h704q14 0 23 9t9 23zM1408 544v64q0 14 -9 23t-23 9h-320q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h320q14 0 23 9t9 23zM1792 544v64q0 14 -9 23t-23 9h-192q-14 0 -23 -9t-9 -23
-v-64q0 -14 9 -23t23 -9h192q14 0 23 9t9 23zM1792 800v64q0 14 -9 23t-23 9h-704q-14 0 -23 -9t-9 -23v-64q0 -14 9 -23t23 -9h704q14 0 23 9t9 23zM128 1152h1792v96q0 14 -9 23t-23 9h-1728q-14 0 -23 -9t-9 -23v-96zM2048 1248v-1216q0 -66 -47 -113t-113 -47h-1728
-q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1728q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2C3" unicode="&#xf2c3;" horiz-adv-x="2048" d="M896 324q0 -55 -31.5 -93.5t-75.5 -38.5h-426q-44 0 -75.5 38.5t-31.5 93.5q0 54 7.5 100.5t24.5 90t51 68.5t81 25q64 -64 156 -64t156 64q47 0 81 -25t51 -68.5t24.5 -90t7.5 -100.5zM768 768q0 -80 -56 -136t-136 -56t-136 56t-56 136t56 136t136 56t136 -56t56 -136z
-M1792 352v-64q0 -14 -9 -23t-23 -9h-704q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h704q14 0 23 -9t9 -23zM1408 608v-64q0 -14 -9 -23t-23 -9h-320q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h320q14 0 23 -9t9 -23zM1792 608v-64q0 -14 -9 -23t-23 -9h-192q-14 0 -23 9t-9 23v64
-q0 14 9 23t23 9h192q14 0 23 -9t9 -23zM1792 864v-64q0 -14 -9 -23t-23 -9h-704q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h704q14 0 23 -9t9 -23zM1920 32v1120h-1792v-1120q0 -13 9.5 -22.5t22.5 -9.5h1728q13 0 22.5 9.5t9.5 22.5zM2048 1248v-1216q0 -66 -47 -113t-113 -47
-h-1728q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1728q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2C4" unicode="&#xf2c4;" horiz-adv-x="1792" d="M1255 749q0 318 -105 474.5t-330 156.5q-222 0 -326 -157t-104 -474q0 -316 104 -471.5t326 -155.5q74 0 131 17q-22 43 -39 73t-44 65t-53.5 56.5t-63 36t-77.5 14.5q-46 0 -79 -16l-49 97q105 91 276 91q132 0 215.5 -54t150.5 -155q67 149 67 402zM1645 117h117
-q3 -27 -2 -67t-26.5 -95t-58 -100.5t-107 -78t-162.5 -32.5q-71 0 -130.5 19t-105.5 56t-79 78t-66 96q-97 -27 -205 -27q-150 0 -292.5 58t-253 158.5t-178 249t-67.5 317.5q0 170 67.5 319.5t178.5 250.5t253.5 159t291.5 58q121 0 238.5 -36t217 -106t176 -164.5
-t119.5 -219t43 -261.5q0 -190 -80.5 -347.5t-218.5 -264.5q47 -70 93.5 -106.5t104.5 -36.5q61 0 94 37.5t38 85.5z"/>
-    <glyph glyph-name="uniF2C5" unicode="&#xf2c5;" horiz-adv-x="2304" d="M453 -101q0 -21 -16 -37.5t-37 -16.5q-1 0 -13 3q-63 15 -162 140q-225 284 -225 676q0 341 213 614q39 51 95 103.5t94 52.5q19 0 35 -13.5t16 -32.5q0 -27 -63 -90q-98 -102 -147 -184q-119 -199 -119 -449q0 -281 123 -491q50 -85 136 -173q2 -3 14.5 -16t19.5 -21
-t17 -20.5t14.5 -23.5t4.5 -21zM1796 33q0 -29 -17.5 -48.5t-46.5 -19.5h-1081q-26 0 -45 19t-19 45q0 29 17.5 48.5t46.5 19.5h1081q26 0 45 -19t19 -45zM1581 644q0 -134 -67 -233q-25 -38 -69.5 -78.5t-83.5 -60.5q-16 -10 -27 -10q-7 0 -15 6t-8 12q0 9 19 30t42 46
-t42 67.5t19 88.5q0 76 -35 130q-29 42 -46 42q-3 0 -3 -5q0 -12 7.5 -35.5t7.5 -36.5q0 -22 -21.5 -35t-44.5 -13q-66 0 -66 76q0 15 1.5 44t1.5 44q0 25 -10 46q-13 25 -42 53.5t-51 28.5q-5 0 -7 -0.5t-3.5 -2.5t-1.5 -6q0 -2 16 -26t16 -54q0 -37 -19 -68t-46 -54
-t-53.5 -46t-45.5 -54t-19 -68q0 -98 42 -160q29 -43 79 -63q16 -5 17 -10q1 -2 1 -5q0 -16 -18 -16q-6 0 -33 11q-119 43 -195 139.5t-76 218.5q0 55 24.5 115.5t60 115t70.5 108.5t59.5 113.5t24.5 111.5q0 53 -25 94q-29 48 -56 64q-19 9 -19 21q0 20 41 20q50 0 110 -29
-q41 -19 71 -44.5t49.5 -51t33.5 -62.5t22 -69t16 -80q0 -1 3 -17.5t4.5 -25t5.5 -25t9 -27t11 -21.5t14.5 -16.5t18.5 -5.5q23 0 37 14t14 37q0 25 -20 67t-20 52t10 10q27 0 93 -70q72 -76 102.5 -156t30.5 -186zM2304 615q0 -274 -138 -503q-19 -32 -48 -72t-68 -86.5
-t-81 -77t-74 -30.5q-16 0 -31 15.5t-15 31.5q0 15 29 50.5t68.5 77t48.5 52.5q183 230 183 531q0 131 -20.5 235t-72.5 211q-58 119 -163 228q-2 3 -13 13.5t-16.5 16.5t-15 17.5t-15 20t-9.5 18.5t-4 19q0 19 16 35.5t35 16.5q70 0 196 -169q98 -131 146 -273t60 -314
-q2 -42 2 -64z"/>
-    <glyph glyph-name="uniF2C6" unicode="&#xf2c6;" horiz-adv-x="1792" d="M1189 229l147 693q9 44 -10.5 63t-51.5 7l-864 -333q-29 -11 -39.5 -25t-2.5 -26.5t32 -19.5l221 -69l513 323q21 14 32 6q7 -5 -4 -15l-415 -375v0v0l-16 -228q23 0 45 22l108 104l224 -165q64 -36 81 38zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71
-t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="uniF2C7" unicode="&#xf2c7;" horiz-adv-x="1024" d="M640 192q0 -80 -56 -136t-136 -56t-136 56t-56 136q0 60 35 110t93 71v907h128v-907q58 -21 93 -71t35 -110zM768 192q0 77 -34 144t-94 112v768q0 80 -56 136t-136 56t-136 -56t-56 -136v-768q-60 -45 -94 -112t-34 -144q0 -133 93.5 -226.5t226.5 -93.5t226.5 93.5
-t93.5 226.5zM896 192q0 -185 -131.5 -316.5t-316.5 -131.5t-316.5 131.5t-131.5 316.5q0 182 128 313v711q0 133 93.5 226.5t226.5 93.5t226.5 -93.5t93.5 -226.5v-711q128 -131 128 -313zM1024 768v-128h-192v128h192zM1024 1024v-128h-192v128h192zM1024 1280v-128h-192
-v128h192z"/>
-    <glyph glyph-name="uniF2C8" unicode="&#xf2c8;" horiz-adv-x="1024" d="M640 192q0 -80 -56 -136t-136 -56t-136 56t-56 136q0 60 35 110t93 71v651h128v-651q58 -21 93 -71t35 -110zM768 192q0 77 -34 144t-94 112v768q0 80 -56 136t-136 56t-136 -56t-56 -136v-768q-60 -45 -94 -112t-34 -144q0 -133 93.5 -226.5t226.5 -93.5t226.5 93.5
-t93.5 226.5zM896 192q0 -185 -131.5 -316.5t-316.5 -131.5t-316.5 131.5t-131.5 316.5q0 182 128 313v711q0 133 93.5 226.5t226.5 93.5t226.5 -93.5t93.5 -226.5v-711q128 -131 128 -313zM1024 768v-128h-192v128h192zM1024 1024v-128h-192v128h192zM1024 1280v-128h-192
-v128h192z"/>
-    <glyph glyph-name="uniF2C9" unicode="&#xf2c9;" horiz-adv-x="1024" d="M640 192q0 -80 -56 -136t-136 -56t-136 56t-56 136q0 60 35 110t93 71v395h128v-395q58 -21 93 -71t35 -110zM768 192q0 77 -34 144t-94 112v768q0 80 -56 136t-136 56t-136 -56t-56 -136v-768q-60 -45 -94 -112t-34 -144q0 -133 93.5 -226.5t226.5 -93.5t226.5 93.5
-t93.5 226.5zM896 192q0 -185 -131.5 -316.5t-316.5 -131.5t-316.5 131.5t-131.5 316.5q0 182 128 313v711q0 133 93.5 226.5t226.5 93.5t226.5 -93.5t93.5 -226.5v-711q128 -131 128 -313zM1024 768v-128h-192v128h192zM1024 1024v-128h-192v128h192zM1024 1280v-128h-192
-v128h192z"/>
-    <glyph glyph-name="uniF2CA" unicode="&#xf2ca;" horiz-adv-x="1024" d="M640 192q0 -80 -56 -136t-136 -56t-136 56t-56 136q0 60 35 110t93 71v139h128v-139q58 -21 93 -71t35 -110zM768 192q0 77 -34 144t-94 112v768q0 80 -56 136t-136 56t-136 -56t-56 -136v-768q-60 -45 -94 -112t-34 -144q0 -133 93.5 -226.5t226.5 -93.5t226.5 93.5
-t93.5 226.5zM896 192q0 -185 -131.5 -316.5t-316.5 -131.5t-316.5 131.5t-131.5 316.5q0 182 128 313v711q0 133 93.5 226.5t226.5 93.5t226.5 -93.5t93.5 -226.5v-711q128 -131 128 -313zM1024 768v-128h-192v128h192zM1024 1024v-128h-192v128h192zM1024 1280v-128h-192
-v128h192z"/>
-    <glyph glyph-name="uniF2CB" unicode="&#xf2cb;" horiz-adv-x="1024" d="M640 192q0 -80 -56 -136t-136 -56t-136 56t-56 136q0 79 56 135.5t136 56.5t136 -56.5t56 -135.5zM768 192q0 77 -34 144t-94 112v768q0 80 -56 136t-136 56t-136 -56t-56 -136v-768q-60 -45 -94 -112t-34 -144q0 -133 93.5 -226.5t226.5 -93.5t226.5 93.5t93.5 226.5z
-M896 192q0 -185 -131.5 -316.5t-316.5 -131.5t-316.5 131.5t-131.5 316.5q0 182 128 313v711q0 133 93.5 226.5t226.5 93.5t226.5 -93.5t93.5 -226.5v-711q128 -131 128 -313zM1024 768v-128h-192v128h192zM1024 1024v-128h-192v128h192zM1024 1280v-128h-192v128h192z"/>
-    <glyph glyph-name="uniF2CC" unicode="&#xf2cc;" horiz-adv-x="1920" d="M1433 1287q10 -10 10 -23t-10 -23l-626 -626q-10 -10 -23 -10t-23 10l-82 82q-10 10 -10 23t10 23l44 44q-72 91 -81.5 207t46.5 215q-74 71 -176 71q-106 0 -181 -75t-75 -181v-1280h-256v1280q0 104 40.5 198.5t109.5 163.5t163.5 109.5t198.5 40.5q106 0 201 -41
-t166 -115q94 39 197 24.5t185 -79.5l44 44q10 10 23 10t23 -10zM1344 1024q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1600 896q-26 0 -45 19t-19 45t19 45t45 19t45 -19t19 -45t-19 -45t-45 -19zM1856 1024q26 0 45 -19t19 -45t-19 -45t-45 -19
-t-45 19t-19 45t19 45t45 19zM1216 896q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1408 832q0 26 19 45t45 19t45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45zM1728 896q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1088 768
-q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1344 640q-26 0 -45 19t-19 45t19 45t45 19t45 -19t19 -45t-19 -45t-45 -19zM1600 768q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1216 512q-26 0 -45 19t-19 45t19 45t45 19t45 -19
-t19 -45t-19 -45t-45 -19zM1472 640q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1088 512q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1344 512q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1216 384
-q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19zM1088 256q26 0 45 -19t19 -45t-19 -45t-45 -19t-45 19t-19 45t19 45t45 19z"/>
-    <glyph glyph-name="uniF2CD" unicode="&#xf2cd;" horiz-adv-x="1792" d="M1664 448v-192q0 -169 -128 -286v-194q0 -14 -9 -23t-23 -9h-64q-14 0 -23 9t-9 23v118q-63 -22 -128 -22h-768q-65 0 -128 22v-110q0 -17 -9.5 -28.5t-22.5 -11.5h-64q-13 0 -22.5 11.5t-9.5 28.5v186q-128 117 -128 286v192h1536zM704 864q0 -14 -9 -23t-23 -9t-23 9
-t-9 23t9 23t23 9t23 -9t9 -23zM768 928q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM704 992q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM832 992q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM768 1056q0 -14 -9 -23t-23 -9t-23 9
-t-9 23t9 23t23 9t23 -9t9 -23zM704 1120q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM1792 608v-64q0 -14 -9 -23t-23 -9h-1728q-14 0 -23 9t-9 23v64q0 14 9 23t23 9h96v640q0 106 75 181t181 75q108 0 184 -78q46 19 98 12t93 -39l22 22q11 11 22 0l42 -42
-q11 -11 0 -22l-314 -314q-11 -11 -22 0l-42 42q-11 11 0 22l22 22q-36 46 -40.5 104t23.5 108q-37 35 -88 35q-53 0 -90.5 -37.5t-37.5 -90.5v-640h1504q14 0 23 -9t9 -23zM896 1056q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM832 1120q0 -14 -9 -23t-23 -9
-t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM768 1184q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM960 1120q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM896 1184q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM832 1248q0 -14 -9 -23
-t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM1024 1184q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM960 1248q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23zM1088 1248q0 -14 -9 -23t-23 -9t-23 9t-9 23t9 23t23 9t23 -9t9 -23z"/>
-    <glyph glyph-name="uniF2CE" unicode="&#xf2ce;" d="M994 344q0 -86 -17 -197q-31 -215 -55 -313q-22 -90 -152 -90t-152 90q-24 98 -55 313q-17 110 -17 197q0 168 224 168t224 -168zM1536 768q0 -240 -134 -434t-350 -280q-8 -3 -15 3t-6 15q7 48 10 66q4 32 6 47q1 9 9 12q159 81 255.5 234t96.5 337q0 180 -91 330.5
-t-247 234.5t-337 74q-124 -7 -237 -61t-193.5 -140.5t-128 -202t-46.5 -240.5q1 -184 99 -336.5t257 -231.5q7 -3 9 -12q3 -21 6 -45q1 -9 5 -32.5t6 -35.5q1 -9 -6.5 -15t-15.5 -2q-148 58 -261 169.5t-173.5 264t-52.5 319.5q7 143 66 273.5t154.5 227t225 157.5t272.5 70
-q164 10 315.5 -46.5t261 -160.5t175 -250.5t65.5 -308.5zM994 800q0 -93 -65.5 -158.5t-158.5 -65.5t-158.5 65.5t-65.5 158.5t65.5 158.5t158.5 65.5t158.5 -65.5t65.5 -158.5zM1282 768q0 -122 -53.5 -228.5t-146.5 -177.5q-8 -6 -16 -2t-10 14q-6 52 -29 92q-7 10 3 20
-q58 54 91 127t33 155q0 111 -58.5 204t-157.5 141.5t-212 36.5q-133 -15 -229 -113t-109 -231q-10 -92 23.5 -176t98.5 -144q10 -10 3 -20q-24 -41 -29 -93q-2 -9 -10 -13t-16 2q-95 74 -148.5 183t-51.5 234q3 131 69 244t177 181.5t241 74.5q144 7 268 -60t196.5 -187.5
-t72.5 -263.5z"/>
-    <glyph glyph-name="uniF2D0" unicode="&#xf2d0;" horiz-adv-x="1792" d="M256 128h1280v768h-1280v-768zM1792 1248v-1216q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1472q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2D1" unicode="&#xf2d1;" horiz-adv-x="1792" d="M1792 224v-192q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v192q0 66 47 113t113 47h1472q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2D2" unicode="&#xf2d2;" horiz-adv-x="2048" d="M256 0h768v512h-768v-512zM1280 512h512v768h-768v-256h96q66 0 113 -47t47 -113v-352zM2048 1376v-960q0 -66 -47 -113t-113 -47h-608v-352q0 -66 -47 -113t-113 -47h-960q-66 0 -113 47t-47 113v960q0 66 47 113t113 47h608v352q0 66 47 113t113 47h960q66 0 113 -47
-t47 -113z"/>
-    <glyph glyph-name="uniF2D3" unicode="&#xf2d3;" horiz-adv-x="1792" d="M1175 215l146 146q10 10 10 23t-10 23l-233 233l233 233q10 10 10 23t-10 23l-146 146q-10 10 -23 10t-23 -10l-233 -233l-233 233q-10 10 -23 10t-23 -10l-146 -146q-10 -10 -10 -23t10 -23l233 -233l-233 -233q-10 -10 -10 -23t10 -23l146 -146q10 -10 23 -10t23 10
-l233 233l233 -233q10 -10 23 -10t23 10zM1792 1248v-1216q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1472q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2D4" unicode="&#xf2d4;" horiz-adv-x="1792" d="M1257 425l-146 -146q-10 -10 -23 -10t-23 10l-169 169l-169 -169q-10 -10 -23 -10t-23 10l-146 146q-10 10 -10 23t10 23l169 169l-169 169q-10 10 -10 23t10 23l146 146q10 10 23 10t23 -10l169 -169l169 169q10 10 23 10t23 -10l146 -146q10 -10 10 -23t-10 -23
-l-169 -169l169 -169q10 -10 10 -23t-10 -23zM256 128h1280v1024h-1280v-1024zM1792 1248v-1216q0 -66 -47 -113t-113 -47h-1472q-66 0 -113 47t-47 113v1216q0 66 47 113t113 47h1472q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2D5" unicode="&#xf2d5;" horiz-adv-x="1792" d="M1070 358l306 564h-654l-306 -564h654zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="uniF2D6" unicode="&#xf2d6;" horiz-adv-x="1794" d="M1291 1060q-15 17 -35 8.5t-26 -28.5t5 -38q14 -17 40 -14.5t34 20.5t-18 52zM895 814q-8 -8 -19.5 -8t-18.5 8q-8 8 -8 19t8 18q7 8 18.5 8t19.5 -8q7 -7 7 -18t-7 -19zM1060 740l-35 -35q-12 -13 -29.5 -13t-30.5 13l-38 38q-12 13 -12 30t12 30l35 35q12 12 29.5 12
-t30.5 -12l38 -39q12 -12 12 -29.5t-12 -29.5zM951 870q-7 -8 -18.5 -8t-19.5 8q-7 8 -7 19t7 19q8 8 19 8t19 -8t8 -19t-8 -19zM1354 968q-34 -64 -107.5 -85.5t-127.5 16.5q-38 28 -61 66.5t-21 87.5t39 92t75.5 53t70.5 -5t70 -51q2 -2 13 -12.5t14.5 -13.5t13 -13.5
-t12.5 -15.5t10 -15.5t8.5 -18t4 -18.5t1 -21t-5 -22t-9.5 -24zM1555 486q3 20 -8.5 34.5t-27.5 21.5t-33 17t-23 20q-40 71 -84 98.5t-113 11.5q19 13 40 18.5t33 4.5l12 -1q2 45 -34 90q6 20 6.5 40.5t-2.5 30.5l-3 10q43 24 71 65t34 91q10 84 -43 150.5t-137 76.5
-q-60 7 -114 -18.5t-82 -74.5q-30 -51 -33.5 -101t14.5 -87t43.5 -64t56.5 -42q-45 4 -88 36t-57 88q-28 108 32 222q-16 21 -29 32q-50 0 -89 -19q19 24 42 37t36 14l13 1q0 50 -13 78q-10 21 -32.5 28.5t-47 -3.5t-37.5 -40q2 4 4 7q-7 -28 -6.5 -75.5t19 -117t48.5 -122.5
-q-25 -14 -47 -36q-35 -16 -85.5 -70.5t-84.5 -101.5l-33 -46q-90 -34 -181 -125.5t-75 -162.5q1 -16 11 -27q-15 -12 -30 -30q-21 -25 -21 -54t21.5 -40t63.5 6q41 19 77 49.5t55 60.5q-2 2 -6.5 5t-20.5 7.5t-33 3.5q23 5 51 12.5t40 10t27.5 6t26 4t23.5 0.5q14 -7 22 34
-q7 37 7 90q0 102 -40 150q106 -103 101 -219q-1 -29 -15 -50t-27 -27l-13 -6q-4 -7 -19 -32t-26 -45.5t-26.5 -52t-25 -61t-17 -63t-6.5 -66.5t10 -63q-35 54 -37 80q-22 -24 -34.5 -39t-33.5 -42t-30.5 -46t-16.5 -41t-0.5 -38t25.5 -27q45 -25 144 64t190.5 221.5
-t122.5 228.5q86 52 145 115.5t86 119.5q47 -93 154 -178q104 -83 167 -80q39 2 46 43zM1794 640q0 -182 -71 -348t-191 -286t-286.5 -191t-348.5 -71t-348.5 71t-286.5 191t-191 286t-71 348t71 348t191 286t286.5 191t348.5 71t348.5 -71t286.5 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="uniF2D7" unicode="&#xf2d7;" d="M518 1353v-655q103 -1 191.5 1.5t125.5 5.5l37 3q68 2 90.5 24.5t39.5 94.5l33 142h103l-14 -322l7 -319h-103l-29 127q-15 68 -45 93t-84 26q-87 8 -352 8v-556q0 -78 43.5 -115.5t133.5 -37.5h357q35 0 59.5 2t55 7.5t54 18t48.5 32t46 50.5t39 73l93 216h89
-q-6 -37 -31.5 -252t-30.5 -276q-146 5 -263.5 8t-162.5 4h-44h-628l-376 -12v102l127 25q67 13 91.5 37t25.5 79l8 643q3 402 -8 645q-2 61 -25.5 84t-91.5 36l-127 24v102l376 -12h702q139 0 374 27q-6 -68 -14 -194.5t-12 -219.5l-5 -92h-93l-32 124q-31 121 -74 179.5
-t-113 58.5h-548q-28 0 -35.5 -8.5t-7.5 -30.5z"/>
-    <glyph glyph-name="uniF2D8" unicode="&#xf2d8;" d="M922 739v-182q0 -4 0.5 -15t0 -15l-1.5 -12t-3.5 -11.5t-6.5 -7.5t-11 -5.5t-16 -1.5v309q9 0 16 -1t11 -5t6.5 -5.5t3.5 -9.5t1 -10.5v-13.5v-14zM1238 643v-121q0 -1 0.5 -12.5t0 -15.5t-2.5 -11.5t-7.5 -10.5t-13.5 -3q-9 0 -14 9q-4 10 -4 165v7v8.5v9t1.5 8.5l3.5 7
-t5 5.5t8 1.5q6 0 10 -1.5t6.5 -4.5t4 -6t2 -8.5t0.5 -8v-9.5v-9zM180 407h122v472h-122v-472zM614 407h106v472h-159l-28 -221q-20 148 -32 221h-158v-472h107v312l45 -312h76l43 319v-319zM1039 712q0 67 -5 90q-3 16 -11 28.5t-17 20.5t-25 14t-26.5 8.5t-31 4t-29 1.5
-h-29.5h-12h-91v-472h56q169 -1 197 24.5t25 180.5q-1 62 -1 100zM1356 515v133q0 29 -2 45t-9.5 33.5t-24.5 25t-46 7.5q-46 0 -77 -34v154h-117v-472h110l7 30q30 -36 77 -36q50 0 66 30.5t16 83.5zM1536 1248v-1216q0 -66 -47 -113t-113 -47h-1216q-66 0 -113 47t-47 113
-v1216q0 66 47 113t113 47h1216q66 0 113 -47t47 -113z"/>
-    <glyph glyph-name="uniF2D9" unicode="&#xf2d9;" horiz-adv-x="2176" d="M1143 -197q-6 1 -11 4q-13 8 -36 23t-86 65t-116.5 104.5t-112 140t-89.5 172.5q-17 3 -175 37q66 -213 235 -362t391 -184zM502 409l168 -28q-25 76 -41 167.5t-19 145.5l-4 53q-84 -82 -121 -224q5 -65 17 -114zM612 1018q-43 -64 -77 -148q44 46 74 68zM2049 584
-q0 161 -62 307t-167.5 252t-250.5 168.5t-304 62.5q-147 0 -281 -52.5t-240 -148.5q-30 -58 -45 -160q60 51 143 83.5t158.5 43t143 13.5t108.5 -1l40 -3q33 -1 53 -15.5t24.5 -33t6.5 -37t-1 -28.5q-126 11 -227.5 0.5t-183 -43.5t-142.5 -71.5t-131 -98.5
-q4 -36 11.5 -92.5t35.5 -178t62 -179.5q123 -6 247.5 14.5t214.5 53.5t162.5 67t109.5 59l37 24q22 16 39.5 20.5t30.5 -5t17 -34.5q14 -97 -39 -121q-208 -97 -467 -134q-135 -20 -317 -16q41 -96 110 -176.5t137 -127t130.5 -79t101.5 -43.5l39 -12q143 -23 263 15
-q195 99 314 289t119 418zM2123 621q-14 -135 -40 -212q-70 -208 -181.5 -346.5t-318.5 -253.5q-48 -33 -82 -44q-72 -26 -163 -16q-36 -3 -73 -3q-283 0 -504.5 173t-295.5 442q-1 0 -4 0.5t-5 0.5q-6 -50 2.5 -112.5t26 -115t36 -98t31.5 -71.5l14 -26q8 -12 54 -82
-q-71 38 -124.5 106.5t-78.5 140t-39.5 137t-17.5 107.5l-2 42q-5 2 -33.5 12.5t-48.5 18t-53 20.5t-57.5 25t-50 25.5t-42.5 27t-25 25.5q19 -10 50.5 -25.5t113 -45.5t145.5 -38l2 32q11 149 94 290q41 202 176 365q28 115 81 214q15 28 32 45t49 32q158 74 303.5 104
-t302 11t306.5 -97q220 -115 333 -336t87 -474z"/>
-    <glyph glyph-name="uniF2DA" unicode="&#xf2da;" horiz-adv-x="1792" d="M1341 752q29 44 -6.5 129.5t-121.5 142.5q-58 39 -125.5 53.5t-118 4.5t-68.5 -37q-12 -23 -4.5 -28t42.5 -10q23 -3 38.5 -5t44.5 -9.5t56 -17.5q36 -13 67.5 -31.5t53 -37t40 -38.5t30.5 -38t22 -34.5t16.5 -28.5t12 -18.5t10.5 -6t11 9.5zM1704 178
-q-52 -127 -148.5 -220t-214.5 -141.5t-253 -60.5t-266 13.5t-251 91t-210 161.5t-141.5 235.5t-46.5 303.5q1 41 8.5 84.5t12.5 64t24 80.5t23 73q-51 -208 1 -397t173 -318t291 -206t346 -83t349 74.5t289 244.5q20 27 18 14q0 -4 -4 -14zM1465 627q0 -104 -40.5 -199
-t-108.5 -164t-162 -109.5t-198 -40.5t-198 40.5t-162 109.5t-108.5 164t-40.5 199t40.5 199t108.5 164t162 109.5t198 40.5t198 -40.5t162 -109.5t108.5 -164t40.5 -199zM1752 915q-65 147 -180.5 251t-253 153.5t-292 53.5t-301 -36.5t-275.5 -129t-220 -211.5t-131 -297
-t-10 -373q-49 161 -51.5 311.5t35.5 272.5t109 227t165.5 180.5t207 126t232 71t242.5 9t236 -54t216 -124.5t178 -197q33 -50 62 -121t31 -112zM1690 573q12 244 -136.5 416t-396.5 240q-8 0 -10 5t24 8q125 -4 230 -50t173 -120t116 -168.5t58.5 -199t-1 -208
-t-61.5 -197.5t-122.5 -167t-185 -117.5t-248.5 -46.5q108 30 201.5 80t174 123t129.5 176.5t55 225.5z"/>
-    <glyph glyph-name="uniF2DB" unicode="&#xf2db;" d="M192 256v-128h-112q-16 0 -16 16v16h-48q-16 0 -16 16v32q0 16 16 16h48v16q0 16 16 16h112zM192 512v-128h-112q-16 0 -16 16v16h-48q-16 0 -16 16v32q0 16 16 16h48v16q0 16 16 16h112zM192 768v-128h-112q-16 0 -16 16v16h-48q-16 0 -16 16v32q0 16 16 16h48v16
-q0 16 16 16h112zM192 1024v-128h-112q-16 0 -16 16v16h-48q-16 0 -16 16v32q0 16 16 16h48v16q0 16 16 16h112zM192 1280v-128h-112q-16 0 -16 16v16h-48q-16 0 -16 16v32q0 16 16 16h48v16q0 16 16 16h112zM1280 1440v-1472q0 -40 -28 -68t-68 -28h-832q-40 0 -68 28
-t-28 68v1472q0 40 28 68t68 28h832q40 0 68 -28t28 -68zM1536 208v-32q0 -16 -16 -16h-48v-16q0 -16 -16 -16h-112v128h112q16 0 16 -16v-16h48q16 0 16 -16zM1536 464v-32q0 -16 -16 -16h-48v-16q0 -16 -16 -16h-112v128h112q16 0 16 -16v-16h48q16 0 16 -16zM1536 720v-32
-q0 -16 -16 -16h-48v-16q0 -16 -16 -16h-112v128h112q16 0 16 -16v-16h48q16 0 16 -16zM1536 976v-32q0 -16 -16 -16h-48v-16q0 -16 -16 -16h-112v128h112q16 0 16 -16v-16h48q16 0 16 -16zM1536 1232v-32q0 -16 -16 -16h-48v-16q0 -16 -16 -16h-112v128h112q16 0 16 -16v-16
-h48q16 0 16 -16z"/>
-    <glyph glyph-name="uniF2DC" unicode="&#xf2dc;" horiz-adv-x="1664" d="M1566 419l-167 -33l186 -107q23 -13 29.5 -38.5t-6.5 -48.5q-14 -23 -39 -29.5t-48 6.5l-186 106l55 -160q13 -38 -12 -63.5t-60.5 -20.5t-48.5 42l-102 300l-271 156v-313l208 -238q16 -18 17 -39t-11 -36.5t-28.5 -25t-37 -5.5t-36.5 22l-112 128v-214q0 -26 -19 -45
-t-45 -19t-45 19t-19 45v214l-112 -128q-16 -18 -36.5 -22t-37 5.5t-28.5 25t-11 36.5t17 39l208 238v313l-271 -156l-102 -300q-13 -37 -48.5 -42t-60.5 20.5t-12 63.5l55 160l-186 -106q-23 -13 -48 -6.5t-39 29.5q-13 23 -6.5 48.5t29.5 38.5l186 107l-167 33
-q-29 6 -42 29t-8.5 46.5t25.5 40t50 10.5l310 -62l271 157l-271 157l-310 -62q-4 -1 -13 -1q-27 0 -44 18t-19 40t11 43t40 26l167 33l-186 107q-23 13 -29.5 38.5t6.5 48.5t39 30t48 -7l186 -106l-55 160q-13 38 12 63.5t60.5 20.5t48.5 -42l102 -300l271 -156v313
-l-208 238q-16 18 -17 39t11 36.5t28.5 25t37 5.5t36.5 -22l112 -128v214q0 26 19 45t45 19t45 -19t19 -45v-214l112 128q16 18 36.5 22t37 -5.5t28.5 -25t11 -36.5t-17 -39l-208 -238v-313l271 156l102 300q13 37 48.5 42t60.5 -20.5t12 -63.5l-55 -160l186 106
-q23 13 48 6.5t39 -29.5q13 -23 6.5 -48.5t-29.5 -38.5l-186 -107l167 -33q27 -5 40 -26t11 -43t-19 -40t-44 -18q-9 0 -13 1l-310 62l-271 -157l271 -157l310 62q29 6 50 -10.5t25.5 -40t-8.5 -46.5t-42 -29z"/>
-    <glyph glyph-name="uniF2DD" unicode="&#xf2dd;" horiz-adv-x="1792" d="M1473 607q7 118 -33 226.5t-113 189t-177 131t-221 57.5q-116 7 -225.5 -32t-192 -110.5t-135 -175t-59.5 -220.5q-7 -118 33 -226.5t113 -189t177.5 -131t221.5 -57.5q155 -9 293 59t224 195.5t94 283.5zM1792 1536l-349 -348q120 -117 180.5 -272t50.5 -321
-q-11 -183 -102 -339t-241 -255.5t-332 -124.5l-999 -132l347 347q-120 116 -180.5 271.5t-50.5 321.5q11 184 102 340t241.5 255.5t332.5 124.5q167 22 500 66t500 66z"/>
-    <glyph glyph-name="uniF2DE" unicode="&#xf2de;" horiz-adv-x="1792" d="M948 508l163 -329h-51l-175 350l-171 -350h-49l179 374l-78 33l21 49l240 -102l-21 -50zM563 1100l304 -130l-130 -304l-304 130zM907 915l240 -103l-103 -239l-239 102zM1188 765l191 -81l-82 -190l-190 81zM1680 640q0 159 -62 304t-167.5 250.5t-250.5 167.5t-304 62
-t-304 -62t-250.5 -167.5t-167.5 -250.5t-62 -304t62 -304t167.5 -250.5t250.5 -167.5t304 -62t304 62t250.5 167.5t167.5 250.5t62 304zM1792 640q0 -182 -71 -348t-191 -286t-286 -191t-348 -71t-348 71t-286 191t-191 286t-71 348t71 348t191 286t286 191t348 71t348 -71
-t286 -191t191 -286t71 -348z"/>
-    <glyph glyph-name="uniF2E0" unicode="&#xf2e0;" horiz-adv-x="1920" d="M1334 302q-4 24 -27.5 34t-49.5 10.5t-48.5 12.5t-25.5 38q-5 47 33 139.5t75 181t32 127.5q-14 101 -117 103q-45 1 -75 -16l-3 -2l-5 -2.5t-4.5 -2t-5 -2t-5 -0.5t-6 1.5t-6 3.5t-6.5 5q-3 2 -9 8.5t-9 9t-8.5 7.5t-9.5 7.5t-9.5 5.5t-11 4.5t-11.5 2.5q-30 5 -48 -3
-t-45 -31q-1 -1 -9 -8.5t-12.5 -11t-15 -10t-16.5 -5.5t-17 3q-54 27 -84 40q-41 18 -94 -5t-76 -65q-16 -28 -41 -98.5t-43.5 -132.5t-40 -134t-21.5 -73q-22 -69 18.5 -119t110.5 -46q30 2 50.5 15t38.5 46q7 13 79 199.5t77 194.5q6 11 21.5 18t29.5 0q27 -15 21 -53
-q-2 -18 -51 -139.5t-50 -132.5q-6 -38 19.5 -56.5t60.5 -7t55 49.5q4 8 45.5 92t81.5 163.5t46 88.5q20 29 41 28q29 0 25 -38q-2 -16 -65.5 -147.5t-70.5 -159.5q-12 -53 13 -103t74 -74q17 -9 51 -15.5t71.5 -8t62.5 14t20 48.5zM383 86q3 -15 -5 -27.5t-23 -15.5
-q-14 -3 -26.5 5t-15.5 23q-3 14 5 27t22 16t27 -5t16 -23zM953 -177q12 -17 8.5 -37.5t-20.5 -32.5t-37.5 -8t-32.5 21q-11 17 -7.5 37.5t20.5 32.5t37.5 8t31.5 -21zM177 635q-18 -27 -49.5 -33t-57.5 13q-26 18 -32 50t12 58q18 27 49.5 33t57.5 -12q26 -19 32 -50.5
-t-12 -58.5zM1467 -42q19 -28 13 -61.5t-34 -52.5t-60.5 -13t-51.5 34t-13 61t33 53q28 19 60.5 13t52.5 -34zM1579 562q69 -113 42.5 -244.5t-134.5 -207.5q-90 -63 -199 -60q-20 -80 -84.5 -127t-143.5 -44.5t-140 57.5q-12 -9 -13 -10q-103 -71 -225 -48.5t-193 126.5
-q-50 73 -53 164q-83 14 -142.5 70.5t-80.5 128t-2 152t81 138.5q-36 60 -38 128t24.5 125t79.5 98.5t121 50.5q32 85 99 148t146.5 91.5t168 17t159.5 -66.5q72 21 140 17.5t128.5 -36t104.5 -80t67.5 -115t17.5 -140.5q52 -16 87 -57t45.5 -89t-5.5 -99.5t-58 -87.5z
-M455 1222q14 -20 9.5 -44.5t-24.5 -38.5q-19 -14 -43.5 -9.5t-37.5 24.5q-14 20 -9.5 44.5t24.5 38.5q19 14 43.5 9.5t37.5 -24.5zM614 1503q4 -16 -5 -30.5t-26 -18.5t-31 5.5t-18 26.5q-3 17 6.5 31t25.5 18q17 4 31 -5.5t17 -26.5zM1800 555q4 -20 -6.5 -37t-30.5 -21
-q-19 -4 -36 6.5t-21 30.5t6.5 37t30.5 22q20 4 36.5 -7.5t20.5 -30.5zM1136 1448q16 -27 8.5 -58.5t-35.5 -47.5q-27 -16 -57.5 -8.5t-46.5 34.5q-16 28 -8.5 59t34.5 48t58 9t47 -36zM1882 792q4 -15 -4 -27.5t-23 -16.5q-15 -3 -27.5 5.5t-15.5 22.5q-3 15 5 28t23 16
-q14 3 26.5 -5t15.5 -23zM1691 1033q15 -22 10.5 -49t-26.5 -43q-22 -15 -49 -10t-42 27t-10 49t27 43t48.5 11t41.5 -28z"/>
-    <glyph glyph-name="uniF2E1" unicode="&#xf2e1;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2E2" unicode="&#xf2e2;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2E3" unicode="&#xf2e3;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2E4" unicode="&#xf2e4;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2E5" unicode="&#xf2e5;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2E6" unicode="&#xf2e6;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2E7" unicode="&#xf2e7;" horiz-adv-x="1792"/>
-    <glyph glyph-name="_698" unicode="&#xf2e8;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2E9" unicode="&#xf2e9;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2EA" unicode="&#xf2ea;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2EB" unicode="&#xf2eb;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2EC" unicode="&#xf2ec;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2ED" unicode="&#xf2ed;" horiz-adv-x="1792"/>
-    <glyph glyph-name="uniF2EE" unicode="&#xf2ee;" horiz-adv-x="1792"/>
-    <glyph glyph-name="lessequal" unicode="&#xf500;" horiz-adv-x="1792"/>
-  </font>
-</defs></svg>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.ttf b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.ttf
deleted file mode 100644
index 35acda2fa1196aad98c2adf4378a7611dd713aa3..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.ttf and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff
deleted file mode 100644
index 400014a4b06eee3d0c0d54402a47ab2601b2862b..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff2 b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff2
deleted file mode 100644
index 4d13fc60404b91e398a37200c4a77b645cfd9586..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff2 and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/qrcode-enterprise.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/qrcode-enterprise.png
deleted file mode 100644
index 1f7157a61874b2bcd3517ee2cf6479989e2b0e6f..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/qrcode-enterprise.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/qrcode-official-account.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/qrcode-official-account.png
deleted file mode 100644
index 9afe08d93e907606271014602420096a9c523af8..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/qrcode-official-account.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/voice-dictation.svg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/voice-dictation.svg
deleted file mode 100644
index d35971499ddfed4ab0016419fb87e8d6a0d695cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/voice-dictation.svg
+++ /dev/null
@@ -1,94 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg width="1920px" height="292px" viewBox="0 0 1920 292" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-    <!-- Generator: Sketch 53.2 (72643) - https://sketchapp.com -->
-    <title>背景</title>
-    <desc>Created with Sketch.</desc>
-    <defs>
-        <linearGradient x1="99.5186404%" y1="50%" x2="1.16675231%" y2="50.2022984%" id="linearGradient-1">
-            <stop stop-color="#2EB7F5" offset="0%"></stop>
-            <stop stop-color="#579AE8" offset="51.5849005%"></stop>
-            <stop stop-color="#7687DF" offset="100%"></stop>
-        </linearGradient>
-        <linearGradient x1="98.4784023%" y1="50.2665574%" x2="4.25290926%" y2="49.4787783%" id="linearGradient-2">
-            <stop stop-color="#6369DD" offset="0%"></stop>
-            <stop stop-color="#6880DC" offset="48.5333069%"></stop>
-            <stop stop-color="#517FDB" offset="100%"></stop>
-        </linearGradient>
-        <rect id="path-3" x="0" y="0" width="1920" height="292"></rect>
-        <polygon id="path-5" points="0.367421407 0.714217224 2047.15997 0.714217224 2047.15997 106.058099 0.367421407 106.058099"></polygon>
-    </defs>
-    <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd" opacity="0.888648624">
-        <g id="语音听写" transform="translate(0.000000, -60.000000)">
-            <g id="背景" transform="translate(0.000000, 60.000000)">
-                <mask id="mask-4" fill="white">
-                    <use xlink:href="#path-3"></use>
-                </mask>
-                <g id="Mask">
-                    <use fill="url(#linearGradient-1)" xlink:href="#path-3"></use>
-                    <use fill="url(#linearGradient-2)" xlink:href="#path-3"></use>
-                </g>
-                <g id="Group-149" opacity="0.0871465774" mask="url(#mask-4)">
-                    <g transform="translate(-102.000000, 10.706667)">
-                        <g id="Group-3" stroke-width="1" fill="none" transform="translate(12.865799, 30.702827)">
-                            <mask id="mask-6" fill="white">
-                                <use xlink:href="#path-5"></use>
-                            </mask>
-                            <g id="Clip-2"></g>
-                            <path d="M0.367421407,106.058099 C88.4084752,101.068792 175.232146,89.2320853 257.860737,71.065792 L273.317451,67.605592 C278.446645,66.4190987 283.532001,65.170312 288.642648,63.955592 L296.299344,62.121832 L303.878479,60.1848987 L319.023261,56.2944853 C329.001752,53.5496853 339.069609,50.9138987 348.894664,47.9900053 C358.800652,45.1605253 368.603787,42.2142453 378.347907,39.1988587 C397.847951,33.1865787 417.273806,27.0594453 437.306665,21.6165653 C457.305802,16.158112 478.04009,11.462752 499.57866,8.41816533 L507.673749,7.32900533 C510.376609,6.980552 513.118249,6.73624533 515.83797,6.434512 C521.270667,5.79308533 526.782612,5.44949867 532.267579,4.99592533 C543.276294,4.305832 554.337278,3.80553867 565.41681,3.77828533 C587.575874,3.55344533 609.706274,4.65428533 631.634339,6.37805867 C653.565776,8.133952 675.291506,10.639312 696.877287,13.433752 C718.461382,16.2418187 739.917332,19.361352 761.314267,22.5976853 C804.106451,29.076192 846.649089,36.0705653 889.517149,42.3252053 C932.37172,48.570112 975.620845,54.0558187 1019.43145,57.4021387 C1063.19484,60.764032 1107.57536,62.1120987 1151.60684,59.9620053 C1173.60573,58.8991253 1195.48826,56.9845787 1216.97288,54.059712 C1238.45075,51.130952 1259.51552,47.197712 1279.85525,42.2327387 C1320.57856,32.3650853 1359.0374,19.4216987 1400.89716,10.8826453 L1408.75619,9.325312 L1416.70121,7.91884533 C1419.33157,7.41368533 1422.04455,7.07788533 1424.72717,6.679792 L1432.79191,5.52152533 L1436.82344,4.94141867 L1438.84173,4.65525867 L1440.88531,4.44404533 L1449.0664,3.599192 L1457.24579,2.75433867 C1459.97732,2.49153867 1462.74256,2.36792533 1465.48926,2.168392 L1473.74622,1.62429867 L1477.87217,1.35176533 L1482.01835,1.21452533 C1487.54884,1.050032 1493.07934,0.843685333 1498.60983,0.715205333 L1515.22154,0.720072 C1517.99184,0.684058667 1520.75372,0.823245333 1523.51896,0.890405333 L1531.81133,1.134712 C1553.91306,1.93381867 1575.88328,3.683872 1597.62082,6.09481867 C1619.32968,8.59628533 1640.80081,11.7216587 1662.0696,15.238312 C1704.63416,22.2599387 1746.16512,31.1192187 1787.99284,39.3915787 C1808.90586,43.536032 1829.89812,47.5471387 1851.07586,51.2146587 C1872.26034,54.8607653 1893.67583,58.0737387 1915.31556,60.676432 L1923.44269,61.6244587 C1926.14723,61.9485787 1928.85852,62.259072 1931.59173,62.498512 L1947.95221,64.0529253 L1964.40711,65.234552 C1967.14369,65.4613387 1969.9022,65.5684053 1972.65564,65.6939653 L1980.92103,66.0745387 C2002.97049,66.960272 2025.14811,66.8629387 2047.16048,65.579112 C2025.15654,66.9038187 2002.97218,67.042032 1980.90585,66.195232 L1972.63541,65.830232 C1969.87859,65.7085653 1967.11672,65.6063653 1964.37845,65.3854187 L1947.90837,64.232992 L1931.52935,62.7068053 C1928.79277,62.4732053 1926.0781,62.1675787 1923.3685,61.847352 L1915.23294,60.9139253 C1893.56117,58.349192 1872.11365,55.172232 1850.89544,51.563112 C1829.68735,47.9325787 1808.66305,43.9574853 1787.72137,39.8500187 C1745.85824,31.613672 1704.33065,22.7758053 1661.79476,15.7911653 C1640.54115,12.292032 1619.09025,9.18320533 1597.41342,6.700232 C1575.71298,4.30680533 1553.78492,2.57524533 1531.74388,1.791712 L1523.47344,1.55324533 C1520.71494,1.488032 1517.95981,1.350792 1515.19962,1.388752 L1498.63849,1.394592 C1493.12655,1.525992 1487.61629,1.73525867 1482.10603,1.902672 L1477.97502,2.04185867 L1473.86256,2.31633867 L1465.63933,2.86432533 C1462.90274,3.064832 1460.14593,3.18844533 1457.42789,3.45221867 L1449.28222,4.29901867 L1441.13655,5.14581867 L1439.10139,5.357032 L1437.09153,5.643192 L1433.07855,6.22329867 L1425.05091,7.38253867 C1422.38008,7.77965867 1419.67891,8.116432 1417.06036,8.61964533 L1409.15074,10.0251387 L1401.32712,11.582472 C1359.63429,20.1137387 1321.22603,33.0697787 1280.41167,42.9938853 C1260.01967,47.986112 1238.86722,51.951472 1217.31853,54.903592 C1195.75973,57.852792 1173.80469,59.7877787 1151.73836,60.8672053 C1107.5703,63.050392 1063.07007,61.7315253 1019.20719,58.381312 C975.303853,54.999952 932.005831,49.478232 889.132713,43.2021787 C846.246105,36.916392 803.713584,29.8947653 760.953436,23.3928987 C739.571676,20.1448853 718.137646,17.013672 696.578843,14.195872 C675.021726,11.3907253 653.336463,8.875632 631.457296,7.11097867 C609.583187,5.37649867 587.515174,4.264952 565.431985,4.47616533 C554.391234,4.495632 543.372403,4.98813867 532.404155,5.66752533 C526.942794,6.116232 521.451082,6.454952 516.040305,7.09053867 C513.332387,7.38837867 510.60255,7.62976533 507.911493,7.976272 L499.850126,9.05472533 C478.402607,12.073032 457.72902,16.7372453 437.760233,22.1723387 C417.756037,27.592832 398.333555,33.7024453 378.814964,39.704992 C369.062413,42.715512 359.244103,45.6578987 349.322939,48.4844587 C339.48271,51.4044587 329.399678,54.0373253 319.407697,56.7772587 L304.240996,60.6628053 L296.650059,62.5968187 L288.983245,64.4276587 C283.86754,65.6394587 278.77544,66.8862987 273.637815,68.0708453 L258.155809,71.5164453 C175.361978,89.5484187 88.4354532,101.228419 0.367421407,106.058099" id="Fill-1" fill="#FFFFFF" mask="url(#mask-6)"></path>
-                        </g>
-                        <path d="M13.0644392,142.885139 C101.376959,138.960659 188.635651,127.758565 271.608211,109.952405 C281.910439,107.620299 292.315521,105.441005 302.487917,102.924939 C307.588447,101.685885 312.737875,100.514965 317.796252,99.2214053 L332.962953,95.3300187 C338.046622,94.0676053 342.993715,92.635832 348.00488,91.2819253 C352.982323,89.888112 358.042387,88.5926053 362.94564,87.1121653 C372.81622,84.2252587 382.713778,81.3685253 392.434293,78.3142053 C411.93265,72.268832 431.21687,65.9772053 450.961402,60.1722453 C470.67727,54.351712 490.931012,49.0450987 512.026131,45.0641653 C522.549241,43.033792 533.313468,41.4384987 544.172117,40.142992 C549.608187,39.5103253 555.062803,38.9204853 560.556201,38.484432 C566.034423,37.985112 571.546368,37.628872 577.054941,37.269712 C599.126327,36.0092453 621.369697,35.841832 643.495038,36.627312 C654.561081,37.0117787 665.600146,37.602592 676.603803,38.3666587 C687.604087,39.1482453 698.563904,40.105032 709.483254,41.1922453 C731.321954,43.366672 752.983611,46.1124453 774.441247,49.3020587 C785.166693,50.9051387 795.848299,52.6036053 806.469205,54.424712 C817.100227,56.2302453 827.628396,58.2012453 838.197032,60.1060587 C859.332618,63.9224987 880.419306,67.8236187 901.575125,71.5777653 C922.734317,75.3221787 943.981187,78.9050187 965.389925,82.1306453 C976.103568,83.7162053 986.815524,85.324152 997.647196,86.6167387 L1013.86604,88.6256987 L1021.9763,89.6301787 L1030.13209,90.4944987 C1073.60547,95.103232 1117.9725,97.281552 1162.13888,95.897472 C1184.2052,95.2034853 1206.20577,93.6150053 1227.81684,90.9432053 C1249.4296,88.279192 1270.64612,84.562032 1291.14773,79.787832 C1332.22006,70.3085387 1370.92171,57.613352 1412.52686,48.6557653 C1433.28981,44.1570187 1454.89582,40.910952 1476.86773,39.135592 C1487.85284,38.2401253 1498.92057,37.6833787 1510.00684,37.5023387 C1521.09143,37.2882053 1532.19288,37.409872 1543.25556,37.8293787 C1587.57706,39.4898853 1630.98299,45.4126187 1673.53237,52.4089387 C1694.81802,55.9508987 1715.90471,59.870512 1736.89866,63.9341787 L1799.86028,76.143672 C1841.86336,84.1863253 1884.33181,91.605072 1927.79507,96.5204053 C1971.18246,101.422112 2015.7788,103.659805 2059.8929,101.014285 C2015.79397,103.740592 1971.15717,101.584659 1927.71414,96.7578987 C1884.19692,91.9194587 1841.65765,84.5717653 1799.60062,76.6040587 L1736.60528,64.440312 C1715.60964,60.3912453 1694.5297,56.4872053 1673.25753,52.961792 C1630.73007,45.997592 1587.37641,40.113792 1543.18305,38.487352 C1532.15242,38.075632 1521.08806,37.9607787 1510.04394,38.180752 C1498.99981,38.3686053 1487.97592,38.9302187 1477.03971,39.8286053 C1455.15549,41.611752 1433.65907,44.855872 1412.97537,49.3507253 C1371.51522,58.3044187 1332.85405,71.0180987 1291.67886,80.5538453 C1271.1233,85.356272 1249.81573,89.105552 1228.13046,91.791952 C1206.44183,94.487112 1184.36707,96.0950587 1162.22655,96.805592 C1117.91517,98.2198453 1073.41325,96.0678053 1029.82522,91.4668587 L1021.6492,90.594752 L1013.52544,89.584432 L997.277934,87.560872 C986.426029,86.2575787 975.70227,84.6418453 964.975138,83.0455787 C943.542794,79.8034053 922.280749,76.2040187 901.113127,72.4430587 C879.948877,68.6723653 858.872306,64.758592 837.753581,60.930472 C827.19169,59.0198187 816.661835,57.0420053 806.054418,55.2325787 C795.453746,53.4075787 784.789,51.7052187 774.083788,50.0982453 C752.66662,46.899872 731.047116,44.1453387 709.252255,41.9611787 C698.354824,40.8690987 687.418613,39.9084187 676.445307,39.120992 C665.466943,38.3520587 654.454856,37.7554053 643.419163,37.3650987 C621.352836,36.568912 599.17691,36.7217253 577.178028,37.9636987 C571.686317,38.317992 566.192919,38.6703387 560.733244,39.1638187 C555.258394,39.5950053 549.824011,40.1799787 544.406489,40.8068053 C533.58662,42.090632 522.862861,43.6722987 512.378531,45.688072 C491.357602,49.6407787 471.144327,54.9201387 451.44532,60.720232 C431.715963,66.504752 412.426685,72.782752 392.904722,78.8193653 C383.170718,81.869792 373.257985,84.7245787 363.37223,87.607592 C358.460546,89.0860853 353.392052,90.3806187 348.407864,91.771512 C343.389954,93.123472 338.434431,94.554272 333.344017,95.8147387 L318.153711,99.7002853 C313.088589,100.991899 307.932417,102.160872 302.826828,103.397005 C292.635884,105.906259 282.224058,108.093339 271.898224,110.403059 C188.757052,128.074899 101.397193,139.121259 13.0644392,142.885139" id="Fill-4" fill="#FFFFFF"></path>
-                        <path d="M12.895658,149.010325 C101.489761,146.093245 189.272838,135.585139 272.596112,118.043725 C293.380983,113.591699 314.041082,108.919699 334.217262,103.608219 C344.399775,101.074632 354.317566,98.210112 364.338212,95.4730987 C369.315655,94.0656587 374.218908,92.571592 379.162629,91.1242453 C384.101291,89.672032 389.036581,88.2168987 393.906112,86.6858453 C413.436506,80.625872 432.654967,74.225232 452.178616,68.134112 C471.690463,62.0381253 491.536163,56.2341387 512.191203,51.495952 C532.799031,46.6944987 554.295448,43.1895253 576.134148,40.8447653 C597.972848,38.4776187 620.147087,37.1928187 642.353363,36.872592 C664.564697,36.583512 686.804695,37.202552 708.86765,38.6752053 C730.942407,40.0923787 752.851925,42.356352 774.446137,45.3561653 C796.023487,48.380312 817.341175,52.103312 838.109185,56.6098453 L869.415481,63.1360453 L900.657704,69.7449787 C921.496532,74.1249787 942.407863,78.386232 963.521529,82.287352 C974.083421,84.2252587 984.670604,86.1232587 995.357269,87.814912 L1011.41762,90.2832853 C1016.7795,91.0862853 1022.10259,91.9836987 1027.51506,92.665032 C1070.58377,98.6004187 1114.92213,101.712165 1159.21665,101.073659 C1181.35042,100.749539 1203.45216,99.4783653 1225.22004,97.1287387 C1246.98455,94.7752187 1268.37137,91.292632 1289.06351,86.737432 C1330.52365,77.6698587 1369.51194,65.2160587 1410.88777,55.8448053 C1431.55293,51.1494453 1453.0308,47.5471387 1474.9909,45.5226053 C1496.93751,43.4532987 1519.30397,42.9938853 1541.46304,43.8591787 C1563.63896,44.7059787 1585.63447,46.7149387 1607.33828,49.3974453 C1629.04546,52.094552 1650.52839,55.3318587 1671.86463,58.8553253 C1693.19074,62.4031253 1714.33476,66.2954853 1735.41133,70.294912 L1798.68826,82.1812587 C1840.93414,89.955272 1883.68079,96.9905253 1927.31773,101.632352 C1970.89059,106.254712 2015.551,108.302605 2059.75784,105.747605 C2015.56618,108.383392 1970.86867,106.417259 1927.24185,101.870819 C1883.55434,97.3058853 1840.73517,90.3426587 1798.43702,82.643592 L1735.123,70.8020187 C1714.04475,66.817192 1692.9041,62.9384587 1671.58979,59.4081787 C1650.26704,55.9012587 1628.79086,52.678552 1607.1174,50.000912 C1585.44394,47.3359253 1563.49733,45.346432 1541.39053,44.5161787 C1519.29723,43.667432 1497.03362,44.1394987 1475.18649,46.213672 C1453.31913,48.2450187 1431.94917,51.8414853 1411.35483,56.535872 C1370.10039,65.9071253 1331.14077,78.3852587 1289.57103,87.508312 C1268.81819,92.092712 1247.33864,95.605472 1225.49825,97.9804053 C1203.65112,100.353392 1181.46845,101.644032 1159.25712,101.981779 C1114.81084,102.649485 1070.32073,99.5601253 1027.11713,93.625712 C1021.69118,92.939512 1016.36134,92.037232 1010.98934,91.2293653 L994.898642,88.7454187 C984.196803,87.0430587 973.594444,85.1362987 963.02075,83.1886587 C941.885164,79.2690453 920.960344,74.9912453 900.11983,70.595672 L868.887724,63.9653253 L837.613464,57.4254987 C816.857257,52.906312 795.639051,49.1872053 774.110598,46.155272 C752.577086,43.1496187 730.733328,40.878832 708.714212,39.4499787 C686.708586,37.9685653 664.530974,37.336872 642.385399,37.611352 C620.243196,37.917952 598.131344,39.185232 576.356716,41.5309653 C554.582089,43.854312 533.159862,47.3330053 512.611048,52.1052587 C492.009964,56.816192 472.189556,62.5968187 452.68614,68.6752853 C433.165863,74.7488853 413.935599,81.1378453 394.374855,87.1910053 C389.498579,88.720112 384.554859,90.174272 379.607766,91.625512 C374.657301,93.0699387 369.745617,94.563032 364.761429,95.9694987 C354.723923,98.7016453 344.790956,101.564219 334.593268,104.093912 C314.383365,109.393712 293.703033,114.070579 272.881067,118.496325 C189.385808,135.902445 101.504936,146.253845 12.895658,149.010325" id="Fill-6" fill="#FFFFFF"></path>
-                        <path d="M12.7268768,155.134539 L29.387485,154.653712 L37.7186321,154.411352 L46.0312318,154.027859 L62.6598035,153.258925 L79.2411638,152.214539 L87.5301577,151.685045 L95.7921737,151.023179 L112.311147,149.693605 L128.757617,148.097339 L136.979166,147.289472 L145.160248,146.355072 L161.517354,144.478485 L177.780036,142.342992 L185.905476,141.264539 L193.982018,140.064419 L210.124986,137.653472 L226.151611,134.990432 L234.157335,133.648205 L242.10236,132.186259 L257.983978,129.253605 L273.722275,126.074699 L281.584679,124.474539 L289.376266,122.763419 L304.952695,119.328525 L320.362198,115.652245 L328.059362,113.803885 L335.670534,111.841645 L350.886132,107.905485 L365.911198,103.731832 L373.414458,101.636245 L380.821608,99.427752 L395.627478,95.0029787 C434.816411,82.8927653 472.465912,69.0694853 512.946418,58.282032 C523.064859,55.6053653 533.328306,53.0892987 543.804205,50.9012453 C554.275046,48.7112453 564.895951,46.7558187 575.656806,45.095312 C597.148164,41.7158987 619.098148,39.3234453 641.228548,37.8605253 C663.372437,36.4735253 685.705172,36.0209253 707.956973,36.6574853 C730.203716,37.2706853 752.399875,38.8066053 774.191363,41.5056587 C795.992969,44.163832 817.393276,47.924792 838.103958,52.6561653 C858.851735,57.321352 879.231936,62.4848853 899.699816,67.500472 C940.610284,77.548192 981.822569,87.3048853 1024.46469,94.5737387 C1067.02081,101.866925 1111.30016,105.979259 1155.7144,106.153485 C1200.08143,106.330632 1244.6609,102.466499 1286.47008,93.7678187 C1328.35175,85.1440853 1367.68063,72.9355653 1408.85245,63.146752 C1429.42319,58.235312 1450.77291,54.2884453 1472.71278,51.9680187 C1483.67428,50.8000187 1494.7673,50.044712 1505.89742,49.6952853 C1517.03259,49.3624053 1528.19306,49.4568187 1539.30968,49.872432 C1561.54968,50.7114453 1583.58566,52.8381787 1605.30633,55.6296987 C1627.03375,58.430952 1648.53691,61.7412587 1669.9389,65.260832 C1691.32235,68.811552 1712.53887,72.6824987 1733.71155,76.6206053 C1754.8758,80.5723387 1776.13953,84.3245387 1797.34593,88.194512 L1805.30276,89.6379653 L1813.30174,90.9967387 C1818.63495,91.9038853 1823.9766,92.793512 1829.33342,93.651992 C1834.68013,94.5328587 1840.03863,95.383552 1845.414,96.2050453 C1850.77588,97.0528187 1856.158,97.8577653 1861.54854,98.6374053 C1883.10229,101.790032 1904.82801,104.566952 1926.74596,106.736512 C1970.53128,111.083419 2015.29117,112.947352 2059.62279,110.479952 C2015.30466,113.028139 1970.51104,111.245965 1926.67515,106.974979 C1904.73528,104.845325 1882.97751,102.105392 1861.3951,98.989752 C1855.99613,98.218872 1850.60727,97.4236587 1845.23696,96.5856187 C1839.85653,95.7738587 1834.48791,94.9319253 1829.13615,94.0598187 C1823.77089,93.2100987 1818.4225,92.3311787 1813.08254,91.432792 L1805.07176,90.0876453 L1797.10482,88.658792 L1765.24884,82.9229387 C1754.63468,81.0044987 1743.9986,79.1181787 1733.42828,77.1286853 C1712.25223,73.203232 1691.03739,69.3478587 1669.66238,65.812712 C1648.27724,62.3106587 1626.76902,59.0139787 1605.07533,56.232192 C1583.38501,53.458192 1561.39962,51.3518987 1539.23718,50.529432 C1528.15933,50.1225787 1517.04271,50.0349787 1505.96149,50.374672 C1494.88196,50.727992 1483.84289,51.487192 1472.93366,52.6561653 C1451.09496,54.9814587 1429.84978,58.921512 1409.33974,63.832952 C1368.26741,73.626632 1328.9537,85.864352 1286.94894,94.5445387 C1244.99476,103.304539 1200.23318,107.213445 1155.70597,107.062579 C1111.12818,106.913659 1066.68865,102.820792 1023.97571,95.5198187 C981.240856,88.2071653 939.988104,78.4086187 899.086067,68.332672 C878.616501,63.3034587 858.259905,58.130192 837.55934,53.461112 C816.916102,48.7287653 795.618649,44.9736453 773.896291,42.309632 C752.185737,39.6076587 730.072198,38.0629787 707.8929,37.4371253 C685.711917,36.7888853 663.448313,37.225912 641.370183,38.595392 C619.303856,40.0398187 597.411199,42.411832 575.972111,45.7688587 C565.241607,47.4176853 554.645993,49.3604587 544.203817,51.5378053 C533.756582,53.712232 523.521799,56.2136987 513.421906,58.8786853 C473.015589,69.6135787 435.347541,83.4096053 396.096221,95.5081387 L381.265059,99.9290187 L373.846106,102.135565 L366.329357,104.229205 L351.282372,108.397019 L336.043167,112.328312 L328.420193,114.287632 L320.711226,116.133072 L305.279804,119.803512 L289.681455,123.231592 L281.879751,124.940765 L274.000486,126.528272 L258.231838,129.681872 L242.31987,132.589219 L234.361357,134.036565 L226.340457,135.365165 L210.28854,138.000952 L194.121967,140.385619 L186.035308,141.571139 L177.896379,142.635965 L161.615149,144.743232 L145.239496,146.592565 L137.049984,147.511392 L128.82169,148.303685 L112.358359,149.871725 L95.8258962,151.173072 L87.5588219,151.819365 L79.2647695,152.333285 L62.6749787,153.349445 L46.0396624,154.088205 L37.7236905,154.456125 L29.3908572,154.683885 L12.7268768,155.134539 Z" id="Fill-8" fill="#FFFFFF"></path>
-                        <path d="M12.5580957,161.258752 C101.819904,160.296125 190.773152,151.071845 275.003562,134.058952 C285.590745,132.025659 295.975594,129.672139 306.414398,127.402325 C309.032951,126.848499 311.616096,126.242112 314.187437,125.619179 L321.92001,123.776659 L329.652582,121.934139 L333.518869,121.012392 L337.336257,120.025432 C347.512026,117.389645 357.741751,114.820045 367.721929,111.941899 C372.721291,110.515965 377.808333,109.189312 382.735192,107.681619 L397.571412,103.216939 C437.014951,91.175832 474.546423,77.1267387 514.285033,65.316312 C553.893812,53.3520987 596.524129,44.7984453 640.301011,39.6105787 C684.094754,34.5443787 729.532469,33.517512 773.685357,37.8517653 C795.734823,40.0184053 817.391421,43.571072 838.080184,48.4932187 C858.74534,53.4270453 878.768083,59.1628987 898.809373,64.825752 C938.888581,76.1397787 979.05884,87.467432 1021.08721,96.1203653 C1041.98674,100.575312 1063.4899,104.079312 1085.3404,106.589539 C1107.18416,109.109499 1129.36514,110.667805 1151.622,111.178805 C1196.1138,112.217352 1241.02207,108.972259 1283.33034,100.859525 C1304.4929,96.8221387 1325.03329,91.7617787 1345.37639,86.4853387 C1365.72287,81.198192 1385.90748,75.6949653 1406.41415,70.578152 C1426.90563,65.4574453 1448.1036,61.1251387 1470.02661,58.4961653 C1480.978,57.187032 1492.07608,56.2565253 1503.24329,55.840912 C1514.4105,55.4360053 1525.61649,55.4622853 1536.77358,55.8681653 C1559.108,56.6604587 1581.19288,58.927352 1602.95233,61.8084187 C1624.7219,64.706032 1646.25541,68.1117253 1667.74002,71.6196187 C1689.20103,75.177152 1710.50692,79.028632 1731.7892,82.9112587 C1742.41516,84.8880987 1753.10857,86.7247787 1763.7868,88.5896853 L1795.82656,94.183432 C1801.16315,95.123672 1806.49637,96.0678053 1811.88691,96.8970853 C1817.26228,97.7623787 1822.63933,98.6198853 1828.03494,99.436512 C1833.42379,100.273579 1838.81771,101.093125 1844.23017,101.871792 L1852.34044,103.060232 L1860.47937,104.185405 C1882.17981,107.179379 1904.04043,109.800565 1926.0764,111.834832 C1970.10283,115.910179 2014.99424,117.593072 2059.48773,115.213272 C2015.00773,117.673859 1970.08428,116.071752 1926.01064,112.074272 C1903.95275,110.077965 1882.06178,107.495712 1860.33267,104.538725 L1852.18532,103.427179 L1844.06325,102.253339 C1838.64235,101.484405 1833.24001,100.674592 1827.84778,99.8462853 C1822.44374,99.039392 1817.05994,98.1906453 1811.67783,97.3360587 C1806.28054,96.5155387 1800.9389,95.5801653 1795.59556,94.6486853 L1763.52883,89.079272 C1752.84216,87.2211787 1742.13695,85.3893653 1731.51267,83.420312 C1710.22365,79.5503387 1688.91776,75.7134587 1667.46687,72.172472 C1645.99406,68.6811253 1624.45043,65.2880853 1602.71459,62.4099387 C1580.9838,59.546392 1558.95119,57.300912 1536.70445,56.5251653 C1525.58782,56.1280453 1514.4341,56.1085787 1503.32253,56.5193253 C1492.21434,56.938832 1481.17359,57.8722587 1470.27616,59.181392 C1448.45769,61.812312 1427.36089,66.136832 1406.92167,71.2604587 C1386.45885,76.380192 1366.28267,81.896072 1345.93282,87.2007387 C1325.58297,92.4937253 1305.01055,97.5774453 1283.77885,101.643059 C1241.31715,109.815165 1196.22171,113.102112 1151.5613,112.086925 C1129.21508,111.588579 1106.9481,110.038059 1085.01667,107.521992 C1063.08354,105.014685 1041.47921,101.514579 1020.50381,97.0489253 C978.382704,88.3482987 938.177036,76.9768453 898.119748,65.6384853 C878.083516,59.9620053 858.099554,54.2222587 837.508587,49.291352 C816.936167,44.382832 795.390853,40.8330853 773.444241,38.6615787 C729.505491,34.320512 684.214468,35.3113653 640.552243,40.335712 C596.888332,45.4817253 554.345694,53.986712 514.80436,65.900312 C475.111275,77.664992 437.551139,91.6907253 398.038469,103.724045 C358.547719,115.763205 317.547886,126.183712 275.276714,134.513499 C190.87432,151.391099 101.823277,160.455752 12.5580957,161.258752" id="Fill-10" fill="#FFFFFF"></path>
-                        <path d="M12.3901575,167.382965 L29.1991447,167.249619 L37.6027952,167.182459 L45.9997013,166.966379 L62.7918272,166.512805 C68.3813339,166.297699 73.9640961,166.011539 79.5502306,165.764312 L87.9269031,165.379845 C90.7224995,165.262072 93.4995486,165.038205 96.2850282,164.871765 L112.989476,163.792339 C118.552004,163.413712 124.084183,162.891032 129.633222,162.447192 C135.173832,161.970259 140.73636,161.570219 146.239875,160.960912 C190.412996,156.616925 234.065104,150.405112 276.444188,141.995512 C281.736936,140.937499 287.053291,139.919392 292.329178,138.835099 L308.053987,135.415779 L315.911332,133.699792 L323.677627,131.851432 L339.211903,128.154712 L354.543845,124.188379 L362.207286,122.202779 C364.755022,121.531179 367.252174,120.798259 369.776304,120.098432 L384.883991,115.854699 C389.89347,114.409299 394.811899,112.861699 399.780911,111.370552 C439.504347,99.3858987 477.040877,85.1625787 516.182599,72.5150853 C555.20292,59.739112 596.656321,49.3867387 639.711541,42.046832 C661.238308,38.3783387 683.328241,35.7756453 705.660976,34.3886453 C716.831559,33.7150987 728.062842,33.4026587 739.292439,33.3841653 C750.523722,33.3432853 761.766809,33.7199653 772.923902,34.4820853 C795.224601,36.0413653 817.247089,39.1871787 838.048822,44.1278187 C858.811774,49.130752 878.472,55.426272 898.059722,61.7081653 C917.645758,68.002712 937.171094,74.353712 956.959465,80.4166053 C966.850279,83.4504853 976.815281,86.4006587 986.874707,89.236952 C996.895352,92.1267787 1007.21107,94.6408987 1017.42899,97.2776587 C1058.31417,107.759485 1102.32373,114.174725 1146.94199,116.093165 C1191.53496,118.050539 1236.80069,115.489699 1279.63334,107.987245 C1301.04714,104.230179 1321.84718,99.383952 1342.34879,94.1824587 C1362.85376,88.9722053 1383.13785,83.4680053 1403.58213,78.1477653 C1424.01291,72.8284987 1445.04059,68.0883653 1466.93999,65.1226187 C1477.86608,63.6178453 1488.98945,62.5724853 1500.18701,62.0235253 C1511.39132,61.5242053 1522.64958,61.4628853 1533.86063,61.8434587 C1556.30802,62.5705387 1578.45866,64.977592 1600.28387,67.9287387 C1611.19985,69.4140453 1622.05175,71.0502187 1632.86993,72.7467387 C1643.68137,74.4549387 1654.49787,76.1475653 1665.27558,77.9346053 C1708.40162,85.0389653 1750.88019,93.3590187 1794.07198,100.264819 C1837.29076,107.081072 1880.95973,113.120605 1925.30315,116.928285 C1969.60104,120.734992 2014.66444,122.240739 2059.35351,119.945619 C2014.67455,122.321525 1969.58586,120.897539 1925.24244,117.167725 C1880.85181,113.438885 1837.12889,107.474299 1793.8511,100.732019 C1750.60535,93.8602853 1708.11329,85.5567787 1665.00243,78.4864853 C1643.42677,75.0058453 1621.83424,71.5018453 1600.03769,68.5292853 C1578.24452,65.5956587 1556.14784,63.210992 1533.79487,62.501432 C1522.62935,62.1286453 1511.42842,62.1967787 1500.2865,62.7009653 C1489.15132,63.2528453 1478.09034,64.300152 1467.21989,65.803952 C1445.43008,68.7696987 1424.5002,73.501072 1404.10651,78.824232 C1383.69428,84.1522587 1363.41187,89.672032 1342.89509,94.8998053 C1322.37831,100.119792 1301.5378,104.991325 1280.04981,108.776619 C1237.05361,116.337472 1191.59903,118.937245 1146.82396,116.999339 C1102.02698,115.101339 1057.80327,108.689019 1016.74948,98.183832 C1006.51133,95.5344187 996.172005,93.0076453 986.139557,90.1061387 C976.063269,87.2581653 966.08815,84.296312 956.190592,81.2526987 C936.388731,75.1703387 916.865082,68.8047387 897.295907,62.498512 C877.733476,56.2078587 858.105287,49.910392 837.480597,44.9269253 C816.820499,40.0047787 794.932901,36.8628587 772.753604,35.2977387 C761.653838,34.533672 750.478197,34.151152 739.292439,34.1852187 C728.11174,34.1968987 716.927668,34.5025253 705.800924,35.1653653 C683.555868,36.5319253 661.54181,39.1112587 640.069,42.7573653 C618.574269,46.3616187 597.536479,50.865232 576.905044,55.8798453 C556.276983,60.9168453 536.287962,66.779232 516.737335,73.0874053 C477.619218,85.6959653 440.042221,99.900792 400.244596,111.877659 C395.267153,113.367832 390.338607,114.913485 385.322383,116.357912 L370.189405,120.596779 C367.660216,121.295632 365.159692,122.027579 362.608584,122.699179 L354.931653,124.681859 L339.57442,128.643325 L324.018225,132.335179 L316.240127,134.180619 L308.370978,135.893685 L292.62425,139.306192 C287.341618,140.389512 282.016833,141.404699 276.712282,142.452005 C234.24552,150.793472 190.525966,156.932285 146.309006,161.198405 C140.798747,161.797979 135.23116,162.189259 129.685492,162.655485 C124.131394,163.089592 118.594157,163.601565 113.028257,163.970459 L96.3120063,165.020685 C93.5248405,165.182259 90.7444192,165.402232 87.9505089,165.515139 L79.5670919,165.885005 C73.9775851,166.121525 68.3931368,166.397952 62.8002578,166.602352 L46.0047596,167.026725 L37.6061675,167.228205 L29.1991447,167.279792 L12.3901575,167.382965 Z" id="Fill-12" fill="#FFFFFF"></path>
-                        <path d="M12.2213764,173.508055 L29.1180421,173.540175 L37.5655319,173.557695 C40.3813618,173.540175 43.1955056,173.465228 46.0096494,173.422401 L62.8961984,173.118721 C68.5211137,172.973695 74.1375985,172.712841 79.7574554,172.515255 C91.0106584,172.189188 102.199789,171.418308 113.409152,170.802188 C116.214866,170.661055 119.00709,170.447895 121.795942,170.226948 L130.169242,169.582601 L138.540856,168.939228 L142.72582,168.617055 L146.893923,168.221881 C157.995374,167.128828 169.137293,166.172041 180.14938,164.799641 C185.663011,164.143615 191.200248,163.552801 196.695332,162.845188 L213.13843,160.615281 C215.875012,160.236655 218.625083,159.886255 221.353234,159.485241 L229.507338,158.216988 L245.815546,155.682428 L261.975375,152.848081 L270.053603,151.429935 C272.744659,150.954948 275.391877,150.398201 278.0627,149.885255 C320.777322,141.671295 362.487014,131.638175 402.277895,119.460801 C442.279541,107.490748 479.950962,93.153548 518.630685,79.8101213 C537.950314,73.1174813 557.595364,66.7022413 577.806953,60.9294013 C587.891671,58.0191347 598.136571,55.298668 608.405076,52.6220013 C618.67021,49.937548 629.026394,47.3601613 639.507351,44.9579747 C649.995053,42.5655213 660.594039,40.319068 671.391988,38.421068 C682.188251,36.523068 693.148068,34.925828 704.244462,33.7159747 C726.433876,31.258308 749.233668,30.753148 771.71984,31.7673613 C782.981474,32.227748 794.200954,33.0813613 805.287231,34.3379347 C816.349901,35.655828 827.302974,37.3601347 837.864866,39.6990547 C848.467224,41.9970947 858.560372,44.9832813 868.419149,48.135908 C878.291415,51.292428 887.92931,54.6786547 897.484584,58.1291213 C916.585016,65.0329747 935.521894,72.076988 954.713377,78.8757213 C964.307432,82.2755747 973.973991,85.605348 983.745089,88.8270813 C988.62811,90.4428147 993.54991,92.014748 998.490259,93.5691613 L1013.51027,98.0085347 C1053.23539,110.197588 1096.98361,117.828521 1141.63221,120.846828 C1186.29431,123.836908 1231.94279,121.979788 1275.33355,115.155748 C1297.01038,111.688735 1318.09033,107.069295 1338.79764,101.983628 C1359.49315,96.8687613 1379.91044,91.3733213 1400.32437,85.871068 C1420.71806,80.3620013 1441.57037,75.2179347 1463.42425,71.8677213 C1474.33685,70.1955347 1485.45179,68.9340947 1496.69319,68.2615213 C1507.93459,67.6220413 1519.25693,67.4585213 1530.53542,67.799188 C1553.10253,68.4980413 1575.37119,70.9255347 1597.27228,73.9818013 C1608.2321,75.507988 1619.10929,77.2113213 1629.96794,78.944828 L1662.51691,84.1988813 C1705.85877,91.3353613 1748.62398,99.577548 1792.16818,106.243908 C1835.74441,112.790548 1879.7877,118.473841 1924.42619,122.018721 C1969.02422,125.559708 2014.29332,126.889281 2059.21845,124.678841 C2014.30681,126.970068 1969.01073,125.721281 1924.37055,122.258161 C1879.68822,118.793095 1835.59097,113.185721 1791.95404,106.712081 C1748.3542,100.080761 1705.57382,91.8531747 1662.24376,84.7517347 L1629.6931,79.5190947 C1618.83614,77.7933747 1607.964,76.0988013 1597.01936,74.5813747 C1575.152,71.5436013 1552.94403,69.1384947 1530.47472,68.456188 C1519.25018,68.1242813 1507.99024,68.295588 1496.81797,68.9370147 C1485.6457,69.6115347 1474.59146,70.873948 1463.73618,72.544188 C1441.99528,75.8924547 1421.23401,81.0258147 1400.86561,86.5436413 C1380.46686,92.0575747 1360.04788,97.568588 1339.33045,102.703895 C1318.59954,107.810975 1297.4707,112.457668 1275.71462,115.950961 C1232.15019,122.831455 1186.30949,124.724588 1141.45349,121.749108 C1096.62446,118.749295 1052.62164,111.106681 1012.73296,98.888428 L997.682605,94.4276413 C992.733826,92.867388 987.801908,91.2886413 982.91383,89.667068 C973.129242,86.4336547 963.454253,83.091228 953.853453,79.6826147 C934.655226,72.8644147 915.726778,65.807748 896.651638,58.8970813 C887.114911,55.4436947 877.507367,52.060388 867.675568,48.909708 C857.843769,45.7570813 847.836613,42.7903613 837.333736,40.505948 C826.861209,38.1796813 815.989071,36.4802413 804.998903,35.1642947 C793.983444,33.9086947 782.82635,33.0521613 771.616986,32.5878813 C749.198259,31.5639347 726.587313,32.052548 704.492322,34.4839347 C693.449885,35.6801613 682.535593,37.2637747 671.778111,39.148148 C661.017257,41.031548 650.445249,43.2643747 639.979466,45.645148 C629.518742,48.0346813 619.176047,50.6023347 608.914286,53.2770547 C598.647467,55.9459347 588.417742,58.6547213 578.343141,61.5542813 C558.153472,67.305708 538.537085,73.6966147 519.214085,80.372708 C480.537733,93.6830413 442.817415,108.006615 402.738207,119.968881 C362.874823,132.132628 321.107803,142.162828 278.324049,150.343695 C275.646482,150.851775 272.995892,151.403655 270.298091,151.873775 L262.204688,153.280241 L246.014509,156.087335 L229.679323,158.596588 L221.51173,159.850241 C218.780206,160.246388 216.026763,160.591921 213.285122,160.965681 L196.818419,163.168335 C191.316591,163.867188 185.770923,164.448268 180.248862,165.093588 C169.224972,166.448468 158.069564,167.386761 146.957996,168.459375 L142.786521,168.847735 L138.596498,169.162121 L130.216453,169.791868 L121.836409,170.420641 C119.044184,170.638668 116.25196,170.845988 113.444561,170.981281 C102.225081,171.577935 91.0292058,172.328375 79.7709445,172.635948 C74.1477152,172.822828 68.5295444,173.074921 62.9029429,173.208268 L46.0113355,173.482748 C43.1971917,173.520708 40.3813618,173.590788 37.5655319,173.602468 L29.1180421,173.570348 L12.2213764,173.508055 Z" id="Fill-14" fill="#FFFFFF"></path>
-                        <path d="M12.0525952,179.632268 C57.3385594,180.508268 102.752669,179.144628 147.605299,175.459588 L164.385622,173.934375 C167.179533,173.670601 169.985246,173.444788 172.769039,173.150841 L181.110303,172.223255 L197.789459,170.367108 L214.353958,168.193655 L222.634521,167.104495 C225.399767,166.751175 228.117802,166.287868 230.861128,165.885881 L247.283993,163.386361 C252.763901,162.561948 258.146014,161.540921 263.582084,160.627935 L271.715954,159.219521 L275.784575,158.516775 L279.811044,157.739081 C290.524686,155.625001 301.337811,153.676388 311.889585,151.299508 C317.187392,150.148055 322.512177,149.035535 327.788065,147.853908 L343.496012,144.124095 L351.349985,142.259188 L359.094361,140.249255 L374.576367,136.219655 C379.703875,134.837521 384.748763,133.352215 389.839177,131.923361 C394.905984,130.466281 400.04361,129.089015 404.995761,127.504428 C445.27899,115.519775 483.172979,101.107628 521.498616,87.1656013 C531.084241,83.678148 540.69853,80.2160013 550.387008,76.8210147 C555.24305,75.134228 560.100778,73.4484147 565.00066,71.8034813 C569.876935,70.135188 574.90159,68.6187347 579.84531,67.019548 L594.735487,62.2794147 C599.711244,60.7142947 604.622928,59.077148 609.679619,57.6006013 C619.739045,54.5871613 629.84231,51.6146013 640.107443,48.8328147 C660.612418,43.2556147 681.704164,38.2818813 703.61031,34.7983213 C725.460813,31.234948 748.102109,29.2532413 770.824339,29.0030947 C793.546569,28.8123213 816.378397,30.8251747 837.841091,35.206148 C848.546303,37.425348 858.873823,40.2762413 868.663469,43.5904413 C878.503699,46.8666813 888.003331,50.463148 897.336037,54.180308 C915.959295,61.6486947 934.238584,69.3750147 952.765733,76.8959613 C962.027622,80.655948 971.350211,84.364348 980.789143,87.970548 C990.236505,91.565068 999.737823,95.1206547 1009.59323,98.3297347 C1047.902,112.270788 1091.25567,121.280935 1135.90428,125.341681 C1158.23027,127.374001 1180.90023,128.170188 1203.49431,127.690335 C1226.09177,127.211455 1248.57963,125.348495 1270.55153,122.280548 C1292.52344,119.173668 1313.90857,114.800481 1334.83507,109.834535 C1355.75989,104.852041 1376.33906,99.3721747 1396.7631,93.7248947 C1406.98102,90.9032013 1417.21412,88.078588 1427.65461,85.5109347 C1438.09004,82.942308 1448.73118,80.6335613 1459.60669,78.7414013 C1470.47714,76.845348 1481.60726,75.4242813 1492.87564,74.570668 C1504.14401,73.7024547 1515.54897,73.5243347 1526.88985,73.7920013 C1549.60365,74.3497213 1572.01732,76.821988 1594.00271,79.9794813 C1605.00468,81.5650413 1615.9274,83.3121747 1626.82989,85.0894813 C1637.72395,86.8911213 1648.65005,88.6061347 1659.50027,90.493428 L1724.69432,101.606948 C1746.41499,105.335788 1768.15252,109.009148 1790.11431,112.214335 C1834.06823,118.506935 1878.51113,123.827175 1923.46493,127.110228 C1968.38838,130.384521 2013.89522,131.538895 2059.08339,129.411188 C2013.90534,131.619681 1968.37658,130.547068 1923.41435,127.350641 C1878.41671,124.145455 1833.9266,118.902108 1789.9086,112.684455 C1767.91983,109.503601 1746.15701,105.829268 1724.43634,102.119895 L1659.23217,91.0472547 C1648.38364,89.167748 1637.44237,87.456628 1626.55,85.663748 C1615.64751,83.8932547 1604.73153,82.153908 1593.74473,80.5780813 C1571.78969,77.438108 1549.44515,74.9901747 1526.84264,74.4499747 C1515.55909,74.191068 1504.22158,74.375028 1493.02739,75.2442147 C1481.83151,76.0988013 1470.76884,77.5179213 1459.95572,79.412028 C1449.13754,81.301268 1438.54192,83.6070947 1428.14358,86.174748 C1417.74188,88.7404547 1407.53238,91.5660413 1397.31783,94.3935747 C1376.88874,100.058375 1356.31294,105.553815 1335.35271,110.558695 C1314.39249,115.548001 1292.95003,119.949415 1270.89382,123.081601 C1248.83086,126.176801 1226.24858,128.061175 1203.55333,128.556601 C1180.86145,129.053001 1158.09201,128.266548 1135.65979,126.240068 C1090.82739,122.193921 1047.17528,113.151655 1008.71813,99.1794547 C998.840804,95.953828 989.324311,92.3846147 979.861773,88.7774413 C970.414411,85.158588 961.085077,81.438508 951.821503,77.6697613 C933.29604,70.132268 915.023495,62.3923213 896.444076,54.924908 C887.138348,51.2096947 877.680869,47.621988 867.896282,44.3564547 C858.148789,41.0480947 847.957845,38.231268 837.338626,36.0188813 C816.06815,31.664188 793.411679,29.649388 770.8412,29.824588 C748.262291,30.0601347 725.728907,32.0165347 703.971141,35.550708 C682.157732,39.0060413 661.126686,43.9476547 640.653748,49.5014947 C630.40379,52.2716013 620.308955,55.2334547 610.254588,58.239108 C605.197897,59.710788 600.291271,61.343068 595.315514,62.902348 L580.437141,67.6278813 C575.496792,69.2222013 570.473824,70.733788 565.60092,72.3972147 C560.704411,74.0372813 555.850056,75.718228 550.9957,77.400148 C541.308907,80.7863747 531.692933,84.2407347 522.105622,87.7213747 C483.764809,101.634201 445.815178,116.035641 405.454387,128.012508 C400.493805,129.596121 395.346063,130.972415 390.270825,132.427548 C385.171981,133.855428 380.118662,135.339761 374.981037,136.720921 L359.473739,140.744681 L351.715874,142.752668 L343.850098,144.614655 L328.120231,148.338628 C322.834227,149.520255 317.502698,150.629855 312.19646,151.778388 C301.627824,154.148455 290.804583,156.104855 280.067335,158.198495 L276.032436,158.968401 L271.95707,159.665308 L263.806338,161.061068 C258.360152,161.965295 252.967922,162.977561 247.477897,163.792241 L231.026369,166.265481 C228.27967,166.663575 225.556577,167.121041 222.787958,167.469495 L214.493906,168.545028 L197.907487,170.691228 L181.206412,172.518175 L172.855032,173.432135 C170.06618,173.721215 167.25878,173.942161 164.461498,174.201068 L147.662627,175.698055 C102.76953,179.305228 57.3318149,180.588081 12.0525952,179.632268" id="Fill-16" fill="#FFFFFF"></path>
-                        <path d="M11.883814,185.756481 C57.4361861,187.091895 103.177404,186.039721 148.39761,182.672961 C193.582406,179.144628 238.244504,173.422401 281.739803,165.546188 C325.164284,157.548308 367.475923,147.525895 408.000268,135.479921 C448.554964,123.428108 486.777747,108.995521 524.850466,94.520108 C562.827075,79.943468 601.506799,65.9323347 641.240351,52.9782413 C651.195237,49.759428 661.288386,46.6749347 671.56195,43.7977613 C681.828769,40.9137747 692.304668,38.2614413 703.0318,35.985788 C713.750501,33.705268 724.669851,31.7157747 735.806711,30.210028 C746.94863,28.7227747 758.254103,27.666708 769.638824,27.027228 C792.408265,25.8718813 815.673428,26.9260013 837.576201,30.869948 C848.53939,32.7952013 859.065873,35.5176147 869.000526,38.8104013 C878.881222,42.1421213 888.318468,45.8865347 897.462328,49.835348 C933.803395,65.787308 967.972732,83.3112013 1005.602,98.1652413 L1019.83965,103.610068 L1026.95847,106.332481 L1034.29311,108.841735 L1037.96381,110.094415 L1039.79832,110.720268 L1041.68509,111.291615 L1049.24062,113.574081 L1053.01923,114.714828 L1056.89226,115.745588 L1064.63832,117.804188 C1085.42656,123.137081 1107.31416,126.999268 1129.59631,129.616561 C1174.18253,134.817081 1220.62518,134.827788 1265.15913,129.385881 C1287.4278,126.657628 1309.13835,122.550161 1330.34138,117.764281 C1351.53261,112.941415 1372.29556,107.480041 1392.79042,101.736401 C1413.23638,95.945068 1433.68234,89.9629613 1455.40132,85.783468 C1466.22962,83.6762013 1477.33445,82.0001213 1488.63823,80.9761747 C1494.28844,80.4642013 1499.97912,80.0816813 1505.6934,79.928868 L1509.97447,79.7760547 L1514.26229,79.7439347 C1517.12196,79.7147347 1519.97994,79.6904013 1522.83624,79.7750813 C1545.72876,80.1235347 1568.3043,82.664908 1590.39423,85.9168147 C1601.45522,87.5393613 1612.4319,89.3390547 1623.38666,91.1660013 L1656.24587,96.645868 L1721.9053,107.745761 C1743.76929,111.494068 1765.70747,115.075935 1787.86316,118.177948 C1832.22344,124.226241 1877.09293,129.178561 1922.39576,132.202708 C1967.67666,135.211281 2013.45835,136.189481 2058.94834,134.144508 C2013.47015,136.270268 1967.66655,135.373828 1922.34855,132.443121 C1877.00525,129.497815 1832.09192,124.623361 1787.66588,118.649041 C1765.48153,115.571361 1743.51468,111.988521 1721.65069,108.257735 L1655.97609,97.1996947 L1623.10001,91.7383213 C1612.14526,89.9191613 1601.17532,88.1272547 1590.13289,86.5144413 C1568.07499,83.2820013 1545.57195,80.7659347 1522.80589,80.434028 C1519.96476,80.3503213 1517.12364,80.3766013 1514.28083,80.407748 L1510.01831,80.4418147 L1505.76253,80.5965747 C1500.08534,80.7513347 1494.43682,81.134828 1488.82371,81.6477747 C1477.5958,82.670748 1466.5601,84.3429347 1455.78913,86.446308 C1434.18143,90.618988 1413.80798,96.5942813 1393.35527,102.402161 C1372.83849,108.168188 1352.07385,113.646108 1330.83879,118.493308 C1309.59529,123.303521 1287.82066,127.438241 1265.45926,130.193748 C1220.73309,135.686268 1174.0898,135.704761 1129.28269,130.507161 C1106.88926,127.891815 1084.88869,124.023788 1063.97736,118.674348 L1056.18408,116.608935 L1052.28745,115.575255 L1048.48523,114.430615 L1040.88249,112.139388 L1038.98223,111.566095 L1037.13592,110.937321 L1033.44162,109.678801 L1026.05808,107.159815 L1018.91565,104.433508 L1004.62742,98.9779747 C966.927334,84.0596947 932.749566,66.493948 896.523156,50.5604813 C887.418077,46.620428 878.036473,42.8896413 868.226594,39.574468 C858.401539,36.309908 847.988027,33.6079347 837.13275,31.6943613 C815.445801,27.7757213 792.374543,26.7128413 769.765283,27.8458013 C758.449694,28.475548 747.206607,29.517988 736.123703,30.9906413 C725.045857,32.4808147 714.177091,34.4537613 703.502229,36.7177347 C692.81725,38.9768413 682.37676,41.612628 672.135232,44.4820147 C661.88696,47.344588 651.810673,50.4154547 641.864218,53.6235613 C602.140782,66.543588 563.47792,80.5148147 525.474332,95.0680947 C487.376322,109.521121 449.087779,123.944948 408.457208,135.988975 C367.856987,148.021321 325.484648,158.041788 281.989349,166.006575 C238.406372,173.812708 193.678516,179.460961 148.451566,182.911428 C103.187521,186.200321 57.4277555,187.172681 11.883814,185.756481" id="Fill-18" fill="#FFFFFF"></path>
-                        <path d="M11.7150328,191.880695 C57.5540463,193.664815 103.649351,192.928001 149.26411,189.856161 C194.846832,186.612041 239.900111,181.016348 283.813569,173.283215 C327.678129,165.450801 370.320248,155.375828 411.212169,143.335695 C452.146243,131.299455 490.670843,116.765641 528.618788,101.851255 C547.604564,94.391628 566.46388,86.8220147 585.522159,79.414948 C604.671488,72.0876947 623.721336,64.6786813 643.044337,57.500348 C662.336987,50.305468 682.086578,43.4386013 702.915289,37.7679613 C713.350721,34.9657347 724.045816,32.4808147 734.987086,30.4037213 C745.926669,28.317868 757.171442,26.7916813 768.517382,25.6616413 C791.189028,23.3791747 814.772868,23.4453613 837.191596,26.7216013 C848.400959,28.3188413 859.241061,30.7667747 869.38648,33.9544413 C871.947705,34.7253213 874.375726,35.6344147 876.864447,36.4773213 C879.290782,37.3737613 881.707,38.2818813 884.070949,39.2318547 C888.819079,41.1152547 893.423888,43.116428 897.946077,45.1730813 C906.973594,49.3000147 915.58801,53.7033747 924.153529,58.1213347 L949.715195,71.460868 C966.729889,80.3620013 983.827204,89.214468 1001.7659,97.469308 C1006.23413,99.545428 1010.9165,101.458028 1015.48084,103.460175 C1020.09745,105.419495 1024.60953,107.472255 1029.31382,109.355655 C1038.77804,113.067948 1048.50024,116.582655 1058.71985,119.570788 C1079.03766,125.666775 1100.62345,130.337801 1122.81623,133.616961 C1167.24733,140.108121 1214.07779,141.005535 1259.19008,136.379281 C1281.77911,134.123095 1303.85386,130.329041 1325.34691,125.725175 C1346.84501,121.103788 1367.83728,115.700815 1388.45353,109.889041 C1409.00741,104.016921 1429.26283,97.6464547 1450.86885,93.0007347 C1461.64488,90.674468 1472.71092,88.7404547 1484.03494,87.4926413 C1495.3539,86.2341213 1506.89881,85.7007347 1518.41505,85.7503747 C1541.49305,85.917788 1564.25912,88.469868 1586.489,91.7792013 C1608.7526,95.109948 1630.62503,99.1736147 1652.72845,102.760348 L1718.90214,113.852455 C1740.92631,117.620228 1763.0702,121.137855 1785.43497,124.137668 C1830.22184,129.957228 1875.54996,134.532868 1921.22879,137.298108 C1966.89582,140.040961 2012.98775,140.841041 2058.81496,138.876855 C2012.99955,140.921828 1966.88908,140.202535 1921.18495,137.539495 C1898.32783,136.209921 1875.55502,134.398548 1852.89349,132.217308 C1830.2387,130.009788 1807.66148,127.541415 1785.24444,124.609735 C1762.84763,121.634255 1740.68014,118.116628 1718.65259,114.366375 L1652.45698,103.314175 C1630.33164,99.7371747 1608.44404,95.6861613 1586.22259,92.3758547 C1564.02812,89.0869613 1541.34298,86.560188 1518.40324,86.4093213 C1506.95445,86.3664947 1495.49722,86.9037747 1484.2592,88.1593747 C1473.01442,89.4052413 1462.01751,91.334388 1451.29544,93.655788 C1429.79902,98.292748 1409.60092,104.658348 1389.02176,110.554801 C1368.37684,116.389935 1347.3694,121.812375 1325.82071,126.460041 C1304.27708,131.089215 1282.13151,134.910521 1259.44468,137.191041 C1214.13343,141.865961 1167.09389,140.993855 1122.43011,134.497828 C1100.11929,131.215748 1078.40874,126.534015 1057.95772,120.413695 C1047.67573,117.414855 1037.88439,113.883601 1028.35273,110.153788 C1023.61977,108.262601 1019.09758,106.209841 1014.46074,104.245655 L1007.54425,101.269201 C1005.23931,100.276401 1002.91583,99.297228 1000.69858,98.2411613 C982.722791,89.9522547 965.617046,81.0754547 948.612468,72.161668 L923.082839,58.811428 C914.530809,54.3895747 905.946743,49.9940013 896.971496,45.8826413 C892.476285,43.8328013 887.903512,41.8423347 883.189105,39.968668 C880.842018,39.0235613 878.447719,38.120308 876.038246,37.2287347 C873.578188,36.3936147 871.182204,35.4932813 868.651329,34.730188 C858.628998,31.573668 847.917041,29.1461747 836.825706,27.5596413 C825.757977,25.908868 814.336162,25.129228 802.914346,24.945268 C791.482414,24.8304147 780.028562,25.3180547 768.751753,26.4734013 C757.474944,27.5898147 746.297617,29.0984813 735.413676,31.1658413 C724.53142,33.2254147 713.883536,35.690868 703.488571,38.4755747 C682.735736,44.1102013 663.024927,50.9459213 643.735649,58.122308 C624.414334,65.2840947 605.350997,72.6824013 586.196609,79.9940813 C567.134958,87.386548 548.262153,94.943508 529.25783,102.394375 C491.274476,117.289295 452.679059,131.817268 411.665737,143.845721 C370.69794,155.873201 327.993435,165.945255 284.058057,173.742628 C240.056921,181.407628 194.937883,186.929348 149.313007,190.095601 C103.657782,193.089575 57.5439295,193.744628 11.7150328,191.880695" id="Fill-20" fill="#FFFFFF"></path>
-                        <path d="M11.5462516,198.004908 C57.69214,200.220215 104.17694,199.823095 150.201427,197.002375 C196.212425,194.064855 241.699038,188.590828 286.035714,180.957948 L290.203817,180.262988 L294.326394,179.484321 L302.574921,177.928935 C308.059888,176.872868 313.598811,175.900508 319.043311,174.774361 L335.329599,171.317081 L343.4719,169.589415 L351.501231,167.689468 L367.55652,163.889575 C372.891421,162.601855 378.10998,161.157428 383.392612,159.797681 L391.293797,157.728375 L395.24439,156.694695 L399.13091,155.582175 L414.656755,151.110681 C456.019105,139.100721 494.88093,124.455948 532.779978,109.141521 C551.738775,101.477495 570.466573,93.6246413 589.22978,85.795148 L645.494109,62.2930413 C654.874026,58.3744013 664.301155,54.4908013 673.888466,50.7337347 C683.504441,47.0019747 693.307576,43.423028 703.380491,40.1039613 C713.424742,36.7576413 723.873663,33.8123347 734.560327,31.205748 C737.258128,30.5886547 739.891857,29.8761747 742.640242,29.3359747 L750.833127,27.631668 C756.382166,26.6495747 761.89917,25.593508 767.564552,24.8498813 C790.003513,21.4870147 813.745849,20.4406813 836.692334,22.848708 C848.129324,24.0790013 859.338688,26.1230013 869.764003,29.1335213 C880.270253,32.072988 889.771571,36.038348 898.758621,40.2645613 C907.755788,44.506348 916.162811,49.1335747 924.397848,53.835748 C932.634573,58.5359747 940.754954,63.3014147 948.858474,68.073668 C965.067201,77.613308 981.227029,87.1811747 998.256899,96.210788 C1002.49582,98.4786547 1006.80556,100.702721 1011.27548,102.817775 L1024.55034,109.250535 L1027.87032,110.859455 C1028.96968,111.400628 1030.16851,111.864908 1031.31339,112.372015 L1038.25517,115.352361 C1040.52301,116.384095 1043.00162,117.241601 1045.40097,118.164321 L1052.64457,120.897441 C1072.33346,127.852881 1093.63091,133.250988 1115.66858,137.199801 C1137.72141,141.142775 1160.55493,143.617961 1183.57898,144.655535 C1206.60808,145.697001 1229.8294,145.200601 1252.68821,143.316228 C1275.58748,141.515561 1298.02476,138.063148 1319.8584,133.704561 C1341.70722,129.342081 1362.94566,123.990695 1383.74908,118.169188 C1404.48336,112.276628 1424.57861,105.544081 1446.03962,100.416561 C1456.74989,97.848908 1467.76872,95.667668 1479.08768,94.1531613 C1484.74969,93.4007747 1490.48083,92.8245613 1496.24907,92.4342547 C1502.02574,92.096508 1507.82264,91.8259213 1513.63134,91.8259213 C1536.8965,91.673108 1559.91718,94.1823613 1582.29881,97.5637213 C1604.71079,100.977201 1626.68944,105.204388 1648.95304,108.839788 L1715.69158,119.931895 C1726.8217,121.759815 1737.9046,123.694801 1749.11397,125.353361 C1760.30478,127.050855 1771.5445,128.640308 1782.8348,130.095441 C1794.11498,131.576855 1805.49295,132.795468 1816.86419,134.037441 L1833.95644,135.786521 L1851.08579,137.404201 C1873.95134,139.470588 1896.92143,141.165161 1919.9674,142.400321 C1966.04922,144.874535 2012.48343,145.492601 2058.6799,143.610175 C2012.49355,145.573388 1966.04247,145.036108 1919.92862,142.641708 C1896.86748,141.446455 1873.87715,139.790815 1850.98968,137.763361 L1833.84179,136.174881 L1816.73267,134.454028 C1805.35132,133.232495 1793.95648,132.032375 1782.65269,130.568481 C1771.34048,129.131841 1760.09402,127.538495 1748.88971,125.849761 C1737.66686,124.195095 1726.58227,122.266921 1715.44541,120.445815 L1648.68158,109.392641 C1626.38762,105.766001 1604.40054,101.552441 1582.02734,98.1603747 C1559.68786,94.7994547 1536.75824,92.318428 1513.64483,92.4838947 C1507.87491,92.487788 1502.11679,92.7603213 1496.38564,93.098068 C1490.66125,93.489348 1484.97395,94.064588 1479.35409,94.815028 C1468.11437,96.3256413 1457.16467,98.500068 1446.50667,101.062855 C1425.14683,106.180641 1405.09711,112.913188 1384.3173,118.834948 C1363.48016,124.681761 1342.21474,130.054561 1320.30354,134.445268 C1298.41257,138.830135 1275.89436,142.309801 1252.89223,144.133828 C1229.93394,146.039615 1206.60471,146.552561 1183.46432,145.520828 C1160.33067,144.491041 1137.38082,142.018775 1115.20489,138.068015 C1093.03908,134.111415 1071.6118,128.695788 1051.78296,121.707255 L1044.48709,118.961481 C1042.07088,118.033895 1039.57372,117.171521 1037.28734,116.134921 L1030.2916,113.138028 C1029.14166,112.628975 1027.92934,112.160801 1026.82661,111.618655 L1023.49314,110.006815 L1010.16095,103.560428 C1005.67585,101.438561 1001.34251,99.215468 997.098531,96.937868 C980.038311,87.874188 963.876796,78.284908 947.686617,68.7355347 C939.591527,63.9603613 931.488007,59.1929747 923.26983,54.4937213 C915.056712,49.794468 906.695215,45.183788 897.765493,40.964388 C888.834085,36.756668 879.438993,32.829268 869.084495,29.926788 C858.797442,26.9474147 847.729713,24.921908 836.420867,23.698428 C813.728988,21.304028 790.221023,22.3182413 767.908522,25.6470413 C762.278548,26.3828813 756.795267,27.4292147 751.278264,28.4015747 L743.134277,30.0912813 C740.399381,30.6256413 737.779141,31.3332547 735.096515,31.9454813 C724.463807,34.531628 714.060411,37.454548 704.048197,40.783348 C694.007318,44.082948 684.227789,47.645348 674.625303,51.3644547 C665.049795,55.1078947 655.626038,58.980788 646.244435,62.8926147 L589.936267,86.361628 C571.162943,94.178468 552.413226,102.022561 533.432508,109.678801 C495.486249,124.978628 456.550235,139.620481 415.108637,151.621681 L399.554127,156.090255 L395.660863,157.200828 L391.703526,158.234508 L383.788852,160.300895 C378.496103,161.657721 373.269114,163.102148 367.924095,164.388895 L351.843514,168.182948 L343.802381,170.079975 L335.646591,171.805695 L319.335011,175.257135 C313.88208,176.380361 308.338099,177.350775 302.843015,178.404895 L294.582685,179.957361 L290.451677,180.731161 L286.27683,181.420281 C241.85079,188.983081 196.298418,194.382161 150.246952,197.240841 C104.180312,199.983695 57.6803371,200.300028 11.5462516,198.004908" id="Fill-22" fill="#FFFFFF"></path>
-                        <path d="M11.3774704,204.130095 C57.8538394,206.771721 104.750054,206.705535 151.212934,204.130095 C154.126559,204.020108 157.014892,203.779695 159.911656,203.581135 L168.600262,202.964041 L177.287182,202.347921 C180.18226,202.144495 183.082396,201.952748 185.955554,201.655881 L203.231598,200.032361 C206.108128,199.753015 208.996462,199.512601 211.862875,199.201135 L220.440197,198.196655 L237.59484,196.190615 C243.290573,195.461588 248.942466,194.619655 254.616279,193.839041 L263.119411,192.646708 L267.368448,192.043241 L271.578704,191.354121 L288.416355,188.598615 C294.053073,187.729428 299.585252,186.655841 305.134291,185.618268 L321.784783,182.505548 L338.221136,179.039508 C343.690928,177.869561 349.211303,176.775535 354.574869,175.445961 C365.357643,172.875388 376.236526,170.434268 386.781556,167.546388 L402.698583,163.351321 L418.344143,158.825321 C439.145876,152.731281 459.522706,146.138895 479.268924,138.962508 C499.05055,131.819215 518.33477,124.219428 537.305371,116.374361 C575.233082,100.663788 611.950156,83.9974013 648.586295,67.2959747 C657.701491,63.0882547 666.818372,58.8746947 676.085319,54.767228 C685.335405,50.6490547 694.757475,46.649628 704.493165,42.909108 C723.870122,35.3735613 744.675227,28.9116013 766.893305,24.5822147 C769.628201,23.9748547 772.486184,23.581628 775.281781,23.0813347 L779.497095,22.366908 C780.899952,22.1235747 782.344961,21.9707613 783.764679,21.768308 C786.620976,21.3926013 789.46547,20.9896413 792.331884,20.6372947 C795.218531,20.3472413 798.091689,20.0036547 801.005314,19.8012013 C812.609232,18.824948 824.425601,18.661428 836.107079,19.356388 C841.930958,19.762268 847.75315,20.276188 853.426963,21.1648413 C856.298435,21.5327613 859.067053,22.1118947 861.884569,22.591748 L866.015577,23.476508 L868.081081,23.9222947 L870.099374,24.4332947 L874.1309,25.4630813 L878.049456,26.627188 L880.010421,27.2102147 L881.905626,27.8584547 L885.696037,29.158828 C888.115627,30.1185347 890.604348,31.0315213 892.943005,32.0506013 C895.303581,33.0550813 897.60177,34.1072547 899.867923,35.1788947 C908.922418,39.483948 917.21647,44.2795613 925.171612,49.222148 C933.104833,54.177388 940.847522,59.2250947 948.546372,64.2932413 C963.9407,74.4295347 979.119204,84.6699747 995.16269,94.4422413 C1011.19606,104.192121 1027.94772,113.657788 1046.73284,121.578775 C1065.57193,129.454015 1086.33319,135.798201 1108.16009,140.470201 C1151.88302,149.760668 1199.34577,152.896748 1245.66533,150.136375 C1268.85293,148.780521 1291.67801,145.741775 1313.88429,141.675188 C1336.09056,137.576481 1357.66286,132.379855 1378.68547,126.555428 C1399.68279,120.715428 1419.65833,113.656815 1440.95578,108.045548 C1451.59017,105.237481 1462.53482,102.777868 1473.83692,100.989855 C1479.48375,100.090495 1485.22164,99.3799613 1491.01348,98.865068 C1496.80364,98.339468 1502.65786,98.0873747 1508.50366,97.918988 C1531.98128,97.3534813 1555.27004,99.8773347 1577.8186,103.304441 C1600.42618,106.745175 1622.51442,111.162161 1644.93484,114.884188 C1689.82288,122.294175 1734.40741,130.412748 1780.06601,136.057108 C1791.46591,137.496668 1802.95854,138.658828 1814.4596,139.812228 L1831.73227,141.455215 L1849.04373,142.962908 C1872.14871,144.873561 1895.34811,146.411428 1918.61327,147.510321 C1965.13685,149.711028 2011.94876,150.145135 2058.54485,148.343495 C2011.95888,150.225921 1965.13179,149.873575 1918.57955,147.751708 C1895.29921,146.692721 1872.08126,145.193788 1848.95436,143.322068 L1831.62605,141.844548 L1814.3382,140.229788 C1802.82871,139.096828 1791.31247,137.952188 1779.89234,136.531121 C1734.16461,130.920828 1689.54804,122.814908 1644.66,115.437041 C1622.19743,111.722801 1600.10918,107.320415 1577.54714,103.900121 C1555.04242,100.493455 1531.85482,98.000748 1508.5475,98.5769613 C1502.74386,98.748268 1496.93347,99.0003613 1491.19221,99.5259613 C1485.44758,100.040855 1479.75353,100.749441 1474.14885,101.645881 C1462.926,103.427081 1452.04711,105.878908 1441.46162,108.682108 C1420.25859,114.281695 1400.31003,121.348095 1379.24358,127.223135 C1358.18724,133.071895 1336.57111,138.295775 1314.29739,142.422708 C1292.02535,146.514601 1269.10922,149.580601 1245.81708,150.957868 C1199.29013,153.757175 1151.59806,150.638615 1107.6121,141.321868 C1085.652,136.637215 1064.75247,130.267721 1045.76669,122.348681 C1026.81127,114.368321 1010.00734,104.894868 993.920015,95.1206547 C977.854609,85.3152947 962.681164,75.0573347 947.313814,64.917148 C939.626767,59.848028 931.899253,54.799348 923.996382,49.8518947 C916.074963,44.9200147 907.839925,40.1506813 898.871423,35.877748 C896.625503,34.8129213 894.349234,33.7685347 892.01395,32.7728147 C889.695527,31.760548 887.250644,30.864108 884.864777,29.9141347 L881.130008,28.630308 L879.261781,27.9898547 L877.331167,27.4136413 L873.468253,26.2631613 L869.492368,25.246028 L867.504426,24.7398947 L865.469273,24.299948 L861.393907,23.4249213 C858.615172,22.9489613 855.885334,22.375668 853.050957,22.0116413 C847.453019,21.1298013 841.701645,20.6197747 835.946898,20.214868 C824.403681,19.5218547 812.708713,19.674668 801.20765,20.6363213 C798.322688,20.8348813 795.473136,21.1736013 792.611781,21.4597613 C789.768973,21.8072413 786.94134,22.206308 784.106963,22.577148 C782.695675,22.7776547 781.260782,22.927548 779.868043,23.1689347 L775.681392,23.8755747 C772.904343,24.3710013 770.064908,24.758388 767.346873,25.3618547 C745.273801,29.6484147 724.569864,36.0597613 705.245177,43.5583213 C695.536465,47.280348 686.12957,51.2642013 676.886229,55.3706947 C667.626026,59.465508 658.509145,63.670308 649.383832,67.8741347 C612.703854,84.559988 575.953058,101.206908 537.966332,116.907748 C518.967067,124.747948 499.650811,132.343841 479.833776,139.484215 C460.050463,146.656708 439.633167,153.248121 418.792653,159.336321 L403.120114,163.858428 L387.174424,168.050575 C376.610846,170.935535 365.713416,173.372761 354.91378,175.941388 C349.541784,177.268041 344.012978,178.360121 338.534755,179.529095 L322.071424,182.989295 L305.397327,186.096175 C299.839857,187.131801 294.300934,188.203441 288.650726,189.060948 L271.781039,191.790175 L267.562352,192.472481 L263.308257,193.070108 L254.788264,194.248815 C249.104334,195.019695 243.44401,195.852868 237.739847,196.572161 L220.561598,198.551921 L211.970788,199.540828 C209.101002,199.848401 206.20761,200.083948 203.327707,200.358428 L186.029743,201.953721 C183.153213,202.245721 180.251391,202.432601 177.352941,202.631161 L168.655904,203.232681 L159.960554,203.834201 C157.060418,204.028868 154.168712,204.265388 151.253401,204.369535 C104.750054,206.868081 57.8386643,206.851535 11.3774704,204.130095" id="Fill-24" fill="#FFFFFF"></path>
-                        <path d="M11.2086893,210.254308 C58.0357724,213.316415 105.370379,213.579215 152.296944,211.243215 L156.701104,211.046601 L161.088403,210.755575 L169.863002,210.173521 C175.707113,209.762775 181.573145,209.444495 187.400395,208.959775 L204.853482,207.389788 L213.579183,206.604308 C216.475947,206.301601 219.357536,205.955095 222.247555,205.631948 L239.577555,203.663868 C242.477691,203.362135 245.333988,202.948468 248.19703,202.555241 L256.78784,201.374588 L265.378651,200.193935 C268.236634,199.790975 271.119909,199.439601 273.940797,198.951961 L290.948747,196.204241 C336.268434,188.798148 380.230789,178.713441 422.267589,166.477668 L430.155285,164.187415 L434.099133,163.041801 C435.405881,162.651495 436.678906,162.224201 437.970478,161.818321 L453.420448,156.882548 C463.77326,153.646215 473.793905,150.069215 483.79263,146.484428 C503.744556,139.267161 523.168724,131.566148 542.164616,123.534201 C561.182428,115.515881 579.763533,107.154948 598.09172,98.616868 C602.667865,96.4784547 607.257499,94.3468547 611.771258,92.163668 L625.288927,85.6014547 L638.808283,79.0372947 L645.567961,75.756188 L652.236589,72.412788 C669.983062,63.477588 687.523827,54.3691347 706.195983,46.033508 C715.562412,41.892948 725.210423,37.9577613 735.219265,34.3379347 C745.290494,30.7755347 755.764707,27.590788 766.593006,24.8557213 C777.4584,22.156668 788.760501,20.0932013 800.243017,18.4648147 C811.777803,16.9551747 823.622836,16.1229747 835.477986,16.319588 C838.442195,16.3118013 841.392915,16.5074413 844.350379,16.6125613 L848.761284,16.9318147 L850.966737,17.0972813 L853.153642,17.3318547 C859.012929,17.8740013 864.730581,18.8376013 870.348752,19.928708 C873.113998,20.547748 875.918025,21.1152013 878.58379,21.8724547 C881.316999,22.5460013 883.911947,23.3782013 886.518697,24.1967747 C887.84905,24.580268 889.059689,25.0805613 890.329341,25.523428 C891.572016,25.9916013 892.877077,26.4042947 894.065796,26.919188 C896.460094,27.929508 898.910035,28.901868 901.191363,29.9900547 C910.40604,34.2882947 918.706837,39.1929213 926.442782,44.3389347 C934.180413,49.490788 941.49314,54.8266013 948.812612,60.1594947 C963.400971,70.847668 977.579602,81.7110413 992.574318,92.184108 C1000.06577,97.4216147 1007.73259,102.578335 1015.74674,107.544281 C1019.75466,110.027255 1023.84352,112.466428 1028.01162,114.860828 C1032.19827,117.242575 1036.61929,119.467615 1040.91048,121.780255 C1058.77499,130.580161 1079.01861,137.717615 1100.47962,143.209161 C1121.96086,148.682215 1144.66117,152.590148 1167.85552,154.778201 C1191.0448,156.977935 1214.67417,157.615468 1238.12143,156.718055 C1241.05361,156.596388 1243.99084,156.516575 1246.91458,156.358895 L1255.66726,155.756401 C1261.508,155.388481 1267.32513,154.884295 1273.1018,154.261361 C1284.66525,153.044695 1296.12584,151.488335 1307.41951,149.598121 C1330.00517,145.809908 1351.98213,140.841041 1373.2408,135.022455 C1394.60402,129.319695 1414.5239,121.986601 1435.64769,115.899375 C1446.19946,112.855761 1457.04631,110.107068 1468.30794,108.022188 C1479.55103,105.925628 1491.23925,104.550308 1503.04382,104.125935 C1526.70185,103.153575 1550.32278,105.494441 1573.07536,108.963401 C1595.89876,112.432361 1618.10166,117.042068 1640.68563,120.875055 C1685.94799,128.402815 1730.98947,136.530148 1777.13873,142.020721 C1788.6634,143.414535 1800.27406,144.542628 1811.90832,145.601615 C1823.53922,146.671308 1835.20047,147.630041 1846.87351,148.528428 C1870.22804,150.297948 1893.67362,151.666455 1917.17147,152.630055 C1964.1621,154.553361 2011.38037,154.797668 2058.41147,153.075841 C2011.39049,154.878455 1964.15873,154.714935 1917.14112,152.871441 C1893.63147,151.947748 1870.16734,150.619148 1846.79427,148.888561 C1835.10941,148.010615 1823.43637,147.070375 1811.79535,146.021121 C1800.15266,144.981601 1788.5167,143.871028 1776.97011,142.495708 C1730.74498,137.038228 1685.68158,128.923548 1640.40405,121.426935 C1617.77624,117.600761 1595.57165,113.005655 1572.80221,109.559081 C1550.1019,106.112508 1526.59899,103.803761 1503.12475,104.782935 C1491.40955,105.211201 1479.83261,106.580681 1468.66709,108.668481 C1457.48639,110.746548 1446.70024,113.485508 1436.19062,116.526201 C1415.1562,122.600775 1395.22788,129.953335 1373.79048,135.693081 C1352.49134,141.537948 1330.45873,146.535041 1307.79721,150.350508 C1296.46475,152.255321 1284.96369,153.823361 1273.35134,155.052681 C1267.54938,155.682428 1261.71202,156.191481 1255.84093,156.565241 L1247.04947,157.175521 C1244.11224,157.336121 1241.16152,157.417881 1238.21754,157.542468 C1214.66068,158.456428 1190.92003,157.830575 1167.60766,155.633761 C1144.2936,153.447655 1121.46345,149.533881 1099.84226,144.040388 C1078.24299,138.528401 1057.85099,131.355908 1039.83979,122.501495 C1035.5132,120.178148 1031.04496,117.930721 1026.83808,115.542161 C1022.65143,113.141921 1018.54403,110.695935 1014.51587,108.207121 C1006.46294,103.224628 998.760713,98.0630413 991.254082,92.8128813 C976.244191,82.3076947 962.079049,71.431668 947.522725,60.7454413 C932.873665,50.1400013 918.499444,39.226988 900.20498,30.694748 C897.945571,29.6133747 895.537784,28.6585347 893.18058,27.6598947 C892.012095,27.152788 890.727267,26.7459347 889.504826,26.2836013 C888.255407,25.847548 887.063316,25.3521213 885.754882,24.9754413 C883.186913,24.168548 880.634119,23.3460813 877.943062,22.682268 C875.31945,21.9337747 872.557576,21.374108 869.834483,20.7618813 C864.300619,19.6853747 858.668959,18.7305347 852.88892,18.1922813 L850.730679,17.9596547 L848.555577,17.7951613 L844.202,17.4768813 C841.281631,17.3727347 838.369692,17.1770947 835.439205,17.1829347 C823.72569,16.9814547 812.002058,17.7980813 800.570125,19.286308 C789.175288,20.8962013 777.949063,22.936308 767.154486,25.6120013 C756.391946,28.3227347 745.975061,31.4812013 735.949358,35.019268 C725.984355,38.616708 716.366694,42.5304813 707.020499,46.652548 C688.38881,54.9521613 670.84973,64.0392013 653.077965,72.970508 L646.399221,76.3119613 L639.629427,79.5920947 L626.088151,86.1523613 L612.548562,92.7116547 C608.028059,94.8928947 603.433366,97.022548 598.850476,99.1590147 C580.495312,107.692228 561.885543,116.049268 542.835694,124.063695 C523.807766,132.092721 504.348189,139.790815 484.357482,147.006135 C474.34021,150.588975 464.301017,154.165001 453.926286,157.400361 L438.445966,162.333215 C437.152707,162.740068 435.87631,163.165415 434.567876,163.555721 L430.617284,164.700361 L422.714413,166.990615 C380.598365,179.212761 336.571936,189.296495 291.17806,196.666575 L274.138074,199.388015 C271.312127,199.870788 268.42548,200.218268 265.560752,200.617335 L256.956453,201.784361 L248.350467,202.951388 C245.482367,203.339748 242.621012,203.749521 239.717504,204.045415 L222.362212,205.987215 C219.46882,206.304521 216.582173,206.647135 213.682036,206.944975 L204.944533,207.716828 L187.471212,209.258588 C181.637217,209.733575 175.764442,210.042121 169.915271,210.442161 L161.132243,211.009615 L156.739885,211.293828 L152.334039,211.483628 C105.365321,213.739815 58.0205973,213.396228 11.2086893,210.254308" id="Fill-26" fill="#FFFFFF"></path>
-                        <path d="M11.0407511,216.378521 C58.2404681,219.856241 106.033701,220.444135 153.450928,218.343681 C159.394522,218.186975 165.287531,217.699335 171.204146,217.367428 C177.114017,216.990748 183.030632,216.655921 188.932072,216.246148 L206.572319,214.726775 L215.390756,213.965628 C218.338104,213.736895 221.23824,213.347561 224.163668,213.045828 L241.679142,211.111815 L246.058011,210.629041 C247.513137,210.453841 248.954775,210.243601 250.403157,210.053801 L259.086704,208.883855 C264.871801,208.092535 270.67376,207.346961 276.441996,206.518655 L293.630361,203.778721 L302.222858,202.408268 C305.097702,201.971241 307.898357,201.390161 310.739479,200.887921 L327.728882,197.762548 L331.976232,196.980961 C333.384147,196.707455 334.773515,196.401828 336.172999,196.112748 L344.551358,194.355881 C350.132434,193.175228 355.740488,192.036428 361.297958,190.820735 C383.354169,185.707815 405.262,180.341828 426.416133,174.072588 C431.712254,172.516228 437.043783,170.999775 442.309554,169.410321 L457.887669,164.403495 C468.341649,161.136015 478.483695,157.552201 488.580216,153.932375 C508.705812,146.627508 528.280045,138.820401 547.351813,130.624935 C566.416837,122.425575 584.999628,113.852455 603.16426,104.996095 C607.713428,102.785655 612.222128,100.550881 616.720711,98.307348 C621.183886,96.0404547 625.56444,93.720028 629.99052,91.4297747 C638.793783,86.8181213 647.708329,82.276548 656.422227,77.6084413 C673.748855,68.210908 690.583134,58.4882813 708.55892,49.4606147 C717.500445,44.920988 726.789311,40.5984147 736.460928,36.5766013 C738.904124,35.591588 741.263014,34.5355213 743.778714,33.6108547 L751.265112,30.7862413 C753.789242,29.8703347 756.394306,29.0284013 758.953845,28.145588 C760.247104,27.7173213 761.508326,27.254988 762.828562,26.8539747 L766.797702,25.6626147 L770.768528,24.4712547 L772.753098,23.8755747 L774.794996,23.345108 L782.959217,21.2261613 C785.71266,20.562348 788.531862,19.9910013 791.315656,19.3700147 C792.71514,19.0721747 794.097763,18.7441613 795.512423,18.471628 L799.793496,17.7143747 C805.482484,16.6768013 811.230487,15.7141747 817.118438,15.0970813 C820.037121,14.7204013 823.011447,14.517948 825.962167,14.2415213 L830.421969,13.966068 C831.90576,13.863868 833.396295,13.790868 834.891889,13.761668 C840.86246,13.495948 846.863381,13.6283213 852.828894,13.8911213 C858.791034,14.2240013 864.704277,14.833308 870.497805,15.7219613 C871.947873,15.9360947 873.360847,16.2300413 874.79574,16.4811613 C876.228947,16.7352013 877.658781,16.9950813 879.041404,17.332828 L883.226368,18.2808547 L887.293303,19.388508 L889.325085,19.946228 L891.282677,20.5837613 L895.196175,21.8636947 L898.946118,23.295468 L900.819404,24.013788 L902.601639,24.805108 C912.210869,28.9534547 920.530213,33.9719613 928.181852,39.250348 C935.828432,44.5452813 942.748292,50.1458413 949.683327,55.727908 C963.489324,66.9222147 976.671455,78.357908 990.581992,89.483108 C1004.4959,100.589815 1018.9949,111.519375 1035.63696,121.283855 C1052.23181,131.039575 1071.56661,139.244775 1092.61452,145.548081 C1113.6776,151.841655 1136.24976,156.409508 1159.4205,159.327561 C1182.59799,162.229068 1206.39428,163.554748 1230.11975,163.179041 L1239.00732,162.930841 C1241.96985,162.831561 1244.9408,162.813068 1247.88646,162.596988 L1256.74199,162.076255 L1261.16807,161.815401 L1265.57392,161.456241 C1277.32284,160.486801 1288.9824,159.146521 1300.47672,157.435401 C1323.48559,154.041388 1345.86891,149.297361 1367.44458,143.565401 C1372.84018,142.133628 1378.20543,140.658055 1383.47626,139.087095 C1388.74709,137.511268 1393.94879,135.855628 1399.11676,134.162028 C1409.45608,130.779695 1419.67063,127.251361 1430.15159,123.983881 C1440.62243,120.712508 1451.35462,117.674735 1462.54712,115.272548 C1473.71939,112.866468 1485.38907,111.140748 1497.27457,110.435081 C1509.14658,109.671015 1521.16023,109.805335 1533.00695,110.596655 C1544.85872,111.399655 1556.55032,112.826561 1568.06824,114.570775 C1591.11927,118.066015 1613.48404,122.816855 1636.21976,126.817255 C1659.0263,130.696961 1681.85307,134.582508 1704.76077,138.266575 C1727.67522,141.933121 1750.73299,145.329081 1774.05716,147.993095 C1785.71335,149.346028 1797.44373,150.447841 1809.21963,151.410468 C1820.98373,152.413001 1832.77649,153.293868 1844.58443,154.107575 C1868.20199,155.726228 1891.90049,156.943868 1915.64283,157.761468 C1963.12581,159.402508 2010.78247,159.451175 2058.27726,157.808188 C2010.7909,159.531961 1963.12581,159.565055 1915.61922,158.002855 C1891.86508,157.226135 1868.14804,156.047428 1844.51193,154.469655 C1832.69387,153.674441 1820.8893,152.813041 1809.11509,151.829975 C1797.33076,150.886815 1785.57003,149.803495 1773.89361,148.468081 C1750.52897,145.828401 1727.44759,142.432441 1704.51291,138.780495 C1681.5816,135.107135 1658.76495,131.237161 1635.92975,127.367188 C1613.15187,123.373601 1590.78879,118.639308 1567.79678,115.167428 C1556.31089,113.434895 1544.66651,112.021615 1532.8788,111.230295 C1521.09953,110.449681 1509.17356,110.325095 1497.39934,111.089161 C1485.61838,111.795801 1474.05156,113.511788 1462.95517,115.909108 C1451.8436,118.301561 1441.16536,121.331548 1430.72825,124.599028 C1420.2827,127.864561 1410.08332,131.394841 1399.73388,134.788855 C1394.56085,136.488295 1389.34735,138.151721 1384.05966,139.736308 C1378.76522,141.318948 1373.38986,142.800361 1367.98245,144.238948 C1346.3562,150.000108 1323.90544,154.773335 1300.81395,158.195575 C1289.27747,159.919348 1277.5707,161.272281 1265.77288,162.253401 L1261.3468,162.616455 L1256.90049,162.880228 L1248.00618,163.409721 C1245.04534,163.627748 1242.06258,163.649161 1239.08657,163.749415 L1230.15516,164.003455 C1206.32009,164.395708 1182.40746,163.078788 1159.10183,160.174361 C1135.80463,157.255335 1113.09589,152.675801 1091.88611,146.354001 C1070.6932,140.023441 1051.20327,131.772495 1034.46004,121.948641 C1017.70332,112.131601 1003.14531,101.186468 989.194311,90.0602947 C975.277029,78.906868 962.113446,67.463388 948.346229,56.276868 C941.431428,50.696748 934.531801,45.1030013 926.947608,39.843108 C919.3651,34.598788 911.116574,29.6182413 901.660781,25.5282947 L899.908896,24.7486547 L898.067647,24.0420147 L894.38009,22.6306813 L890.534037,21.3702147 L888.608481,20.7414413 L886.612109,20.1924813 L882.614305,19.099428 L878.498472,18.165028 C877.139454,17.832148 875.733226,17.5761613 874.321938,17.3250413 C872.910651,17.0778147 871.52297,16.786788 870.093135,16.5755747 C864.390658,15.6966547 858.56678,15.093188 852.688946,14.7612813 C846.806053,14.500428 840.882693,14.363188 834.982939,14.623068 C833.505893,14.652268 832.033906,14.722348 830.563604,14.8226013 L826.154385,15.0922147 C823.234016,15.3657213 820.293412,15.5623347 817.403393,15.934148 C811.576142,16.541508 805.88041,17.4924547 800.230202,18.518348 L795.97948,19.2678147 C794.574937,19.537428 793.202431,19.8625213 791.813063,20.156468 C789.049503,20.772588 786.250534,21.3371213 783.515638,21.9941213 L775.405374,24.0936013 L773.378651,24.6192013 L771.405884,25.2090413 L767.458663,26.3916413 L763.511443,27.5722947 C762.199637,27.970388 760.94516,28.428828 759.660332,28.8541747 C757.114282,29.7301747 754.522707,30.5652947 752.012066,31.474388 L744.55939,34.280508 C742.055493,35.197388 739.705034,36.248588 737.270269,37.226788 C727.637432,41.224268 718.373858,45.525428 709.447508,50.0494813 C691.502072,59.0430813 674.659363,68.7501347 657.299012,78.1457213 C648.56994,82.813828 639.638532,87.3554013 630.818408,91.9670547 C626.383898,94.257308 621.994912,96.5777347 617.523307,98.8426813 C613.017979,101.085241 608.499162,103.320015 603.94325,105.529481 C585.746582,114.382921 567.130068,122.954095 548.029636,131.151508 C528.924145,139.345028 509.312817,147.150188 489.146754,154.453108 C479.031686,158.072935 468.86772,161.654801 458.391821,164.921308 L442.785041,169.925215 C437.509154,171.513695 432.167508,173.030148 426.86127,174.585535 C405.66667,180.849908 383.721744,186.211028 361.630125,191.317135 C356.064224,192.530881 350.446053,193.667735 344.858233,194.846441 L336.464699,196.600388 C335.063528,196.889468 333.672475,197.194121 332.262874,197.467628 L328.008778,198.247268 L310.994084,201.366801 C308.14959,201.868068 305.343877,202.448175 302.46566,202.884228 L293.854616,204.242028 L276.634214,206.955681 C270.855861,207.775228 265.043786,208.512041 259.250258,209.294601 L250.553222,210.450921 C249.101468,210.639748 247.658144,210.846095 246.201331,211.018375 L241.815718,211.495308 L224.273267,213.401095 C221.344466,213.697961 218.439272,214.083401 215.490238,214.308241 L206.659997,215.053815 L188.999517,216.544961 C183.091332,216.944028 177.166286,217.270095 171.253044,217.636068 C165.329684,217.960188 159.431616,218.436148 153.484651,218.583121 C106.026957,220.604735 58.2219207,219.936055 11.0407511,216.378521" id="Fill-28" fill="#FFFFFF"></path>
-                        <path d="M10.87197,222.503611 C58.4662403,226.393051 106.743392,227.301171 154.674887,225.430424 C160.679181,225.299024 166.644693,224.877571 172.623695,224.547611 L190.550584,223.511984 L208.388108,222.046144 C214.326642,221.533197 220.297213,221.138024 226.183478,220.439171 C237.977928,219.131984 249.848253,218.042824 261.509498,216.366744 L279.068811,214.046317 L296.452767,211.322931 C308.097151,209.613757 319.490303,207.413051 330.947527,205.336931 C333.797079,204.793811 336.698902,204.339264 339.506301,203.722171 L347.97571,201.954597 L364.914529,198.418477 C370.505722,197.155091 376.039587,195.809944 381.605488,194.510544 L389.943379,192.545384 L394.10811,191.556477 L398.203709,190.477051 L414.586107,186.155451 C417.304141,185.420584 420.074446,184.748984 422.748641,183.961557 L430.781344,181.609984 L446.843377,176.905864 C452.142871,175.279424 457.336138,173.537157 462.586733,171.859131 C473.11153,168.519624 483.454225,164.998104 493.606388,161.294571 C513.936005,153.913784 533.668735,145.990851 552.831553,137.636731 C571.987627,129.272877 590.567047,120.473944 608.625453,111.333371 C626.64002,102.164571 644.244859,92.722264 661.102743,82.8487707 C677.990978,72.9908507 694.275581,62.7786373 711.49261,53.077424 C720.152552,48.259424 728.955814,43.5153973 738.263228,39.102304 C740.537812,37.963504 742.975949,36.9395573 745.323036,35.852344 L748.877389,34.2443973 C750.071166,33.7139307 751.317213,33.224344 752.534596,32.7123707 C754.991281,31.7078907 757.405813,30.6673973 759.899593,29.6921173 L767.492217,26.8606907 L769.389109,26.1530773 L771.350073,25.503864 L775.275373,24.207384 C777.885496,23.337224 780.488874,22.461224 783.222084,21.722464 L791.332349,19.412744 L799.673612,17.3901573 C802.421997,16.667944 805.313703,16.1618107 808.158197,15.5855973 C811.026297,15.0463707 813.826952,14.3913173 816.759124,13.9649973 C822.540849,12.9556507 828.479384,12.276264 834.438152,11.7078373 C840.41884,11.2094907 846.45517,10.924304 852.508361,10.9262507 C854.020816,10.9077573 855.531585,10.9836773 857.042353,11.0070373 C858.554808,11.0391573 860.067263,11.0683573 861.569601,11.1802907 L866.086732,11.452824 L870.563396,11.8917973 C872.064047,12.0173573 873.536035,12.2188373 875.006337,12.433944 C876.47158,12.6509973 877.962115,12.8184107 879.408811,13.079264 C882.293772,13.613624 885.205711,14.1032107 887.987819,14.804984 C890.839057,15.406504 893.519997,16.2289707 896.244776,16.9940107 C897.600421,17.3843173 898.870074,17.8670907 900.186938,18.2992507 C901.491999,18.744064 902.817294,19.170384 904.034677,19.693064 C914.105906,23.6165707 922.691658,28.6292373 930.318004,34.0176107 C937.951096,39.4186373 944.633212,45.2206773 951.158518,51.0460773 C964.217562,62.707584 976.43017,74.662064 989.24304,86.3868373 C1002.02893,98.115504 1015.44206,109.633931 1030.7251,120.313344 C1045.92216,131.004437 1064.45099,140.042811 1084.83456,147.279544 L1088.65364,148.643184 L1090.56065,149.325491 L1092.54016,149.938691 L1100.47338,152.371051 L1104.44083,153.586744 L1108.53306,154.656437 L1116.7192,156.791931 L1118.76616,157.325317 L1120.86538,157.785704 L1125.06552,158.705504 L1133.46749,160.547051 L1142.04481,162.095624 L1146.33263,162.868451 L1148.47569,163.255837 L1150.65417,163.575091 C1173.79456,167.177397 1197.71731,169.063717 1221.67884,169.310944 C1245.64206,169.540651 1269.66766,168.177011 1293.07615,165.162597 C1316.50992,162.205611 1339.34344,157.744824 1361.29174,152.157891 C1372.27853,149.369291 1383.0495,146.314971 1393.45121,142.861584 C1403.88496,139.438371 1414.08939,135.776691 1424.49616,132.294104 C1434.89787,128.813464 1445.50191,125.492451 1456.59493,122.763224 C1467.67784,120.036917 1479.2885,117.952037 1491.2229,116.884291 C1503.16741,115.877864 1515.30583,115.743544 1527.30261,116.401517 C1539.30783,117.042944 1551.15961,118.397824 1562.81074,120.125491 C1586.14166,123.584717 1608.65649,128.528277 1631.55071,132.697064 C1643.00456,134.761504 1654.57981,136.646851 1666.10111,138.607144 C1677.6359,140.546997 1689.18248,142.464464 1700.7628,144.306011 C1723.92005,147.987157 1747.23411,151.370464 1770.82806,153.973157 C1782.62083,155.280344 1794.47766,156.372424 1806.39689,157.240637 C1812.34722,157.716597 1818.30936,158.140971 1824.27319,158.546851 C1830.23196,158.981931 1836.20759,159.335251 1842.17816,159.709011 C1866.07225,161.161224 1890.03547,162.237731 1914.03409,162.906411 C1962.03303,164.258371 2010.15675,164.103611 2058.1422,162.542384 C2010.16518,164.184397 1962.03472,164.419944 1914.01555,163.148771 C1890.00343,162.519997 1866.02672,161.482424 1842.11409,160.070117 C1836.13846,159.706091 1830.15777,159.363477 1824.19394,158.938131 C1818.22337,158.541011 1812.25617,158.127344 1806.30246,157.662091 C1794.37312,156.812371 1782.48425,155.737811 1770.67125,154.448144 C1747.03683,151.870757 1723.69579,148.487451 1700.51662,144.819931 C1688.92619,142.984224 1677.37455,141.072597 1665.83302,139.139557 C1654.3016,137.184131 1642.74321,135.308517 1631.25058,133.246024 C1608.31421,129.083077 1585.80612,124.156064 1562.54433,120.722144 C1550.93029,119.007131 1539.13247,117.666851 1527.19976,117.037104 C1515.27885,116.389837 1503.23654,116.530971 1491.39994,117.535451 C1479.57514,118.600277 1468.06565,120.672504 1457.05693,123.388104 C1446.0381,126.106624 1435.47958,129.419851 1425.10654,132.899517 C1414.72675,136.381131 1404.524,140.050597 1394.06833,143.488411 C1383.64133,146.958344 1372.81809,150.036024 1361.81106,152.836304 C1339.80375,158.453411 1316.89436,162.945344 1293.36785,165.927637 C1269.86831,168.967357 1245.73311,170.353384 1221.65523,170.135357 C1197.57905,169.901757 1173.53152,168.020304 1150.25961,164.411184 L1148.06934,164.091931 L1145.91278,163.704544 L1141.59967,162.928797 L1132.97177,161.376331 L1124.51753,159.529917 L1120.29042,158.607197 L1118.1777,158.144864 L1116.11725,157.609531 L1107.87716,155.465277 L1103.75795,154.391691 L1099.76184,153.171131 L1091.77297,150.727091 L1089.78166,150.110971 L1087.85779,149.425744 L1084.01173,148.054317 C1063.4781,140.782544 1044.78739,131.682851 1029.45377,120.916811 C1014.09316,110.203331 1000.60415,98.6537573 987.794658,86.9134107 C974.985161,75.1652773 962.796158,63.206904 949.779267,51.5599973 C943.272508,45.7375173 936.632545,39.960784 929.07533,34.602584 C921.528231,29.2580107 913.035216,24.297904 903.16295,20.446424 C901.967487,19.9305573 900.665798,19.512024 899.386028,19.074024 C898.091084,18.6496507 896.846723,18.1727173 895.514684,17.789224 C892.838802,17.0378107 890.208446,16.2260507 887.407791,15.6352373 C884.677954,14.942224 881.814912,14.4613973 878.980535,13.9338507 C877.560817,13.6768907 876.093888,13.5123973 874.653936,13.2972907 C873.208927,13.085104 871.760545,12.8845973 870.285185,12.760984 L865.879338,12.325904 L861.431339,12.0562907 C859.952606,11.9443573 858.462071,11.9151573 856.971536,11.882064 C855.481001,11.858704 853.993838,11.7818107 852.501617,11.800304 C846.532732,11.794464 840.572278,12.0728373 834.659035,12.5614507 C828.769398,13.1211173 822.89325,13.788824 817.167167,14.784544 C814.265344,15.2040507 811.481551,15.8552107 808.637057,16.3866507 C805.812796,16.9579973 802.94301,17.4573173 800.214859,18.1727173 L791.934295,20.174864 L783.879673,22.464144 C781.16501,23.194144 778.576808,24.0652773 775.98186,24.9276507 L772.081851,26.2124507 L770.13269,26.855824 L768.244229,27.5585707 L760.693758,30.368584 C758.211781,31.3370507 755.807366,32.3707307 753.362484,33.3693707 C752.150159,33.8774507 750.909171,34.363144 749.722138,34.8906907 L746.181274,36.4898773 C743.842618,37.5702773 741.414597,38.5874107 739.146758,39.7203707 C729.869694,44.1091307 721.084979,48.834664 712.436841,53.6370907 C695.238358,63.3071573 678.938581,73.5076907 662.013251,83.3685307 C645.114899,93.2468907 627.47128,102.690171 609.421304,111.857997 C591.327489,120.998571 572.710975,129.796531 553.517806,138.160384 C534.314521,146.514504 514.543011,154.436464 494.172926,161.815304 C484.00053,165.517864 473.635915,169.038411 463.090885,172.375971 C457.830173,174.054971 452.626789,175.796264 447.317179,177.422704 L431.224795,182.123904 L423.178603,184.473531 C420.497663,185.259011 417.7223,185.931584 415.000893,186.665477 L398.588146,190.982211 L394.485802,192.061637 L390.314327,193.048597 L381.96126,195.011811 C376.386929,196.310237 370.842947,197.653437 365.243324,198.915851 C342.682962,203.706597 319.928695,208.243304 296.67365,211.786237 L279.257657,214.484317 L261.667994,216.778464 C249.986515,218.435077 238.099329,219.506717 226.288018,220.794437 C220.395009,221.483557 214.416007,221.870944 208.469042,222.373184 L190.61297,223.811771 L172.667534,224.816251 C166.683474,225.137451 160.712903,225.549171 154.703551,225.670837 C106.733275,227.461771 58.4460068,226.471891 10.87197,222.503611" id="Fill-30" fill="#FFFFFF"></path>
-                        <path d="M10.7031888,228.627824 C58.7105599,232.926064 107.493549,234.151491 155.961232,232.507531 C204.460952,230.954091 252.554316,226.113704 299.411749,218.839011 L308.199836,217.472451 C311.133695,217.024717 314.067554,216.576984 316.949143,216.025104 L334.317924,212.878317 L342.998099,211.297624 L351.561931,209.516424 L368.68791,205.954997 C371.562754,205.391437 374.346548,204.691611 377.160692,204.037531 L385.587948,202.051931 L394.015204,200.067304 C396.814173,199.391811 399.662039,198.782504 402.398621,198.022331 L418.951317,193.649144 L427.226822,191.463037 C429.953288,190.696051 432.639286,189.880397 435.34889,189.092971 L451.569419,184.323637 C452.916634,183.920677 454.284082,183.541077 455.612749,183.119624 L459.578516,181.830931 L467.506679,179.251597 C472.780881,177.520037 478.105666,175.838117 483.3394,174.066651 C488.542784,172.265984 493.736051,170.453637 498.860187,168.578024 C519.409001,161.123264 539.322146,153.086451 558.579387,144.561024 C577.850118,136.042411 596.459887,127.029344 614.412068,117.600664 C632.35919,108.170037 649.709423,98.3539707 666.223339,88.0891973 C682.765918,77.836104 698.426653,67.1119173 715.0097,56.8490907 C723.253168,51.6943173 731.795081,46.6904107 740.687708,41.8986907 C749.51289,37.064144 758.960252,32.608224 768.76676,28.4452773 L776.236296,25.410424 C778.792463,24.455584 781.395841,23.5406507 783.972241,22.604304 C785.275616,22.148784 786.533466,21.6514107 787.870564,21.228984 L791.886915,19.970464 L795.904953,18.7109707 C797.240364,18.2875707 798.572404,17.8612507 799.97526,17.5137707 L808.276057,15.3023573 C809.645191,14.9110773 811.078398,14.612264 812.50486,14.3007973 L816.779189,13.3644507 C819.627055,12.7356773 822.466491,12.0942507 825.402036,11.6056373 C828.295428,11.044024 831.268067,10.635224 834.217101,10.1816507 C840.169125,9.40979733 846.180163,8.719704 852.283938,8.46761067 L854.566952,8.34399733 L856.858397,8.293384 C858.386027,8.26905067 859.913657,8.20967733 861.441287,8.21357067 C864.499919,8.26515733 867.563609,8.25931733 870.60538,8.47247733 C873.665698,8.582464 876.675433,8.92215733 879.698656,9.19955733 C881.200994,9.36891733 882.681412,9.601544 884.173634,9.800104 L886.40775,10.1105973 L888.594655,10.5164773 C894.491037,11.4985707 900.067055,13.0305973 905.412073,14.7475573 L909.328943,16.1374773 C910.655924,16.5822907 911.827781,17.1604507 913.082259,17.666584 C915.619878,18.665224 917.879287,19.8633973 920.202768,21.0148507 C924.775541,23.363504 928.879571,25.9905307 932.779579,28.6993173 C940.512152,34.1597173 947.061064,40.1204107 953.230598,46.1638373 C965.581469,58.2565307 976.866708,70.6840507 988.588654,82.9529173 C1000.29711,95.221784 1012.44902,107.370931 1026.52817,118.767691 C1028.28511,120.192651 1030.06903,121.606904 1031.87993,123.009477 L1037.58072,127.081904 L1040.43702,129.116171 C1041.37619,129.800424 1042.33054,130.476891 1043.40629,131.092037 L1049.65338,134.889984 L1052.7744,136.789931 L1056.13148,138.551664 L1062.86081,142.064424 L1064.54356,142.942371 L1066.33254,143.746344 L1069.91387,145.355264 L1077.08496,148.564344 C1096.79914,156.679997 1118.73901,162.930744 1141.69899,167.353571 C1164.67076,171.758877 1188.66096,174.452091 1212.8518,175.243411 C1224.94638,175.606464 1237.08143,175.705744 1249.16927,175.219077 C1261.27396,174.883277 1273.31459,173.981971 1285.23887,172.739024 C1309.09755,170.264811 1332.41329,166.150531 1354.78649,160.767997 C1360.36757,159.404357 1365.91155,157.988157 1371.3746,156.471704 C1376.86125,154.979584 1382.24336,153.380397 1387.53106,151.671224 C1398.11318,148.256771 1408.35808,144.494837 1418.70752,140.822451 C1429.0637,137.158824 1439.5278,133.558464 1450.51291,130.513877 C1461.47778,127.466371 1472.98391,124.984371 1484.93516,123.545784 C1496.88305,122.115957 1509.17659,121.706184 1521.33187,122.189931 C1533.50064,122.654211 1545.52609,123.926357 1557.32223,125.631637 C1580.94316,129.052904 1603.64685,134.147331 1626.70461,138.502024 C1638.22591,140.689104 1649.92088,142.586131 1661.56695,144.570757 C1673.22145,146.534944 1684.89113,148.472851 1696.5979,150.327051 C1720.00975,154.032531 1743.59022,157.414864 1767.46239,159.961104 C1779.39341,161.241037 1791.3902,162.304891 1803.45106,163.098157 C1809.47895,163.521557 1815.50517,163.936197 1821.54487,164.290491 C1827.57783,164.677877 1833.61922,165.017571 1839.66566,165.333904 C1863.84302,166.623571 1888.08445,167.534611 1912.34948,168.065077 C1960.88461,169.121144 2009.50236,168.756144 2058.00714,167.274731 C2009.51079,168.836931 1960.88967,169.282717 1912.336,168.307437 C1888.05747,167.816877 1863.80424,166.945744 1839.60834,165.695984 C1833.55852,165.389384 1827.51207,165.059424 1821.47405,164.682744 C1815.42929,164.337211 1809.39633,163.932304 1803.36506,163.518637 C1791.29072,162.745811 1779.2619,161.699477 1767.31064,160.437064 C1743.39632,157.915157 1719.78718,154.532824 1696.35341,150.841944 C1684.63652,148.993584 1672.9601,147.060544 1661.29886,145.103171 C1649.64941,143.125357 1637.95107,141.235144 1626.39605,139.048064 C1603.29613,134.699211 1580.60257,129.625224 1557.06256,126.230237 C1545.30858,124.538584 1533.34383,123.280064 1521.26106,122.825517 C1509.19008,122.352477 1496.99939,122.768091 1485.16954,124.190131 C1473.33125,125.621904 1461.92124,128.089304 1451.0238,131.126104 C1440.10783,134.159011 1429.68251,137.753531 1419.34319,141.418131 C1409.01061,145.093437 1398.75897,148.866077 1388.14143,152.300971 C1382.83688,154.019877 1377.41767,155.632691 1371.92259,157.131624 C1366.44437,158.656837 1360.88184,160.079851 1355.28221,161.452251 C1332.8382,166.866904 1309.43814,171.012331 1285.47998,173.509904 C1273.50849,174.765504 1261.41728,175.677517 1249.25526,176.021104 C1237.11178,176.517504 1224.91772,176.425037 1212.76413,176.066851 C1188.45356,175.285264 1164.33522,172.594971 1141.2235,168.177011 C1118.12695,163.741531 1096.03364,157.465477 1076.16434,149.302131 L1068.93592,146.074557 L1065.32761,144.454931 L1063.52177,143.646091 L1061.82552,142.763277 L1055.03718,139.227157 L1051.65313,137.452771 L1048.50176,135.541144 L1042.20071,131.716917 C1041.11484,131.095931 1040.15038,130.415571 1039.20278,129.726451 L1036.32119,127.677584 L1030.56644,123.574984 C1028.74711,122.167544 1026.95307,120.749397 1025.1877,119.319571 C1011.03099,107.881931 998.816693,95.7084507 987.089688,83.4298507 C975.377859,71.140544 964.121284,58.7178907 951.817625,46.644664 C945.675069,40.6129173 939.180113,34.6921307 931.538591,29.285264 C927.685794,26.604704 923.634034,24.005904 919.13545,21.6922907 C916.854122,20.5593307 914.636867,19.380624 912.148145,18.399504 C910.915587,17.903104 909.769022,17.333704 908.467333,16.8966773 L904.628025,15.5310907 C899.38586,13.8452773 893.921126,12.337584 888.136029,11.371064 L885.989591,10.9700507 L883.795941,10.664424 C882.330698,10.4697573 880.878944,10.2400507 879.401898,10.0726373 C876.430944,9.79913067 873.47348,9.46235733 870.463745,9.353344 C867.470872,9.14115733 864.454393,9.146024 861.442973,9.09249067 C859.937263,9.087624 858.431552,9.146024 856.925842,9.169384 L854.666434,9.21707733 L852.415456,9.33777067 C846.394301,9.58305067 840.452394,10.261464 834.567815,11.0216373 C831.650818,11.4693707 828.710215,11.870384 825.843801,12.4261573 C822.938606,12.906984 820.12109,13.5425707 817.29683,14.1645307 L813.054537,15.0901707 C811.639878,15.397744 810.215102,15.6936373 808.859457,16.081024 L800.617674,18.2700507 C799.224934,18.6146107 797.901326,19.0370373 796.574345,19.456544 L792.584971,20.7043573 L788.595598,21.9521707 C787.268617,22.3697307 786.015826,22.864184 784.722567,23.3158107 C782.159656,24.245344 779.573139,25.1515173 777.032148,26.0985707 L769.601392,29.1120107 C759.845468,33.2457573 750.435201,37.6724773 741.633625,42.4856107 C732.762917,47.254944 724.237866,52.239384 715.999456,57.381504 C699.429898,67.616104 683.747243,78.3334773 667.162511,88.5924107 C650.601384,98.8649707 633.212369,108.683957 615.22478,118.116531 C597.233819,127.548131 578.581896,136.563144 559.270699,145.081757 C539.971304,153.608157 520.017692,161.644971 499.426725,169.097784 C494.292472,170.974371 489.087402,172.785744 483.873902,174.587384 C478.630051,176.357877 473.296835,178.038824 468.010831,179.769411 L460.065807,182.347771 L456.093294,183.636464 C454.761255,184.057917 453.390435,184.438491 452.041534,184.840477 L435.790655,189.605917 C392.334137,202.185277 346.559196,212.040277 299.627573,219.304264 C252.680775,226.509851 204.521653,231.273344 155.986524,232.747944 C107.48006,234.313064 58.6886403,233.004904 10.7031888,228.627824" id="Fill-32" fill="#FFFFFF"></path>
-                        <path d="M10.5344076,234.752037 C107.437738,244.131077 207.716692,241.045611 302.507307,226.329784 C308.423922,225.393437 314.392807,224.563184 320.270642,223.547024 L337.84513,220.387584 L346.631531,218.807864 C349.551901,218.266691 352.416628,217.627211 355.311706,217.042237 L372.634962,213.452584 L376.966619,212.555171 L381.230831,211.556531 L389.75251,209.550491 C395.428009,208.205344 401.130486,206.897184 406.780693,205.519917 L423.512119,201.092224 C429.084764,199.610811 434.707993,198.190717 440.122143,196.519504 C451.019574,193.264677 462.085617,190.191864 472.649194,186.579824 L488.660644,181.341344 C493.907867,179.507584 499.182068,177.697184 504.348357,175.787504 C514.746695,172.027517 524.9545,168.093304 534.993692,164.021851 C545.05649,159.968891 554.915267,155.747544 564.603746,151.402584 C574.314144,147.073197 583.810404,142.584184 593.131307,137.979344 C602.474129,133.389104 611.594383,128.647024 620.532535,123.795931 C638.432446,114.106397 655.549994,103.935064 671.751976,93.2984773 C687.920236,82.6433973 703.145951,71.5162507 719.024196,60.689864 C727.023177,55.3063573 735.125011,49.9656773 743.666924,44.8556773 C745.752661,43.5514107 748.001953,42.337664 750.16188,41.0742773 C752.34204,39.8225707 754.498595,38.556264 756.793412,37.3717173 L763.578382,33.7596773 L770.626388,30.3169973 C780.139509,25.8172773 790.084278,21.5725707 800.81141,18.0617573 L804.792353,16.705904 C806.107531,16.2425973 807.528935,15.8902507 808.891324,15.476584 L817.155027,13.082184 L825.690195,11.029424 C827.118343,10.6945973 828.526258,10.331544 829.971268,10.0210507 L834.345078,9.14699733 C840.206051,8.03739733 846.178308,7.090344 852.271967,6.494664 C855.301935,6.135504 858.382486,5.966144 861.451235,5.74909067 C862.987295,5.65565067 864.536845,5.636184 866.077964,5.57583733 L868.394701,5.50089067 L870.71481,5.507704 C883.0876,5.45417067 895.524463,6.94921067 906.732141,10.033704 C909.472095,10.871744 912.271064,11.6504107 914.825544,12.673384 C917.49974,13.5863707 919.909213,14.717384 922.365898,15.8075173 C923.628807,16.3301973 924.687694,16.9910907 925.856179,17.5770373 C926.982511,18.1902373 928.186404,18.7596373 929.245291,19.409824 C931.437255,20.6702907 933.485897,22.0076507 935.499132,23.360584 C943.454273,28.8287707 949.976207,34.9169707 955.86753,41.1453307 C967.594534,53.6312507 977.977697,66.4928773 988.642442,79.2561973 C999.273464,92.0253573 1010.24508,104.711784 1023.0158,116.795717 C1029.43825,122.819677 1036.15241,128.753117 1043.95411,134.189184 C1051.71872,139.641797 1060.35,144.681717 1069.66921,149.254437 C1088.38015,158.361917 1109.7872,165.578211 1132.52798,170.799171 C1155.28899,176.006504 1179.30954,179.373264 1203.66731,180.864411 C1252.41826,183.946957 1302.28543,179.668184 1347.90187,169.362531 C1359.30008,166.772491 1370.48921,163.843731 1381.32426,160.555811 C1392.11883,157.211437 1402.4531,153.386237 1412.78568,149.561037 C1423.11826,145.733891 1433.4795,141.899931 1444.32297,138.533171 C1455.15127,135.167384 1466.50901,132.282424 1478.43498,130.435037 C1484.39375,129.531784 1490.46043,128.788157 1496.61479,128.518544 L1501.21454,128.238224 C1502.7506,128.171064 1504.29509,128.172037 1505.83452,128.135051 L1510.45451,128.051344 C1511.99563,128.025064 1513.53506,128.100984 1515.07449,128.120451 C1527.39839,128.334584 1539.62111,129.469491 1551.58249,131.114424 C1575.55076,134.416944 1598.45678,139.676837 1621.66967,144.245664 C1633.2601,146.568037 1645.07141,148.492317 1656.849,150.502251 C1668.62996,152.495637 1680.42778,154.457877 1692.26607,156.330571 C1715.93928,160.072064 1739.79964,163.453424 1763.95676,165.960731 C1776.0328,167.214384 1788.17627,168.249037 1800.38214,168.982931 C1812.58463,169.746024 1824.80567,170.439037 1837.04357,170.985077 C1861.516,172.115117 1886.04575,172.846091 1910.589,173.240291 C1959.68392,173.994624 2008.821,173.408677 2057.87208,172.007077 C2008.82943,173.488491 1959.69067,174.157171 1910.57888,173.481677 C1886.02552,173.128357 1861.48396,172.438264 1836.99467,171.347157 C1824.74665,170.820584 1812.51718,170.148011 1800.30289,169.403411 C1788.08017,168.689957 1775.90802,167.673797 1763.81007,166.437664 C1739.60742,163.954691 1715.71839,160.572357 1692.02158,156.844491 C1680.17149,154.977637 1668.36693,153.021237 1656.57922,151.034664 C1644.80838,149.033491 1632.9684,147.111157 1621.34762,144.789757 C1598.09764,140.227744 1575.20679,134.988291 1551.33631,131.713997 C1539.42215,130.083664 1527.27193,128.963357 1515.04246,128.757984 C1513.51314,128.739491 1511.98551,128.664544 1510.45788,128.691797 L1505.87499,128.777451 C1504.34905,128.814437 1502.81805,128.814437 1501.29379,128.881597 L1496.73619,129.162891 C1490.63579,129.432504 1484.63487,130.173211 1478.73005,131.070624 C1466.92043,132.907304 1455.65036,135.775717 1444.88108,139.131771 C1434.09662,142.487824 1423.76741,146.317891 1413.44158,150.149904 C1403.11575,153.981917 1392.76125,157.822691 1381.91609,161.190424 C1371.02203,164.504624 1359.81435,167.446037 1348.37062,170.053597 C1302.5923,180.426411 1252.511,184.754824 1203.51387,181.684931 C1179.03301,180.200597 1154.87926,176.828971 1131.97156,171.605091 C1109.08746,166.365637 1087.52023,159.114304 1068.65248,149.949397 C1059.25401,145.347477 1050.54686,140.271544 1042.70975,134.779024 C1034.82542,129.295264 1028.06069,123.329704 1021.61126,117.292117 C1008.7731,105.168277 997.745834,92.459464 987.099637,79.6854373 C976.451753,66.909464 966.097255,54.0507573 954.429265,41.5998773 C948.57335,35.3948773 942.117175,29.3592373 934.271632,23.9572373 C932.287062,22.6208507 930.26877,21.3000373 928.112215,20.0570907 C927.066817,19.4146907 925.889901,18.857944 924.785489,18.2544773 C923.637237,17.6802107 922.60027,17.0290507 921.362654,16.5170773 C918.95318,15.447384 916.59429,14.334864 913.975737,13.4403707 C911.475212,12.4349173 908.731886,11.6727973 906.050946,10.848384 C895.079326,7.823264 882.893696,6.345744 870.716496,6.39149067 L868.433482,6.383704 L866.152153,6.456704 C864.632954,6.515104 863.10701,6.532624 861.59287,6.62509067 C858.56796,6.83727733 855.529561,7.00177067 852.538374,7.35509067 C846.522278,7.93909067 840.61578,8.87251733 834.817193,9.96751733 L830.485536,10.8289173 C829.055702,11.1355173 827.657904,11.4956507 826.239872,11.826584 L817.770462,13.8569573 L809.570833,16.2289707 C808.216874,16.638744 806.807273,16.9871973 805.500526,17.4466107 L801.544875,18.7898107 C790.891932,22.268504 781.002805,26.4801173 771.533523,30.9516107 L764.515868,34.371904 L757.75619,37.9644773 C755.469804,39.1412373 753.318307,40.4026773 751.143205,41.6475707 C748.990023,42.9051173 746.747475,44.113024 744.666796,45.412424 C736.145117,50.5010107 728.051713,55.8241707 720.056105,61.1959973 C704.179546,71.9980507 688.930225,83.1203307 672.714754,93.7870907 C656.46556,104.432437 639.302487,114.610584 621.360423,124.304984 C612.400351,129.158997 603.258178,133.902051 593.893435,138.494237 C584.550613,143.100051 575.032433,147.591011 565.300116,151.920397 C555.589718,156.267304 545.709021,160.488651 535.625989,164.541611 C525.564877,168.614037 515.336839,172.548251 504.916582,176.307264 C499.73849,178.217917 494.454172,180.027344 489.195146,181.861104 L473.15166,187.097637 C462.566162,190.708704 451.481572,193.780544 440.563908,197.033424 C435.139642,198.702691 429.50461,200.121811 423.923533,201.602251 L407.161758,206.026051 C401.501434,207.402344 395.78884,208.708557 390.10491,210.053704 L381.568056,212.057797 L377.297099,213.054491 L372.958698,213.950931 L355.606778,217.535717 C352.708328,218.118744 349.840228,218.758224 346.9148,219.297451 L338.113224,220.874251 L320.513444,224.027851 C314.628865,225.043037 308.649863,225.869397 302.721445,226.795037 C207.748729,241.373624 107.38884,244.289731 10.5344076,234.752037" id="Fill-34" fill="#FFFFFF"></path>
-                        <path d="M10.3656264,240.876251 C108.167662,251.044664 209.728072,248.443917 305.719208,233.798171 C311.715071,232.874477 317.73454,232.001397 323.715228,231.044611 L341.502168,227.870571 C353.436566,225.886917 365.016877,223.289091 376.72702,220.918051 C382.613285,219.776331 388.31239,218.351371 394.067137,217.013037 L411.309458,212.958131 C416.974841,211.498131 422.596384,209.979731 428.241533,208.492477 L436.700825,206.250891 L440.926256,205.125717 L445.067381,203.899317 L461.63188,198.993717 C464.380265,198.163464 467.177548,197.384797 469.888838,196.514637 L477.975497,193.853544 L494.148814,188.534277 C496.868535,187.670931 499.468541,186.688837 502.129247,185.769037 C504.778151,184.836584 507.430427,183.907051 510.037177,182.935664 C531.036187,175.296944 551.338826,167.022637 570.859104,158.174064 C609.876052,140.466211 645.802333,120.467131 677.617839,98.4766107 C693.484282,87.460424 708.209218,75.908904 723.520925,64.608504 C727.311336,61.767344 731.25687,58.9943173 735.131587,56.1891707 C739.139508,53.4463173 743.022655,50.642144 747.180641,47.9713173 C755.290906,42.5284373 763.952533,37.356144 773.005342,32.4262107 L779.918458,28.8024907 C782.25037,27.612104 784.713799,26.5093173 787.106412,25.3588373 L790.719779,23.649664 C791.952337,23.1016773 793.228734,22.5858107 794.483212,22.052424 C797.007342,20.9992773 799.50618,19.9227707 802.05223,18.8890907 C807.350037,16.994984 812.458998,14.9178907 818.038388,13.2992373 L822.147476,12.0124907 C823.51661,11.5822773 824.873941,11.137464 826.315579,10.792904 L834.844003,8.57467733 L836.975266,8.01987733 L839.163857,7.53807733 L843.542725,6.58323733 C846.527168,6.05569067 849.455968,5.417184 852.497739,4.999624 C858.505405,4.003904 864.696858,3.40335733 870.910232,3.01207733 C877.13878,2.631504 883.431401,2.70937067 889.665008,3.05685067 C895.900301,3.47441067 902.015879,4.357224 907.927435,5.57875733 C913.751314,6.93071733 919.354309,8.60971733 924.456526,10.7267173 C925.783507,11.213384 926.9267,11.8421573 928.169374,12.393064 C929.371582,12.9751173 930.659782,13.499744 931.743961,14.1557707 C933.996625,15.4133173 936.286384,16.6523707 938.350202,18.0091973 C946.662801,23.4228773 953.263983,29.612304 958.971519,35.9944507 C970.219664,48.8239573 979.682201,62.0992507 989.352132,75.3034907 C998.968107,88.5184373 1008.72572,101.706131 1020.21329,114.397424 C1025.98659,120.723117 1032.08362,126.980677 1039.03551,132.881024 C1046.02788,138.769691 1053.97796,144.277784 1062.72895,149.325491 C1080.24442,159.424797 1101.02255,167.566731 1123.37383,173.692891 C1145.7875,179.768437 1169.74735,183.932357 1194.22484,186.128197 C1243.19836,190.594824 1294.04179,187.561917 1340.68171,177.898664 C1352.33959,175.470197 1363.74623,172.643637 1374.83588,169.448184 C1385.91541,166.250784 1396.4048,162.411957 1406.76773,158.472877 C1417.12897,154.530877 1427.39579,150.478891 1438.09763,146.807477 C1448.78429,143.140931 1459.95825,139.856904 1471.80497,137.576384 C1477.7199,136.431744 1483.79669,135.561584 1489.96117,134.989264 C1493.04172,134.711864 1496.15599,134.569757 1499.25003,134.353677 C1500.79621,134.222277 1502.36094,134.245637 1503.91892,134.200864 L1508.59286,134.111317 C1521.07187,134.043184 1533.50368,135.009704 1545.6421,136.577744 C1569.97795,139.729397 1593.11497,145.138211 1616.5049,149.899757 C1628.18638,152.323357 1640.05333,154.397531 1651.98436,156.404544 C1663.89515,158.430051 1675.82112,160.425384 1687.79767,162.316571 C1711.74065,166.102837 1735.88598,169.501717 1760.33986,171.971064 C1784.7735,174.529957 1809.56798,175.652211 1834.33211,176.665451 C1859.10973,177.631971 1883.93287,178.187744 1908.76613,178.425237 C1958.43602,178.883677 2008.11603,178.056344 2057.73702,176.739424 C2008.12277,178.137131 1958.44277,179.045251 1908.75939,178.667597 C1883.91938,178.470984 1859.08444,177.954144 1834.29164,177.028504 C1809.51908,176.055171 1784.6892,174.971851 1760.19654,172.448971 C1735.69713,170.002984 1711.51977,166.603131 1687.55318,162.830491 C1675.56651,160.945144 1663.63043,158.955651 1651.71121,156.936957 C1639.79198,154.938704 1627.87445,152.861611 1616.17274,150.441904 C1592.75076,145.687171 1569.63398,140.300744 1545.40941,137.179264 C1533.32664,135.626797 1520.97745,134.673904 1508.60297,134.747877 L1503.9695,134.841317 C1502.4267,134.885117 1500.87377,134.863704 1499.34277,134.994131 C1496.2774,135.210211 1493.19179,135.353291 1490.1399,135.628744 C1484.04118,136.199117 1478.02677,137.063437 1472.1658,138.201264 C1460.42868,140.468157 1449.33734,143.733691 1438.69789,147.392451 C1428.04663,151.054131 1417.80342,155.105144 1407.43375,159.056877 C1397.06744,163.006664 1386.54265,166.867877 1375.39567,170.092531 C1364.26893,173.307451 1352.81845,176.151531 1341.11505,178.597517 C1294.29471,188.326957 1243.23377,191.405611 1194.00227,186.943851 C1169.39663,184.750931 1145.29684,180.578251 1122.73142,174.476424 C1100.23007,168.323984 1079.2867,160.139224 1061.61779,149.969837 C1052.79093,144.887091 1044.76665,139.340064 1037.70684,133.405651 C1030.6757,127.450797 1024.56518,121.189344 1018.75311,114.838344 C1007.20989,102.109091 997.401696,88.9019307 987.773918,75.686984 C978.129279,62.479824 968.685289,49.2006373 957.514706,36.430504 C951.854382,30.0833973 945.329076,23.9611307 937.151366,18.6253173 C935.116212,17.2850373 932.873665,16.0703173 930.664841,14.8351573 C929.604267,14.188864 928.341359,13.6759173 927.16613,13.105544 C925.948747,12.5663173 924.830846,11.9472773 923.534215,11.4713173 C918.541597,9.39617067 913.06506,7.75123733 907.371014,6.42555733 C901.589289,5.22835733 895.603543,4.35819733 889.489651,3.945504 C883.372387,3.60289067 877.192736,3.52113067 871.058611,3.892944 C864.937974,4.275464 858.832513,4.862384 852.893978,5.843504 C849.887616,6.25327733 846.992538,6.88399733 844.040132,7.40375733 L839.705103,8.345944 L837.540118,8.82093067 L835.42403,9.36989067 L826.964737,11.5657307 C825.533217,11.9063973 824.186002,12.3473173 822.826985,12.772664 L818.748247,14.0477307 C813.209324,15.6498373 808.135771,17.710384 802.873373,19.5859973 C800.340812,20.6118907 797.858835,21.6796373 795.348194,22.7249973 C794.102147,23.2544907 792.832495,23.7645173 791.604995,24.309584 L788.011861,26.0070773 C785.632738,27.1497707 783.181111,28.2437973 780.859316,29.4263973 L773.974865,33.0277307 C764.95915,37.9294373 756.322815,43.0754507 748.224353,48.498864 C744.076484,51.158984 740.195022,53.9553707 736.19216,56.6904373 C732.319129,59.4887707 728.376967,62.2530373 724.583184,65.090304 C709.271477,76.3683173 694.519563,87.9198373 678.60085,98.950624 C646.689235,120.965477 610.668531,140.976237 571.560532,158.689931 C551.994729,167.541424 531.64825,175.817677 510.605401,183.456397 C507.991907,184.427784 505.334572,185.356344 502.680611,186.288797 C500.01316,187.209571 497.409782,188.190691 494.685002,189.054037 L478.477962,194.373304 L470.374442,197.031477 C467.658093,197.902611 464.857439,198.680304 462.102309,199.510557 L445.50746,204.413237 L441.357905,205.638664 L437.125729,206.763837 L428.651261,209.003477 C422.99431,210.488784 417.364336,212.008157 411.688837,213.466211 L394.417851,217.516251 C388.652987,218.853611 382.943766,220.276624 377.04907,221.417371 C365.318694,223.783544 353.719835,226.378451 341.76689,228.358211 L323.952972,231.527384 C317.965539,232.481251 311.935954,233.352384 305.928288,234.263424 C209.756736,248.771931 108.117078,251.202344 10.3656264,240.876251" id="Fill-36" fill="#FFFFFF"></path>
-                        <path d="M10.1968452,247.001437 C108.922878,258.212291 211.779918,255.650477 309.045766,241.286997 C406.370628,226.965371 498.616879,200.943304 577.31511,164.879357 C596.934869,155.828331 615.682901,146.162157 633.542344,135.976224 L646.726161,128.213891 C651.017351,125.569344 655.197257,122.864451 659.437863,120.193624 C663.742542,117.553944 667.703251,114.741011 671.7685,111.980637 C675.821946,109.214424 679.919232,106.468651 683.709642,103.578824 C687.559067,100.716251 691.482682,97.8887173 695.268034,94.9979173 C699.004489,92.0847307 702.749374,89.1764107 706.404894,86.228184 L728.400404,68.553424 C735.735051,62.6598907 743.336106,56.873424 751.107459,51.1628773 C755.132241,48.3713573 758.981666,45.4961307 763.191922,42.793184 C765.274287,41.4305173 767.304383,40.0415707 769.427215,38.7003173 L775.900252,34.731064 C778.014653,33.3829973 780.317901,32.141024 782.568879,30.8698507 C784.856951,29.621064 787.020251,28.295384 789.411177,27.109864 L796.516511,23.5114507 L798.297059,22.6150107 L800.160228,21.7759973 L803.888252,20.0969973 C808.754411,17.777544 814.089313,15.824064 819.279208,13.7576773 C821.916308,12.7619573 824.703474,11.902504 827.409706,10.968104 L831.496875,9.592784 C832.864322,9.14115733 834.321135,8.78199733 835.72905,8.372224 C846.879399,4.973344 858.874498,2.43683733 871.281011,1.005064 C877.506187,0.333464 883.83253,-0.00330933333 890.177421,0.000584 C896.517254,0.109597333 902.84697,0.565117333 908.994584,1.510224 C915.08487,2.54779733 921.009916,3.94647733 926.442613,5.86199733 C931.777515,7.83591733 936.911768,10.0551173 941.209702,12.7561173 C945.698169,15.3471307 949.547594,18.2700507 953.054735,21.304904 C956.566935,24.3426773 959.645801,27.533264 962.481864,30.7841973 C968.133757,37.3016373 972.779034,44.0585173 977.417566,50.8037173 C982.017316,57.5615707 986.38101,64.3680907 990.719411,71.1765573 C999.370922,84.7954373 1007.95836,98.430864 1018.18977,111.662357 C1023.30379,118.273237 1028.81742,124.786784 1034.93974,131.098851 C1041.09242,137.389504 1048.27868,143.341437 1056.36703,148.853424 C1072.49819,159.922171 1092.47373,169.020891 1114.29725,176.090211 C1136.22194,183.088477 1160.04521,188.072917 1184.57497,191.031851 C1209.1081,193.985917 1234.3258,195.073131 1259.3597,194.237037 C1284.39698,193.447664 1309.25722,190.797277 1333.11927,186.348171 C1345.04018,184.106584 1356.71997,181.436731 1368.05242,178.337637 C1379.40848,175.258984 1390.18114,171.547664 1400.62163,167.524877 C1411.08573,163.523504 1421.28341,159.277824 1431.86385,155.338744 C1442.42743,151.402584 1453.39736,147.751611 1465.10751,145.016544 C1476.77718,142.231837 1489.3051,140.684237 1501.89708,140.272517 C1514.50593,139.853011 1527.16031,140.595664 1539.49769,142.043984 C1564.24327,144.962037 1587.64838,150.506144 1611.20018,155.490584 L1620.06077,157.335051 L1628.96858,159.092891 L1637.94382,160.723224 L1646.9764,162.277637 C1659.02039,164.341104 1671.08462,166.364664 1683.19775,168.284077 C1707.42063,172.121931 1731.86271,175.540277 1756.61672,177.990157 C1781.35219,180.547104 1806.46197,181.546717 1831.53803,182.377944 C1856.6259,183.177051 1881.75254,183.552757 1906.87919,183.631597 C1957.14259,183.780517 2007.38914,182.704011 2057.60365,181.472744 C2007.39588,182.784797 1957.1544,183.942091 1906.87919,183.873957 C1881.7458,183.835997 1856.60903,183.500197 1831.50431,182.740997 C1806.4215,181.950651 1781.26957,180.988997 1756.47677,178.468064 C1731.67386,176.041544 1707.20144,172.622224 1682.95495,168.797997 C1670.82833,166.884424 1658.75399,164.865731 1646.70325,162.809077 L1637.6673,161.259531 L1628.66339,159.628224 L1619.73366,157.873304 L1610.8579,156.030784 C1587.27743,151.054131 1563.90098,145.535331 1539.28692,142.649397 C1527.01361,141.214704 1514.45535,140.486651 1501.95947,140.908104 C1489.47877,141.325664 1477.10429,142.859637 1465.53241,145.627797 C1453.92849,148.347291 1443.02769,151.979771 1432.50289,155.910091 C1421.96461,159.840411 1411.77535,164.090957 1401.29439,168.107904 C1390.84547,172.143344 1379.98682,175.890677 1368.58861,178.988797 C1357.21232,182.107357 1345.48532,184.795704 1333.51551,187.052864 C1309.55398,191.536037 1284.58414,194.212704 1259.43558,195.019597 C1234.2887,195.874184 1208.9496,194.795731 1184.2799,191.839717 C1159.61694,188.877864 1135.64697,183.879797 1113.56547,176.847464 C1091.58682,169.745051 1071.44605,160.589877 1055.16988,149.442291 C1047.00566,143.890397 1039.74857,137.893691 1033.5352,131.556317 C1027.36904,125.213104 1021.83012,118.680091 1016.68575,112.049744 C1006.40375,98.7832107 997.770789,85.1312373 989.112533,71.517224 C984.785935,64.707784 980.437417,57.9051573 975.857899,51.1580107 C971.239601,44.4215707 966.611186,37.6734507 961.016621,31.2085707 C958.207535,27.9819707 955.164078,24.8225307 951.702462,21.8227173 C948.249277,18.8267973 944.453808,15.940864 940.056392,13.400464 C935.847822,10.746184 930.828226,8.57857067 925.618098,6.64455733 C920.313546,4.76991733 914.530135,3.40141067 908.579797,2.383304 C902.570445,1.45669067 896.373933,1.00603733 890.153816,0.895077333 C883.926953,0.889237333 877.703463,1.215304 871.569338,1.87425067 C859.333124,3.28071733 847.477974,5.77439733 836.401814,9.143104 C835.005702,9.548984 833.562379,9.90327733 832.205047,10.3510107 L828.146543,11.7136773 C825.462231,12.6393173 822.693612,13.4890373 820.075059,14.4769707 C814.920573,16.5258373 809.622766,18.460824 804.783585,20.7637307 L801.077481,22.429104 L799.222742,23.261304 L797.45231,24.151904 L790.382385,27.7250107 C788.003262,28.9017707 785.848393,30.2216107 783.568751,31.4626107 C781.326204,32.7259973 779.034759,33.960184 776.927102,35.3014373 L770.472613,39.2502507 C768.356525,40.585664 766.331488,41.9687707 764.255867,43.3255973 C760.055728,46.0149173 756.209676,48.881384 752.193324,51.6631707 C744.430401,57.3561973 736.834405,63.1270907 729.4947,69.0128373 L707.472212,86.6729973 C703.809947,89.621224 700.053259,92.532464 696.308374,95.4475973 C692.511219,98.340344 688.572429,101.172744 684.712887,104.039211 C680.908988,106.931957 676.799899,109.681624 672.73465,112.451731 C668.654226,115.214051 664.681714,118.030877 660.365232,120.673477 C656.111137,123.347224 651.917742,126.054064 647.616435,128.700557 L634.395524,136.470677 C616.485497,146.665371 597.688567,156.339331 578.021597,165.392304 C499.132833,201.469877 406.721342,227.485131 309.251473,241.753224 C211.803524,255.978491 108.868922,258.369971 10.1968452,247.001437" id="Fill-38" fill="#FFFFFF"></path>
-                        <path d="M9.63030698,244.594676 C107.197971,254.838036 208.309871,252.132169 304.381942,238.472409 C400.499538,224.836983 492.108434,200.298276 571.369831,166.024289 C610.92634,148.855663 647.252232,129.171943 678.963199,107.233983 C680.927535,105.854769 682.928966,104.494049 684.862952,103.099263 L690.585663,98.877916 L702.031084,90.433276 L724.959022,73.5566493 C740.3837,62.372076 756.558704,51.439596 775.048758,41.8522627 C793.469681,32.2659027 814.433282,24.137596 837.622569,18.8738093 C849.191078,16.2380227 861.467759,14.519116 873.946775,14.159956 C880.190499,13.9857293 886.454456,14.2689693 892.617245,14.8792493 C898.776663,15.5450093 904.735431,16.7256627 910.417674,18.242116 C913.169432,19.1044893 915.995378,19.8958093 918.539742,20.9557693 C921.222368,21.897956 923.589688,23.0844493 926.034571,24.2125427 C927.258698,24.7751293 928.31084,25.4496493 929.45572,26.061876 C930.566877,26.6964893 931.74042,27.2980093 932.765585,27.977396 C934.905278,29.288476 936.883104,30.6852093 938.810345,32.0984893 C946.450181,37.8022227 952.392088,44.1649027 957.748909,50.6181027 C968.368129,63.5653827 977.555828,76.8601427 987.048716,90.0604893 C996.5298,103.258889 1006.35485,116.393049 1018.24036,128.912063 C1024.18395,135.168649 1030.5339,141.296756 1038.08774,146.929436 C1039.90033,148.370943 1041.94897,149.704409 1043.88464,151.088489 C1045.86078,152.454076 1047.97687,153.751529 1050.01877,155.084996 C1054.28466,157.654596 1058.64667,160.176503 1063.29532,162.517369 C1072.54035,167.230249 1082.49861,171.482743 1093.05207,175.176543 C1103.61059,178.865476 1114.69686,182.046329 1126.13554,184.725916 C1149.03818,190.064649 1173.33357,193.351596 1197.90885,194.761956 C1247.13529,197.570023 1297.36835,192.601156 1342.8145,181.422423 C1354.17561,178.617276 1365.32259,175.492876 1375.95698,171.869156 C1386.60318,168.245436 1396.81099,164.199289 1407.06263,160.187209 C1417.31259,156.177076 1427.65191,152.202956 1438.54091,148.749569 C1449.40968,145.299103 1460.87702,142.383969 1472.93113,140.568703 C1484.9785,138.734943 1497.50641,138.112009 1509.92136,138.379676 C1522.3481,138.653183 1534.672,139.817289 1546.72105,141.517703 C1570.87986,144.872783 1594.01688,150.120023 1617.50292,154.602223 C1629.22992,156.877876 1641.17107,158.777823 1653.03465,160.835449 L1688.66417,166.911969 C1712.38459,171.020409 1736.3242,174.715183 1760.60441,177.499889 C1772.74283,178.903436 1784.96724,180.025689 1797.25404,180.897796 C1809.53915,181.787423 1821.84787,182.568036 1834.17513,183.230876 C1858.82966,184.566289 1883.55838,185.448129 1908.30902,185.945503 C1957.81367,186.939276 2007.39757,186.401023 2056.84995,184.744409 C2007.406,186.481809 1957.82042,187.100849 1908.29553,186.187863 C1883.53478,185.730396 1858.79257,184.888463 1834.1178,183.592956 C1821.78042,182.949583 1809.4599,182.188436 1797.16299,181.318276 C1784.86101,180.465636 1772.60288,179.360903 1760.44423,177.975849 C1736.11681,175.214503 1712.13673,171.516809 1688.40788,167.424916 L1652.76487,161.366889 C1640.90466,159.317049 1628.95003,157.421969 1617.19267,155.148263 C1593.66279,150.670929 1570.54095,145.445103 1546.46982,142.117276 C1534.46797,140.429516 1522.21827,139.280983 1509.8792,139.017209 C1497.55868,138.758303 1485.13699,139.383183 1473.21777,141.206236 C1461.28337,143.009823 1449.91383,145.906463 1439.10745,149.346223 C1428.28421,152.785983 1417.98029,156.752316 1407.74214,160.767316 C1397.5023,164.782316 1387.281,168.843063 1376.59097,172.490143 C1365.89924,176.141116 1354.72023,179.282063 1343.31865,182.104729 C1297.70726,193.354516 1247.25669,198.378863 1197.7689,195.583449 C1173.06379,194.178929 1148.62846,190.889063 1125.57406,185.529889 C1114.05782,182.840569 1102.8923,179.645116 1092.25453,175.934769 C1081.62013,172.221503 1071.57757,167.942729 1062.25329,163.197729 C1057.56249,160.840316 1053.16002,158.301863 1048.85534,155.712796 C1046.79489,154.369596 1044.65857,153.063383 1042.66389,151.687089 C1040.70629,150.292303 1038.64079,148.947156 1036.80966,147.495916 C1029.17825,141.817489 1022.76592,135.639743 1016.79535,129.369529 C1004.84409,116.803796 994.96339,103.644329 985.46713,90.439116 C975.991103,77.226116 966.833754,63.9381693 956.270177,51.0259293 C950.947078,44.598036 945.065872,38.2927827 937.540693,32.6649693 C935.642115,31.2701827 933.699698,29.8938893 931.593727,28.6013027 C930.583738,27.930676 929.435486,27.343756 928.347935,26.7208227 C927.224975,26.1193027 926.194753,25.4554893 924.995917,24.905556 C922.604991,23.8008227 920.28994,22.6357427 917.668015,21.7149693 C915.182666,20.674476 912.419106,19.9026227 909.734794,19.056796 C904.182382,17.5724627 898.360189,16.414196 892.330604,15.7591427 C886.29596,15.158596 880.153404,14.875356 874.01422,15.044716 C861.742597,15.389276 849.617668,17.0760627 838.165502,19.678756 C815.173492,24.8831693 794.341408,32.9375027 775.999733,42.4644893 C757.58724,51.9924493 741.439214,62.8821027 726.016223,74.043316 L703.059621,90.909236 L691.595652,99.3490093 L685.862825,103.569383 C683.925466,104.964169 681.915605,106.327809 679.944524,107.708969 C648.130704,129.674183 611.703644,149.374449 572.052712,166.548916 C492.600782,200.830689 400.836763,225.359663 304.580905,238.939609 C208.335163,252.459209 107.147387,254.997663 9.63030698,244.594676" id="Fill-40" fill="#FFFFFF"></path>
-                        <path d="M9.06393735,242.188888 C105.508642,251.499795 204.971511,248.668368 299.929053,235.714275 C347.440703,229.243555 393.920444,220.241195 438.538702,208.808421 C483.129983,197.360075 525.976123,183.557235 565.763632,167.225675 C605.554512,150.918448 642.381184,132.149661 674.66206,111.014701 L698.401024,94.9089547 C706.325815,89.5478347 714.167986,84.1448613 722.26982,78.858688 C738.44651,68.2951013 755.609583,58.1227947 775.119744,49.5457813 C784.860492,45.2621413 795.174523,41.3941147 806.044976,38.121768 C808.741091,37.280808 811.54849,36.5644347 814.293503,35.779928 C817.052004,35.0080747 819.923476,34.3802747 822.732562,33.6736347 C828.413119,32.3421147 834.223508,31.169248 840.182277,30.304928 C843.138055,29.8182613 846.166337,29.5058213 849.176071,29.1466613 C850.68684,28.9899547 852.216156,28.8848347 853.737041,28.7524613 L856.020055,28.564608 L858.319931,28.4575413 C864.442253,28.094488 870.638765,28.1772213 876.756029,28.5811547 C889.004047,29.3812347 900.919897,32.0082613 910.916936,36.1867813 L914.597749,37.8054347 C915.818504,38.3505013 916.900996,38.989008 918.057679,39.575928 C920.382846,40.749768 922.50062,42.0394347 924.559379,43.3583013 C928.646548,46.011608 932.367827,48.846928 935.743451,51.804888 C942.533479,57.7159413 948.096008,64.0319013 953.537136,70.343968 C964.397471,82.9914613 974.217468,95.9105147 984.801279,108.604728 C995.36317,121.293101 1006.7226,133.806275 1020.53197,145.393808 C1023.97166,148.289475 1027.76545,151.035248 1031.65871,153.730408 C1035.67169,156.365221 1039.77909,158.958181 1044.19843,161.371075 C1052.9764,166.227035 1062.49121,170.649861 1072.68721,174.482848 C1082.86298,178.333355 1093.59685,181.691355 1104.73203,184.528621 C1115.88575,187.342528 1127.41379,189.658088 1139.16946,191.490875 C1162.6926,195.134061 1187.11107,196.802355 1211.46547,196.629101 C1235.82493,196.422755 1260.12537,194.376808 1283.6502,190.703448 C1295.41767,188.876501 1307.0081,186.665088 1318.36416,184.110088 C1329.71178,181.537568 1340.82504,178.626328 1351.68031,175.422115 C1373.47349,169.078901 1393.03929,160.545688 1413.71119,153.015008 C1424.05389,149.253075 1434.722,145.738368 1445.99375,142.948795 C1457.23515,140.134888 1469.13414,138.161941 1481.28605,137.193475 C1493.44639,136.249341 1505.7922,136.285355 1517.9694,137.023141 C1530.15166,137.786235 1542.17711,139.199515 1554.00528,140.984608 C1577.69872,144.528515 1600.66375,149.405888 1624.05032,153.428675 L1694.09366,165.874688 C1717.33522,170.189475 1740.75551,174.218101 1764.56866,177.323035 C1770.52068,178.095861 1776.49463,178.817101 1782.4888,179.475075 C1788.48467,180.124288 1794.50582,180.710235 1800.52698,181.287421 C1812.57603,182.427195 1824.65375,183.457955 1836.76013,184.367048 C1860.97627,186.186208 1885.29357,187.556661 1909.66989,188.414168 C1958.41579,190.178821 2007.37245,190.052288 2056.09474,188.016075 C2007.38256,190.133075 1958.41747,190.340395 1909.6446,188.656528 C1885.25479,187.837955 1860.91725,186.507408 1836.68088,184.727181 C1824.5627,183.838528 1812.4698,182.826261 1800.40895,181.704981 C1794.37936,181.138501 1788.35483,180.563261 1782.3438,179.921835 C1776.33613,179.271648 1770.35038,178.561115 1764.38487,177.796075 C1740.52451,174.713528 1717.07387,170.682955 1693.81039,166.382768 L1623.76705,153.979581 C1600.33327,149.962635 1577.36824,145.100835 1553.73887,141.582235 C1541.94105,139.808821 1529.96281,138.408195 1517.85643,137.656781 C1505.74836,136.930675 1493.4936,136.903421 1481.4378,137.845608 C1469.38875,138.814075 1457.61791,140.773395 1446.45744,143.572701 C1435.27337,146.348648 1424.66426,149.849728 1414.35866,153.605821 C1393.76264,161.125795 1374.16817,169.689181 1352.26708,176.082035 C1341.38146,179.303768 1330.23111,182.230581 1318.84639,184.820621 C1307.45155,187.391195 1295.81728,189.618181 1284.0026,191.460701 C1260.38335,195.163261 1235.97499,197.232568 1211.48402,197.454488 C1187.00485,197.642341 1162.4498,195.979888 1138.77828,192.327941 C1126.95179,190.492235 1115.34619,188.168888 1104.11322,185.342328 C1092.89711,182.492408 1082.08062,179.115915 1071.82223,175.243021 C1061.5436,171.385701 1051.94112,166.933675 1043.08053,162.040728 C1038.62241,159.611288 1034.47454,156.998861 1030.42278,154.341661 C1026.49074,151.625088 1022.65649,148.856928 1019.18307,145.937901 C1005.26747,134.284181 993.849029,121.742781 983.258474,109.032995 C972.678035,96.3105547 962.883331,83.3915013 952.065148,70.7634747 C946.644254,64.457248 941.117134,58.1695147 934.406353,52.315888 C931.069511,49.3861547 927.398815,46.5858747 923.375719,43.967608 C921.350682,42.668208 919.263258,41.3941147 916.988675,40.2455813 C915.855598,39.6703413 914.795025,39.041568 913.601248,38.5081813 L909.999684,36.9206747 C900.220155,32.8258613 888.587573,30.252368 876.572242,29.4591013 C870.57132,29.0600347 864.484407,28.973408 858.451449,29.3277013 L856.186982,29.431848 L853.936004,29.615808 C852.438724,29.7462347 850.931328,29.8474613 849.439107,30.0022213 C846.469839,30.3555413 843.480339,30.661168 840.558283,31.1410213 C834.668646,31.9907413 828.918957,33.1480347 823.287297,34.4639813 C820.498445,35.1647813 817.648893,35.7847947 814.910625,36.5498347 C812.184159,37.327528 809.396994,38.0351413 806.719426,38.869288 C795.924849,42.1114613 785.674891,45.9463947 775.98304,50.1998613 C756.568989,58.7155547 739.448069,68.8411413 723.281495,79.3765013 C715.191464,84.646128 707.334118,90.0471547 699.395838,95.406328 L675.623152,111.504288 C643.23605,132.665528 606.303152,151.449888 566.419535,167.761008 C526.539289,184.096461 483.610529,197.894435 438.941687,209.333048 C394.24418,220.751221 347.702052,229.749688 300.119585,236.182448 C204.998489,248.994435 105.46143,251.658448 9.06393735,242.188888" id="Fill-42" fill="#FFFFFF"></path>
-                        <path d="M8.49756773,239.782224 C103.876641,248.157757 201.778158,245.198824 295.688615,232.946504 C342.672508,226.822291 388.749265,218.362077 433.188794,207.633997 C477.628323,196.916624 520.420508,183.868117 560.5503,168.495291 C600.651427,153.115651 638.041265,135.300731 670.84484,114.932757 L695.472393,99.663104 C703.66865,94.570624 711.893571,89.472304 720.40682,84.5384773 C737.409711,74.6776373 755.638416,65.3861973 776.118099,57.9372773 C786.337707,54.2142773 797.093503,50.9652907 808.296122,48.3314507 C819.463333,45.6713307 831.175162,43.634144 843.232647,42.7912373 C849.248743,42.3775707 855.344088,42.1848507 861.407396,42.4593307 C867.462273,42.6948773 873.518837,43.2418907 879.325854,44.305744 L881.516131,44.6756107 L883.650766,45.143784 C885.062053,45.470824 886.512121,45.7472507 887.893058,46.1171173 L892.002147,47.266624 C893.394887,47.6267573 894.640933,48.1416507 895.967914,48.5708907 C898.648854,49.4137973 901.017861,50.5292373 903.513327,51.5337173 L906.986746,53.2419173 L908.718397,54.0984507 L910.360683,55.0094907 C919.074581,59.904384 926.205207,65.6324507 932.521434,71.569784 C938.817427,77.5275573 944.540138,83.6546907 950.249359,89.7866907 C961.600358,102.067237 972.349409,114.522011 984.302354,126.583557 C996.289022,138.597411 1009.15585,150.480837 1025.78779,160.481837 C1042.29833,170.558757 1062.5487,178.429131 1084.16652,184.406371 L1088.2166,185.540304 C1089.55707,185.932557 1090.98521,186.210931 1092.36446,186.551597 L1100.7091,188.507024 L1102.79484,188.995637 L1104.93116,189.407357 L1109.2038,190.229824 C1112.0601,190.765157 1114.88605,191.354024 1117.76426,191.845557 C1129.33783,193.690024 1141.0446,195.320357 1152.96551,196.218744 C1176.74325,198.166384 1200.99649,198.367864 1224.8383,196.568171 C1248.68686,194.800597 1272.14256,191.356944 1294.69955,186.540891 C1305.98479,184.141624 1317.06938,181.423104 1327.92971,178.436917 C1338.78836,175.441971 1349.47672,172.244571 1359.72162,168.606251 C1380.26368,161.376331 1399.65413,153.024157 1420.89594,146.348064 C1431.4983,143.017317 1442.57446,140.128464 1454.1632,138.108797 C1455.61496,137.860597 1457.05154,137.587091 1458.51172,137.359331 L1462.92937,136.759757 C1465.86997,136.342197 1468.82912,135.979144 1471.82031,135.717317 C1477.7892,135.147917 1483.82047,134.813091 1489.86017,134.663197 C1501.94632,134.369251 1514.06282,134.864677 1525.99553,135.924637 C1537.94173,136.963184 1549.72943,138.507864 1561.38225,140.302691 C1584.69968,143.896237 1607.53825,148.349237 1630.75283,152.083917 C1653.8966,155.986984 1676.78069,160.402024 1699.46077,165.105171 C1722.18637,169.714877 1745.11937,174.016037 1768.46041,177.448011 C1780.12165,179.177624 1791.91104,180.614264 1803.71561,182.002237 C1815.52186,183.379504 1827.36858,184.642891 1839.25408,185.777797 C1863.02002,188.065131 1886.93772,189.814211 1910.93971,191.032824 C1958.94034,193.435011 2007.3136,193.704624 2055.34121,191.287837 C2007.32709,193.784437 1958.93866,193.596584 1910.90431,191.274211 C1886.88545,190.094531 1862.94415,188.385357 1839.15123,186.135984 C1827.25561,185.020544 1815.39203,183.775651 1803.5706,182.416877 C1791.75423,181.049344 1779.94629,179.631197 1768.25301,177.917157 C1744.86308,174.507571 1721.9031,170.204464 1699.1539,165.608384 C1676.46539,160.918864 1653.6049,156.522317 1630.47462,152.636771 C1607.24824,148.914744 1584.37595,144.469531 1561.1091,140.899344 C1549.47652,139.114251 1537.72422,137.582224 1525.83198,136.555357 C1513.95153,135.506104 1501.90922,135.022357 1489.90907,135.321171 C1483.9132,135.473984 1477.92746,135.809784 1472.00747,136.377237 C1469.0382,136.639064 1466.10434,137.000171 1463.19409,137.416757 L1458.81522,138.013411 C1457.36853,138.239224 1455.94206,138.512731 1454.50549,138.758984 C1443.01791,140.767944 1432.021,143.642197 1421.47428,146.963211 C1400.34713,153.617891 1380.98703,161.972984 1360.39269,169.238917 C1350.1107,172.898651 1339.37682,176.118437 1328.49963,179.126037 C1317.60894,182.126824 1306.49231,184.860917 1295.16661,187.275757 C1272.53374,192.123931 1248.98362,195.596784 1225.0204,197.385771 C1201.0555,199.209797 1176.67075,199.021944 1152.75137,197.077224 C1140.75964,196.179811 1128.97868,194.547531 1117.33093,192.698197 C1114.43416,192.203744 1111.58461,191.613904 1108.71145,191.076624 L1104.40846,190.251237 L1102.25865,189.837571 L1100.15605,189.347984 L1091.75071,187.382824 C1090.35966,187.040211 1088.92308,186.760864 1087.5725,186.367637 L1083.49038,185.227864 C1061.70901,179.220451 1041.27991,171.299464 1024.60244,161.142731 C1007.77997,151.038557 994.847384,139.118144 982.816877,127.074117 C970.852129,114.972664 960.118253,102.503291 948.800977,90.231504 C943.113675,84.108264 937.401081,77.979184 931.155672,72.054504 C924.886656,66.1492907 917.842023,60.4844907 909.266388,55.6577307 L907.651079,54.7603173 L905.953151,53.918384 L902.552235,52.242304 C900.107353,51.2592373 897.790616,50.1622907 895.167005,49.336904 C893.868688,48.916424 892.651305,48.4102907 891.288915,48.057944 L887.267506,46.9298507 C885.916919,46.5667973 884.497201,46.297184 883.116264,45.9750107 L881.027154,45.5155973 L878.882403,45.152544 C873.196787,44.1062107 867.256566,43.566984 861.307914,43.3314373 C855.350832,43.0579307 849.348225,43.2438373 843.414748,43.648744 C831.517445,44.4731573 819.93882,46.4762773 808.845799,49.112064 C797.724114,51.719624 787.040821,54.9394107 776.876856,58.6332107 C756.513515,66.0227573 738.343825,75.2616373 721.364539,85.0903573 C712.859721,90.0086107 704.646603,95.0894107 696.431798,100.183837 L671.767151,115.445704 C638.879269,135.824384 601.368031,153.662664 561.179224,169.041331 C520.956696,184.417077 478.08695,197.457797 433.573231,208.162517 C389.057826,218.875024 342.922054,227.329397 295.870717,233.415651 C201.806822,245.524891 103.834487,248.317384 8.49756773,239.782224" id="Fill-44" fill="#FFFFFF"></path>
-                        <path d="M7.93102949,237.375463 C102.311916,244.812703 198.749876,241.724316 291.690811,230.170849 C338.1891,224.392169 383.896595,216.477996 428.191118,206.469209 C433.72161,205.211663 439.176227,203.842183 444.672996,202.534023 C450.14616,201.195689 455.673281,199.925489 461.104291,198.530703 L477.287726,194.202289 C482.664781,192.738396 488.129514,191.380596 493.376737,189.761943 L509.254983,185.074369 L517.186519,182.720849 L525.011828,180.252476 C535.408479,176.924649 545.675299,173.459583 555.790367,169.852409 C596.200055,155.394516 634.168234,138.589916 667.767661,119.132009 L693.331013,104.758796 C701.866181,99.9680493 710.517692,95.2405693 719.481137,90.707756 C737.377675,81.646996 756.668639,73.357116 777.981268,67.1063693 C788.620721,63.9800227 799.708684,61.364676 811.157477,59.3742093 C816.893677,58.395036 822.752964,57.670876 828.661148,57.1199693 C834.592939,56.6420627 840.580371,56.3938627 846.57792,56.4464227 C852.573783,56.564196 858.551099,56.9681293 864.412072,57.735116 C870.244381,58.573156 875.924938,59.7460227 881.318854,61.282916 C886.625092,62.9074093 891.715505,64.7849693 896.279848,67.0460227 C900.888029,69.263276 905.074679,71.7520893 908.989863,74.3606227 C916.823603,79.5922893 923.416355,85.3690227 929.719093,91.1944227 C936.052181,97.017876 942.137408,102.926009 948.185541,108.841929 C960.276749,120.670849 972.241497,132.549409 985.826612,143.802116 C999.322362,155.055796 1014.63744,165.731316 1033.88625,173.793436 L1041.10456,176.786436 L1042.91208,177.532009 L1044.80392,178.203609 L1048.58758,179.548756 C1051.13363,180.421836 1053.58357,181.393223 1056.20887,182.187463 L1064.07971,184.570183 C1065.39994,184.958543 1066.68646,185.385836 1068.03367,185.742076 L1072.09723,186.781596 L1080.22942,188.858689 C1082.99129,189.476756 1085.78689,190.048103 1088.56394,190.645729 C1091.35279,191.225836 1094.10117,191.872129 1096.96759,192.317916 C1119.59034,196.425383 1143.21802,198.670863 1166.93675,199.045596 C1178.79527,199.103023 1190.68245,198.959943 1202.46679,198.129689 L1211.30714,197.540823 C1214.23426,197.267316 1217.17149,197.019116 1220.09523,196.725169 C1225.93597,196.116836 1231.75648,195.442316 1237.52977,194.646129 C1260.6415,191.488636 1283.099,186.835129 1304.74211,181.185903 C1315.5603,178.352529 1326.19975,175.281663 1336.61832,171.990823 C1347.02846,168.696089 1357.07271,165.020783 1367.04277,161.279289 C1386.96266,153.810903 1406.73586,145.990169 1428.64706,140.445089 C1439.56473,137.659409 1451.01689,135.561876 1462.74221,134.239116 C1474.48439,132.965996 1486.46094,132.498796 1498.38859,132.672049 C1510.32299,132.841409 1522.19837,133.673609 1533.92706,134.885409 C1545.66587,136.085529 1557.26473,137.680823 1568.77254,139.439636 C1580.28709,141.192609 1591.71059,143.121756 1603.14253,145.027543 C1614.55591,146.954743 1626.05529,148.730103 1637.42821,150.776049 C1648.8045,152.802529 1660.09985,154.982796 1671.29404,157.319769 C1682.50846,159.638249 1693.5391,162.223423 1704.66078,164.652863 C1726.89235,169.534129 1749.33131,174.127289 1772.21035,177.874623 C1818.04768,185.184356 1864.79045,190.782969 1912.09977,193.761369 C1959.37705,196.759236 2007.21749,197.328636 2054.58751,194.559503 C2007.23267,197.409423 1959.37199,196.920809 1912.05593,194.001783 C1864.70109,191.102223 1817.90436,185.580503 1771.98104,178.341823 C1749.05479,174.613956 1726.58716,170.018849 1704.33199,165.151209 C1693.19344,162.726636 1682.16618,160.148276 1670.96862,157.840503 C1659.78286,155.514236 1648.49931,153.342729 1637.13482,151.326956 C1625.78045,149.289769 1614.28444,147.522196 1602.85588,145.599863 C1591.42058,143.700889 1580.00045,141.779529 1568.50107,140.036289 C1557.00507,138.286236 1545.4315,136.702623 1533.73316,135.513209 C1522.04494,134.312116 1510.22351,133.491596 1498.35993,133.330023 C1486.50478,133.165529 1474.60916,133.636623 1462.95803,134.906823 C1451.32714,136.226663 1439.97951,138.311543 1429.13435,141.086516 C1407.37321,146.608236 1387.66578,154.414369 1367.7442,161.901249 C1357.77245,165.651503 1347.70797,169.343356 1337.25399,172.659503 C1326.80676,175.967863 1316.15044,179.051383 1305.30359,181.900329 C1283.60484,187.578756 1261.06977,192.263409 1237.85857,195.449129 C1232.05998,196.252129 1226.21418,196.933463 1220.34478,197.548609 C1217.40923,197.846449 1214.45851,198.097569 1211.51454,198.373996 L1202.62528,198.970649 C1190.77856,199.812583 1178.82562,199.963449 1166.89965,199.912836 C1143.04434,199.549783 1119.27166,197.305276 1096.49041,193.184183 C1093.60377,192.737423 1090.83515,192.088209 1088.02775,191.505183 C1085.22709,190.905609 1082.41295,190.331343 1079.62916,189.710356 L1071.43627,187.622556 L1067.34067,186.579143 C1065.98166,186.219983 1064.68502,185.790743 1063.35467,185.401409 L1055.41808,183.003116 C1052.77086,182.204009 1050.30069,181.227756 1047.73103,180.348836 L1043.91533,178.995903 L1042.00663,178.318463 L1040.18225,177.568023 L1032.89818,174.554583 C1030.41283,173.593903 1028.18378,172.438556 1025.88053,171.341609 C1023.60257,170.227143 1021.26054,169.151609 1019.03654,168.003076 L1012.61409,164.392009 L1009.40371,162.585503 L1006.44624,160.640783 L1000.53806,156.747449 C998.627678,155.424689 996.83364,154.050343 994.973844,152.703249 L989.610278,148.572423 L984.422069,144.367623 C970.804918,133.061383 958.84017,121.155569 946.772568,109.322756 C940.737924,103.404889 934.669558,97.5006493 928.360075,91.685956 C922.079257,85.866396 915.538775,80.128596 907.804517,74.9524093 C903.938231,72.3721027 899.807223,69.911516 895.278289,67.7293027 C890.806684,65.508156 885.82081,63.668556 880.627543,62.0722893 C875.344911,60.564596 869.782382,59.4121693 864.06473,58.5867827 C858.315041,57.8305027 852.440579,57.4304627 846.542511,57.3097693 C840.637699,57.2552627 834.734573,57.495676 828.878659,57.9638493 C823.041291,58.5050227 817.247763,59.2165293 811.567206,60.1840227 C800.229697,62.147236 789.194004,64.7411693 778.627054,67.8392893 C757.444257,74.0365027 738.232541,82.2709027 720.373097,91.2956493 C711.426514,95.808996 702.783434,100.522849 694.249951,105.303863 L668.647819,119.668316 C634.986005,139.126223 596.877878,155.959049 556.387255,170.411103 C546.250267,174.017303 535.963214,177.481396 525.544644,180.810196 L517.702473,183.276623 L509.757449,185.627223 L493.848853,190.312849 C488.593199,191.931503 483.116663,193.287356 477.731177,194.749303 L461.519078,199.072849 C456.081323,200.467636 450.544086,201.735889 445.060805,203.073249 C439.555605,204.379463 434.094244,205.747969 428.553635,207.002596 C384.188295,216.994836 338.426843,224.902196 291.866168,230.640969 C198.780226,242.049409 102.273135,244.971356 7.93102949,237.375463" id="Fill-46" fill="#FFFFFF"></path>
-                        <path d="M7.36465987,234.969772 C54.0635975,238.230439 101.23465,239.027599 148.188193,237.679532 C195.146794,236.328545 241.90306,232.834279 287.952839,227.392372 C333.99756,221.946572 379.406611,214.639759 423.542638,205.295759 L440.004283,201.657439 C445.484191,200.433959 451.014684,199.281532 456.391739,197.912052 L472.625757,193.947665 C475.323558,193.278012 478.053396,192.649239 480.724219,191.944545 L488.716455,189.798345 C510.13531,184.215305 531.119144,178.079412 551.519579,171.346865 C592.369346,157.931412 630.804582,142.076785 665.457837,123.710959 C683.185763,114.813719 700.600069,105.645892 719.462758,97.4854653 C738.246198,89.3016787 758.587619,82.1671453 780.556151,77.1593453 C791.531143,74.6977853 802.807951,72.5340653 814.509664,71.4711853 C820.335228,70.8852387 826.251843,70.5951853 832.168458,70.5611187 C838.090132,70.4939587 844.021922,70.8034787 849.852545,71.4127853 C855.698343,71.989972 861.414309,73.000292 866.873984,74.337652 C868.222884,74.693892 869.630799,74.980052 870.930802,75.394692 L874.869592,76.5938387 C876.172967,77.0026387 877.392036,77.4961187 878.658316,77.9428787 L880.538347,78.630052 L882.32564,79.3970387 C891.924753,83.4383187 900.031646,88.4947853 907.359548,93.8218387 C914.658786,99.178092 921.278516,104.797145 927.93534,110.395759 L947.603997,127.300612 C960.713624,138.555265 974.214433,149.685332 989.669461,159.880999 C1004.84796,170.210012 1023.73594,178.550505 1044.34546,184.949199 C1046.86116,185.820332 1049.56908,186.474412 1052.19775,187.216092 C1054.85508,187.925652 1057.46183,188.702372 1060.15963,189.356452 L1068.32217,191.234012 C1069.68624,191.543532 1071.03514,191.872519 1072.40934,192.163545 L1076.5825,192.954865 L1084.93219,194.540425 C1087.74633,195.004705 1090.59252,195.410585 1093.42183,195.849559 C1099.05349,196.797585 1104.84028,197.358225 1110.57648,198.048319 C1113.43446,198.428892 1116.35989,198.585599 1119.25328,198.847425 C1122.15679,199.075185 1125.05187,199.347719 1127.96212,199.540439 L1136.72154,199.970652 L1141.1021,200.185759 C1142.56228,200.244159 1144.02921,200.244159 1145.49614,200.275305 L1154.28254,200.423252 C1157.21303,200.496252 1160.14351,200.364852 1163.07231,200.346359 C1166.00112,200.284065 1168.93666,200.291852 1171.85872,200.173105 L1180.6232,199.753599 C1203.98278,198.542772 1226.99334,195.439785 1249.21648,191.109425 C1271.46322,186.779065 1292.96469,181.234959 1313.80521,174.987132 C1324.24738,171.877332 1334.46025,168.543665 1344.42188,164.949145 C1354.40037,161.372145 1364.20688,157.628705 1374.12467,153.975785 C1393.91304,146.683572 1414.38935,139.681412 1436.88058,135.428919 C1459.30943,131.086879 1483.30468,129.969492 1506.76543,130.989545 C1518.52616,131.474265 1530.19752,132.480692 1541.75086,133.780092 C1553.31431,135.065865 1564.76479,136.655319 1576.14782,138.366439 C1587.53591,140.073665 1598.85656,141.909372 1610.18733,143.719772 C1621.53664,145.502919 1632.82188,147.444719 1643.98234,149.597732 C1655.1445,151.747825 1666.19705,154.091612 1677.10123,156.645639 L1709.65188,164.493625 C1731.39279,169.649372 1753.34446,174.537452 1775.77162,178.591385 C1820.62593,186.687572 1866.48181,193.122279 1913.13016,196.609732 C1959.72287,200.126385 2007.08109,200.938145 2053.83229,197.831265 C2007.09795,201.018932 1959.71613,200.288932 1913.0762,196.850145 C1866.38233,193.440559 1820.45226,187.078852 1775.52207,179.053719 C1753.04433,175.020225 1731.06737,170.130199 1709.30117,164.987105 L1676.74039,157.160532 C1665.85139,154.615265 1654.8157,152.283159 1643.66873,150.143772 C1632.52006,147.999519 1621.26011,146.069399 1609.91249,144.295012 C1598.57667,142.490452 1587.25602,140.661559 1575.87973,138.964065 C1564.50512,137.260732 1553.07488,135.681985 1541.54178,134.405945 C1530.01711,133.117252 1518.3879,132.121532 1506.68281,131.646545 C1483.33672,130.644012 1459.50164,131.773079 1437.25996,136.093705 C1414.9424,140.327705 1394.56894,147.304559 1374.81598,154.601639 C1364.91674,158.255532 1355.1136,162.006759 1345.11825,165.596412 C1335.1465,169.204559 1324.88137,172.562559 1314.43076,175.682092 C1293.55652,181.957172 1271.99941,187.532425 1249.67342,191.891012 C1227.36766,196.251545 1204.26099,199.383732 1180.76146,200.616945 L1171.94977,201.044239 C1169.00916,201.164932 1166.05844,201.159092 1163.11278,201.223332 C1160.16543,201.244745 1157.21977,201.377119 1154.27242,201.305092 L1145.43038,201.163959 C1143.95671,201.131839 1142.48135,201.132812 1141.01105,201.075385 L1136.6052,200.861252 L1127.79182,200.433959 C1124.86302,200.241239 1121.95108,199.969679 1119.02734,199.741919 C1116.1154,199.480092 1113.17311,199.323385 1110.29489,198.942812 C1104.51991,198.251745 1098.69604,197.690132 1093.02391,196.740159 C1090.17436,196.300212 1087.30963,195.893359 1084.47356,195.426159 L1076.06486,193.835732 L1071.86134,193.040519 C1070.47535,192.747545 1069.11633,192.416612 1067.74045,192.108065 L1059.51722,190.219799 C1056.7975,189.562799 1054.16883,188.782185 1051.49126,188.068732 C1048.83898,187.322185 1046.10915,186.664212 1043.57153,185.789185 C1022.79509,179.355452 1003.68622,170.939039 988.379575,160.531185 C957.25538,139.991905 934.571931,115.918452 906.130363,94.383452 C898.871591,89.0933853 890.845633,84.084612 881.421876,80.110492 L879.671678,79.3581053 L877.828743,78.6835853 C876.587754,78.244612 875.393977,77.7589187 874.115894,77.3588787 L870.258038,76.1811453 C868.983327,75.7723453 867.604076,75.4929987 866.282154,75.1416253 C860.932077,73.8276253 855.327395,72.8319053 849.582765,72.2625053 C843.854996,71.6609853 838.017628,71.350492 832.180261,71.4137587 C826.347952,71.443932 820.507213,71.7261987 814.747408,72.3033853 C803.183958,73.344852 791.994827,75.4842387 781.078849,77.923412 C759.250266,82.8854653 739.008327,89.965492 720.273784,98.1103453 C701.46168,106.229892 684.059176,115.372412 666.290784,124.272572 L653.005799,130.971052 L646.362464,134.321265 C644.123289,135.422105 641.826786,136.484985 639.560633,137.569279 C630.428576,141.855839 621.07395,145.982772 611.523734,149.953972 C592.440164,157.909999 572.520275,165.184692 552.082745,171.917239 C531.638471,178.647839 510.614169,184.781785 489.15822,190.358012 L481.154181,192.501292 C478.478299,193.205985 475.743403,193.832812 473.04223,194.503439 L456.781234,198.461985 C451.395748,199.829519 445.856825,200.979025 440.370172,202.202505 L423.883235,205.834012 C379.683136,215.158545 334.221814,222.458545 288.119765,227.864439 C242.000855,233.231399 195.195691,236.647799 148.208426,237.919945 C101.22622,239.189172 54.0467362,238.310252 7.36465987,234.969772" id="Fill-48" fill="#FFFFFF"></path>
-                        <path d="M6.79744718,232.563011 C53.0731672,235.348691 99.7164627,235.872344 146.152365,234.462957 C192.595011,233.047731 238.843753,229.702384 284.497293,224.616717 C330.154205,219.544677 375.259754,212.767357 419.294613,204.143624 L435.766375,200.844024 C438.504643,200.283384 441.273261,199.770437 443.986237,199.168917 L452.131911,197.375064 L468.419885,193.786384 C473.842465,192.578477 479.162192,191.223597 484.537561,189.950477 C506.07613,184.897904 527.140899,179.189304 547.777391,173.023237 C568.408826,166.845491 588.527678,160.095424 607.918123,152.701984 C617.608288,149.003317 627.141642,145.166437 636.420392,141.130024 L663.953139,128.769664 C673.140838,124.662197 682.264464,120.488544 691.600542,116.474517 C700.934934,112.459517 710.480092,108.598304 720.348985,105.019357 C740.049678,97.8546507 761.2173,91.892984 783.634342,88.099904 C794.825158,86.1911973 806.398725,84.9190507 818.107182,84.6903173 C823.954666,84.610504 829.829128,84.706864 835.632772,85.194504 C841.4263,85.693824 847.172617,86.441344 852.704795,87.5791707 C858.208309,88.751064 863.549956,90.1925707 868.524026,91.9971307 C869.783562,92.4312373 870.950361,92.9490507 872.167744,93.420144 C873.386813,93.8951307 874.577218,94.3886107 875.720411,94.915184 C878.025345,95.954704 880.283067,97.0302373 882.453111,98.162224 C891.18387,102.663891 898.810216,107.805037 906.075732,113.050331 C920.6388,123.538971 934.345316,134.395531 948.78361,144.905584 C956.010345,150.156717 963.439415,155.312464 971.241118,160.272571 C975.12258,162.762357 979.137245,165.180117 983.236217,167.550184 C985.229217,168.770744 987.459962,169.833624 989.567619,170.979237 L992.769571,172.669917 L994.373077,173.513797 L996.07775,174.289544 L1002.89476,177.391557 L1004.6028,178.164384 L1006.39684,178.869077 L1009.98829,180.278464 L1013.57805,181.687851 L1015.37546,182.390597 L1017.24874,183.024237 L1024.74695,185.554904 L1026.62023,186.187571 L1028.55928,186.751131 L1032.4458,187.872411 L1036.33063,188.993691 C1037.61714,189.375237 1038.91715,189.746077 1040.26436,190.052677 C1045.61275,191.338451 1050.87852,192.742971 1056.39721,193.772757 C1059.13717,194.314904 1061.84171,194.920317 1064.61033,195.409904 L1072.96171,196.797877 C1074.35951,197.022717 1075.74045,197.274811 1077.14667,197.477264 L1081.38559,198.031091 L1089.86343,199.138744 C1101.26839,200.314531 1112.75934,201.310251 1124.34976,201.599331 C1127.24484,201.674277 1130.13655,201.824171 1133.03331,201.837797 L1141.73035,201.841691 L1146.07887,201.839744 L1150.42064,201.704451 L1159.10419,201.435811 C1161.99589,201.320957 1164.87074,201.092224 1167.75401,200.925784 L1172.07892,200.654224 C1173.52056,200.575384 1174.96051,200.472211 1176.38529,200.319397 C1182.0962,199.743184 1187.85263,199.314917 1193.51295,198.566424 C1216.22338,195.804104 1238.33186,191.520464 1259.71025,186.298531 C1281.08526,181.059077 1301.83136,174.950437 1321.98393,168.293811 C1342.05557,161.543744 1361.16612,153.857331 1381.23607,147.030371 C1391.26852,143.618837 1401.54714,140.423384 1412.22538,137.712651 C1422.89012,134.990237 1433.97809,132.803157 1445.351,131.276971 C1468.13731,128.271317 1491.78185,127.963744 1514.92561,129.408171 C1538.11658,130.815611 1560.90457,133.775517 1583.43627,137.098477 C1594.70296,138.784291 1605.94099,140.496384 1617.14192,142.336957 C1628.32262,144.204784 1639.39878,146.289664 1650.32319,148.611064 C1672.26306,153.172104 1693.06142,159.267117 1714.37067,164.616557 C1735.62935,170.052624 1757.11396,175.204477 1779.09261,179.581557 C1822.95717,188.453491 1867.98347,195.624037 1914.01639,199.551437 C1959.97174,203.515824 2006.89831,204.526144 2053.07961,201.102931 C2006.91686,204.606931 1959.96331,203.678371 1913.95569,199.790877 C1867.8705,195.941344 1822.75989,188.840877 1778.81608,180.039997 C1756.79697,175.683357 1735.27695,170.528584 1713.99804,165.104197 C1692.65169,159.760597 1671.86344,153.684077 1649.97922,149.151264 C1639.07336,146.840571 1628.01912,144.766397 1616.85528,142.910251 C1605.68132,141.080384 1594.42306,139.372184 1583.17155,137.696104 C1560.6584,134.390664 1537.91087,131.451197 1514.80589,130.063224 C1491.74981,128.638264 1468.2469,128.961411 1445.6191,131.960251 C1434.33386,133.481571 1423.32852,135.658917 1412.72784,138.372571 C1402.11705,141.074544 1391.88733,144.261237 1381.89029,147.668877 C1361.88778,154.488051 1342.79072,162.186144 1322.6685,168.970277 C1302.46534,175.659024 1281.70576,181.788104 1260.26667,187.057731 C1238.83264,192.309837 1216.64154,196.625597 1193.82151,199.415171 C1188.13758,200.169504 1182.34574,200.605557 1176.6028,201.188584 C1175.16959,201.342371 1173.7229,201.446517 1172.27114,201.527304 L1167.92431,201.802757 C1165.02249,201.972117 1162.13078,202.204744 1159.2239,202.320571 L1150.48808,202.596997 L1146.12102,202.734237 L1141.74721,202.740077 L1132.99959,202.741051 C1130.08259,202.729371 1127.17402,202.580451 1124.2604,202.506477 C1112.59915,202.222264 1101.03402,201.228491 1089.5515,200.051731 L1081.01465,198.941157 L1076.74706,198.386357 C1075.33072,198.182931 1073.93798,197.930837 1072.53343,197.705024 L1064.11798,196.312184 C1061.32913,195.820651 1058.60435,195.212317 1055.84248,194.667251 C1050.28163,193.633571 1044.97202,192.222237 1039.58317,190.930624 C1038.22246,190.620131 1036.91403,190.247344 1035.61403,189.864824 L1031.69547,188.736731 L1027.7786,187.606691 L1025.81932,187.041184 L1023.92749,186.404624 L1016.36353,183.855464 L1014.4717,183.218904 L1012.65742,182.511291 L1009.03225,181.091197 L1005.40708,179.671104 L1003.5945,178.960571 L1001.86959,178.181904 L994.986827,175.049717 L993.266979,174.266184 L991.649984,173.412571 L988.417681,171.704371 C986.294849,170.548051 984.03544,169.466677 982.037381,168.241251 C977.923235,165.857557 973.893394,163.425197 970.00013,160.921784 C962.174821,155.935397 954.730576,150.760184 947.498782,145.493477 C933.050371,134.954224 919.367461,124.091824 904.851605,113.615837 C897.613067,108.379304 890.054166,103.277091 881.431319,98.8201973 C879.288253,97.700864 877.059195,96.6370107 874.784611,95.6081973 C873.654907,95.0864907 872.484736,94.6017707 871.292645,94.1365173 C870.097182,93.6732107 868.953989,93.1641573 867.718058,92.736864 C862.840097,90.9634507 857.60299,89.5462773 852.204016,88.3928773 C846.773005,87.2725707 841.127856,86.534784 835.427065,86.0393573 C829.716157,85.5556107 823.926002,85.457304 818.154393,85.5322507 C806.596001,85.752224 795.14215,87.001984 784.037326,88.888304 C761.756861,92.6443973 740.705581,98.5554507 721.067275,105.681224 C711.232104,109.238757 701.708866,113.083424 692.386277,117.084797 C683.060315,121.085197 673.943434,125.246197 664.737187,129.354637 L637.163974,141.706237 C627.861618,145.744597 618.299599,149.585371 608.582457,153.285011 C589.141427,160.680397 568.975364,167.431437 548.301777,173.605291 C527.621445,179.769411 506.519582,185.473144 484.948975,190.517931 C479.56349,191.789104 474.235333,193.142037 468.806008,194.347997 L452.492742,197.930837 L444.336952,199.721771 C441.618917,200.322317 438.846926,200.833317 436.105286,201.394931 L419.611604,204.686744 C375.517731,213.289064 330.36497,220.057624 284.655789,225.088784 C238.938176,230.099504 192.643909,233.367957 146.174284,234.703371 C99.7097182,236.032944 53.0579921,235.428504 6.79744718,232.563011" id="Fill-50" fill="#FFFFFF"></path>
-                        <path d="M6.231752,230.157223 C52.1247215,232.461103 98.2908434,232.710276 144.264747,231.238596 C190.242023,229.763996 236.040569,226.567569 281.350139,221.847876 C326.674884,217.171009 371.487047,210.856023 415.503359,203.073249 L431.993668,200.121129 C437.478635,199.115676 442.902902,198.004129 448.360891,196.950983 C453.800332,195.865716 459.286985,194.857343 464.68596,193.707836 L480.881197,190.257369 C502.490584,185.679783 523.732396,180.534743 544.61169,174.949756 C565.479182,169.348223 585.893106,163.182156 605.666302,156.391209 C615.550371,152.993303 625.282689,149.452316 634.723307,145.645609 L663.26267,134.423076 C682.219782,126.882663 701.470279,119.489223 722.022465,113.405889 C742.561162,107.347863 764.347593,102.562956 787.074881,100.022556 C798.449485,98.8360627 810.12422,98.3922227 821.66575,99.0949693 C827.440731,99.406436 833.153324,100.084849 838.719225,101.030929 C841.521566,101.474769 844.226112,102.087969 846.940774,102.682676 C849.584619,103.373743 852.280734,104.009329 854.796434,104.852236 C859.940804,106.421249 864.712539,108.355263 869.258334,110.413863 C873.809188,112.481223 878.132414,114.706263 882.271852,117.033503 C890.543985,121.699663 898.150098,126.704543 905.808481,131.669516 C921.078036,141.627689 935.937861,151.789289 951.961114,161.313356 C959.973583,166.071009 968.238972,170.689476 976.947811,175.010103 C981.456511,177.050209 985.746015,179.256756 990.563276,181.061316 C992.939027,181.984036 995.215297,182.996303 997.671982,183.847969 L1005.04372,186.401996 C1007.45994,187.296489 1010.07512,187.984636 1012.60431,188.763303 L1016.42339,189.903076 C1017.68967,190.291436 1018.96775,190.666169 1020.29473,190.981529 C1025.54701,192.310129 1030.74534,193.714649 1036.17803,194.786289 C1046.87987,197.182636 1058.01505,198.864556 1069.19743,200.323583 L1077.65841,201.237543 C1079.07139,201.381596 1080.47256,201.565556 1081.89565,201.673596 L1086.16323,201.997716 L1094.69671,202.644009 L1103.28078,203.029449 L1107.57197,203.222169 C1109.00349,203.280569 1110.44007,203.272783 1111.87496,203.301009 L1120.48095,203.423649 C1123.35073,203.463556 1126.22052,203.353569 1129.08862,203.326316 C1131.95503,203.252343 1134.83156,203.291276 1137.68955,203.125809 C1160.60737,202.262463 1183.27901,199.652956 1205.18516,195.644769 C1227.1166,191.668703 1248.34323,186.461369 1268.97467,180.600929 C1289.57912,174.695716 1309.81432,168.340823 1329.26884,161.247169 C1348.77394,154.206076 1367.98228,146.804849 1388.47714,140.661169 C1398.71361,137.592249 1409.26707,134.848423 1420.18136,132.659396 C1431.08554,130.456743 1442.3556,128.857556 1453.77404,127.863783 C1476.62948,125.815889 1499.94185,126.134169 1522.80066,127.813169 C1545.70162,129.462969 1568.23669,132.400489 1590.55762,135.675756 C1601.71134,137.333343 1612.86675,139.024023 1623.85186,141.050503 C1634.84203,143.058489 1645.68887,145.333169 1656.3435,147.878436 C1661.66829,149.151556 1666.94586,150.493783 1672.15936,151.915823 C1677.36949,153.347596 1682.51386,154.832903 1687.68689,156.292903 C1698.02453,159.220689 1708.36385,162.145556 1718.7605,164.999369 C1739.53188,170.730356 1760.59159,176.108023 1782.10993,180.836476 C1803.57768,185.632089 1825.34894,189.987756 1847.45911,193.687396 C1858.50997,195.547436 1869.65526,197.215729 1880.8798,198.683516 C1892.09254,200.182449 1903.39127,201.471143 1914.74732,202.556409 C1960.11759,206.914996 2006.66309,208.085916 2052.32506,204.374596 C2006.68164,208.166703 1960.10579,207.077543 1914.67819,202.794876 C1903.30865,201.730049 1891.99474,200.459849 1880.76346,198.979409 C1869.52206,197.530116 1858.35485,195.879343 1847.28375,194.037796 C1825.1348,190.372223 1803.31971,186.050623 1781.80811,181.289076 C1760.25774,176.583009 1739.15588,171.199503 1718.36258,165.481169 C1707.95412,162.632223 1697.60806,159.712223 1687.26536,156.790276 C1682.09233,155.334169 1676.93278,153.848863 1671.74289,152.427796 C1666.54119,151.012569 1661.27542,149.677156 1655.96244,148.409876 C1645.33141,145.877263 1634.50817,143.614263 1623.53993,141.617956 C1612.58011,139.604129 1601.44662,137.923183 1590.29458,136.272409 C1567.99051,133.014663 1545.48917,130.096609 1522.65902,128.466276 C1499.86935,126.807716 1476.675,126.506956 1453.95952,128.555823 C1442.60683,129.550569 1431.42108,131.145863 1420.59109,133.340729 C1409.75099,135.521969 1399.25823,138.258009 1389.06897,141.320116 C1368.66854,147.450169 1349.50403,154.849449 1329.98713,161.911956 C1310.509,169.030916 1290.2384,175.413063 1269.60696,181.341636 C1248.93675,187.229329 1227.65278,192.464889 1205.63367,196.472103 C1183.64996,200.510463 1160.84174,203.151116 1137.78734,204.031983 C1134.91418,204.201343 1132.01911,204.164356 1129.13583,204.239303 C1126.24918,204.268503 1123.36254,204.382383 1120.47758,204.343449 L1111.81595,204.225676 C1110.37262,204.197449 1108.92761,204.205236 1107.48766,204.147809 L1103.1695,203.957036 L1094.53147,203.574516 L1085.93898,202.929196 L1081.64273,202.605076 C1080.21289,202.497036 1078.80161,202.313076 1077.37852,202.169996 L1068.85852,201.253116 C1057.59689,199.792143 1046.38078,198.104383 1035.59295,195.697329 C1030.1181,194.618876 1024.87088,193.207543 1019.57644,191.872129 C1018.23766,191.553849 1016.94946,191.177169 1015.67137,190.786863 L1011.81689,189.639303 C1009.26578,188.855769 1006.627,188.162756 1004.19054,187.258529 L996.758102,184.676276 C994.281183,183.815849 991.982994,182.793849 989.587009,181.860423 C984.730967,180.036396 980.39931,177.806489 975.855201,175.743023 C967.104209,171.393196 958.813529,166.749423 950.784198,161.971329 C934.723851,152.407356 919.87077,142.226289 904.633252,132.269089 C896.99173,127.303143 889.400792,122.300209 881.202849,117.667143 C877.097133,115.354503 872.819432,113.147956 868.322534,111.100063 C863.817207,109.058009 859.128091,107.152223 854.086575,105.612409 C851.618087,104.782156 848.97087,104.157276 846.375922,103.476916 C843.708471,102.891943 841.052823,102.286529 838.297694,101.848529 C832.826216,100.915103 827.206359,100.243503 821.513999,99.9330093 C810.139395,99.2331827 798.596178,99.663396 787.3278,100.831396 C764.799474,103.334809 743.083861,108.083703 722.639587,114.100849 C702.163277,120.146223 682.949874,127.505596 663.989391,135.032383 L635.407874,146.246156 C625.958825,150.047023 616.191099,153.592876 606.283424,156.990783 C586.461331,163.784649 565.998509,169.947796 545.092236,175.545436 C524.172475,181.124583 502.895254,186.263783 481.255517,190.833583 L465.036674,194.278209 C459.630955,195.424796 454.137557,196.430249 448.691371,197.513569 C443.226638,198.563796 437.795627,199.674369 432.303915,200.675929 L415.793373,203.621236 C371.723105,211.380649 326.870475,217.685903 281.496832,222.321889 C236.13162,226.966636 190.29092,230.083249 144.286667,231.479009 C98.2874711,232.870876 52.1112325,232.540916 6.231752,230.157223" id="Fill-52" fill="#FFFFFF"></path>
-                        <path d="M5.66453931,227.750559 C96.8181812,231.344105 188.526558,227.740825 278.535321,219.095679 C323.556563,214.751692 368.17145,209.034332 412.207995,202.114905 C456.209131,195.139025 499.793795,187.149905 542.048105,177.195625 C563.158399,172.202425 583.914606,166.704065 604.08067,160.552599 C614.210913,157.528452 623.970209,154.106212 633.825613,150.789092 L663.34293,140.773492 C683.004842,134.078905 703.142241,127.740559 724.363819,122.779479 C734.959433,120.292612 745.80628,118.145439 756.865578,116.443079 C767.901271,114.712492 779.220233,113.442292 790.675771,113.028625 C802.119506,112.598412 813.708248,113.033492 824.899065,114.594719 C830.475082,115.409399 835.929699,116.495639 841.181981,117.858305 C846.392109,119.271585 851.367866,120.972972 856.080587,122.852479 C865.543125,126.606625 874.162599,131.002199 882.426302,135.555452 C890.690004,140.110652 898.801954,144.748585 906.925708,149.376785 C923.15804,158.637079 939.501657,167.843839 957.167196,176.157079 C961.620254,178.203025 966.041275,180.288879 970.710157,182.157679 L977.695776,184.974505 C980.095133,185.851479 982.548446,186.679785 984.973095,187.534372 C986.192164,187.954852 987.394371,188.391879 988.628615,188.797759 L992.407223,189.935585 C994.938098,190.681159 997.423447,191.480265 999.98973,192.183012 C1005.19649,193.499932 1010.31051,194.942412 1015.68587,196.023785 C1026.25788,198.449332 1037.25817,200.190625 1048.32758,201.680799 C1053.90697,202.313465 1059.45938,203.041519 1065.11128,203.444479 C1067.92879,203.665425 1070.73113,203.962292 1073.56214,204.120945 L1082.05853,204.583279 C1084.88616,204.773079 1087.72897,204.824665 1090.57346,204.892799 C1093.41458,204.957039 1096.25739,205.047559 1099.09851,205.091359 L1107.63368,205.053399 L1111.90127,205.032959 L1116.16042,204.893772 L1124.6821,204.604692 C1127.51816,204.474265 1130.34074,204.257212 1133.17006,204.088825 C1138.84387,203.802665 1144.44181,203.194332 1150.06166,202.680412 C1152.88255,202.456545 1155.63937,202.040932 1158.42822,201.710972 C1161.20358,201.355705 1163.99749,201.034505 1166.76105,200.649065 L1175.02644,199.403199 L1183.22944,198.032745 C1194.13024,196.125012 1204.86749,193.906785 1215.43276,191.445225 C1236.57509,186.522105 1257.03454,180.659719 1277.08932,174.389505 C1287.11333,171.245639 1297.08171,168.041425 1306.84943,164.650332 C1316.62896,161.264105 1326.26854,157.737719 1335.94016,154.235665 C1355.25979,147.230585 1374.83908,140.328679 1395.79257,134.982159 C1406.25498,132.311332 1417.04618,130.045412 1428.12403,128.355705 C1439.20524,126.692279 1450.51409,125.548612 1461.90218,124.897452 C1484.70197,123.575665 1507.71422,124.311505 1530.33528,126.131639 C1552.98838,127.943012 1575.3228,130.841599 1597.41948,134.164559 C1619.54145,137.463185 1641.25032,141.793545 1661.92727,147.436932 C1667.09862,148.842425 1672.18904,150.354985 1677.23055,151.894799 L1692.38376,156.497692 C1702.48534,159.565639 1712.59367,162.624825 1722.761,165.614905 C1732.92497,168.610825 1743.15807,171.524985 1753.4839,174.325265 C1763.81648,177.115812 1774.26877,179.760359 1784.77839,182.323145 C1805.79426,187.455532 1827.15748,192.122665 1848.91524,196.096785 C1870.68144,200.045599 1892.84556,203.278039 1915.2997,205.634479 C1960.15064,210.336652 2006.37241,211.632159 2051.57069,207.645385 C2006.39433,211.712945 1960.13715,210.500172 1915.22551,205.872945 C1892.74102,203.554465 1870.53643,200.358039 1848.72471,196.444265 C1826.92648,192.504212 1805.51437,187.869199 1784.45297,182.769905 C1773.91974,180.224639 1763.46239,177.574252 1753.10621,174.790519 C1742.7652,171.992185 1732.51693,169.082892 1722.34284,166.089892 C1712.1637,163.103705 1702.04526,160.050359 1691.93694,156.988252 L1676.77698,152.395092 C1671.72704,150.856252 1666.6636,149.355372 1661.50237,147.957665 C1640.88105,142.343479 1619.22951,138.039399 1597.15138,134.761212 C1575.07157,131.454799 1552.77087,128.576652 1530.17678,126.783772 C1507.61474,124.982132 1484.70029,124.265759 1462.0219,125.593385 C1450.6945,126.248439 1439.4531,127.393079 1428.43933,129.053585 C1417.43905,130.738425 1406.71866,132.996559 1396.31189,135.660572 C1375.47306,140.992492 1355.96122,147.884665 1336.66182,154.899479 C1327.00032,158.406399 1317.36074,161.941545 1307.56772,165.340425 C1297.77639,168.747092 1287.80127,171.962012 1277.77051,175.115612 C1257.69887,181.407239 1237.20233,187.295905 1215.99592,192.249199 C1194.79458,197.173292 1172.90192,201.251559 1150.33987,203.574905 C1144.68798,204.096612 1139.05632,204.709812 1133.35047,205.002785 C1130.50429,205.174092 1127.66486,205.393092 1124.81193,205.525465 L1116.24135,205.822332 L1111.95522,205.965412 L1107.66066,205.987799 L1099.07491,206.031599 C1096.21355,205.989745 1093.35388,205.901172 1090.49421,205.836932 C1087.63286,205.770745 1084.7715,205.719159 1081.92532,205.531305 L1073.37329,205.069945 C1070.52374,204.912265 1067.70285,204.615399 1064.86679,204.395425 C1059.17611,203.993439 1053.58492,203.264412 1047.96507,202.628825 C1036.81809,201.133785 1025.73013,199.392492 1015.07381,196.947479 C993.54873,192.372812 973.714833,185.316145 956.131915,176.890972 C938.405675,168.531985 922.041825,159.291159 905.822982,150.022105 C897.707659,145.387092 889.607511,140.746239 881.369101,136.195905 C873.137435,131.650439 864.59215,127.286012 855.232466,123.563012 C850.55684,121.695185 845.683936,120.027865 840.563172,118.634052 C835.403628,117.291825 830.040062,116.219212 824.550037,115.413292 C813.532891,113.869585 802.092528,113.430612 790.766822,113.849145 C779.420882,114.253079 768.213204,115.500892 757.224723,117.216879 C746.221067,118.903665 735.424804,121.033319 724.869657,123.502665 C703.732385,128.429679 683.647256,134.734932 663.990402,141.411025 L634.446108,151.411052 C624.575528,154.726225 614.806115,158.143599 604.645522,161.169692 C584.427188,167.321159 563.627142,172.815625 542.479753,177.802985 C500.144509,187.745585 456.51432,195.714265 412.469344,202.667759 C368.387274,209.562852 323.735293,215.268532 278.671897,219.571639 C188.563653,228.062999 96.7996338,231.503732 5.66453931,227.750559" id="Fill-54" fill="#FFFFFF"></path>
-                        <path d="M5.09884414,225.344771 C50.3679471,226.660717 95.7500206,226.354117 140.97697,224.769531 C186.207293,223.172291 231.297666,220.327237 276.081165,216.349224 C320.862978,212.363424 365.359835,207.324477 409.450336,201.305384 L417.720783,200.185077 L425.962565,198.992744 L442.442758,196.607104 C453.392458,194.934917 464.340473,193.256891 475.25645,191.512677 C497.063114,187.975584 518.734888,184.152331 540.114962,179.839491 C561.49335,175.528597 582.590155,170.741744 603.171005,165.294971 C623.569754,159.611677 643.83024,153.783357 664.151427,147.967691 C684.521512,142.232811 705.380573,136.958317 727.109675,133.113651 C737.964952,131.186451 748.978725,129.566824 760.171228,128.396877 C771.372161,127.279491 782.785546,126.726637 794.170267,127.073144 C799.860941,127.218171 805.534754,127.637677 811.124261,128.287864 C816.71714,128.926371 822.232457,129.828651 827.537008,131.047264 C832.851677,132.239597 838.001104,133.721011 842.853774,135.431157 C847.740167,137.114051 852.454574,138.959491 857.04758,140.892531 C866.225163,144.773211 874.837893,149.039331 883.531557,153.233424 C892.201615,157.446011 900.832893,161.682931 909.558593,165.847824 C927.021797,174.151331 944.891358,182.244597 964.237964,188.986877 L967.873251,190.235664 C969.078831,190.658091 970.294527,191.069811 971.56418,191.426051 L979.089359,193.660824 C981.6,194.402504 984.230356,195.002077 986.794954,195.679517 L990.664612,196.669397 C991.964615,196.984757 993.298341,197.251451 994.613519,197.544424 L1002.53831,199.262357 L1010.61148,200.736957 C1013.28567,201.260611 1016.02057,201.663571 1018.75715,202.064584 C1021.49542,202.458784 1024.2084,202.916251 1026.96521,203.259837 C1049.00962,206.064011 1071.67621,207.356597 1094.25512,206.795957 C1097.07601,206.721011 1099.89858,206.675264 1102.71778,206.576957 L1111.15516,206.137011 L1115.37384,205.914117 L1119.56892,205.574424 L1127.96077,204.896011 C1130.74288,204.619584 1133.5115,204.292544 1136.28686,203.993731 C1139.06054,203.685184 1141.84602,203.409731 1144.57585,202.990224 L1152.8092,201.833904 C1155.56265,201.469877 1158.25539,200.961797 1160.98017,200.534504 C1182.68904,196.874771 1203.72177,191.952624 1224.15761,186.351091 C1244.60526,180.745664 1264.483,174.465717 1284.14828,167.978451 C1303.78996,161.484371 1322.74707,154.307011 1342.17292,147.551104 C1361.59541,140.813691 1381.65019,134.532771 1402.99991,129.999957 C1424.34795,125.471037 1446.89819,122.969571 1469.56309,122.199664 C1492.25329,121.449224 1515.02104,122.380704 1537.44483,124.327371 C1559.88379,126.287664 1582.0614,129.126877 1603.9119,132.652291 C1625.71014,136.275037 1647.00253,141.039504 1666.94603,147.314584 C1686.79004,153.649037 1706.41654,160.180104 1726.31789,166.432797 C1736.26603,169.557197 1746.27824,172.615411 1756.40005,175.547091 C1766.53367,178.464171 1776.75665,181.282944 1787.05551,184.000491 C1807.64647,189.444344 1828.63874,194.398611 1850.07614,198.618984 C1871.52197,202.819891 1893.44329,206.215851 1915.65631,208.776691 C1960.06043,213.778651 2006.02254,215.168571 2050.81615,210.917051 C2006.04445,215.248384 1960.04526,213.942171 1915.57538,209.014184 C1893.33032,206.490331 1871.36854,203.132331 1849.87381,198.963544 C1828.38919,194.776264 1807.34803,189.854117 1786.70816,184.442384 C1776.38064,181.740411 1766.15598,178.917744 1756.00213,176.005531 C1745.86346,173.076771 1735.83775,170.021477 1725.87612,166.900971 C1705.95623,160.658011 1686.30275,154.134731 1666.47054,147.819744 C1646.59112,141.580677 1625.35774,136.843464 1603.61683,133.244077 C1581.8186,129.742024 1559.65616,126.919357 1537.2779,124.979504 C1514.90639,123.050357 1492.21619,122.138344 1469.6356,122.898517 C1447.07861,123.679131 1424.65145,126.182544 1403.44336,130.696864 C1382.21841,135.217024 1362.25131,141.484317 1342.87435,148.222704 C1323.49065,154.979584 1304.54028,162.171544 1284.85983,168.695797 C1265.19117,175.198637 1245.29151,181.501944 1224.80171,187.134624 C1204.32372,192.762437 1183.22691,197.716704 1161.42193,201.406611 C1158.68367,201.838771 1155.98249,202.348797 1153.21219,202.717691 L1144.93331,203.885691 C1142.18493,204.310064 1139.38427,204.589411 1136.59542,204.900877 C1133.80488,205.204557 1131.0194,205.533544 1128.22043,205.814837 L1119.78137,206.502011 L1115.561,206.845597 L1111.31871,207.072384 L1102.83075,207.521091 C1099.99469,207.621344 1097.15357,207.669037 1094.31582,207.745931 C1071.60202,208.321171 1048.78705,207.042211 1026.58752,204.222464 C1023.80879,203.874984 1021.07726,203.414597 1018.32045,203.015531 C1015.56532,202.609651 1012.81187,202.201824 1010.11744,201.672331 L1001.98863,200.181184 L994.006513,198.446704 C992.679532,198.149837 991.33569,197.879251 990.025571,197.560971 L986.127248,196.562331 C983.542417,195.878077 980.8952,195.270717 978.362639,194.522224 L970.780132,192.263117 C969.500362,191.902984 968.272863,191.487371 967.058852,191.059104 L963.391529,189.797664 C943.92858,182.997957 926.0371,174.873544 908.558721,166.545704 C899.829648,162.369131 891.203429,158.125397 882.543487,153.909891 C873.85994,149.711904 865.267443,145.446757 856.162364,141.589437 C851.604767,139.666131 846.932512,137.833344 842.091645,136.163104 C837.274384,134.461717 832.246357,133.013397 827.007565,131.832744 C821.785634,130.628731 816.352937,129.737157 810.837619,129.103517 C805.325674,128.458197 799.724365,128.040637 794.096077,127.893664 C782.839502,127.545211 771.527285,128.084437 760.408972,129.187224 C749.300775,130.340624 738.312294,131.948571 727.507601,133.859224 C705.866177,137.674691 685.064444,142.918037 664.711221,148.632477 C644.417012,154.425757 624.099197,160.254077 603.67347,165.929584 C583.042036,171.374411 561.903078,176.155424 540.489282,180.459504 C519.073799,184.765531 497.373361,188.579051 475.543092,192.104464 C464.616997,193.843811 453.658866,195.515997 442.699049,197.181371 L426.205367,199.559224 L417.95684,200.747664 L409.679649,201.863104 C365.550367,207.854944 321.02316,212.882211 276.207624,216.825184 C231.380286,220.725331 186.252818,223.491544 141.002262,225.009944 C95.7533928,226.514717 50.3595165,226.740531 5.09884414,225.344771" id="Fill-56" fill="#FFFFFF"></path>
-                        <path d="M4.53163145,222.938009 C49.566363,223.742956 94.6533644,223.157009 139.607162,221.521809 C184.564332,219.874929 229.405159,217.214809 274.018359,213.628076 C318.641676,210.079303 363.069402,205.725583 407.259384,200.688583 C451.402156,195.541596 495.497715,190.109423 538.815971,182.995329 C549.639211,181.211209 560.408496,179.318076 571.098533,177.285756 C576.441866,176.266676 581.764965,175.214503 587.06783,174.128263 L595.001051,172.465809 C597.631408,171.889596 600.233099,171.268609 602.851653,170.672929 L634.220335,163.448849 C644.638906,160.980476 655.042302,158.479983 665.546865,156.119649 C686.542502,151.397036 707.988335,147.215596 730.036115,144.393903 C741.046516,142.995223 752.185062,141.787316 763.47536,141.331796 C774.74711,140.831503 786.136889,141.004756 797.331078,142.028703 C802.917212,142.570849 808.444332,143.330049 813.844993,144.327716 C819.259143,145.306889 824.462527,146.653983 829.512473,148.125663 C839.573585,151.113796 849.081648,154.668409 858.324989,158.393356 C876.777949,165.866609 894.912231,173.581249 913.695671,180.736223 C923.084019,184.312249 932.625805,187.755903 942.420509,190.940649 C952.181491,194.172116 962.274639,197.002569 972.689838,199.418383 C983.170795,201.734916 993.921533,203.629023 1004.84763,205.119196 C1007.58084,205.487116 1010.3444,205.773276 1013.0911,206.104209 L1017.21873,206.585036 L1021.37672,206.959769 L1029.70112,207.689769 L1038.07611,208.198823 C1039.47222,208.278636 1040.86496,208.381809 1042.26444,208.442156 L1046.46627,208.588156 C1049.26861,208.678676 1052.07095,208.800343 1054.87498,208.863609 C1060.48809,208.876263 1066.10289,209.045623 1071.71431,208.874316 C1074.52003,208.815916 1077.32743,208.788663 1080.12977,208.705929 L1088.52836,208.331196 C1089.92784,208.264036 1091.32901,208.218289 1092.72513,208.132636 L1096.90335,207.833823 L1105.26147,207.232303 C1127.47955,205.294396 1149.20865,201.752436 1170.19586,197.096983 C1191.2151,192.476569 1211.52786,186.835129 1231.41908,180.766396 C1241.36722,177.728623 1251.21083,174.576969 1261.00216,171.367889 C1270.78675,168.151023 1280.54941,164.920529 1290.1519,161.528463 C1309.39734,154.787156 1328.32916,147.716863 1348.03322,141.370729 C1357.88357,138.201556 1367.91939,135.213423 1378.21487,132.542596 C1388.50024,129.860089 1399.07225,127.524089 1409.91404,125.689356 C1431.55209,121.932289 1454.08716,120.025529 1476.64078,119.626463 C1499.21463,119.264383 1521.79185,120.372036 1544.0622,122.400463 C1555.20074,123.417596 1566.26847,124.688769 1577.25527,126.149743 C1588.23701,127.612663 1599.16816,129.242996 1609.90541,131.248063 C1631.37485,135.238729 1652.14117,140.600823 1671.32254,147.456983 C1690.69443,154.097063 1709.90784,160.887036 1729.38765,167.411289 C1748.84554,173.954036 1768.69798,180.099663 1788.90957,185.827729 C1809.11779,191.556769 1829.76102,196.791356 1850.92696,201.221969 C1861.52763,203.404183 1872.2143,205.459863 1883.05103,207.219649 C1893.85741,209.042703 1904.80711,210.574729 1915.81582,211.924743 L1924.0947,212.885423 C1926.85489,213.202729 1929.63868,213.445089 1932.41067,213.728329 C1937.94791,214.320116 1943.5391,214.707503 1949.11006,215.171783 C1960.28907,215.923196 1971.50518,216.571436 1982.76513,216.743716 C2005.26985,217.285863 2027.86056,216.371903 2050.06178,214.188716 C2027.87236,216.411809 2005.27828,217.366649 1982.75838,216.865383 C1971.49169,216.712569 1960.26378,216.084769 1949.07128,215.351849 C1943.49358,214.897303 1937.89564,214.519649 1932.34997,213.937596 C1929.57461,213.659223 1926.78576,213.421729 1924.0222,213.108316 L1915.73152,212.162236 C1904.70257,210.830716 1893.73432,209.316209 1882.90771,207.510676 C1872.04906,205.768409 1861.34047,203.730249 1850.71282,201.564583 C1829.49799,197.165116 1808.79911,191.961676 1788.54199,186.262809 C1768.28994,180.555183 1748.4122,174.405663 1728.92228,167.873623 C1709.42055,161.355209 1690.19365,154.577889 1670.81333,147.951436 C1651.65725,141.120583 1630.99209,135.803263 1609.5783,131.834983 C1598.87477,129.843543 1587.97566,128.224889 1577.00741,126.770729 C1566.04085,125.320463 1554.99672,124.059023 1543.88347,123.050649 C1521.66539,121.041689 1499.15561,119.951556 1476.67787,120.326289 C1454.22036,120.738009 1431.79658,122.649636 1410.28161,126.399889 C1399.50558,128.230729 1388.99259,130.561863 1378.75781,133.238529 C1368.50953,135.904489 1358.51418,138.887756 1348.69418,142.054983 C1329.05588,148.395276 1310.14935,155.473356 1290.8938,162.234129 C1281.28625,165.636903 1271.50841,168.880049 1261.72551,172.102756 C1251.93417,175.321569 1242.08214,178.482956 1232.1222,181.532409 C1212.20568,187.624503 1191.84908,193.294169 1170.75902,197.945729 C1149.71112,202.630383 1127.8505,206.210303 1105.50427,208.173516 L1097.09725,208.783796 L1092.89374,209.086503 C1091.4892,209.172156 1090.0796,209.219849 1088.67337,209.288956 L1080.22588,209.669529 C1077.40667,209.755183 1074.58073,209.784383 1071.75815,209.844729 C1066.11469,210.020903 1060.46448,209.855436 1054.81428,209.844729 C1051.99507,209.782436 1049.17587,209.661743 1046.35667,209.574143 L1042.12618,209.429116 C1040.71827,209.368769 1039.31541,209.263649 1037.91087,209.182863 L1029.48361,208.665049 L1021.10694,207.925316 L1016.92029,207.545716 L1012.76736,207.060023 C1010.00211,206.724223 1007.22001,206.434169 1004.46994,206.062356 C993.473023,204.554663 982.648096,202.640116 972.091263,200.299249 C961.606933,197.862023 951.405873,194.991663 941.631402,191.746569 C931.802975,188.541383 922.237584,185.081183 912.830689,181.491529 C894.016898,174.308329 875.88093,166.576169 857.468437,159.101943 C848.257133,155.381863 838.816515,151.844769 828.841395,148.873183 C823.847091,147.416103 818.741502,146.089449 813.39817,145.120009 C808.075071,144.132076 802.620454,143.379689 797.101765,142.840463 C786.04078,141.821383 774.762285,141.641316 763.578214,142.130903 C752.378967,142.575716 741.309551,143.768049 730.331187,145.155049 C708.364341,147.953383 686.982581,152.106596 666.012236,156.808769 C655.519475,159.159369 645.119452,161.650129 634.694137,164.112663 L603.279929,171.325063 C600.658003,171.919769 598.052939,172.539783 595.417524,173.115023 L587.467442,174.776503 C582.152774,175.860796 576.817872,176.911023 571.464423,177.928156 C560.754152,179.957556 549.96632,181.846796 539.126218,183.626049 C495.742203,190.722623 451.621352,196.129489 407.451603,201.251169 C363.232956,206.258969 318.78331,210.600036 274.131329,214.105009 C229.48272,217.613876 184.609857,220.195156 139.63414,221.761249 C94.6601089,223.317609 49.5629907,223.822769 4.53163145,222.938009" id="Fill-58" fill="#FFFFFF"></path>
-                        <path d="M3.96526183,220.532221 C48.8229502,220.810595 93.6823246,219.941408 138.438845,218.263381 C183.198738,216.597035 227.854092,214.073181 272.381299,210.974088 C316.913565,207.893488 361.339606,204.303835 405.650989,200.296621 C449.940453,196.227115 494.292304,192.260781 538.116397,186.774101 C560.003995,183.991341 581.798856,180.924368 603.118229,176.906448 C613.823441,174.986061 624.574179,173.164955 635.21869,171.120955 C645.885122,169.121728 656.566728,167.137101 667.32421,165.290688 C688.820627,161.569635 710.698108,158.489035 732.921245,156.549181 C755.090425,154.610301 778.099297,154.393248 799.986895,157.723995 C805.431394,158.570795 810.793274,159.613235 815.967994,160.920421 C821.124166,162.222741 826.180857,163.647701 831.156614,165.156368 C841.121617,168.166888 850.715673,171.541435 860.372115,174.826435 C879.666451,181.419795 899.018116,187.965461 919.061092,193.739275 C924.077316,195.172021 929.117146,196.580435 934.221048,197.906115 C939.306403,199.258075 944.46089,200.518541 949.672704,201.672915 C960.092961,203.992368 970.789743,205.922488 981.707407,207.315328 L985.786145,207.869155 L989.903664,208.322728 C992.653735,208.612781 995.392003,208.936901 998.148818,209.198728 C1003.68606,209.641595 1009.20474,210.169141 1014.78076,210.423181 C1020.33655,210.797915 1025.92437,210.932235 1031.50544,211.111328 C1034.29598,211.207688 1037.09495,211.173621 1039.88886,211.210608 C1042.68446,211.229101 1045.47837,211.274848 1048.27396,211.196981 L1056.65569,211.056821 C1059.44623,210.984795 1062.2334,210.838795 1065.02225,210.734648 C1070.61007,210.569181 1076.14562,210.090301 1081.70646,209.754501 C1092.75902,208.805501 1103.80483,207.723155 1114.61121,206.049995 C1125.45468,204.480008 1136.11943,202.505115 1146.6105,200.252821 C1157.10663,198.010261 1167.4409,195.516581 1177.62847,192.835048 C1198.01542,187.476848 1217.7903,181.364315 1237.2954,174.992875 C1256.80725,168.613648 1276.13531,162.063115 1295.18178,155.227395 C1314.28053,148.448128 1333.41974,141.646475 1353.45091,135.765595 C1363.45976,132.827101 1373.65576,130.076461 1384.15189,127.750195 C1386.74684,127.126288 1389.42609,126.628915 1392.05813,126.062435 C1394.68849,125.489141 1397.37112,125.002475 1400.05374,124.517755 C1405.39202,123.498675 1410.8146,122.637275 1416.2709,121.856661 C1438.09274,118.720581 1460.57385,117.208995 1483.0381,117.097061 C1505.51585,116.979288 1527.96324,118.227101 1550.0987,120.384008 C1561.16811,121.471221 1572.16671,122.799821 1583.06414,124.355208 C1593.97675,125.887235 1604.74603,127.770635 1615.30961,129.977181 C1625.85127,132.216821 1636.20239,134.776688 1646.16402,137.781368 C1656.18973,140.735435 1665.60168,144.295888 1675.08782,147.736621 C1694.04493,154.659941 1712.84018,161.724395 1731.92881,168.514368 C1751.00732,175.310181 1770.4669,181.752675 1790.31765,187.763008 C1810.17516,193.763608 1830.51826,199.247368 1851.44477,203.885301 C1872.39994,208.466781 1893.84914,212.348435 1915.75866,215.102968 C1959.44112,220.759008 2005.13176,222.199541 2049.30825,217.460381 C2005.15705,222.280328 1959.42426,220.922528 1915.66761,215.339488 C1893.72269,212.620968 1872.22795,208.775328 1851.2222,204.224995 C1830.24174,199.619181 1809.8413,194.164621 1789.93153,188.193221 C1770.03525,182.200408 1750.55375,175.755968 1731.44489,168.967941 C1712.33097,162.185755 1693.51718,155.132008 1674.55501,148.222315 C1665.052,144.784501 1655.66534,141.241568 1645.69022,138.310861 C1635.764,135.324675 1625.44997,132.781355 1614.94541,130.557288 C1604.41386,128.364368 1593.68336,126.494595 1582.79942,124.974248 C1571.92391,123.429568 1560.9506,122.110701 1549.90816,121.034195 C1527.82666,118.894808 1505.43997,117.665488 1483.04991,117.797861 C1460.67165,117.923421 1438.29339,119.442795 1416.57946,122.576928 C1411.15182,123.356568 1405.75622,124.217968 1400.4483,125.234128 C1397.78084,125.718848 1395.11339,126.203568 1392.49821,126.776861 C1389.87797,127.342368 1387.21389,127.837795 1384.63244,128.460728 C1374.18857,130.783101 1364.03978,133.527901 1354.06804,136.463475 C1334.11274,142.337541 1315.01905,149.138221 1295.93042,155.930141 C1276.89238,162.779488 1257.54408,169.353381 1238.03392,175.746235 C1218.52039,182.138115 1198.72022,188.274981 1178.28437,193.662381 C1168.06982,196.357541 1157.70521,198.867768 1147.17029,201.124928 C1136.64718,203.391821 1125.92342,205.384235 1115.01756,206.970768 C1104.15217,208.660475 1093.04566,209.757421 1081.92903,210.717128 C1076.33953,211.060715 1070.77025,211.543488 1065.1504,211.712848 C1062.34637,211.820888 1059.54403,211.968835 1056.73663,212.041835 L1048.30431,212.185888 C1045.49354,212.260835 1042.68277,212.214115 1039.87031,212.193675 C1037.05954,212.154741 1034.2454,212.186861 1031.438,212.088555 C1025.8232,211.904595 1020.20334,211.765408 1014.61384,211.386781 C1009.0041,211.125928 1003.45168,210.592541 997.88241,210.142861 C995.108734,209.878115 992.351918,209.550101 989.586672,209.256155 L985.443861,208.796741 L981.338145,208.237075 C970.354722,206.829635 959.583751,204.879075 949.092677,202.536261 C943.837023,201.368261 938.665676,200.100008 933.565145,198.739288 C928.444382,197.405821 923.391063,195.989621 918.359664,194.549088 C898.266104,188.744128 878.894205,182.176075 859.6083,175.570061 C849.951858,172.275328 840.38478,168.903701 830.477106,165.902915 C825.530013,164.398141 820.503672,162.978048 815.387967,161.682541 C810.287436,160.390928 804.99806,159.357248 799.619319,158.518235 C778.001501,155.212795 755.173045,155.405515 733.120207,157.321035 C710.974632,159.239475 689.156166,162.297688 667.690099,165.999275 C656.946106,167.835955 646.271244,169.811821 635.601441,171.804235 C624.960302,173.840448 614.182586,175.659608 603.465571,177.574155 C582.11922,181.582341 560.270403,184.644448 538.357513,187.415528 C494.474405,192.881768 450.10738,196.820848 405.80274,200.863101 C361.474496,204.840141 317.03328,208.416168 272.479095,211.451995 C227.923223,214.473221 183.244264,216.917261 138.465823,218.502821 C93.6924414,220.102008 48.821264,220.891381 3.96526183,220.532221" id="Fill-60" fill="#FFFFFF"></path>
-                        <path d="M3.39872359,218.125557 C92.8915316,217.581464 182.185377,213.555757 271.190895,208.406824 C315.707986,205.861557 360.177865,203.040837 404.652803,200.240557 C449.110879,197.370197 493.683612,195.014731 537.937668,191.290757 C548.998652,190.351491 560.032659,189.316837 571.022826,188.145917 C576.531399,187.599877 581.972527,186.852357 587.438946,186.186597 L603.828088,184.140651 C614.771044,182.827624 625.63981,181.310197 636.517007,179.802504 C647.397576,178.306491 658.289948,176.822157 669.239649,175.479931 C691.132305,172.800344 713.22561,170.548051 735.59544,169.536757 C757.902883,168.663677 780.810587,169.479331 802.101296,173.823317 C823.252057,178.229597 842.999961,184.391771 863.292484,189.785984 C883.534423,195.257091 904.037712,200.424517 925.132831,204.666304 C935.664372,206.820291 946.364526,208.632637 957.23835,210.063437 C968.122292,211.472824 979.171474,212.408197 990.259436,213.088557 C1001.36426,213.702731 1012.51967,213.935357 1023.67508,213.886691 C1026.46224,213.853597 1029.24772,213.777677 1032.03489,213.726091 L1036.21479,213.639464 L1040.38627,213.458424 L1048.72585,213.086611 L1057.03676,212.537651 C1059.81212,212.379971 1062.56725,212.125931 1065.32238,211.877731 C1068.07582,211.620771 1070.84107,211.413451 1073.58271,211.118531 C1079.04913,210.485864 1084.55264,209.945664 1089.95668,209.143637 C1100.84568,207.738144 1111.49862,205.802184 1122.04871,203.715357 C1143.09493,199.409331 1163.43972,194.019011 1183.31409,188.134237 C1203.19519,182.243624 1222.57889,175.801131 1241.86986,169.272011 C1251.5145,166.000637 1261.14733,162.730237 1270.68912,159.365424 L1299.34145,149.282664 C1318.49078,142.608517 1337.9453,136.159211 1358.29178,130.724117 C1368.47429,128.025064 1378.87094,125.587837 1389.48342,123.501984 C1400.08915,121.396664 1410.93937,119.733237 1421.89581,118.370571 C1443.82388,115.673464 1466.26958,114.430517 1488.67651,114.547317 C1511.08512,114.644651 1533.45495,115.955731 1555.48755,118.280051 C1577.49824,120.620917 1599.31671,123.877691 1620.04088,128.842664 C1630.39537,131.316877 1640.48009,134.179451 1650.11124,137.495597 C1654.94874,139.135664 1659.5957,140.945091 1664.26795,142.716557 L1678.22064,148.097144 C1696.78489,155.306624 1715.19232,162.642637 1733.91675,169.698331 C1752.63949,176.753051 1771.72475,183.494357 1791.2602,189.770411 C1830.27209,202.327384 1871.84015,212.508451 1915.46528,218.306597 C1937.2163,221.290837 1959.46473,223.062304 1981.80252,223.620024 C2004.14369,224.162171 2026.59276,223.179104 2048.55455,220.732144 C2026.60794,223.219011 2004.1538,224.243931 1981.79578,223.740717 C1959.43943,223.223877 1937.16066,221.491344 1915.37085,218.542144 C1871.66985,212.817971 1829.98207,202.696277 1790.85384,190.194784 C1771.27118,183.935277 1752.16737,177.191051 1733.4126,170.145091 C1714.66119,163.095237 1696.23353,155.769931 1677.66591,148.574077 L1663.70647,143.205171 C1659.03085,141.434677 1654.39569,139.634011 1649.58349,138.007571 C1639.99449,134.713811 1629.94855,131.870704 1619.63283,129.413037 C1598.9896,124.482131 1577.24195,121.248717 1555.28185,118.928291 C1533.30826,116.623437 1510.99913,115.329877 1488.66808,115.248117 C1466.34209,115.144944 1443.99586,116.396651 1422.16391,119.095704 C1411.25973,120.459344 1400.45504,122.122771 1389.90832,124.224197 C1379.34812,126.307131 1369.00036,128.740464 1358.86169,131.435624 C1338.59952,136.863904 1319.20064,143.308344 1300.07998,149.990277 L1271.45125,160.087637 C1261.90946,163.461211 1252.26651,166.743291 1242.6303,170.018557 C1223.3444,176.563251 1203.95058,183.026184 1184.03575,188.943077 C1164.12766,194.853157 1143.72891,200.272677 1122.60007,204.610824 C1112.00108,206.715171 1101.28913,208.669624 1090.3428,210.089717 C1084.9101,210.898557 1079.37792,211.446544 1073.88115,212.085051 C1071.12603,212.384837 1068.34392,212.594104 1065.57699,212.853984 C1062.80668,213.105104 1060.03806,213.362064 1057.24584,213.518771 L1048.88771,214.065784 L1040.50092,214.434677 L1036.30753,214.613771 L1032.10402,214.698451 C1029.30168,214.748091 1026.49934,214.823037 1023.697,214.854184 C1012.48257,214.897011 1001.26478,214.655624 990.097568,214.030744 C978.948905,213.339677 967.83565,212.391651 956.889322,210.966691 C945.954797,209.521291 935.165279,207.685584 924.61519,205.520891 C903.455999,201.251851 882.910557,196.057171 862.656815,190.567571 C842.347431,185.151944 822.607957,178.981011 801.627495,174.598091 C780.598135,170.291091 757.874219,169.459864 735.686491,170.316397 C713.438062,171.306277 691.359932,173.540077 669.506057,176.203117 C658.569845,177.537557 647.685904,179.012157 636.805335,180.501357 C625.928138,182.002237 615.050941,183.513824 604.087751,184.821011 L587.67669,186.860144 C582.200154,187.523957 576.752281,188.267584 571.235278,188.812651 C560.226563,189.977731 549.175696,191.006544 538.102908,191.939971 C493.794897,195.639611 449.218791,197.967824 404.759029,200.810931 C360.279032,203.579091 315.804095,206.386184 271.273515,208.885704 C182.22753,213.875984 92.8932178,217.742064 3.39872359,218.125557" id="Fill-62" fill="#FFFFFF"></path>
-                        <path d="M2.83235396,215.719769 C92.1987026,214.068996 181.33911,209.930383 270.462657,205.950423 C315.030332,203.974556 359.614868,202.078503 404.246615,200.600983 C448.87499,199.057276 493.589358,198.510263 538.157033,196.655089 C549.290521,196.159663 560.434125,195.680783 571.513658,194.869023 L604.750567,192.440556 C626.86242,190.613609 648.88828,188.391489 671.043972,186.633649 C682.120132,185.751809 693.224956,184.971196 704.358444,184.348263 C715.481815,183.710729 726.676004,183.258129 737.881995,183.277596 C749.082928,183.297063 760.307467,183.713649 771.380254,184.771663 C776.912433,185.301156 782.417633,185.954263 787.826725,186.824423 C793.24256,187.697503 798.545426,188.720476 803.838174,189.772649 C814.418613,191.877969 824.845614,194.219809 835.292849,196.494489 C845.751887,198.759436 856.241275,200.971823 866.811597,203.047943 C887.942125,207.193369 909.387957,210.873543 931.224971,213.500569 C936.667785,214.193583 942.17973,214.676356 947.6765,215.200983 C953.205306,215.601996 958.72231,216.063356 964.281466,216.302796 C969.825447,216.642489 975.399779,216.749556 980.962308,216.936436 C986.534953,216.981209 992.107599,217.113583 997.680244,217.009436 C1003.25626,217.005543 1008.81879,216.802116 1014.38638,216.638596 C1017.16848,216.543209 1019.94048,216.372876 1022.71752,216.243423 L1026.88226,216.035129 L1031.03181,215.747996 C1042.11977,215.067636 1053.09645,213.891849 1064.00063,212.573956 C1069.42321,211.835196 1074.87108,211.149969 1080.22453,210.251583 C1085.62687,209.454423 1090.91962,208.447023 1096.23935,207.485369 C1098.90343,207.013303 1101.49669,206.420543 1104.12873,205.893969 C1106.74897,205.346956 1109.38776,204.832063 1111.97259,204.222756 C1117.17766,203.068383 1122.32371,201.829329 1127.45965,200.571783 C1147.95114,195.467623 1167.81539,189.553649 1187.32892,183.290249 C1206.83739,177.012249 1226.00527,170.378983 1245.20181,163.774916 C1264.38486,157.166956 1283.29982,150.287436 1302.6498,143.807956 C1312.32479,140.570649 1322.08577,137.412183 1332.03054,134.452276 C1341.98543,131.501129 1352.10893,128.733943 1362.4297,126.230529 C1383.06957,121.211049 1404.64354,117.472476 1426.6576,115.096569 C1448.66154,112.687569 1471.11231,111.698663 1493.48888,111.937129 C1515.8722,112.176569 1538.21168,113.589849 1560.16166,116.116623 C1582.10153,118.628796 1603.73621,122.333303 1624.03211,127.829716 C1634.18933,130.556023 1643.96886,133.750503 1653.24423,137.380063 C1662.47577,141.023249 1671.5994,144.749169 1680.69942,148.496503 C1698.8944,155.992143 1716.94606,163.597769 1735.33158,170.925996 C1753.73058,178.237676 1772.45669,185.287529 1791.7308,191.804969 C1811.0049,198.312676 1830.85228,204.288943 1851.44662,209.283116 C1871.97858,214.359049 1893.2558,218.416876 1914.93094,221.520836 C1936.61283,224.619929 1958.83091,226.470236 1981.14341,227.063969 C2003.46435,227.588596 2025.90331,226.590929 2047.79933,224.003809 C2025.91848,226.630836 2003.47446,227.669383 1981.13667,227.184663 C1958.80393,226.630836 1936.55551,224.819463 1914.83146,221.756383 C1893.11585,218.689409 1871.78973,214.663703 1851.20214,209.616969 C1830.55047,204.652969 1810.6407,198.704929 1791.30758,192.222529 C1771.98289,185.721636 1753.23823,178.669836 1734.80719,171.364969 C1716.39301,164.043556 1698.32449,156.446689 1680.12108,148.964676 C1671.01768,145.225129 1661.889,141.505049 1652.66421,137.872569 C1643.43267,134.268316 1633.69361,131.096223 1623.58022,128.388409 C1603.36358,122.929956 1581.82332,119.254649 1559.93909,116.762916 C1538.04981,114.256583 1515.77609,112.861796 1493.46865,112.636956 C1471.1612,112.414063 1448.80992,113.412703 1426.89366,115.825596 C1404.96897,118.205396 1383.5029,121.941049 1362.95409,126.952743 C1352.67715,129.454209 1342.59243,132.217503 1332.67127,135.166703 C1322.75685,138.125636 1313.02285,141.283129 1303.3664,144.521409 C1284.06195,151.002836 1265.16217,157.893063 1245.97237,164.518543 C1226.78595,171.135263 1207.62313,177.784103 1188.09611,184.083516 C1168.56403,190.369303 1148.66268,196.311503 1128.11218,201.444863 C1122.96107,202.709223 1117.79815,203.957036 1112.57285,205.118223 C1109.98465,205.732396 1107.329,206.251183 1104.69527,206.804036 C1102.05143,207.334503 1099.44299,207.931156 1096.76542,208.407116 C1091.42209,209.377529 1086.10068,210.392716 1080.67304,211.197663 C1075.29261,212.101889 1069.82113,212.801716 1064.36652,213.539503 C1053.40333,214.857396 1042.36932,216.031236 1031.22403,216.709649 L1027.05255,216.995809 L1022.86759,217.202156 C1020.07537,217.330636 1017.28989,217.499996 1014.49429,217.594409 C1008.89972,217.755009 1003.30685,217.955516 997.70385,217.956489 C992.10254,218.056743 986.502917,217.921449 980.903293,217.872783 C975.310414,217.681036 969.710791,217.569103 964.136459,217.224543 C958.548639,216.980236 953.002971,216.513036 947.445501,216.106183 C941.918381,215.576689 936.379458,215.087103 930.906294,214.386303 C908.979915,211.733969 887.495301,208.033356 866.327679,203.865543 C855.742182,201.778716 845.240991,199.557569 834.778581,197.283863 C824.319543,194.999449 813.917834,192.655663 803.371117,190.550343 C798.100288,189.497196 792.800795,188.472276 787.454091,187.606983 C782.098956,186.742663 776.642653,186.091503 771.154314,185.562983 C760.172577,184.505943 749.020541,184.084489 737.878623,184.059183 C726.728273,184.032903 715.598158,184.474796 704.484903,185.104543 C693.371649,185.720663 682.283686,186.492516 671.219329,187.366569 C649.085557,189.107863 627.063069,191.316356 604.924238,193.131623 L571.638431,195.542569 C560.543724,196.348489 549.384944,196.820556 538.239653,197.310143 C493.624767,199.140009 448.917143,199.657823 404.302257,201.173303 C359.67894,202.619676 315.099463,204.500156 270.525044,206.430276 C181.38295,210.250609 92.2071332,214.229596 2.83235396,215.719769" id="Fill-64" fill="#FFFFFF"></path>
-                        <path d="M2.26598434,213.313105 C91.6205301,210.491412 180.823325,206.301212 270.203162,203.632332 C314.893924,202.360185 359.631897,201.443305 404.390104,201.480292 C449.155056,201.444279 493.866051,202.850745 538.590536,202.951972 C544.180042,202.962679 549.769549,202.948079 555.359056,202.909145 C560.946876,202.823492 566.529639,202.674572 572.115773,202.560692 L588.870804,202.193745 L597.249163,202.006865 L605.615719,201.692479 C627.923162,200.804799 650.195196,199.568665 672.519501,198.643025 C683.679967,198.171932 694.852236,197.778705 706.036307,197.500332 C717.227124,197.218065 728.449976,197.202492 739.645851,197.566519 C750.84004,197.916919 762.008936,198.689745 773.014279,199.969679 C775.76435,200.290879 778.499246,200.653932 781.225711,201.044239 L789.324173,202.321252 L805.509293,204.891825 C816.315673,206.583479 827.052922,208.378305 837.866046,210.008639 C848.67917,211.639945 859.536133,213.174892 870.460542,214.528799 C881.381578,215.894385 892.361629,217.095479 903.395635,218.097039 C914.433014,219.095679 925.507488,219.952212 936.642662,220.387292 C981.16987,222.224945 1026.27542,219.174519 1068.92597,211.602959 C1090.28412,207.855625 1110.85823,202.739785 1130.89952,197.048705 C1150.94924,191.354705 1170.47795,185.058212 1189.75374,178.518385 C1209.02109,171.970772 1228.15525,165.268399 1247.2894,158.584519 C1266.39827,151.874359 1285.48184,145.114559 1305.05776,138.837532 C1324.65054,132.586785 1344.75421,126.801292 1365.71276,122.198399 C1386.6224,117.528345 1408.38017,114.159639 1430.4583,111.956012 C1452.55329,109.751412 1475.03103,108.927972 1497.42615,109.277399 C1519.82465,109.637532 1542.17762,111.157879 1564.06859,113.931879 C1585.94944,116.687385 1607.39864,120.853252 1627.2494,126.901545 C1632.20324,128.418972 1637.04916,130.052225 1641.77706,131.792545 C1646.5235,133.521185 1651.03895,135.447412 1655.57463,137.331785 C1664.62575,141.130705 1673.58582,144.999705 1682.51386,148.891092 C1700.36993,156.674839 1718.0928,164.554945 1736.17144,172.154732 C1754.26862,179.735052 1772.67437,187.083719 1791.72422,193.840599 C1810.75383,200.609159 1830.43935,206.788852 1850.89374,212.002025 C1871.3397,217.218119 1892.50564,221.504679 1914.15044,224.740039 C1935.82052,227.907265 1958.02174,229.906492 1980.36291,230.511905 C2002.70407,231.016092 2025.1717,230.002852 2047.04581,227.275572 C2025.18687,230.042759 2002.71419,231.097852 1980.35448,230.632599 C1957.99476,230.068065 1935.75982,228.106799 1914.0459,224.974612 C1892.36063,221.774292 1871.14411,217.520825 1850.64082,212.334905 C1830.12573,207.149959 1810.37783,200.997519 1791.28583,194.253292 C1772.18371,187.510039 1753.75773,180.158452 1735.6285,172.585919 C1717.5212,164.990999 1699.7781,157.120625 1681.9136,149.350505 C1672.9805,145.465932 1664.01874,141.604719 1654.96762,137.814559 C1650.42351,135.931159 1645.93167,134.018559 1641.21221,132.302572 C1636.50623,130.574905 1631.68223,128.954305 1626.75031,127.446612 C1606.98892,121.441145 1585.63245,117.306425 1563.8241,114.575252 C1542.00394,111.824612 1519.72011,110.321785 1497.3958,109.977225 C1475.0715,109.642399 1452.67638,110.477519 1430.67749,112.686985 C1408.68198,114.896452 1387.01527,118.266132 1366.19836,122.930345 C1345.32918,127.528372 1325.29464,133.308999 1305.75244,139.558772 C1286.22205,145.836772 1267.1722,152.601439 1248.07008,159.326199 C1228.93761,166.024679 1209.83044,172.734839 1190.55127,179.301919 C1171.27043,185.860239 1151.71811,192.180092 1131.62792,197.902319 C1111.55123,203.615785 1090.90124,208.774452 1069.43012,212.547092 C1026.55532,220.129359 981.257548,223.164212 936.526319,221.288599 C891.761368,219.210532 848.089026,212.787505 805.133287,205.684119 L788.9684,203.106732 L780.883428,201.825825 C778.180568,201.438439 775.465906,201.076359 772.736068,200.755159 C761.81166,199.477172 750.710208,198.701425 739.568289,198.346159 C717.272649,197.577225 694.912936,198.526225 672.610551,199.380812 C650.303108,200.290879 628.029388,201.514359 605.696653,202.387439 L597.321666,202.697932 L588.931505,202.879945 L572.152868,203.237159 C566.559989,203.348119 560.968796,203.493145 555.374231,203.575879 C549.77798,203.609945 544.181728,203.623572 538.587163,203.607999 C493.81884,203.478545 449.123019,202.044825 404.388418,202.054559 C359.653817,201.984479 314.929333,202.886759 270.245315,204.113159 C180.86885,206.621439 91.6357052,210.651039 2.26598434,213.313105" id="Fill-66" fill="#FFFFFF"></path>
-                        <path d="M1.6994461,210.907317 C91.1534733,206.861171 180.653026,202.634957 270.412242,201.522437 C315.278361,201.017277 360.203494,201.252824 404.99711,203.008717 C449.810959,204.629317 494.322991,208.131371 539.027242,210.218197 L547.412345,210.579304 L555.809251,210.820691 L572.603063,211.302491 C578.202687,211.448491 583.797252,211.656784 589.400248,211.752171 C595.003243,211.832957 600.607925,211.861184 606.214293,211.884544 C628.638079,211.940997 651.066923,211.624664 673.500826,211.415397 C684.720306,211.328771 695.934728,211.190557 707.169384,211.251877 C718.400667,211.327797 729.633637,211.586704 740.829511,212.165837 C752.018642,212.742051 763.19091,213.631677 774.191195,214.933024 L807.219025,218.657971 C829.238141,221.085464 851.41238,223.056464 873.718137,224.310117 C896.018835,225.558904 918.447679,226.212011 940.822568,225.676677 C985.558855,224.603091 1030.0456,219.377264 1071.76372,209.990437 L1075.6789,209.118331 L1079.55025,208.181984 L1087.28282,206.295664 C1092.39515,204.976797 1097.49568,203.639437 1102.54394,202.240757 C1112.64889,199.450211 1122.62401,196.499064 1132.49122,193.434037 C1152.22564,187.296197 1171.51154,180.684344 1190.69965,173.980997 L1248.18811,153.774597 C1267.36273,147.048864 1286.68573,140.437984 1306.53818,134.372171 C1326.41928,128.349184 1346.83827,122.851797 1368.04467,118.535064 C1389.20555,114.157984 1411.08303,110.959611 1433.26064,108.868891 C1455.46354,106.821971 1478.00704,106.093917 1500.46455,106.538731 C1522.91869,107.000091 1545.32393,108.657677 1567.17611,111.712971 C1589.00638,114.777997 1610.28023,119.413984 1629.6437,126.048224 C1639.39962,129.302077 1648.29394,133.314157 1657.19331,137.242531 C1666.09268,141.188424 1674.89426,145.207317 1683.66885,149.242757 C1718.70317,165.410797 1753.51998,181.817304 1791.24536,195.849851 C1810.08276,202.862717 1829.62158,209.274064 1849.99167,214.684824 C1870.37018,220.071251 1891.45856,224.598224 1913.12527,227.935784 C1934.81053,231.209104 1957.04885,233.341677 1979.45746,233.952931 C2001.86101,234.455171 2024.40114,233.413704 2046.29211,230.547237 C2024.418,233.452637 2001.87281,234.535957 1979.44903,234.073624 C1957.02018,233.503251 1934.74815,231.408637 1913.01904,228.170357 C1891.30849,224.867837 1870.16785,220.372984 1849.72863,215.014784 C1829.29616,209.633224 1809.69158,203.246211 1790.79011,196.257677 C1752.97536,182.241677 1718.10797,165.831277 1683.05005,149.692437 C1674.2687,145.663811 1665.46544,141.652704 1656.56438,137.714597 C1647.64815,133.788171 1638.80273,129.807237 1629.09908,126.578691 C1609.83004,119.993117 1588.65061,115.391197 1566.90802,112.353424 C1545.13508,109.323437 1522.80234,107.684344 1500.4224,107.239531 C1478.04245,106.808344 1455.57483,107.549051 1433.46635,109.600837 C1411.36967,111.699344 1389.57312,114.898691 1368.49824,119.272851 C1347.37951,123.586664 1327.03135,129.080157 1307.20757,135.101197 C1287.40571,141.167011 1268.12317,147.780811 1248.9671,154.515304 L1191.51574,174.757717 C1172.331,181.476637 1153.03498,188.108931 1133.2719,194.270131 C1123.3912,197.347811 1113.39753,200.311611 1103.26898,203.116757 C1098.20891,204.523224 1093.09658,205.867397 1087.96907,207.194051 L1080.20784,209.093997 L1076.31963,210.036184 L1072.3859,210.911211 C1030.46207,220.313611 985.796599,225.530677 940.88664,226.580904 C918.412271,227.104557 895.917668,226.434904 873.571444,225.169571 C851.215103,223.900344 828.998711,221.910851 806.950931,219.465837 L773.929845,215.719477 C762.949794,214.415211 751.863518,213.524611 740.709796,212.943531 C729.557761,212.360504 718.356828,212.094784 707.155895,212.012051 C695.954962,211.943917 684.735481,212.074344 673.521059,212.155131 C651.092215,212.349797 628.656626,212.652504 606.207549,212.583397 C600.596122,212.555171 594.98301,212.524024 589.371583,212.439344 C583.763529,212.340064 578.158848,212.128851 572.554166,211.978957 L555.736748,211.486451 L547.328039,211.239224 L538.934505,210.872277 C494.189787,208.756251 449.701361,205.226944 404.929665,203.580064 C360.176516,201.794971 315.276675,201.544824 270.427417,202.004237 C180.698551,202.955184 91.1737068,207.020797 1.6994461,210.907317" id="Fill-68" fill="#FFFFFF"></path>
-                        <path d="M1.13307648,208.500556 C45.9772758,205.843356 90.8771173,203.422676 135.881499,201.819596 C180.875763,200.204836 225.971195,199.271409 271.066627,199.662689 C316.157001,199.973183 361.237258,201.741729 405.941509,205.227236 C450.644073,208.778929 494.719399,214.409663 539.239863,218.436343 C561.484919,220.489103 583.873296,221.997769 606.342607,222.940929 C628.80686,223.907449 651.330127,224.407743 673.856767,224.800969 C696.380034,225.141636 718.950513,225.543623 741.423196,226.760289 C747.041367,227.062023 752.652793,227.408529 758.250731,227.819276 L775.034426,228.964889 C786.225242,229.716303 797.400884,230.557263 808.620364,231.095516 C831.054267,232.232369 853.57922,232.771596 876.09237,232.610023 C898.607207,232.448449 921.106868,231.604569 943.390706,229.768863 C987.939833,226.116916 1031.68636,218.920089 1072.52939,208.009996 C1113.41456,197.088223 1151.88689,183.424569 1190.23108,169.771623 C1209.41244,162.934929 1228.5955,156.101156 1247.93199,149.403649 C1267.27353,142.712956 1286.88149,136.254889 1307.02732,130.380823 C1317.11878,127.465689 1327.34345,124.702396 1337.71481,122.133769 C1348.07942,119.550543 1358.66661,117.272943 1369.33641,115.147183 C1390.7384,110.994943 1412.7238,107.833556 1435.04304,105.838223 C1457.37409,103.894476 1480.0255,103.189783 1502.58755,103.731929 C1525.14117,104.285756 1547.6442,106.095183 1569.46773,109.484329 C1591.25584,112.881263 1612.37288,117.986396 1631.23389,125.205609 C1635.95166,126.998489 1640.50758,128.949049 1644.91342,130.977476 C1649.35131,132.984489 1653.74704,135.023623 1658.12759,137.071516 C1666.88701,141.170223 1675.5537,145.332196 1684.18666,149.515583 C1718.67805,166.254969 1752.91652,183.261049 1790.31647,197.802649 C1827.53938,212.408489 1868.40264,224.149809 1911.86422,231.105249 C1933.58995,234.516783 1955.91257,236.779783 1978.43077,237.380329 C2000.93549,237.910796 2023.59197,236.819689 2045.53689,233.817929 C2023.60883,236.858623 2000.9473,237.991583 1978.42066,237.501996 C1955.88222,236.941356 1933.52419,234.716316 1911.75462,231.337876 C1868.20199,224.454463 1827.20553,212.765703 1789.84773,198.204636 C1752.35167,183.676663 1718.06599,166.667663 1683.54594,149.957476 C1674.90791,145.779929 1666.23785,141.624769 1657.47675,137.534823 C1653.09619,135.490823 1648.70215,133.456556 1644.26426,131.454409 C1639.85505,129.428903 1635.34466,127.501703 1630.64543,125.720503 C1611.88391,118.554823 1590.86804,113.487649 1569.16928,110.119916 C1547.4385,106.758996 1525.00965,104.969036 1502.53528,104.431756 C1480.05248,103.905183 1457.482,104.621556 1435.23695,106.571143 C1413.00707,108.573289 1391.0908,111.738569 1369.76974,115.889836 C1359.13872,118.015596 1348.59369,120.291249 1338.26449,122.873503 C1327.92685,125.441156 1317.73253,128.202503 1307.67142,131.117636 C1287.58292,136.991703 1268.01712,143.449769 1248.70423,150.148249 C1229.39472,156.851596 1210.22347,163.697049 1191.05897,170.544449 C1152.73502,184.221729 1114.24582,197.934049 1073.24768,208.907409 C1032.21918,219.835996 988.309095,227.027956 943.609902,230.664329 C921.250189,232.493223 898.67128,233.326396 876.110918,233.473369 C853.542125,233.622289 830.96996,233.067489 808.495591,231.915063 C797.250818,231.368049 786.070119,230.519303 774.870872,229.762049 L758.078746,228.603783 C752.49767,228.190116 746.901419,227.841663 741.296737,227.537983 C718.874637,226.309636 696.35137,225.894023 673.8163,225.540703 C651.282916,225.131903 628.744474,224.618956 606.251557,223.636863 C583.756954,222.679103 561.328109,221.154863 539.059447,219.084583 C494.503575,215.026756 450.448483,209.370716 405.803246,205.794689 C361.156324,202.281929 316.116534,200.499756 271.053138,200.143516 C225.988056,199.673396 180.902741,200.525063 135.90679,202.059036 C90.9024092,203.582303 45.9907648,205.923169 1.13307648,208.500556" id="Fill-70" fill="#FFFFFF"></path>
-                        <path d="M0.566538238,206.094768 C45.5675472,202.761101 90.725366,199.988075 136.043367,198.532941 C158.691407,197.720208 181.383287,197.363968 204.073481,197.208235 C215.421107,197.217968 226.768733,197.162488 238.112986,197.368835 C249.462298,197.450595 260.796435,197.791261 272.133945,198.088128 C294.785357,198.879448 317.433398,199.897555 339.931373,201.667075 L348.375491,202.282221 C351.189635,202.495381 353.985231,202.777648 356.79263,203.021955 L373.61342,204.540355 L390.346532,206.359515 L398.708029,207.283208 L407.035804,208.305208 C451.414633,213.888248 494.855976,221.579528 539.022352,227.538275 C561.095424,230.533221 583.391064,232.961688 605.850259,234.751648 C628.306081,236.555235 650.891735,237.798181 673.516169,238.668341 L741.458605,241.017968 C752.782625,241.405355 764.108331,241.615595 775.432352,241.923168 C786.753,242.293035 798.092195,242.287195 809.42296,242.222955 C832.084489,242.057488 854.735902,241.242808 877.243994,239.727328 C899.753772,238.206981 922.138777,236.054941 944.191615,233.050261 C988.309095,227.091515 1031.19402,218.044381 1071.35416,205.925408 C1151.87678,181.805235 1224.65333,149.805928 1306.49788,126.823581 C1347.32404,115.370368 1390.80753,106.761235 1435.80349,102.842595 C1458.28797,100.929995 1481.09282,100.240875 1503.7965,100.877435 C1526.49007,101.523728 1549.12294,103.509328 1570.94646,107.220648 C1592.72615,110.953381 1613.69481,116.539341 1632.04323,124.337688 C1636.64298,126.277541 1641.00161,128.383835 1645.37879,130.464821 C1649.74755,132.556515 1654.09269,134.662808 1658.40749,136.790515 C1667.04214,141.042035 1675.58236,145.353901 1684.09393,149.685235 C1718.1014,167.006675 1751.81548,184.621088 1788.94903,199.686341 C1807.45931,207.239408 1826.85145,214.089728 1847.1524,219.918048 C1867.44324,225.737608 1888.52655,230.711341 1910.36188,234.256221 C1932.16517,237.814728 1954.61088,240.211075 1977.27915,240.808701 C1999.92719,241.369341 2022.74384,240.227621 2044.78319,237.089595 C2022.76071,240.267528 1999.93899,241.450128 1977.26903,240.930368 C1954.58053,240.371675 1932.09773,238.013288 1910.24722,234.488848 C1888.36468,230.978035 1867.22573,226.035448 1846.8725,220.243141 C1826.50748,214.442075 1807.04621,207.614141 1788.46511,200.082488 C1751.23377,185.027941 1717.47078,167.410608 1683.43465,150.116421 C1674.91803,145.791901 1666.37275,141.486848 1657.7381,137.244088 C1653.4233,135.120275 1649.07647,133.018848 1644.71109,130.932021 C1640.32716,128.853955 1635.97696,126.755448 1631.41093,124.835061 C1613.16368,117.097061 1592.30461,111.553928 1570.62272,107.852341 C1548.89699,104.172168 1526.34844,102.207008 1503.73918,101.577261 C1481.11643,100.956275 1458.39251,101.656101 1435.99065,103.576488 C1413.59552,105.516341 1391.53425,108.701088 1370.00243,112.749181 C1359.23483,114.781501 1348.63247,117.092195 1338.1296,119.543048 C1327.65538,122.034781 1317.32786,124.729941 1307.12006,127.567208 C1266.30233,138.982461 1227.75412,152.879715 1189.2565,166.716621 C1150.80103,180.598301 1112.49731,194.732075 1072.13989,206.803355 C1031.83643,218.934008 988.796385,227.985035 944.549074,233.932101 C922.428791,236.931915 899.974655,239.076168 877.417665,240.584835 C854.852245,242.090581 832.150248,242.894555 809.439821,243.044448 C798.087137,243.101875 786.722649,243.101875 775.386826,242.724221 C764.049317,242.409835 752.694947,242.191808 741.372612,241.796635 L673.431863,239.406128 C650.782137,238.520395 628.169505,237.262848 605.683332,235.443688 C583.193788,233.636208 560.855994,231.189248 538.762689,228.176781 C494.559217,222.183968 451.138108,214.468355 406.823352,208.864875 L398.509067,207.838981 L390.159372,206.913341 L373.449866,205.085421 L356.652682,203.558261 C353.848655,203.312008 351.058117,203.028768 348.247345,202.813661 L339.811658,202.193648 C317.344033,200.408555 294.716226,199.391421 272.086733,198.569928 C260.761027,198.251648 249.437007,197.891515 238.096125,197.790288 C226.760302,197.563501 215.41942,197.598541 204.076853,197.569341 C181.398462,197.684195 158.711641,198.000528 136.065286,198.774328 C90.752344,200.147701 45.5860946,202.840915 0.566538238,206.094768" id="Fill-72" fill="#FFFFFF"></path>
-                        <path d="M0.000168612571,203.688104 C45.193396,199.613731 90.7002427,196.537024 136.411111,195.260011 C147.83124,194.842451 159.273289,194.778211 170.706908,194.552397 L187.866609,194.508597 C190.727964,194.508597 193.589319,194.480371 196.447303,194.503731 L205.02631,194.613717 C227.908722,194.781131 250.76247,195.671731 273.540342,196.926357 L290.602248,198.014544 C296.284492,198.398037 301.934699,198.929477 307.603454,199.379157 L316.098155,200.083851 C318.925788,200.335944 321.734873,200.652277 324.554075,200.933571 L341.452427,202.677784 L358.237809,204.755851 C361.030033,205.116957 363.849235,205.412851 366.617853,205.828464 L374.94057,207.043184 C380.479493,207.873437 386.057197,208.618037 391.564083,209.517397 L399.824413,210.864491 L408.049334,212.279717 C451.878486,219.930117 494.490255,229.653717 538.198005,237.410211 C549.117356,239.360771 560.115954,241.156571 571.230894,242.692491 L587.85778,245.098571 C593.423681,245.837331 599.053655,246.411597 604.649906,247.075411 C610.256274,247.710997 615.850839,248.382597 621.465637,248.986064 L638.419631,250.442171 L646.895785,251.170224 C649.720046,251.411611 652.540934,251.680251 655.390487,251.810677 L672.445648,252.796664 C717.942378,255.264064 763.870756,255.047984 809.306786,252.212664 C854.751246,249.387077 899.652773,243.778731 943.139642,235.700064 L959.387149,232.558144 C962.081578,232.013077 964.833335,231.558531 967.482238,230.939491 L975.464358,229.144664 L991.433654,225.557931 L995.424714,224.660517 L999.336525,223.653117 L1007.16015,221.636371 L1022.8074,217.605797 C1027.97874,216.210037 1033.01689,214.651731 1038.12753,213.182971 L1053.40215,208.699797 L1068.38337,203.893477 L1075.86809,201.482531 L1083.2668,198.983984 L1098.05413,193.977157 C1107.80668,190.532531 1117.62836,187.157011 1127.29661,183.631597 C1146.72246,176.666424 1165.98307,169.541624 1185.3634,162.524864 C1224.14767,148.535144 1263.3855,134.794597 1304.98728,123.691784 C1346.35806,112.373864 1390.32547,104.068411 1435.58446,99.8694507 C1458.25442,97.949064 1481.2498,97.2472907 1504.13727,97.9675573 C1527.01631,98.6605707 1549.81104,100.871011 1571.66155,104.903531 C1593.43449,109.003211 1614.30198,115.030091 1632.1159,123.415357 C1649.75446,131.852211 1666.6292,140.775731 1683.43819,149.732344 C1717.01907,167.646544 1750.28296,185.862477 1787.17708,201.490317 C1805.61655,209.280877 1824.93955,216.398864 1845.23881,222.465651 C1865.51953,228.540224 1886.69053,233.675531 1908.63546,237.386851 L1916.87387,238.755357 C1919.61888,239.211851 1922.42291,239.542784 1925.1949,239.942824 C1927.98038,240.307824 1930.73888,240.749717 1933.54628,241.054371 L1941.97016,241.973197 L1946.18379,242.428717 L1950.43114,242.764517 L1958.92415,243.438064 C1961.76022,243.646357 1964.61651,243.732984 1967.4627,243.885797 L1971.73534,244.092144 C1973.15674,244.168064 1974.58152,244.231331 1976.01304,244.237171 C1998.84318,244.831877 2021.85711,243.640517 2044.02966,240.361357 C2021.87566,243.679451 1998.85667,244.912664 1976.00292,244.357864 C1974.56971,244.354944 1973.14494,244.294597 1971.72016,244.220624 L1967.44415,244.021091 C1964.5946,243.873144 1961.73324,243.791384 1958.8938,243.588931 L1950.39067,242.929984 L1946.13658,242.600997 L1941.9162,242.152291 L1933.47715,241.246117 C1930.663,240.946331 1927.90113,240.510277 1925.1089,240.149171 C1922.33354,239.753997 1919.52446,239.426957 1916.7727,238.975331 L1908.51743,237.618504 C1886.52529,233.941251 1865.29359,228.837091 1844.95049,222.788797 C1824.58546,216.747317 1805.18996,209.651717 1786.67967,201.881597 C1749.68607,186.262517 1716.3716,168.041717 1682.76374,150.155744 C1665.94464,141.212757 1649.04966,132.297997 1631.44313,123.895211 C1613.73207,115.575157 1592.98092,109.595971 1571.31083,105.530357 C1549.56487,101.530931 1526.86287,99.3428773 1504.07151,98.6664107 C1481.27172,97.9626907 1458.35727,98.676144 1435.77162,100.603344 C1390.68462,104.814957 1346.84703,113.122357 1305.59091,124.439304 C1264.10379,135.543091 1224.94014,149.287531 1186.20141,163.293797 C1166.83794,170.321264 1147.5925,177.457744 1128.16665,184.438491 C1118.50009,187.970717 1108.67335,191.356944 1098.9208,194.810331 L1084.12167,199.832731 L1076.71789,202.339064 L1069.22138,204.755851 L1054.20474,209.561197 L1038.8846,214.046317 C1033.7571,215.516051 1028.70209,217.075331 1023.51388,218.471091 L1007.81942,222.503611 L999.973881,224.518411 L996.050266,225.526784 L992.047404,226.423224 L976.034268,230.010931 L968.026857,231.804784 C965.371209,232.422851 962.612707,232.877397 959.911534,233.422464 L943.618501,236.561464 C900.01529,244.633317 855.005851,250.226091 809.461909,253.030264 C763.926398,255.844171 717.905283,256.032024 672.324247,253.532504 L655.240421,252.535811 C652.385811,252.402464 649.559864,252.131877 646.730545,251.888544 L638.23753,251.153677 L621.256558,249.683944 C615.631642,249.075611 610.026961,248.399144 604.412162,247.759664 C598.80748,247.090984 593.16739,246.511851 587.593058,245.768224 L570.940881,243.348517 C559.809079,241.802864 548.793619,239.997331 537.864153,238.037037 C494.112563,230.243557 451.5244,220.496597 407.761007,212.829651 L399.547889,211.410531 L391.302734,210.060517 C385.805964,209.159211 380.236691,208.411691 374.706199,207.579491 L366.400343,206.361851 C363.635097,205.944291 360.820953,205.647424 358.033788,205.284371 L341.273698,203.199491 L324.400638,201.447491 C321.586494,201.165224 318.780781,200.846944 315.958207,200.593877 L307.471936,199.885291 C301.813298,199.432691 296.169835,198.898331 290.494336,198.511917 L273.456036,197.405237 C250.708514,196.111677 227.880058,195.182144 205.012821,194.974824 L196.440558,194.850237 C193.584261,194.822011 190.724592,194.845371 187.866609,194.840504 L170.715338,194.854131 C159.286778,195.058531 147.848101,195.103304 136.431344,195.500424 C90.7305929,196.696651 45.2153156,199.693544 0.000168612571,203.688104" id="Fill-74" fill="#FFFFFF"></path>
-                        <path d="M2044.68944,249.193287 C2022.613,252.019847 1999.91943,253.111927 1977.32872,252.741087 C1954.72789,252.37122 1932.20631,250.568607 1910.22598,247.50066 C1888.23215,244.458993 1866.72562,240.27658 1845.92895,235.144193 C1825.11372,230.035167 1805.07075,223.8691 1786.12713,216.727753 C1767.16327,209.59906 1749.23638,201.59826 1731.83725,193.266527 C1714.428,184.932847 1697.56,176.241953 1680.45257,167.718473 C1663.35694,159.192073 1646.03031,150.80778 1627.87411,143.05518 C1626.76127,142.551967 1625.60459,142.086713 1624.4243,141.643847 L1620.91378,140.286047 L1613.88433,137.579207 L1606.51258,135.192593 C1604.02892,134.424633 1601.65991,133.534033 1599.0616,132.895527 L1591.4049,130.82622 L1587.57402,129.793513 L1583.63354,128.909727 C1578.37452,127.737833 1573.16608,126.486127 1567.74518,125.57606 C1546.29429,121.54646 1524.00371,119.101447 1501.56475,117.790367 C1479.11061,116.495833 1456.46932,116.2963 1433.94099,117.080807 C1428.31608,117.342633 1422.67767,117.5081 1417.06456,117.847793 C1411.45988,118.218633 1405.82822,118.46878 1400.25051,118.966153 L1391.86541,119.63678 C1389.07319,119.860647 1386.313,120.2091 1383.53426,120.48942 L1366.8922,122.268673 C1361.37351,122.941247 1355.9189,123.787073 1350.43056,124.538487 L1342.20901,125.6987 C1339.46905,126.088033 1336.79486,126.61558 1334.08694,127.067207 L1317.87653,129.885007 C1312.53151,130.92842 1307.23202,132.0497 1301.90892,133.128153 L1293.92848,134.761407 L1286.07788,136.588353 L1270.3868,140.26074 L1254.93345,144.260167 C1252.36717,144.937607 1249.76379,145.568327 1247.22786,146.2847 L1239.615,148.426033 C1219.22468,154.029513 1199.3891,160.288047 1179.62096,166.597193 C1140.1926,179.3391 1101.51794,192.874273 1061.73886,205.30958 C1051.85816,208.485567 1041.71949,211.3861 1031.7039,214.41706 C1026.62023,215.849807 1021.48429,217.222207 1016.37702,218.6277 L1008.70683,220.72426 C1006.1338,221.4017 1003.63159,222.170633 1000.99449,222.76534 L985.328699,226.537007 C980.093278,227.774113 974.932048,229.120233 969.57354,230.174353 L953.624477,233.524567 L945.649102,235.197727 L937.552327,236.663567 C894.493735,244.705247 850.0913,250.301913 805.189773,253.505153 L796.777691,254.139767 C793.971978,254.346113 791.147718,254.443447 788.331888,254.600153 L771.428478,255.458633 C768.609275,255.582247 765.80019,255.786647 762.972557,255.845047 L754.498089,256.068913 L737.549154,256.516647 C734.724893,256.617873 731.89726,256.62858 729.067941,256.624687 L720.585043,256.64318 L703.61756,256.682113 L686.66188,256.325873 L678.185726,256.14094 C675.359779,256.06502 672.528774,256.05626 669.71463,255.894687 L652.801103,255.115047 C647.166071,254.840567 641.515864,254.655633 635.911182,254.19914 L619.070159,253.0107 L610.653019,252.407233 L602.283091,251.607153 C591.127683,250.5287 579.933495,249.575807 568.901174,248.117753 L552.296208,246.10782 C546.779205,245.39534 541.282435,244.635167 535.789038,243.863313 C491.840171,237.711847 448.348245,230.566607 404.473568,224.308073 L388.001806,222.002247 L379.753279,220.883887 L371.471029,219.85702 C365.945595,219.187367 360.438709,218.462233 354.8981,217.835407 L338.229061,216.082433 C315.958712,213.919687 293.566963,212.108313 271.052126,211.00942 L262.618126,210.543193 L254.165577,210.205447 C248.530545,209.991313 242.898886,209.731433 237.258795,209.553313 C225.973556,209.27786 214.690002,208.916753 203.391274,208.853487 C192.095918,208.657847 180.79719,208.735713 169.498461,208.745447 C163.84994,208.7727 158.203105,208.890473 152.554584,208.957633 L144.083488,209.077353 L135.617451,209.290513 C113.030111,209.7655 90.4916689,210.797233 67.9869491,211.976913 C45.4839154,213.201367 23.051699,214.8171 0.659949602,216.59538 C23.0415822,214.777193 45.4670542,213.121553 67.9667156,211.85622 C90.4697493,210.636633 113.008192,209.56402 135.598904,209.048153 L144.066627,208.820393 L152.541095,208.686073 C158.191302,208.608207 163.839823,208.479727 169.491717,208.44274 C180.795503,208.412567 192.09929,208.315233 203.403077,208.490433 C214.710236,208.53326 226.000534,208.8749 237.299262,209.129913 C242.942725,209.2983 248.581129,209.547473 254.222906,209.752847 L262.683885,210.075993 L271.128002,210.525673 C293.668131,211.600233 316.086858,213.407713 338.387557,215.556833 L355.078515,217.30202 C360.627555,217.925927 366.142872,218.64814 371.676736,219.31682 L379.970789,220.33882 L388.227746,221.453287 L404.711311,223.748407 C448.619711,229.984553 492.125126,237.10254 536.058818,243.223833 C541.547157,243.99082 547.043927,244.748073 552.555872,245.455687 L569.137232,247.451993 C580.152691,248.900313 591.331705,249.84542 602.470251,250.91706 L610.82669,251.709353 L619.232027,252.306007 L636.046072,253.481793 C641.640637,253.934393 647.282414,254.116407 652.907329,254.38602 L669.793878,255.15398 C672.60465,255.313607 675.430597,255.32042 678.249799,255.394393 L686.712464,255.57446 L703.637794,255.91902 L720.574926,255.870353 L729.04265,255.84602 C731.865224,255.847967 734.687798,255.835313 737.507001,255.734087 L754.4239,255.275647 L762.883193,255.047887 C765.704081,254.98754 768.508108,254.781193 771.322252,254.655633 L788.191939,253.789367 C791.001025,253.630713 793.821913,253.531433 796.620882,253.324113 L805.016102,252.685607 C849.823206,249.460953 894.121101,243.848713 937.065036,235.799247 L945.141579,234.33146 L953.095034,232.6583 L968.998571,229.305167 C974.34359,228.251047 979.489645,226.904927 984.709891,225.666847 L1000.32847,221.89518 C1002.95883,221.301447 1005.4543,220.533487 1008.01889,219.8541 L1015.66547,217.758513 C1020.75757,216.353993 1025.87665,214.982567 1030.94346,213.54982 C1040.94218,210.519833 1051.05051,207.611513 1060.92446,204.440393 C1100.67656,192.044993 1139.37314,178.535127 1178.85715,165.806847 C1198.65395,159.504513 1218.52326,153.250847 1238.96584,147.64834 L1246.59556,145.50798 C1249.13655,144.791607 1251.74836,144.160887 1254.31971,143.483447 L1269.81183,139.485967 L1285.54675,135.81358 L1293.42096,133.987607 L1301.425,132.355327 C1306.76327,131.277847 1312.08131,130.156567 1317.44151,129.113153 L1333.70082,126.2973 C1336.41885,125.845673 1339.10148,125.318127 1341.84986,124.929767 L1350.09839,123.770527 C1355.60359,123.02106 1361.07507,122.175233 1366.61231,121.504607 L1383.31001,119.73022 C1386.09717,119.450873 1388.86579,119.103393 1391.66813,118.879527 L1400.07684,118.21182 C1405.67309,117.717367 1411.31993,117.469167 1416.94147,117.10222 C1422.56976,116.7635 1428.22671,116.600953 1433.86512,116.34302 C1456.45751,115.570193 1479.16288,115.783353 1501.68615,117.095407 C1524.19593,118.42498 1546.5607,120.891407 1568.09927,124.952153 C1573.5404,125.870007 1578.77413,127.130473 1584.05508,128.311127 L1588.01073,129.2027 L1591.86015,130.241247 L1599.55226,132.326127 C1602.16238,132.970473 1604.54656,133.867887 1607.04034,134.641687 L1614.45086,137.04582 L1621.51742,139.7731 L1625.04479,141.141607 C1626.23351,141.588367 1627.39525,142.05654 1628.50641,142.560727 C1646.68453,150.339607 1664.01453,158.74434 1681.10341,167.28534 C1698.20241,175.821473 1715.05018,184.5221 1732.42739,192.856753 C1749.79112,201.1953 1767.68765,209.18442 1786.58407,216.32382 C1805.44844,223.484633 1825.41723,229.67406 1846.17849,234.81326 C1866.91447,239.973873 1888.37042,244.18938 1910.32546,247.26706 C1932.26195,250.370047 1954.7515,252.211593 1977.33547,252.620393 C1999.90763,253.032113 2022.59783,251.97994 2044.68944,249.193287" id="Fill-76" fill="#FFFFFF"></path>
-                        <path d="M2045.34973,258.025605 C2001.31993,262.763792 1955.83669,261.861512 1911.93672,257.041565 C1889.95976,254.627699 1868.30822,251.219085 1847.23333,246.876072 C1826.18205,242.517485 1805.52701,237.425979 1785.99999,231.091525 L1782.32424,229.919632 C1781.10854,229.519592 1779.92825,229.086459 1778.72773,228.670845 L1771.56001,226.147965 C1766.78996,224.458259 1762.19358,222.610872 1757.50278,220.850112 C1752.93169,218.989099 1748.33363,217.146579 1743.8165,215.241765 C1739.35332,213.294125 1734.79741,211.416565 1730.41517,209.411499 C1712.70073,201.517765 1695.5697,193.212312 1678.20935,185.083032 C1660.85911,176.949859 1643.17671,169.033739 1624.53154,161.933272 C1622.18276,161.062139 1619.91324,160.117032 1617.50376,159.306245 L1610.2804,156.874859 C1605.55082,155.159845 1600.42331,153.872125 1595.44081,152.427699 L1591.68243,151.368712 L1587.81278,150.450859 L1580.07515,148.613205 C1577.52066,147.963019 1574.85996,147.473432 1572.22286,146.946859 L1564.30987,145.376872 C1543.05794,141.427085 1521.08604,138.843859 1498.94889,137.177512 C1476.80163,135.518952 1454.45541,134.783112 1432.12099,134.743205 C1409.78657,134.715952 1387.43528,135.348619 1365.22901,136.733672 C1354.16971,137.606752 1343.03791,138.200485 1332.1017,139.534925 L1323.87003,140.415792 C1321.13008,140.721419 1318.3682,140.962805 1315.66703,141.376472 L1299.38243,143.632659 C1293.92275,144.313992 1288.61651,145.334045 1283.24283,146.206152 L1275.20338,147.559085 L1271.18535,148.239445 L1267.22126,149.017139 L1251.35988,152.127912 C1248.70255,152.620419 1246.11097,153.222912 1243.50085,153.797179 L1235.68228,155.527765 C1225.18952,157.744045 1215.00195,160.401245 1204.68455,162.870592 L1189.4268,166.870992 L1181.80719,168.881899 L1174.2601,170.983325 C1133.94314,182.127992 1094.71206,194.543832 1054.88408,206.304619 C1049.91507,207.784085 1044.89041,209.205152 1039.89611,210.658339 C1034.89675,212.102765 1029.9075,213.563739 1024.84912,214.942952 L1009.72289,219.139965 C1004.65608,220.507499 999.494852,221.766019 994.385892,223.083912 C989.248267,224.365792 984.179773,225.742085 978.984819,226.945125 L963.334201,230.456912 C952.97633,232.917499 942.269432,234.824259 931.694052,236.940285 C929.035031,237.443499 926.414792,238.014845 923.723735,238.459659 L915.667427,239.822325 L899.55481,242.546685 C896.883986,243.028485 894.152463,243.388619 891.441173,243.782819 L883.293813,244.952765 C877.85943,245.722672 872.440222,246.532485 866.994036,247.271245 L850.552624,249.231539 L842.332761,250.210712 C840.960255,250.369365 839.594493,250.547485 838.218615,250.689592 L834.079176,251.085739 L817.518049,252.671299 L809.237486,253.463592 L800.909711,254.071925 C712.173974,260.917379 621.524483,259.239352 533.684078,249.323032 L401.699216,235.398525 L368.703422,231.927619 L352.191193,230.261272 C346.69611,229.670459 341.145384,229.280152 335.626694,228.779859 C324.589315,227.760779 313.472689,227.081392 302.386412,226.269632 C291.261355,225.648645 280.154845,224.923512 269.001123,224.490379 C179.816876,220.476352 90.0070764,222.998259 1.32023643,229.502072 C89.9716678,222.838632 179.786526,220.151259 269.065196,224.007605 C280.23072,224.421272 291.347347,225.149325 302.484207,225.760579 C313.580601,226.565525 324.710717,227.240045 335.759898,228.252312 C341.287018,228.749685 346.842803,229.137072 352.344631,229.723992 L368.877093,231.382552 L401.883004,234.833992 L533.893158,248.674792 C621.652629,258.526872 712.150368,260.141632 800.717493,253.254325 L809.028406,252.642099 L817.292109,251.845912 L833.817827,250.253539 L837.948835,249.854472 C839.323027,249.713339 840.685417,249.533272 842.054551,249.373645 L850.257552,248.391552 L866.661869,246.425419 C872.096252,245.684712 877.501971,244.873925 882.924552,244.101099 L891.051678,242.929205 C893.757909,242.534032 896.481002,242.172925 899.146767,241.691125 L915.218917,238.961899 L923.254992,237.598259 C925.939305,237.153445 928.552799,236.579179 931.205075,236.075965 C941.750105,233.957992 952.430026,232.050259 962.759232,229.588699 L978.366011,226.075939 C983.544103,224.871925 988.599108,223.495632 993.719872,222.214725 C998.815344,220.895859 1003.95634,219.639285 1009.01303,218.269805 L1024.11229,214.069872 C1029.16055,212.689685 1034.14136,211.226765 1039.13567,209.780392 L1054.10678,205.424725 C1093.92801,193.697032 1133.19113,181.302605 1173.58396,170.166699 L1181.14455,168.067219 L1188.77932,166.058259 L1204.0708,162.059805 C1214.41181,159.592405 1224.6213,156.937152 1235.14272,154.721845 L1242.98489,152.992232 C1245.60007,152.418939 1248.19839,151.816445 1250.86585,151.323939 L1266.77107,148.216085 L1270.74864,147.438392 L1274.77679,146.759005 L1282.83985,145.406072 C1288.23208,144.535912 1293.5518,143.516832 1299.03003,142.835499 L1315.36521,140.583205 C1318.07482,140.170512 1320.84681,139.929125 1323.59519,139.624472 L1331.85552,138.746525 C1342.82546,137.415005 1353.99604,136.825165 1365.08906,135.956952 C1387.3476,134.583579 1409.73935,133.963565 1432.12604,134.004445 C1454.50768,134.057979 1476.90111,134.809392 1499.1057,136.485472 C1521.2968,138.169339 1543.33278,140.773979 1564.65721,144.752965 L1572.60224,146.333659 C1575.24608,146.863152 1577.91522,147.357605 1580.4815,148.011685 L1588.25117,149.861019 L1592.136,150.785685 L1595.91124,151.852459 C1600.91397,153.307592 1606.0634,154.605045 1610.81659,156.331739 L1618.07536,158.780645 C1620.49326,159.598245 1622.76448,160.545299 1625.11831,161.420325 C1643.79721,168.549992 1661.49141,176.489472 1678.8349,184.638219 C1696.19019,192.783072 1713.29762,201.096312 1730.97834,208.990045 C1735.35383,210.996085 1739.89963,212.872672 1744.34931,214.820312 C1748.85633,216.725125 1753.44428,218.566672 1758.00356,220.428659 C1762.68087,222.188445 1767.26376,224.034859 1772.02201,225.723592 L1779.1695,228.244525 C1780.36496,228.660139 1781.54019,229.096192 1782.75083,229.496232 L1786.40972,230.672019 C1805.84569,237.023992 1826.4434,242.145672 1847.4441,246.535405 C1868.46671,250.909565 1890.07273,254.354192 1912.01597,256.805045 C1955.84512,261.698965 2001.29464,262.683979 2045.34973,258.025605" id="Fill-78" fill="#FFFFFF"></path>
-                        <path d="M2046.01002,266.857827 C2002.18424,270.74824 1957.36702,270.160347 1913.59014,266.30108 C1891.68736,264.3622 1870.00547,261.586253 1848.71814,258.027747 C1843.37143,257.183867 1838.10566,256.184253 1832.82809,255.208973 C1827.52691,254.27652 1822.34038,253.144533 1817.10496,252.094307 C1814.48304,251.57844 1811.89146,251.00904 1809.31506,250.420173 L1801.56563,248.682773 C1796.4162,247.501147 1791.38649,246.143347 1786.29102,244.881907 C1766.21432,239.414693 1747.33983,232.59844 1729.21398,225.244907 C1711.06283,217.894293 1693.66539,209.9626 1676.00491,202.251853 C1658.33262,194.560573 1640.46138,186.978307 1621.40647,180.480333 L1614.22189,178.08788 C1613.01799,177.6966 1611.84277,177.2732 1610.61358,176.907227 L1606.91591,175.829747 C1601.96713,174.4116 1597.11952,172.870813 1591.97009,171.70184 C1586.88305,170.455973 1581.84322,169.140027 1576.59262,168.131653 C1571.39092,167.0532 1566.21958,165.921213 1560.89142,165.060787 C1539.76764,161.276467 1518.05202,158.650413 1496.17286,156.767987 C1474.28526,154.89724 1452.21219,153.76136 1430.10202,153.190987 C1407.98848,152.62256 1385.83953,152.58168 1363.71419,152.95252 C1358.18033,153.027467 1352.65321,153.192933 1347.12272,153.30876 C1341.5956,153.453787 1336.09039,153.759413 1330.57339,153.97744 C1319.51915,154.300587 1308.58631,155.331347 1297.61132,156.09736 C1253.87491,160.00724 1211.19063,167.3014 1170.05422,176.702827 C1159.69973,178.95804 1149.54756,181.509147 1139.3229,183.95124 L1124.11404,187.7998 C1119.03712,189.07292 1113.98886,190.384 1108.96083,191.720387 C1088.78128,196.982227 1068.78046,202.476693 1048.70713,207.887453 C1028.64055,213.305027 1008.50821,218.650573 988.205566,223.781013 C983.152248,225.092093 977.958981,226.228947 972.844961,227.461187 C967.700592,228.655467 962.601748,229.915933 957.421969,231.055707 L941.769664,234.305667 C936.552792,235.38996 931.356152,236.5054 926.036425,237.414493 C883.852933,245.327693 840.457115,251.004173 796.592555,254.716467 L792.486839,255.089253 L788.360889,255.3832 L780.10899,255.968173 L763.605192,257.13228 L747.044065,257.985893 C741.520317,258.242853 736.011744,258.62148 730.472821,258.754827 L713.8746,259.300867 L705.573803,259.56172 L697.264576,259.685333 L680.644434,259.931587 C675.103825,260.026973 669.56153,259.930613 664.020921,259.939373 C619.694362,259.8216 575.36949,257.9966 531.484696,254.408893 C487.594844,250.987627 443.379569,249.139267 399.356513,246.1998 C377.325595,244.800147 355.436311,242.763933 333.455976,241.264027 C311.479014,239.69988 289.395825,238.667173 267.30252,237.83108 C178.873657,234.635627 90.0097742,236.713693 1.98052326,242.40964 C89.9777378,236.553093 178.850052,234.311507 267.35479,237.348307 C289.466643,238.156173 311.565006,239.184987 333.563888,240.734533 C355.581318,242.22276 377.457113,244.244373 399.47117,245.628453 C443.47062,248.53872 487.692639,250.359827 531.636447,253.75676 C575.494263,257.31332 619.755063,259.107173 664.024293,259.197693 C669.558158,259.18504 675.093709,259.27848 680.625887,259.178227 L697.220736,258.923213 L705.519847,258.794733 L713.807155,258.52804 L730.380084,257.97324 C735.910577,257.836 741.410719,257.455427 746.92435,257.194573 L763.458499,256.333173 L779.933633,255.15836 L788.170357,254.570467 L792.287876,254.274573 L796.386848,253.89984 C840.170474,250.166133 883.471868,244.473107 925.547449,236.551147 C967.789956,228.83748 1007.81352,217.73272 1047.98378,206.991987 C1068.06217,201.594853 1088.07816,196.111093 1108.28301,190.858013 C1113.31947,189.523573 1118.37447,188.21444 1123.45814,186.943267 L1138.6906,183.100547 C1148.93719,180.6604 1159.10621,178.113187 1169.48431,175.858947 C1210.7084,166.467253 1253.52419,159.17796 1297.39887,155.283653 C1308.41096,154.522507 1319.37583,153.496613 1330.46548,153.178333 C1335.99597,152.963227 1341.52478,152.66052 1347.06201,152.51744 C1352.59925,152.405507 1358.13312,152.243933 1363.67373,152.171907 C1385.8311,151.814693 1408.01209,151.8692 1430.16103,152.452227 C1452.30661,153.038173 1474.41846,154.188653 1496.35158,156.077893 C1518.27459,157.97784 1540.04247,160.624333 1561.22864,164.43396 C1566.57198,165.300227 1571.76187,166.44 1576.98043,167.52624 C1582.24789,168.541427 1587.30626,169.866133 1592.41185,171.12076 C1597.58151,172.29752 1602.45273,173.847067 1607.42006,175.274947 L1611.13291,176.361187 C1612.36378,176.729107 1613.54238,177.154453 1614.74796,177.54768 L1621.95109,179.952787 C1641.04983,186.480933 1658.93794,194.08656 1676.60517,201.79536 C1694.26228,209.52168 1711.63274,217.459213 1729.74511,224.8108 C1747.82712,232.16628 1766.65102,238.965987 1786.6451,244.446827 C1791.72034,245.713133 1796.72308,247.0758 1801.85902,248.263267 L1809.58653,250.012347 C1812.15619,250.605107 1814.73933,251.1784 1817.3562,251.699133 C1822.57476,252.757147 1827.74948,253.897893 1833.03885,254.83716 C1838.30462,255.8212 1843.55859,256.8286 1848.89349,257.680267 C1870.1353,261.273813 1891.7801,264.0848 1913.65421,266.063587 C1957.37376,269.998773 2002.16401,270.668427 2046.01002,266.857827" id="Fill-80" fill="#FFFFFF"></path>
-                        <path d="M2046.6703,275.690048 C2003.03506,278.799848 1958.75571,278.426088 1915.13227,275.370795 C1871.51388,272.265861 1828.38784,266.545581 1787.03898,257.885835 C1766.39575,253.457168 1746.86198,247.462408 1728.25558,240.709421 C1709.6593,233.925288 1691.84538,226.478315 1673.87803,219.185128 C1655.9191,211.891941 1637.66848,204.806075 1618.37414,198.766541 C1599.16411,192.637461 1578.58326,188.047221 1557.52524,184.505261 C1536.4571,180.958435 1514.91853,178.395648 1493.23664,176.429515 C1471.54969,174.469221 1449.70256,173.116288 1427.80821,172.164368 C1405.90881,171.209528 1383.95714,170.710208 1362.00041,170.426968 C1351.02205,170.285835 1340.04031,170.207968 1329.05689,170.187528 C1326.31188,170.187528 1323.56518,170.153461 1320.82354,170.184608 L1312.59693,170.298488 C1307.11534,170.389981 1301.627,170.418208 1296.14877,170.561288 L1279.73771,171.265008 C1277.00619,171.403221 1274.25949,171.450915 1271.54146,171.665048 L1263.37555,172.244181 L1255.20964,172.821368 C1252.48824,173.022848 1249.75671,173.176635 1247.0606,173.471555 L1230.83332,175.056141 C1209.29475,177.443728 1187.87758,180.288781 1166.88869,184.006915 C1164.25665,184.455621 1161.6128,184.877075 1158.99088,185.346221 L1151.16051,186.809141 C1145.94701,187.799995 1140.69473,188.723688 1135.51495,189.770021 C1125.16045,191.872421 1114.78403,193.942701 1104.53745,196.221275 C1094.23016,198.405435 1084.05102,200.786208 1073.82467,203.094955 C1063.65059,205.483515 1053.45627,207.839955 1043.32097,210.283995 L982.396189,224.814888 C972.198501,227.181061 962.063199,229.630941 951.715446,231.794661 L936.258731,235.126381 L920.594623,238.114515 C910.206403,240.210101 899.556833,241.816101 889.011803,243.615795 C883.752777,244.548248 878.379094,245.231528 873.055995,246.025768 L857.05129,248.329648 L840.923498,250.325955 C835.544757,250.984901 830.184563,251.696408 824.760297,252.216168 C813.933683,253.323821 803.144165,254.560928 792.245048,255.409675 C748.761552,259.193995 704.849781,261.330461 660.889111,261.832701 C616.928442,262.337861 572.928991,261.400541 529.135248,259.179395 C485.380286,256.837555 441.362289,257.512075 397.438714,256.464768 C386.456977,256.198075 375.481985,255.811661 364.522168,255.300661 L331.708475,253.505835 C309.82425,252.422515 287.901243,251.608808 265.958003,251.013128 C178.168182,248.735528 90.1153257,250.451515 2.64081008,255.316235 C90.0883477,250.290915 178.147949,248.412381 265.995098,250.528408 C287.950141,251.096835 309.888323,251.906648 331.789409,252.974395 L364.603102,254.747808 C375.546058,255.251995 386.507561,255.630621 397.479181,255.889528 C441.35723,256.909581 485.393775,256.205861 529.236416,258.523368 C572.996436,260.714341 616.950361,261.623435 660.862133,261.091021 C704.770533,260.561528 748.619918,258.399755 792.029224,254.593048 C802.909793,253.738461 813.679078,252.496488 824.485458,251.384941 C829.899608,250.862261 835.249685,250.148808 840.616623,249.487915 L856.712379,247.484795 L872.681675,245.175075 C877.992971,244.378888 883.354851,243.693661 888.602074,242.760235 C899.121812,240.956648 909.746091,239.347728 920.105647,237.250195 L935.730974,234.260115 L951.143849,230.926448 C961.454508,228.761755 971.588123,226.306035 981.774009,223.935968 L1042.66675,209.370035 C1052.8088,206.932808 1063.01829,204.579288 1073.20249,202.196568 C1083.44233,199.891715 1093.63496,197.515808 1103.96079,195.333595 C1114.22424,193.058915 1124.62089,190.991555 1134.999,188.893048 C1140.18889,187.848661 1145.45298,186.926915 1150.67659,185.937035 L1158.52719,184.476061 C1161.15249,184.007888 1163.80477,183.586435 1166.44355,183.138701 C1187.48303,179.427381 1208.96259,176.586221 1230.56523,174.206421 L1246.83971,172.628648 C1249.54257,172.333728 1252.28421,172.180915 1255.01237,171.981381 L1263.20356,171.406141 L1271.39476,170.831875 C1274.12123,170.618715 1276.87636,170.572968 1279.61631,170.434755 L1296.07627,169.740768 C1301.57135,169.599635 1307.07318,169.575301 1312.57164,169.486728 L1320.82185,169.376741 C1323.57024,169.348515 1326.31525,169.384528 1329.06363,169.385501 C1340.0538,169.413728 1351.04228,169.497435 1362.03076,169.646355 C1384.00435,169.943221 1405.97794,170.458115 1427.90264,171.426581 C1449.82396,172.394075 1471.70144,173.761608 1493.42717,175.739421 C1515.14278,177.724048 1536.7235,180.305328 1557.84729,183.875515 C1578.94915,187.438888 1599.61768,192.063195 1618.88167,198.225368 C1638.2249,204.297021 1656.49576,211.409168 1674.44962,218.715981 C1692.40855,226.024741 1710.19718,233.476581 1728.74624,240.259741 C1747.30374,247.014675 1766.77681,252.991915 1787.32731,257.434208 C1828.55477,266.154301 1871.61673,271.950501 1915.18454,275.132328 C1958.75908,278.264515 2003.0182,278.720035 2046.6703,275.690048" id="Fill-82" fill="#FFFFFF"></path>
-                        <path d="M2047.33059,284.522269 C2003.86396,286.893309 1960.00951,286.614936 1916.55805,284.202043 C1873.10997,281.758003 1829.97887,277.183336 1788.00952,270.186043 C1777.48641,268.503149 1767.17912,266.355976 1757.06742,263.967416 C1754.54329,263.361029 1752.08155,262.676776 1749.5844,262.034376 C1747.11254,261.363749 1744.57155,260.773909 1742.16039,260.032229 C1737.29929,258.592669 1732.364,257.232923 1727.6243,255.665856 C1708.44124,249.615616 1690.1687,242.687429 1671.83377,235.862416 C1653.48704,229.055896 1635.0189,222.306803 1615.41094,216.801629 L1608.05944,214.744976 L1600.55449,212.882989 C1595.60065,211.572883 1590.41244,210.592736 1585.32372,209.473403 C1580.25185,208.320976 1575.02486,207.426483 1569.83328,206.471643 C1567.23159,206.005416 1564.65013,205.499283 1562.03158,205.067123 L1554.14219,203.831963 C1533.10272,200.538203 1511.69567,198.051336 1490.16384,196.065736 C1468.6101,194.129776 1446.93832,192.665883 1425.21934,191.490096 C1403.50035,190.305549 1381.7291,189.448043 1359.94435,188.738483 C1349.05198,188.376403 1338.15624,188.050336 1327.25712,187.747629 L1310.90844,187.294056 C1305.45889,187.164603 1300.00596,186.951443 1294.55977,186.909589 L1278.21784,186.679883 C1276.85714,186.668203 1275.49475,186.626349 1274.13404,186.638029 L1270.04687,186.687669 L1261.87591,186.788896 C1250.97173,186.800576 1240.10803,187.363163 1229.23252,187.748603 C1223.80319,188.004589 1218.39579,188.406576 1212.97489,188.728749 L1204.85114,189.239749 L1196.76616,189.933736 C1191.37899,190.413589 1185.97833,190.836989 1180.60802,191.364536 L1164.54093,193.113616 C1143.18952,195.635523 1121.96794,198.571096 1101.06504,202.161723 L1093.2077,203.468909 L1085.39588,204.866616 C1080.19586,205.811723 1074.9655,206.702323 1069.78741,207.690256 C1059.43291,209.664176 1049.06492,211.620576 1038.78124,213.718109 C1018.15487,217.820709 997.663383,222.152043 977.168525,226.482403 L946.422022,232.976483 L938.726545,234.587349 L930.972053,236.116456 L915.461382,239.170776 C873.815763,246.828963 831.058988,252.338029 787.931264,256.301443 C701.549358,264.022896 613.838785,265.598723 526.620561,263.655949 C504.8257,263.047616 483.012291,263.314309 461.230919,264.034576 C439.446175,264.711043 417.679979,265.725256 395.851395,266.163256 C384.940476,266.406589 374.014381,266.489323 363.093345,266.365709 C352.16725,266.284923 341.263075,265.836216 330.374076,265.512096 C308.584273,264.850229 286.779295,264.332416 264.964201,264.038469 C177.703823,262.776056 90.3271031,264.227296 3.30109691,268.222829 C90.3034973,264.066696 177.686962,262.452909 264.984434,263.553749 C286.80796,263.819469 308.623054,264.334363 330.424659,264.979683 C341.328834,265.296989 352.21109,265.737909 363.110206,265.810909 C374.011009,265.927709 384.91687,265.839136 395.8143,265.588016 C417.61422,265.137363 439.3703,264.108549 461.171905,263.418456 C482.960021,262.683589 504.845933,262.402296 526.6644,262.997976 C613.830354,264.883323 701.463366,263.251043 787.708696,255.486763 C830.767288,251.499989 873.439757,245.974376 914.97915,238.306456 L930.447668,235.247269 L938.181926,233.719136 L945.870659,232.103403 L976.601987,225.589856 C997.0901,221.247816 1017.57821,216.896043 1038.21976,212.783709 C1048.51862,210.691043 1058.9001,208.739509 1069.27314,206.770456 C1074.45966,205.783496 1079.70014,204.893869 1084.91027,203.951683 L1092.73727,202.556896 L1100.6081,201.251656 C1121.55484,197.669789 1142.83038,194.739083 1164.23743,192.225936 L1180.34667,190.482696 C1185.73216,189.957096 1191.14968,189.535643 1196.54865,189.057736 L1204.65555,188.367643 L1212.80459,187.859563 C1218.23898,187.540309 1223.66156,187.140269 1229.10606,186.888176 C1240.0136,186.507603 1250.90766,185.950856 1261.84219,185.946963 L1270.03676,185.850603 L1274.13404,185.801936 C1275.4998,185.792203 1276.86557,185.834056 1278.22964,185.846709 L1294.61541,186.087123 C1300.08352,186.132869 1305.52296,186.349923 1310.97589,186.482296 L1327.32625,186.946576 C1338.22537,187.256096 1349.12617,187.589949 1360.0236,187.958843 C1381.81677,188.683003 1403.60152,189.555109 1425.33905,190.754256 C1447.07658,191.944643 1468.77196,193.424109 1490.35269,195.376616 C1511.91486,197.379736 1533.35732,199.884123 1554.44064,203.197349 L1562.34688,204.442243 C1564.97218,204.878296 1567.56038,205.385403 1570.16882,205.854549 C1575.3722,206.816203 1580.611,207.716536 1585.69972,208.874803 C1590.80363,210.001923 1596.00195,210.987909 1600.97602,212.307749 L1608.50626,214.180443 L1615.88137,216.248776 C1635.54329,221.787043 1654.03165,228.559496 1672.37164,235.380616 C1690.69983,242.219256 1708.93865,249.150363 1728.06438,255.198656 C1732.79396,256.765723 1737.7107,258.125469 1742.55663,259.563083 C1744.96273,260.305736 1747.49192,260.895576 1749.95703,261.565229 C1752.44238,262.205683 1754.89738,262.889936 1757.41139,263.495349 C1767.48937,265.886829 1777.74101,268.018429 1788.24389,269.723709 C1830.11882,276.789136 1873.18921,281.439723 1916.59852,283.962603 C1960.01289,286.453363 2003.85047,286.813496 2047.33059,284.522269" id="Fill-84" fill="#FFFFFF"></path>
-                        <path d="M2047.99088,293.354491 C2004.66419,295.036411 1961.12843,294.794051 1917.82872,292.898971 C1874.53238,290.981504 1831.41983,287.418131 1789.05087,281.856504 L1773.20972,279.659691 L1765.3608,278.377811 C1762.74899,277.942731 1760.11189,277.555344 1757.56078,277.008331 C1752.42147,275.982437 1747.24675,275.012997 1742.23896,273.785624 C1739.72326,273.191891 1737.17384,272.645851 1734.6868,272.013184 L1727.25267,270.080144 C1724.76058,269.454291 1722.31738,268.769064 1719.88768,268.065344 L1712.58001,265.980464 C1707.78973,264.501971 1702.9438,263.080904 1698.21085,261.544984 C1688.68761,258.526677 1679.30938,255.365291 1669.91934,252.220451 C1651.16625,245.918117 1632.30188,239.693651 1612.52868,234.551531 C1602.57379,232.085104 1592.51774,229.743264 1582.11435,227.965957 C1571.80537,226.029024 1561.30755,224.444437 1550.80131,222.901704 C1529.74497,219.891184 1508.36489,217.682691 1486.93424,215.750624 C1465.49177,213.834131 1443.94309,212.319624 1422.35393,211.043584 C1379.179,208.439917 1335.85569,206.723931 1292.58464,204.602064 C1270.94996,203.409731 1249.3372,202.578504 1227.6452,202.381891 C1216.80509,202.488957 1205.95656,202.379944 1195.13332,202.802371 L1187.01294,203.034024 C1184.30671,203.102157 1181.59879,203.183917 1178.90099,203.342571 L1162.70069,204.159197 C1157.30003,204.429784 1151.93478,204.896011 1146.55098,205.253224 L1138.48118,205.829437 C1135.79518,206.043571 1133.12268,206.308317 1130.44342,206.544837 C1125.09166,207.043184 1119.72809,207.496757 1114.38645,208.031117 L1098.40366,209.778251 C1077.15342,212.267064 1056.00603,215.084864 1035.09301,218.426317 C993.170869,224.908717 951.958585,232.808291 910.597921,240.448957 C869.124287,247.970877 826.595139,253.456584 783.668064,257.310011 C740.751106,261.226704 697.485121,263.772944 654.136515,265.329304 L637.878891,265.908437 L621.609464,266.366877 C610.767676,266.708517 599.910712,266.868144 589.062179,267.120237 C567.356683,267.489131 545.646129,267.772371 523.932202,267.866784 C513.066808,267.922264 502.236822,267.986504 491.415267,268.399197 C480.600457,268.838171 469.807567,269.460131 459.04334,270.241717 C437.516573,271.801971 416.074112,273.808011 394.496762,275.270931 C372.931214,276.746504 351.168389,277.542691 329.444346,277.180611 C307.73885,276.967451 286.031667,276.870117 264.322799,276.848704 C177.492383,276.741637 90.640048,278.030331 3.96138374,281.129424 C90.6215006,277.869731 177.477208,276.418491 264.324485,276.363984 C286.036726,276.358144 307.748966,276.451584 329.459521,276.647224 C351.159959,276.993731 372.838477,276.185864 394.380419,274.699584 C415.92742,273.224984 437.358077,271.206291 458.911822,269.629491 C469.686165,268.840117 480.500976,268.210371 491.342764,267.763611 C502.189611,267.341184 513.076924,267.270131 523.920399,267.208811 C545.625895,267.099797 567.328019,266.803904 589.021712,266.420411 C599.865187,266.161504 610.715406,265.995064 621.55045,265.646611 L637.808074,265.178437 L654.053895,264.588597 C697.368778,263.005957 740.587552,260.434411 783.443809,256.495331 C826.306811,252.618544 868.755026,247.113371 910.122434,239.583664 C951.44263,231.921584 992.678521,223.982104 1034.64788,217.471477 C1055.59462,214.138784 1076.78753,211.329744 1098.08667,208.849691 L1114.10655,207.109371 C1119.46,206.576957 1124.83874,206.126304 1130.20568,205.629904 C1132.88999,205.394357 1135.56924,205.129611 1138.2603,204.918397 L1146.35202,204.346077 C1151.75099,203.989837 1157.12805,203.525557 1162.54388,203.259837 L1178.78633,202.450024 C1181.49256,202.292344 1184.20554,202.212531 1186.9202,202.146344 L1195.0625,201.919557 C1205.91272,201.502971 1216.79161,201.617824 1227.657,201.518544 C1249.38442,201.727811 1271.11689,202.578504 1292.70773,203.783491 C1335.95854,205.931637 1379.2886,207.675851 1422.48545,210.307744 C1444.0881,211.598384 1465.65364,213.128464 1487.11802,215.061504 C1508.57229,217.008171 1529.97765,219.234184 1551.07109,222.265144 C1561.59757,223.817611 1572.11393,225.410957 1582.4482,227.360544 C1592.87183,229.147584 1602.98352,231.508891 1612.9637,233.990891 C1632.79254,239.162211 1651.67715,245.410037 1670.42012,251.725024 C1679.80341,254.875704 1689.16815,258.040984 1698.67116,261.061237 C1703.39737,262.599104 1708.2298,264.019197 1713.00997,265.498664 L1720.29572,267.581597 C1722.71868,268.286291 1725.15345,268.971517 1727.6388,269.597371 L1735.04763,271.529437 C1737.52624,272.161131 1740.06723,272.708144 1742.57281,273.300904 C1747.56206,274.528277 1752.71654,275.496744 1757.83562,276.522637 C1760.37661,277.069651 1763.0036,277.456064 1765.60529,277.892117 L1773.42217,279.173997 L1789.23803,281.386384 C1831.52943,287.021011 1874.59477,290.661277 1917.86075,292.659531 C1961.1318,294.632477 2004.65408,294.955624 2047.99088,293.354491" id="Fill-86" fill="#FFFFFF"></path>
-                        <path d="M2048.65117,302.186809 C2005.43071,303.233143 1962.12088,302.966449 1918.93414,301.480169 C1875.75246,299.984156 1832.68544,297.198476 1790.05175,292.913863 C1784.72191,292.382423 1779.39544,291.837356 1774.09257,291.216369 C1768.78296,290.620689 1763.46492,290.051289 1758.2059,289.289169 C1747.61197,287.957649 1737.31986,285.958423 1727.11205,283.881329 C1706.82627,279.476996 1687.38187,273.925103 1668.16004,268.212609 C1648.9146,262.524449 1629.73155,256.727276 1609.65485,252.119516 C1589.67594,247.358943 1568.58251,244.432129 1547.46716,241.814836 C1526.32314,239.232583 1505.00377,237.139916 1483.61021,235.348983 C1462.20316,233.603796 1440.7354,232.110703 1419.24236,230.751929 C1376.24447,228.073316 1333.14541,225.908623 1290.14077,223.150196 L1274.02141,222.079529 L1257.91891,220.908609 C1252.55366,220.499809 1247.18841,220.170823 1241.82147,219.803876 L1233.77528,219.265623 L1229.75049,218.996009 L1225.71391,218.803289 L1209.56251,218.053823 C1206.87483,217.908796 1204.17702,217.832876 1201.47922,217.771556 L1193.38582,217.563263 C1182.59967,217.184636 1171.79161,217.262503 1160.99366,217.171009 C1139.39439,217.353023 1117.80355,217.966223 1096.3105,219.254916 C1074.83769,220.606876 1053.42558,222.333569 1032.1922,224.654969 C989.698462,229.215036 947.769574,235.420036 906.199831,242.277169 C895.823413,244.015543 885.369434,245.646849 874.923885,247.284969 C869.720501,248.150263 864.419322,248.792663 859.168727,249.547969 L851.274286,250.641996 C848.647302,251.017703 846.011888,251.368103 843.357926,251.675676 C832.769056,252.965343 822.198734,254.310489 811.537361,255.388943 C806.212576,255.939849 800.911397,256.572516 795.566379,257.058209 L779.548185,258.567849 C774.214969,259.097343 768.849717,259.502249 763.496268,259.952903 C758.139446,260.386036 752.791056,260.861023 747.425804,261.258143 C736.686869,262.016369 725.966482,262.866089 715.205628,263.504596 L699.074463,264.520756 L682.923065,265.421089 C672.160524,266.053756 661.371006,266.508303 650.596663,267.060183 C607.463881,269.035076 564.267026,270.437649 521.078603,271.821729 L504.885051,272.350249 C499.50631,272.601369 494.11408,272.771703 488.758945,273.171743 C478.023383,273.831663 467.358638,274.858529 456.744476,276.015823 C435.507723,278.329436 414.491852,281.310756 393.263529,283.742143 C382.657799,284.984116 371.974506,286.011956 361.254119,286.878223 C350.513498,287.665649 339.722294,288.255489 328.893994,288.453076 L264.027052,289.411809 C177.542293,290.609009 91.0541605,291.865583 4.62167056,294.037089 C91.0423576,291.704009 177.527117,290.285863 264.006819,288.927089 L328.863644,287.920663 C339.656535,287.716263 350.415703,287.120583 361.125973,286.329263 C371.821069,285.457156 382.479069,284.425423 393.069625,283.178583 C414.265911,280.735516 435.275038,277.742516 456.545513,275.409436 C467.176536,274.244356 477.863201,273.208729 488.635858,272.539076 C494.007854,272.134169 499.420318,271.958969 504.819292,271.703956 L521.01453,271.163756 C564.201268,269.752423 607.384633,268.322596 650.492123,266.320449 C661.259722,265.761756 672.044182,265.301369 682.796605,264.660916 L698.936201,263.751823 L715.053876,262.724956 C725.8063,262.080609 736.514884,261.224076 747.242016,260.460009 C752.60221,260.059969 757.94217,259.582063 763.293933,259.146983 C768.640637,258.692436 773.999145,258.284609 779.32393,257.752196 L795.321891,256.234769 C800.658478,255.746156 805.951227,255.111543 811.267581,254.555769 C821.912093,253.473423 832.462182,252.122436 843.03419,250.829849 C845.681407,250.520329 848.313449,250.168956 850.935375,249.791303 L858.811268,248.694356 C864.051747,247.937103 869.342809,247.293729 874.532704,246.426489 C884.952961,244.784476 895.37659,243.151223 905.768183,241.403116 C947.349729,234.517756 989.312339,228.270903 1031.88196,223.683583 C1053.15918,221.371916 1074.62018,219.653983 1096.14526,218.312729 C1117.68721,217.035716 1139.33369,216.433223 1160.98354,216.265809 C1171.80847,216.364116 1182.64183,216.294036 1193.45326,216.679476 L1201.56353,216.894583 C1204.26808,216.956876 1206.97093,217.035716 1209.66705,217.182689 L1225.85386,217.942863 L1229.89719,218.138503 L1233.9304,218.412009 L1241.99514,218.956103 C1247.36545,219.327916 1252.75599,219.661769 1258.10944,220.073489 L1274.19339,221.253169 L1290.30095,222.333569 C1333.27524,225.117303 1376.36924,227.310223 1419.38062,230.018036 C1440.8821,231.390436 1462.35997,232.898129 1483.78219,234.657916 C1505.19262,236.464423 1526.53391,238.574609 1547.7049,241.172436 C1568.83037,243.806276 1589.99799,246.756449 1610.05109,251.549143 C1630.18512,256.185129 1649.38334,262.002743 1668.61192,267.702583 C1687.82026,273.425783 1707.21577,278.978649 1727.43242,283.382983 C1737.60313,285.459103 1747.85646,287.458329 1758.40654,288.790823 C1763.63859,289.553916 1768.95326,290.126236 1774.2595,290.725809 C1779.55393,291.346796 1784.87535,291.895756 1790.19844,292.439849 C1832.77143,296.800383 1875.79967,299.664903 1918.96112,301.239756 C1962.12425,302.804876 2005.42565,303.153329 2048.65117,302.186809" id="Fill-88" fill="#FFFFFF"></path>
-                        <path d="M2049.31145,311.018057 C1963.02734,311.896004 1876.55607,309.789711 1790.8874,303.513657 L1774.84728,302.253191 L1758.83583,300.873004 C1756.16332,300.660817 1753.50599,300.374657 1750.85203,300.082657 L1742.8834,299.221257 L1735.00245,298.126257 L1731.06028,297.577297 L1727.16533,296.931977 L1719.38218,295.627711 L1711.69176,294.156031 C1706.53727,293.213844 1701.52948,292.040977 1696.45255,290.980044 C1686.40662,288.692711 1676.43487,286.296364 1666.57272,283.766671 C1646.81638,278.755951 1627.19157,273.558351 1606.8805,269.387617 C1601.76985,268.410391 1596.70473,267.343617 1591.50978,266.524071 C1588.91989,266.102617 1586.36035,265.614977 1583.74348,265.249004 L1575.89625,264.150111 C1570.66758,263.418164 1565.34448,262.885751 1560.07196,262.243351 C1554.79776,261.599004 1549.48646,261.059777 1544.17685,260.519577 C1533.57787,259.376884 1522.90806,258.460004 1512.25849,257.480831 C1501.58026,256.609697 1490.91046,255.697684 1480.20693,254.932644 C1458.81505,253.321777 1437.37428,251.934777 1415.9217,250.601311 C1373.02161,247.917831 1330.05575,245.509804 1287.24502,242.277364 L1271.19816,241.036364 L1255.17828,239.675644 C1244.5051,238.752924 1233.85891,237.655004 1223.2144,236.703084 L1207.24678,235.232377 L1191.21342,234.024471 L1183.19589,233.430737 L1175.15138,232.969377 C1169.78613,232.676404 1164.42931,232.326004 1159.059,232.076831 C1137.56933,231.101551 1116.0139,230.649924 1094.46015,230.687884 C1072.90809,230.860164 1051.36784,231.462657 1029.90346,232.625791 C1008.45762,233.862897 987.062373,235.438724 965.812131,237.549884 C944.575378,239.689271 923.399325,242.064204 902.402001,244.908284 L894.514305,245.939044 L886.65696,247.043777 L870.942268,249.258111 C860.419157,250.660684 849.96012,252.227751 839.364506,253.430791 C818.257584,256.078257 796.9517,258.139777 775.625582,260.069897 C690.206453,267.563591 603.939204,270.626671 518.076624,275.519617 C507.349493,276.164937 496.598755,276.737257 485.949185,277.626884 C475.318163,278.621631 464.757957,279.869444 454.303978,281.382977 C433.385902,284.398364 412.835402,288.216751 392.047158,291.582537 C386.864008,292.451724 381.626901,293.213844 376.413401,294.021711 C371.157747,294.735164 365.937502,295.543031 360.638009,296.149417 C350.096351,297.520844 339.402941,298.471791 328.67581,299.211524 C317.914955,299.853924 307.147357,300.184857 296.378072,300.600471 C285.608787,300.989804 274.837816,301.360644 264.063473,301.698391 C177.861982,304.374084 91.5694405,305.736751 5.28195739,306.943684 C91.5610099,305.575177 177.846807,304.051911 264.016261,301.214644 L296.32243,300.088497 C307.083284,299.667044 317.867744,299.328324 328.574642,298.681057 C339.264679,297.938404 349.92268,296.982591 360.435674,295.608244 C365.719992,294.999911 370.928434,294.190097 376.172285,293.475671 C381.375669,292.664884 386.600972,291.899844 391.779064,291.029684 C412.535272,287.655137 433.087458,283.821177 454.044314,280.783404 C464.520214,279.261111 475.110769,278.001617 485.780573,276.998111 C496.489157,276.094857 507.231464,275.516697 517.961968,274.864564 C603.846467,269.914191 690.090111,266.796604 775.409758,259.254244 C796.708898,257.311471 817.984432,255.239244 839.050886,252.582044 C849.631325,251.375111 860.065071,249.804151 870.564576,248.396711 L886.29107,246.170697 L894.153474,245.059151 L902.049601,244.022551 C923.065472,241.161924 944.266817,238.768497 965.535606,236.612564 C986.817885,234.485831 1008.25191,232.887617 1029.74327,231.641751 C1051.25487,230.489324 1072.84739,229.900457 1094.44835,229.740831 C1116.05268,229.716497 1137.65195,230.182724 1159.18714,231.175524 C1164.56926,231.428591 1169.9362,231.781911 1175.30988,232.079751 L1183.37124,232.546951 L1191.40395,233.146524 L1207.4643,234.367084 L1223.4572,235.850444 C1234.13375,236.813071 1244.72936,237.912937 1255.39748,238.843444 L1271.39881,240.210977 L1287.42881,241.461711 C1330.20244,244.720431 1373.1565,247.154737 1416.05996,249.866444 C1437.51254,251.213537 1458.96006,252.615137 1480.36205,254.240604 C1491.07233,255.012457 1501.74719,255.932257 1512.43385,256.812151 C1523.09185,257.798137 1533.76671,258.722804 1544.37919,259.873284 C1549.69217,260.417377 1555.00853,260.960497 1560.29116,261.608737 C1565.5721,262.256004 1570.88846,262.791337 1576.14243,263.530097 L1584.02169,264.638724 C1586.64699,265.007617 1589.21664,265.499151 1591.81665,265.924497 C1597.03352,266.750857 1602.11045,267.823471 1607.23627,268.808484 C1627.59961,273.003551 1647.23792,278.222564 1666.97233,283.241071 C1676.82605,285.775631 1686.77419,288.174897 1696.79821,290.465151 C1701.86164,291.526084 1706.85595,292.700897 1711.99189,293.642111 L1719.6587,295.115737 L1727.41657,296.420004 L1731.29634,297.066297 L1735.22333,297.614284 L1743.07224,298.710257 L1751.00884,299.574577 C1753.65268,299.867551 1756.2999,300.154684 1758.97241,300.368817 L1774.97037,301.756791 L1790.99699,303.036724 C1876.57968,309.467537 1963.02397,311.735404 2049.31145,311.018057" id="Fill-90" fill="#FFFFFF"></path>
-                        <path d="M2049.97174,319.850279 C2006.84907,319.754892 1963.72472,319.303265 1920.63072,318.314359 C1877.53841,317.327399 1834.4697,315.868372 1791.5089,313.653065 C1786.13522,313.385399 1780.76828,313.101185 1775.40303,312.784852 C1770.03946,312.476305 1764.67084,312.187225 1759.31233,311.838772 L1751.27288,311.336532 C1748.59026,311.174959 1745.911,311.005599 1743.23849,310.772972 L1735.21422,310.127652 C1732.54846,309.885292 1729.9063,309.562145 1727.25065,309.283772 C1705.99704,306.984759 1685.43811,303.024265 1665.14895,298.829199 C1644.86992,294.591305 1624.79828,290.019559 1604.18034,286.418225 C1599.0562,285.460465 1593.82415,284.727545 1588.641,283.887559 L1580.78028,282.833439 C1579.47016,282.663105 1578.16679,282.470385 1576.84655,282.318545 L1572.88078,281.894172 C1570.23357,281.617745 1567.59815,281.312119 1564.9425,281.058079 L1556.95364,280.359225 C1551.62042,279.907599 1546.30576,279.392705 1540.96242,278.992665 C1498.26635,275.545119 1455.35108,273.072852 1412.45435,270.525639 C1369.56269,267.964799 1326.64404,265.421479 1283.94122,261.854212 C1273.25287,261.013252 1262.61511,259.978599 1251.95879,259.010132 L1236.02659,257.377852 C1230.70686,256.857119 1225.41749,256.245865 1220.13148,255.622932 L1188.43232,251.929132 C1183.16318,251.277972 1177.83839,250.802012 1172.54227,250.235532 C1167.23435,249.707985 1161.95003,249.096732 1156.61681,248.660679 L1140.64414,247.248372 C1135.31936,246.789932 1129.96422,246.447319 1124.62426,246.041439 L1116.6118,245.453545 L1108.57572,244.982452 C1103.21553,244.682665 1097.86039,244.345892 1092.49851,244.077252 C1071.0257,243.064985 1049.49556,242.565665 1027.95699,242.499479 C1006.42011,242.569559 984.888283,243.066932 963.417158,244.079199 C958.053592,244.347839 952.698457,244.684612 947.339949,244.984399 L939.303874,245.455492 L931.289719,246.043385 C925.951445,246.449265 920.596309,246.791879 915.269838,247.250319 L899.298856,248.662625 C893.963954,249.097705 888.681322,249.708959 883.373398,250.236505 C878.077278,250.803959 872.750806,251.279919 867.48335,251.931079 L835.784186,255.624879 C830.498182,256.247812 825.208806,256.858092 819.889079,257.379799 L803.956878,259.012079 C793.300563,259.980545 782.662796,261.015199 771.974445,261.856159 C729.271625,265.422452 686.352982,267.965772 643.45963,270.527585 C600.564592,273.073825 557.647634,275.546092 514.953245,278.993639 C509.608227,279.393679 504.293558,279.908572 498.962029,280.360199 L490.971479,281.059052 C488.315831,281.313092 485.680417,281.618719 483.034885,281.895145 L479.067432,282.319519 C477.748881,282.471359 476.445506,282.664079 475.1337,282.834412 L467.271296,283.888532 C462.091518,284.728519 456.857784,285.461439 451.735334,286.419199 C431.115703,290.020532 411.044062,294.591305 390.766714,298.830172 C370.475878,303.025239 349.918633,306.985732 328.665019,309.284745 C326.009371,309.563119 323.365525,309.886265 320.699761,310.128625 L312.675488,310.773945 C310.004665,311.006572 307.323725,311.175932 304.642786,311.336532 L296.603338,311.839745 C291.244831,312.187225 285.876206,312.476305 280.510954,312.785825 C275.147389,313.100212 269.777078,313.387345 264.406768,313.655012 C221.442599,315.869345 178.375576,317.328372 135.284947,318.314359 C92.1909464,319.303265 49.0665953,319.755865 5.94224422,319.850279 C49.0665953,319.675079 92.185888,319.142665 135.268086,318.072972 C178.346912,317.006199 221.397073,315.466385 264.329206,313.172239 C275.066455,312.615492 285.7919,311.991585 296.507229,311.329719 L304.543304,310.822612 C307.224244,310.660065 309.901812,310.488759 312.560832,310.255159 L320.553068,309.606919 C323.210402,309.364559 325.84413,309.040439 328.489661,308.760119 C349.662342,306.457212 370.165631,302.493799 390.427803,298.289972 C410.679859,294.041372 430.756558,289.454052 451.423401,285.829359 C456.557653,284.865759 461.809935,284.126999 467.004888,283.280199 L474.894271,282.217319 C476.211135,282.045039 477.519568,281.850372 478.843177,281.698532 L482.825806,281.268319 C485.48314,280.988972 488.13373,280.681399 490.796122,280.424439 L498.79173,279.719745 C504.126632,279.264225 509.446359,278.745439 514.793063,278.341505 C557.51443,274.863785 600.438132,272.364265 643.328112,269.788825 C686.213033,267.200732 729.111443,264.630159 771.770424,261.038559 C782.446972,260.191759 793.07125,259.152239 803.712389,258.177932 L819.622671,256.537865 C824.932281,256.013239 830.213227,255.400039 835.489114,254.774185 L867.188278,251.058972 C872.462479,250.402945 877.797381,249.923092 883.100246,249.351745 C888.414914,248.820305 893.705977,248.205159 899.047623,247.765212 L915.043898,246.341225 C920.378799,245.877919 925.742365,245.531412 931.090756,245.120665 L939.1184,244.526932 L947.169651,244.049025 C952.539961,243.746319 957.903527,243.404679 963.278896,243.133119 C984.792174,242.105279 1006.36952,241.586492 1027.95699,241.512519 C1049.54446,241.591359 1071.12181,242.106252 1092.63509,243.135065 C1098.01046,243.406625 1103.37402,243.747292 1108.74433,244.050972 L1116.79558,244.528879 L1124.82323,245.122612 C1130.17162,245.532385 1135.53518,245.879865 1140.87009,246.343172 L1156.86636,247.767159 C1162.20801,248.207105 1167.49907,248.821279 1172.81374,249.353692 C1178.11829,249.925039 1183.4515,250.404892 1188.72571,251.060919 L1220.42656,254.776132 C1225.70244,255.401985 1230.9817,256.015185 1236.293,256.538839 L1252.20159,258.178905 C1262.84273,259.153212 1273.4687,260.193705 1284.14525,261.040505 C1326.80085,264.632105 1369.70095,267.201705 1412.58587,269.790772 C1455.47585,272.365239 1498.39955,274.864759 1541.12092,278.343452 C1546.46931,278.746412 1551.78735,279.265199 1557.12225,279.719745 L1565.11786,280.425412 C1567.78025,280.682372 1570.43253,280.989945 1573.08818,281.269292 L1577.07081,281.699505 C1578.39442,281.852319 1579.70453,282.046985 1581.01971,282.218292 L1588.90909,283.281172 C1594.10573,284.127972 1599.35633,284.866732 1604.49058,285.830332 C1625.15743,289.454052 1645.23412,294.042345 1665.48787,298.290945 C1685.74835,302.493799 1706.24996,306.457212 1727.42432,308.760119 C1730.06985,309.041412 1732.70527,309.365532 1735.36092,309.607892 L1743.35315,310.256132 C1746.01217,310.489732 1748.68974,310.661039 1751.37068,310.823585 L1759.40675,311.330692 C1770.12208,311.993532 1780.84921,312.615492 1791.58478,313.174185 C1834.51691,315.467359 1877.56707,317.007172 1920.64758,318.073945 C1963.72978,319.142665 2006.84907,319.675079 2049.97174,319.850279" id="Fill-92" fill="#FFFFFF"></path>
-                        <path d="M2050.63203,328.6825 C1964.34623,327.475567 1878.052,326.111927 1791.85051,323.43526 C1781.07785,323.100433 1770.3052,322.727647 1759.53591,322.338313 C1748.76831,321.923673 1737.99903,321.59274 1727.23986,320.95034 C1716.51273,320.210607 1705.81763,319.257713 1695.27766,317.88726 C1689.97648,317.280873 1684.75792,316.47398 1679.50227,315.760527 C1674.28708,314.95266 1669.04998,314.189567 1663.86682,313.321353 C1643.07858,309.955567 1622.52977,306.13718 1601.61169,303.119847 C1591.15603,301.60826 1580.59582,300.359473 1569.96648,299.3657 C1559.31523,298.476073 1548.56618,297.903753 1537.83736,297.258433 C1451.97478,292.364513 1365.70753,289.302407 1280.2884,281.80774 C1258.96228,279.876647 1237.65809,277.815127 1216.55116,275.16766 C1205.95386,273.96462 1195.49483,272.398527 1184.97509,270.99498 L1169.25871,268.780647 L1161.40136,267.675913 L1153.51367,266.645153 C1132.51634,263.802047 1111.34029,261.425167 1090.10354,259.286753 C1068.85161,257.175593 1047.45805,255.59782 1026.01053,254.364607 C1004.54615,253.205367 983.005892,252.600927 961.453833,252.427673 C939.898402,252.389713 918.346343,252.84134 896.854985,253.817593 C891.484675,254.06774 886.127853,254.416193 880.764287,254.711113 L872.719782,255.172473 L864.700568,255.765233 L848.668885,256.972167 L832.701274,258.443847 C822.055076,259.395767 811.407193,260.492713 800.737389,261.41738 L784.717509,262.776153 L768.67065,264.017153 C725.859919,267.25154 682.892377,269.65762 639.992281,272.3411 C618.54139,273.674567 597.098929,275.061567 575.708738,276.672433 C565.003526,277.438447 554.335409,278.349487 543.655489,279.221593 C533.005919,280.199793 522.337801,281.117647 511.737129,282.26034 C506.427519,282.798593 501.117909,283.338793 495.843708,283.98314 C490.571193,284.624567 485.248094,285.157953 480.017732,285.8899 L472.172189,286.98782 C469.555322,287.354767 466.995783,287.842407 464.405894,288.26386 C459.209255,289.083407 454.145819,290.149207 449.033486,291.127407 C428.722416,295.297167 409.097599,300.496713 389.341264,305.505487 C379.480801,308.03518 369.509053,310.4325 359.463116,312.720807 C354.386192,313.780767 349.376712,314.954607 344.223912,315.894847 L336.533493,317.368473 L328.74865,318.67274 L324.8537,319.31806 L320.913224,319.865073 L313.032273,320.960073 L305.063643,321.821473 C302.407995,322.11542 299.75066,322.400607 297.079837,322.612793 L281.066701,323.99298 L265.026588,325.255393 C179.357913,331.5295 92.8866419,333.635793 6.60253105,332.757847 C92.8900141,333.47422 179.335993,331.206353 264.918676,324.773593 L280.943614,323.49658 L296.941575,322.10666 C299.61577,321.892527 302.261302,321.605393 304.905147,321.313393 L312.84174,320.450047 L320.692342,319.3531 L324.617642,318.805113 L328.499104,318.157847 L336.255282,316.854553 L343.922096,315.379953 C349.056348,314.43874 354.052339,313.263927 359.11746,312.202993 C369.139792,309.913713 379.087933,307.514447 388.941652,304.978913 C408.674381,299.960407 428.314373,294.742367 448.676027,290.546327 C453.803536,289.562287 458.88046,288.4887 464.097333,287.661367 C466.697339,287.237967 469.265308,286.746433 471.892292,286.376567 L479.771558,285.26794 C485.025525,284.530153 490.34188,283.99482 495.624512,283.347553 C500.905457,282.699313 506.220126,282.15522 511.534794,281.6121 C522.147269,280.460647 532.822131,279.53598 543.481818,278.54902 C554.166796,277.671073 564.841658,276.7503 575.551929,275.978447 C596.952236,274.35298 618.401441,272.95138 639.854019,271.603313 C682.757487,268.89258 725.71154,266.4573 768.48349,263.199553 L784.515174,261.947847 L800.516507,260.580313 C811.182938,259.649807 821.780238,258.54994 832.456786,257.588287 L848.449688,256.104927 L864.511722,254.884367 L872.542738,254.28382 L880.602419,253.817593 C885.977788,253.51878 891.344726,253.16546 896.72684,252.91142 C918.262037,251.919593 939.861307,251.453367 961.465636,251.4777 C983.066592,251.6383 1004.65912,252.224247 1026.17071,253.38154 C1047.66207,254.630327 1069.0961,256.226593 1090.37838,258.353327 C1111.64548,260.50926 1132.84851,262.902687 1153.86438,265.76234 L1161.75882,266.799913 L1169.62291,267.910487 L1185.34941,270.137473 C1195.84891,271.544913 1206.28266,273.1149 1216.8631,274.321833 C1237.92955,276.980007 1259.20509,279.052233 1280.50423,280.995007 C1365.82387,288.537367 1452.06752,291.654953 1537.95033,296.605327 C1548.68421,297.25746 1559.42483,297.83562 1570.13341,298.736927 C1580.80153,299.74238 1591.39377,301.001873 1601.86967,302.524167 C1622.82653,305.560967 1643.3804,309.394927 1664.13492,312.7685 C1669.3147,313.640607 1674.53831,314.404673 1679.7417,315.214487 C1684.98724,315.928913 1690.19399,316.738727 1695.47831,317.34706 C1705.99299,318.72238 1716.65099,319.67722 1727.34103,320.420847 C1738.04624,321.068113 1748.83239,321.40586 1759.59324,321.828287 L1791.89772,322.954433 C1878.06718,325.790727 1964.35297,327.314967 2050.63203,328.6825" id="Fill-94" fill="#FFFFFF"></path>
-                        <path d="M2051.29231,337.514819 C1964.85982,335.344285 1878.37169,334.086739 1791.88693,332.887592 L1727.02168,331.930805 C1716.19338,331.733219 1705.40049,331.143379 1694.65986,330.355952 C1683.93948,329.489685 1673.25618,328.460872 1662.65045,327.219872 C1641.42213,324.787512 1620.40626,321.807165 1599.16951,319.492579 C1588.55703,318.336259 1577.89229,317.309392 1567.15672,316.649472 C1561.7999,316.249432 1556.40767,316.079099 1551.03062,315.827979 L1534.83538,315.298485 C1491.64864,313.914405 1448.45179,312.512805 1405.31901,310.536939 C1394.54466,309.985059 1383.75515,309.530512 1372.9926,308.897845 L1356.84121,307.997512 L1340.71004,306.981352 C1329.9475,306.342845 1319.22711,305.493125 1308.48818,304.733925 C1303.12293,304.337779 1297.77454,303.862792 1292.4194,303.429659 C1287.06595,302.979005 1281.69901,302.575072 1276.36748,302.044605 L1260.3476,300.534965 C1255.00427,300.049272 1249.70309,299.416605 1244.37662,298.864725 C1233.71694,297.787245 1223.14661,296.441125 1212.55606,295.153405 C1209.90378,294.843885 1207.26837,294.494459 1204.6397,294.117779 L1196.74526,293.023752 C1191.49635,292.269419 1186.19517,291.627019 1180.99347,290.761725 C1170.54455,289.123605 1160.09226,287.491325 1149.71584,285.752952 C1108.1461,278.896792 1066.21721,272.688872 1023.72178,268.132699 C1002.48671,265.816165 981.07629,264.086552 959.603479,262.734592 C938.110434,261.445899 916.519595,260.832699 894.922011,260.650685 C884.122375,260.742179 873.315996,260.664312 862.52985,261.042939 L854.436446,261.252205 C851.736959,261.312552 849.040844,261.388472 846.351473,261.533499 L830.200075,262.283939 L826.16349,262.475685 L822.140394,262.745299 L814.092516,263.283552 C808.727264,263.651472 803.362012,263.980459 797.995074,264.388285 L781.892574,265.559205 L765.773212,266.629872 C722.768576,269.388299 679.671203,271.552992 636.673311,274.231605 C615.178581,275.590379 593.710828,277.083472 572.305462,278.828659 C550.910213,280.618619 529.592526,282.712259 508.448509,285.293539 C487.333157,287.911805 466.239725,290.837645 446.260821,295.598219 C426.184122,300.205979 407.00107,306.003152 387.755631,311.691312 C368.533798,317.403805 349.08771,322.955699 328.803618,327.360032 C318.594127,329.437125 308.303702,331.436352 297.709774,332.767872 C292.450748,333.529992 287.131021,334.099392 281.821411,334.696045 C276.518546,335.315085 271.193761,335.862099 265.862232,336.393539 C223.228543,340.678152 180.163206,343.463832 136.979841,344.959845 C93.7947892,346.444179 50.4832781,346.711845 7.26281787,345.664539 C50.4883365,346.631059 93.7914169,346.282605 136.952863,344.717485 C180.114309,343.141659 223.144237,340.277139 265.717225,335.915632 C271.038637,335.372512 276.36005,334.825499 281.656171,334.203539 C286.960723,333.603965 292.275391,333.031645 297.507439,332.268552 C308.057527,330.936059 318.309172,328.936832 328.481568,326.860712 C348.698215,322.455405 368.092033,316.903512 387.300377,311.179339 C406.530641,305.480472 425.727182,299.661885 445.862895,295.025899 C465.915988,290.233205 487.085297,287.283032 508.209079,284.650165 C529.38176,282.051365 550.721367,279.941179 572.131791,278.134672 C593.554018,276.375859 615.031888,274.867192 636.533363,273.494792 C679.544743,270.786005 722.638744,268.594059 765.61303,265.810325 L781.720589,264.729925 L797.804542,263.549272 C803.157991,263.138525 808.546849,262.804672 813.920532,262.432859 L821.983585,261.888765 L826.016797,261.615259 L830.061813,261.419619 L846.246934,260.658472 C848.943049,260.511499 851.645908,260.433632 854.35214,260.370365 L862.462405,260.155259 C873.272156,259.769819 884.105514,259.839899 894.930441,259.742565 C916.581981,259.909979 938.226777,260.511499 959.770405,261.788512 C981.295486,263.130739 1002.75818,264.845752 1024.03371,267.161312 C1066.59996,271.753499 1108.56425,277.996459 1150.1458,284.882792 C1160.53739,286.631872 1170.95934,288.264152 1181.38128,289.906165 C1186.57117,290.773405 1191.86224,291.417752 1197.10103,292.174032 L1204.97861,293.271952 C1207.60053,293.648632 1210.23258,294.000005 1212.87979,294.309525 C1223.45012,295.602112 1234.0002,296.953099 1244.6464,298.036419 C1249.96276,298.591219 1255.2555,299.225832 1260.59378,299.714445 L1276.59005,301.232845 C1281.91484,301.765259 1287.27335,302.172112 1292.62005,302.625685 C1297.97181,303.061739 1303.31346,303.540619 1308.67197,303.939685 C1319.40079,304.703752 1330.10768,305.560285 1340.86011,306.204632 L1356.97778,307.231499 L1373.11738,308.140592 C1383.87149,308.780072 1394.65426,309.241432 1405.42186,309.800125 C1448.53104,311.802272 1491.71272,313.231125 1534.89945,314.643432 L1551.09638,315.182659 C1556.49367,315.437672 1561.90613,315.612872 1567.27813,316.017779 C1578.05078,316.687432 1588.73745,317.723059 1599.37016,318.888139 C1620.63895,321.221219 1641.64976,324.215192 1662.84604,326.657285 C1673.43491,327.904125 1684.09291,328.936832 1694.78632,329.807965 C1705.49828,330.600259 1716.25745,331.194965 1727.05034,331.399365 L1791.90716,332.406765 C1878.38687,333.764565 1964.87163,335.182712 2051.29231,337.514819" id="Fill-96" fill="#FFFFFF"></path>
-                        <path d="M2051.9526,346.34704 C1965.27562,343.246973 1878.4216,341.957307 1791.59118,342.064373 C1769.88232,342.087733 1748.17513,342.184093 1726.46964,342.397253 C1704.74559,342.75836 1682.98446,341.962173 1661.41891,340.4866 C1639.83987,339.025627 1618.3991,337.018613 1596.87233,335.457387 C1586.10642,334.676773 1575.31184,334.054813 1564.49872,333.61584 C1553.67547,333.202173 1542.84718,333.138907 1531.98178,333.083427 C1510.26954,332.98804 1488.5573,332.7048 1466.8518,332.335907 C1456.00327,332.083813 1445.14799,331.92516 1434.30452,331.582547 L1418.03678,331.124107 L1401.77747,330.545947 C1358.42886,328.988613 1315.16456,326.442373 1272.24592,322.52568 C1229.32053,318.67128 1186.79138,313.186547 1145.31606,305.664627 C1103.95708,298.024933 1062.74649,290.121467 1020.82097,283.64296 C999.907953,280.3054 978.762251,277.485653 957.510322,274.99684 L941.529223,273.249707 C936.187577,272.71632 930.822325,272.262747 925.470562,271.7644 C922.791308,271.52788 920.118799,271.26216 917.434487,271.049 L909.363003,270.472787 C903.979203,270.115573 898.613951,269.648373 893.213291,269.37876 L877.012995,268.562133 C874.315194,268.40348 871.608962,268.32172 868.901044,268.253587 L860.780663,268.02096 C849.957422,267.599507 839.108889,267.707547 828.270473,267.601453 C806.57678,267.798067 784.96402,268.62832 763.329341,269.821627 C720.056611,271.943493 676.73667,273.658507 633.561735,276.2612 C611.970895,277.538213 590.423895,279.05272 568.981434,280.969213 C547.54909,282.900307 526.170702,285.109773 505.112678,288.121267 C494.606429,289.663027 484.110296,291.247613 473.799638,293.184547 C463.396242,294.961853 453.341875,297.30272 443.386988,299.769147 C423.613792,304.91224 404.747732,311.135733 385.994642,317.43904 C376.606294,320.582907 367.228062,323.744293 357.706511,326.763573 C352.970183,328.299493 348.124258,329.72056 343.333975,331.199053 L336.026306,333.281987 C333.598285,333.98668 331.155089,334.67288 328.661309,335.29776 L321.228867,337.2308 C318.741832,337.862493 316.190723,338.41048 313.675024,339.004213 C308.668917,340.231587 303.492511,341.201027 298.354886,342.22692 C295.803777,342.77296 293.164991,343.160347 290.553182,343.5964 L282.705953,344.877307 L266.864802,347.075093 C224.495835,352.637693 181.381601,356.19912 138.086951,358.11756 C94.7838705,360.011667 51.251477,360.253053 7.9231047,358.572107 C51.2599076,360.172267 94.7821844,359.84912 138.053228,357.876173 C181.319214,355.87792 224.384551,352.237653 266.675956,346.603027 L282.491815,344.39064 L290.308694,343.10876 C292.910386,342.67368 295.53737,342.286293 298.078361,341.73928 C303.195753,340.71436 308.351925,339.74492 313.341171,338.51852 C315.846754,337.924787 318.387745,337.377773 320.86635,336.74608 L328.275186,334.814013 C330.760536,334.189133 333.195301,333.502933 335.616578,332.79824 L342.904013,330.714333 C347.684179,329.236813 352.516616,327.815747 357.242826,326.27788 C366.744144,323.256653 376.110573,320.092347 385.493862,316.941667 C404.236836,310.62668 423.121443,304.378853 442.950282,299.20656 C452.93046,296.725533 463.042156,294.3652 473.464099,292.577187 C483.800049,290.6276 494.316415,289.03328 504.841212,287.480813 C525.936331,284.449853 547.341697,282.224813 568.79596,280.277173 C590.26034,278.345107 611.825888,276.814053 633.428531,275.523413 C676.627071,272.890547 719.955444,271.147307 763.20794,268.99916 C784.795408,267.794173 806.529568,266.94348 828.256984,266.734213 C839.124064,266.833493 850.001261,266.71864 860.85148,267.134253 L868.993781,267.362013 C871.71013,267.4282 874.423106,267.508987 877.127652,267.665693 L893.371787,268.474533 C898.785936,268.741227 904.164677,269.205507 909.563652,269.560773 L917.653683,270.133093 C920.346426,270.34528 923.023993,270.609053 925.709992,270.845573 C931.075244,271.341 936.453985,271.791653 941.80912,272.324067 L957.829,274.064387 C979.126454,276.545413 1000.32274,279.352507 1021.26611,282.690067 C1063.23378,289.20264 1104.46967,297.140173 1145.78986,304.802253 C1187.15896,312.332933 1229.60549,317.838107 1272.47017,321.712947 C1315.32643,325.652027 1358.54521,328.224547 1401.86009,329.807187 L1418.10591,330.396053 L1434.36353,330.8652 C1445.19858,331.213653 1456.0488,331.380093 1466.89227,331.639 C1488.58765,332.02152 1510.28809,332.318387 1531.99358,332.4274 C1542.83706,332.489693 1553.72606,332.559773 1564.57122,332.9822 C1575.41301,333.42896 1586.22782,334.058707 1597.00385,334.84808 C1618.55591,336.42488 1639.98825,338.443573 1661.53356,339.918173 C1683.07719,341.404453 1704.75571,342.21232 1726.45446,341.865813 C1748.16502,341.6692 1769.87726,341.57576 1791.5895,341.5816 C1878.43678,341.636107 1965.29248,343.087347 2051.9526,346.34704" id="Fill-98" fill="#FFFFFF"></path>
-                        <path d="M2052.61289,355.179261 C1965.58857,351.183728 1878.20847,349.731515 1790.94978,350.992955 C1769.13469,351.288848 1747.32971,351.804715 1725.53991,352.467555 C1714.65091,352.791675 1703.74842,353.240381 1692.82064,353.321168 C1681.90129,353.444781 1670.97351,353.362048 1660.06259,353.117741 C1638.234,352.680715 1616.46781,351.666501 1594.68475,350.990035 C1572.90169,350.268795 1551.08997,350.003075 1529.29342,350.611408 C1442.07688,352.553208 1354.36631,350.978355 1267.98441,343.255928 C1224.85668,339.292515 1182.09991,333.784421 1140.45429,326.125261 L1124.94362,323.069968 L1117.18912,321.541835 L1109.49365,319.930968 L1078.74714,313.436888 C1058.25229,309.107501 1037.76249,304.773248 1017.13443,300.674541 C1006.84569,298.580901 996.482758,296.621581 986.124887,294.649608 C980.946795,293.660701 975.718119,292.770101 970.516422,291.824995 L962.706287,290.427288 L954.848942,289.120101 C933.944355,285.529475 912.722777,282.592928 891.371367,280.071021 L875.305961,278.321941 C869.935651,277.795368 864.533304,277.371968 859.149505,276.891141 L851.064532,276.198128 L842.939092,275.687128 C837.518198,275.364955 832.110793,274.961995 826.681468,274.706981 C815.805957,274.321541 804.943936,273.757981 794.038074,273.747275 L785.867109,273.646048 L781.781627,273.595435 C780.420923,273.584728 779.058534,273.625608 777.696144,273.638261 L761.354214,273.866995 C755.908028,273.908848 750.455097,274.122008 745.005539,274.251461 L728.656864,274.705035 C717.757747,275.008715 706.862003,275.334781 695.969631,275.695888 C674.184887,276.406421 652.413632,277.262955 630.694647,278.447501 C608.977347,279.623288 587.303888,281.087181 565.751829,283.023141 C544.220003,285.008741 522.811265,287.495608 501.773475,290.788395 L493.884093,292.024528 C491.265539,292.457661 488.684081,292.962821 486.082389,293.428075 C480.890808,294.383888 475.663818,295.278381 470.590266,296.430808 C465.503225,297.550141 460.31333,298.530288 455.359493,299.840395 L447.856233,301.702381 L440.503039,303.758061 C420.896769,309.264208 402.426948,316.012328 384.081901,322.819821 C365.74697,329.643861 347.474425,336.573021 328.291373,342.622288 C323.549988,344.190328 318.616384,345.550075 313.755283,346.988661 C311.342437,347.731315 308.803132,348.321155 306.329586,348.991781 C303.83412,349.633208 301.37069,350.318435 298.848246,350.923848 C288.73655,353.312408 278.427578,355.458608 267.904467,357.144421 C225.936798,364.140741 182.805703,368.715408 139.355929,371.158475 C95.9061558,373.571368 52.0517123,373.849741 8.58507765,371.477728 C52.0635152,373.769928 95.9027836,373.408821 139.315462,370.918061 C182.726455,368.395181 225.79685,363.743621 267.670096,356.679168 C278.172973,354.974861 288.424617,352.843261 298.50259,350.452755 C301.014918,349.846368 303.469917,349.161141 305.958638,348.520688 C308.420382,347.851035 310.951256,347.261195 313.357358,346.520488 C318.201597,345.080928 323.120026,343.722155 327.847922,342.154115 C346.975332,336.105821 365.214154,329.174715 383.54234,322.336075 C401.88233,315.514955 420.369012,308.742501 440.03261,303.205208 L447.406038,301.136875 L454.937961,299.263208 C459.912032,297.943368 465.110358,296.958355 470.21426,295.830261 C475.302987,294.671995 480.54178,293.770688 485.745164,292.810008 C488.3536,292.340861 490.943489,291.832781 493.567101,291.397701 L501.473345,290.152808 C522.558346,286.838608 543.999121,284.334221 565.561297,282.332075 C587.142019,280.378595 608.837399,278.900101 630.574932,277.708741 C652.312464,276.509595 674.098894,275.638461 695.892069,274.913328 C706.787814,274.544435 717.688616,274.211555 728.587733,273.902035 L744.938094,273.437755 C750.391024,273.304408 755.830466,273.087355 761.298572,273.041608 L777.684341,272.801195 C779.050103,272.789515 780.415865,272.747661 781.779941,272.756421 L785.877226,272.805088 L794.071797,272.901448 C805.006322,272.905341 815.90038,273.462088 826.807928,273.843635 C832.252427,274.094755 837.675008,274.494795 843.111077,274.815021 L851.258436,275.322128 L859.365329,276.012221 C864.765989,276.490128 870.183511,276.911581 875.568997,277.436208 L891.678242,279.179448 C913.085294,281.693568 934.360828,284.624275 955.305882,288.206141 L963.180089,289.510408 L971.005398,290.906168 C976.215527,291.849328 981.456005,292.738955 986.640842,293.723968 C997.013887,295.694968 1007.39874,297.643581 1017.69422,299.740141 C1038.33408,303.856368 1058.82388,308.205221 1079.312,312.548235 L1110.04164,319.061781 L1117.73206,320.676541 L1125.46632,322.205648 L1140.93483,325.263861 C1182.47254,332.932755 1225.1467,338.458368 1268.20529,342.444168 C1354.4523,350.210395 1442.08532,351.840728 1529.24958,349.955381 C1551.06805,349.359701 1572.95396,349.640995 1594.74208,350.375861 C1616.54368,351.065955 1638.29976,352.094768 1660.09968,352.545421 C1670.99711,352.795568 1681.90466,352.885115 1692.80209,352.768315 C1703.70458,352.694341 1714.58515,352.253421 1725.49101,351.937088 C1747.29093,351.291768 1769.10434,350.775901 1790.93123,350.512128 C1878.22702,349.410315 1965.61049,351.023128 2052.61289,355.179261" id="Fill-100" fill="#FFFFFF"></path>
-                        <path d="M2053.27317,364.011483 C1965.79866,359.146763 1877.7458,357.429803 1789.95767,359.706429 C1768.01443,360.304056 1746.08973,361.115816 1724.20551,362.201083 L1691.39182,363.993963 C1680.432,364.505936 1669.45869,364.891376 1658.47527,365.159043 C1614.55338,366.207323 1570.5337,365.531829 1526.77873,367.873669 C1482.98331,370.094816 1438.98723,371.032136 1395.02319,370.526976 C1351.0642,370.024736 1307.15243,367.887296 1263.66893,364.102976 C1252.76982,363.254229 1241.98199,362.017123 1231.15537,360.909469 C1225.72942,360.388736 1220.36923,359.678203 1214.99217,359.019256 L1198.86438,357.022949 L1182.85799,354.720043 C1177.53658,353.925803 1172.16289,353.242523 1166.90218,352.310069 C1156.35884,350.509403 1145.70927,348.903403 1135.32105,346.806843 L1119.65694,343.820656 L1104.20022,340.488936 C1093.85247,338.323269 1083.71717,335.874363 1073.52117,333.509163 L1012.5947,318.978269 C1002.45434,316.540069 992.263397,314.179736 982.089314,311.792149 C971.862962,309.483403 961.682135,307.101656 951.376534,304.917496 C941.129948,302.638923 930.753531,300.568643 920.399033,298.467216 C915.217568,297.420883 909.966973,296.497189 904.753472,295.505363 L896.921418,294.043416 C894.302865,293.574269 891.657334,293.151843 889.025292,292.704109 C868.036399,288.986949 846.61923,286.139949 825.078974,283.753336 L808.853387,282.169723 C806.157272,281.873829 803.425748,281.719069 800.704341,281.519536 L792.538434,280.940403 L784.372527,280.362243 C781.652807,280.148109 778.907794,280.100416 776.174584,279.961229 L759.765209,279.259456 C754.288673,279.114429 748.800333,279.087176 743.318739,278.994709 L735.092131,278.880829 C732.350491,278.849683 729.602106,278.884723 726.857093,278.884723 C715.873671,278.905163 704.891934,278.983029 693.913569,279.123189 C671.95684,279.406429 650.00517,279.906723 628.107455,280.859616 C606.211427,281.813483 584.364296,283.165443 562.675661,285.124763 C540.997143,287.091869 519.458573,289.654656 498.388746,293.201483 C477.332408,296.743443 456.751558,301.332709 437.539842,307.461789 C418.247191,313.502296 399.996567,320.589136 382.037642,327.881349 C364.070286,335.174536 346.256368,342.620536 327.660088,349.404669 C309.052004,356.158629 289.518238,362.152416 268.876687,366.583029 C227.52614,375.242776 184.400103,380.962083 140.781717,384.066043 C97.1582728,387.121336 52.8789255,387.495096 9.24367835,384.385296 C52.8957868,387.415283 97.1549005,386.958789 140.729447,383.825629 C184.298935,380.644776 227.359214,374.848576 268.584987,366.128483 C289.13886,361.686189 308.610239,355.709923 327.167739,348.954016 C345.716808,342.170856 363.503748,334.719016 381.464359,327.410256 C399.419912,320.103443 417.689084,312.991296 437.032318,306.920616 C456.296304,300.758443 476.963147,296.133163 498.066696,292.570763 C519.188793,288.999603 540.771202,286.417349 562.488501,284.433696 C584.212545,282.456856 606.090026,281.088349 628.011346,280.120856 C649.934353,279.151416 671.909629,278.637496 693.883219,278.339656 C704.870014,278.191709 715.860182,278.106056 726.850349,278.079776 C729.597048,278.078803 732.343746,278.041816 735.093817,278.071016 L743.342344,278.180029 C748.8408,278.268603 754.344315,278.293909 759.839398,278.434069 L776.297671,279.129029 C779.037626,279.267243 781.792755,279.312016 784.51922,279.525176 L792.712105,280.100416 L800.903304,280.675656 C803.631455,280.874216 806.37141,281.027029 809.075955,281.321949 L825.350441,282.900696 C846.953083,285.279523 868.432638,288.120683 889.472115,291.832976 C892.110902,292.279736 894.763178,292.701189 897.388475,293.170336 L905.239077,294.630336 C910.462694,295.619243 915.726778,296.542936 920.918359,297.587323 C931.294777,299.685829 941.691428,301.753189 951.954875,304.027869 C962.280709,306.209109 972.471653,308.585016 982.713181,310.889869 C992.89738,313.274536 1003.10687,315.624163 1013.24892,318.065283 L1074.13997,332.633163 C1084.32586,335.002256 1094.45948,337.458949 1104.77013,339.622669 L1120.18132,342.956336 L1135.80665,345.946416 C1146.16789,348.044923 1156.79048,349.653843 1167.31191,351.457429 C1172.55745,352.391829 1177.92101,353.077056 1183.23231,353.872269 L1199.20329,356.181989 L1215.29736,358.185109 C1220.6643,358.845029 1226.01269,359.559456 1231.42853,360.082136 C1242.2349,361.193683 1253.00419,362.436629 1263.88476,363.290243 C1307.29407,367.096949 1351.14345,369.258723 1395.05185,369.788216 C1438.96362,370.319656 1482.91923,369.411536 1526.67757,367.219589 C1570.51852,364.902083 1614.55844,365.606776 1658.4348,364.586723 C1669.40642,364.325869 1680.36793,363.948216 1691.31088,363.444029 L1724.12457,361.670616 C1746.02566,360.602869 1767.96384,359.792083 1789.92057,359.225603 C1877.76603,357.107629 1965.82732,358.987136 2053.27317,364.011483" id="Fill-102" fill="#FFFFFF"></path>
-                    </g>
-                </g>
-            </g>
-        </g>
-    </g>
-</svg>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/voice-pic.png b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/voice-pic.png
deleted file mode 100644
index 7b8f043a11d2b114fcfd614413844fac41d7e240..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/image/voice-pic.png and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/SoundRecognizer.js b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/SoundRecognizer.js
deleted file mode 100644
index 5ef3d2e89dc27945d9e356b3c9eb5519f9cea69a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/SoundRecognizer.js
+++ /dev/null
@@ -1,133 +0,0 @@
-SoundRecognizer = {
-    rec: null,
-    wave: null,
-    SampleRate: 16000,
-    testBitRate: 16,
-    isCloseRecorder: false,
-    SendInterval: 300,
-    realTimeSendTryType: 'pcm',
-    realTimeSendTryEncBusy: 0,
-    realTimeSendTryTime: 0,
-    realTimeSendTryNumber: 0,
-    transferUploadNumberMax: 0,
-    realTimeSendTryChunk: null,
-    soundType: "pcm",
-    init: function (config) {
-        this.soundType = config.soundType || 'pcm';
-        this.SampleRate = config.sampleRate || 16000;
-        this.recwaveElm = config.recwaveElm || '';
-        this.TransferUpload = config.translerCallBack || this.TransferProcess;
-        this.initRecorder();
-    },
-    RealTimeSendTryReset: function (type) {
-        this.realTimeSendTryType = type;
-        this.realTimeSendTryTime = 0;
-    },
-    RealTimeSendTry: function (rec, isClose) {
-        var that = this;
-        var t1 = Date.now(), endT = 0, recImpl = Recorder.prototype;
-        if (this.realTimeSendTryTime == 0) {
-            this.realTimeSendTryTime = t1;
-            this.realTimeSendTryEncBusy = 0;
-            this.realTimeSendTryNumber = 0;
-            this.transferUploadNumberMax = 0;
-            this.realTimeSendTryChunk = null;
-        }
-        if (!isClose && t1 - this.realTimeSendTryTime < this.SendInterval) {
-            return;//控制缓冲达到指定间隔才进行传输
-        }
-        this.realTimeSendTryTime = t1;
-        var number = ++this.realTimeSendTryNumber;
-
-        //借用SampleData函数进行数据的连续处理，采样率转换是顺带的
-        var chunk = Recorder.SampleData(rec.buffers, rec.srcSampleRate, this.SampleRate, this.realTimeSendTryChunk, { frameType: isClose ? "" : this.realTimeSendTryType });
-
-        //清理已处理完的缓冲数据，释放内存以支持长时间录音，最后完成录音时不能调用stop，因为数据已经被清掉了
-        for (var i = this.realTimeSendTryChunk ? this.realTimeSendTryChunk.index : 0; i < chunk.index; i++) {
-            rec.buffers[i] = null;
-        }
-        this.realTimeSendTryChunk = chunk;
-
-        //没有新数据，或结束时的数据量太小，不能进行mock转码
-        if (chunk.data.length == 0 || isClose && chunk.data.length < 2000) {
-            this.TransferUpload(number, null, 0, null, isClose);
-            return;
-        }
-        //实时编码队列阻塞处理
-        if (!isClose) {
-            if (this.realTimeSendTryEncBusy >= 2) {
-                console.log("编码队列阻塞，已丢弃一帧", 1);
-                return;
-            }
-        }
-        this.realTimeSendTryEncBusy++;
-
-        //通过mock方法实时转码成mp3、wav
-        var encStartTime = Date.now();
-        var recMock = Recorder({
-            type: this.realTimeSendTryType
-            , sampleRate: this.SampleRate //采样率
-            , bitRate: this.testBitRate //比特率
-        });
-        recMock.mock(chunk.data, chunk.sampleRate);
-        recMock.stop(function (blob, duration) {
-            that.realTimeSendTryEncBusy && (that.realTimeSendTryEncBusy--);
-            blob.encTime = Date.now() - encStartTime;
-
-            //转码好就推入传输
-            that.TransferUpload(number, blob, duration, recMock, isClose);
-        }, function (msg) {
-            that.realTimeSendTryEncBusy && (that.realTimeSendTryEncBusy--);
-            //转码错误？没想到什么时候会产生错误！
-            console.log("不应该出现的错误:" + msg, 1);
-        });
-    },
-    recordClose: function () {
-        try {
-            this.rec.close(function () {
-                this.isCloseRecorder = true;
-            });
-            this.RealTimeSendTry(this.rec, true);//最后一次发送
-        } catch (ex) {
-            // recordClose();
-        }
-    },
-    recordEnd: function () {
-        try {
-            this.rec.stop(function (blob, time) {
-                this.recordClose();
-            }, function (s) {
-                this.recordClose();
-            });
-        } catch (ex) {
-        }
-    },
-    initRecorder: function () {
-        var that = this;
-        var rec = Recorder({
-            type: that.soundType
-            , bitRate: that.testBitRate
-            , sampleRate: that.SampleRate
-            , onProcess: function (buffers, level, time, sampleRate) {
-                that.wave.input(buffers[buffers.length - 1], level, sampleRate);
-                that.RealTimeSendTry(rec, false);//推入实时处理，因为是unknown格式，这里简化函数调用，没有用到buffers和bufferSampleRate，因为这些数据和rec.buffers是完全相同的。
-            }
-        });
-
-        rec.open(function () {
-            that.wave = Recorder.FrequencyHistogramView({
-                elem: that.recwaveElm, lineCount: 90
-                , position: 0
-                , minHeight: 1
-                , stripeEnable: false
-            });
-            rec.start();
-            that.isCloseRecorder = false;
-            that.RealTimeSendTryReset(that.soundType);//重置
-        });
-        this.rec = rec;
-    },
-    TransferProcess: function (number, blobOrNull, duration, blobRec, isClose) {
-
-    }
-}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/jquery-3.2.1.min.js b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/jquery-3.2.1.min.js
deleted file mode 100644
index 644d35e274fd64ddaf6d12af813e820c424176a9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/jquery-3.2.1.min.js
+++ /dev/null
@@ -1,4 +0,0 @@
-/*! jQuery v3.2.1 | (c) JS Foundation and other contributors | jquery.org/license */
-!function(a,b){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){"use strict";var c=[],d=a.document,e=Object.getPrototypeOf,f=c.slice,g=c.concat,h=c.push,i=c.indexOf,j={},k=j.toString,l=j.hasOwnProperty,m=l.toString,n=m.call(Object),o={};function p(a,b){b=b||d;var c=b.createElement("script");c.text=a,b.head.appendChild(c).parentNode.removeChild(c)}var q="3.2.1",r=function(a,b){return new r.fn.init(a,b)},s=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,t=/^-ms-/,u=/-([a-z])/g,v=function(a,b){return b.toUpperCase()};r.fn=r.prototype={jquery:q,constructor:r,length:0,toArray:function(){return f.call(this)},get:function(a){return null==a?f.call(this):a<0?this[a+this.length]:this[a]},pushStack:function(a){var b=r.merge(this.constructor(),a);return b.prevObject=this,b},each:function(a){return r.each(this,a)},map:function(a){return this.pushStack(r.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(f.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(a<0?b:0);return this.pushStack(c>=0&&c<b?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:h,sort:c.sort,splice:c.splice},r.extend=r.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||r.isFunction(g)||(g={}),h===i&&(g=this,h--);h<i;h++)if(null!=(a=arguments[h]))for(b in a)c=g[b],d=a[b],g!==d&&(j&&d&&(r.isPlainObject(d)||(e=Array.isArray(d)))?(e?(e=!1,f=c&&Array.isArray(c)?c:[]):f=c&&r.isPlainObject(c)?c:{},g[b]=r.extend(j,f,d)):void 0!==d&&(g[b]=d));return g},r.extend({expando:"jQuery"+(q+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===r.type(a)},isWindow:function(a){return null!=a&&a===a.window},isNumeric:function(a){var b=r.type(a);return("number"===b||"string"===b)&&!isNaN(a-parseFloat(a))},isPlainObject:function(a){var b,c;return!(!a||"[object Object]"!==k.call(a))&&(!(b=e(a))||(c=l.call(b,"constructor")&&b.constructor,"function"==typeof c&&m.call(c)===n))},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?j[k.call(a)]||"object":typeof a},globalEval:function(a){p(a)},camelCase:function(a){return a.replace(t,"ms-").replace(u,v)},each:function(a,b){var c,d=0;if(w(a)){for(c=a.length;d<c;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(s,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(w(Object(a))?r.merge(c,"string"==typeof a?[a]:a):h.call(c,a)),c},inArray:function(a,b,c){return null==b?-1:i.call(b,a,c)},merge:function(a,b){for(var c=+b.length,d=0,e=a.length;d<c;d++)a[e++]=b[d];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;f<g;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,f=0,h=[];if(w(a))for(d=a.length;f<d;f++)e=b(a[f],f,c),null!=e&&h.push(e);else for(f in a)e=b(a[f],f,c),null!=e&&h.push(e);return g.apply([],h)},guid:1,proxy:function(a,b){var c,d,e;if("string"==typeof b&&(c=a[b],b=a,a=c),r.isFunction(a))return d=f.call(arguments,2),e=function(){return a.apply(b||this,d.concat(f.call(arguments)))},e.guid=a.guid=a.guid||r.guid++,e},now:Date.now,support:o}),"function"==typeof Symbol&&(r.fn[Symbol.iterator]=c[Symbol.iterator]),r.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){j["[object "+b+"]"]=b.toLowerCase()});function w(a){var b=!!a&&"length"in a&&a.length,c=r.type(a);return"function"!==c&&!r.isWindow(a)&&("array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a)}var x=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ha(),z=ha(),A=ha(),B=function(a,b){return a===b&&(l=!0),0},C={}.hasOwnProperty,D=[],E=D.pop,F=D.push,G=D.push,H=D.slice,I=function(a,b){for(var c=0,d=a.length;c<d;c++)if(a[c]===b)return c;return-1},J="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",K="[\\x20\\t\\r\\n\\f]",L="(?:\\\\.|[\\w-]|[^\0-\\xa0])+",M="\\["+K+"*("+L+")(?:"+K+"*([*^$|!~]?=)"+K+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+L+"))|)"+K+"*\\]",N=":("+L+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+M+")*)|.*)\\)|)",O=new RegExp(K+"+","g"),P=new RegExp("^"+K+"+|((?:^|[^\\\\])(?:\\\\.)*)"+K+"+$","g"),Q=new RegExp("^"+K+"*,"+K+"*"),R=new RegExp("^"+K+"*([>+~]|"+K+")"+K+"*"),S=new RegExp("="+K+"*([^\\]'\"]*?)"+K+"*\\]","g"),T=new RegExp(N),U=new RegExp("^"+L+"$"),V={ID:new RegExp("^#("+L+")"),CLASS:new RegExp("^\\.("+L+")"),TAG:new RegExp("^("+L+"|[*])"),ATTR:new RegExp("^"+M),PSEUDO:new RegExp("^"+N),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+K+"*(even|odd|(([+-]|)(\\d*)n|)"+K+"*(?:([+-]|)"+K+"*(\\d+)|))"+K+"*\\)|)","i"),bool:new RegExp("^(?:"+J+")$","i"),needsContext:new RegExp("^"+K+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+K+"*((?:-\\d)?\\d*)"+K+"*\\)|)(?=[^-]|$)","i")},W=/^(?:input|select|textarea|button)$/i,X=/^h\d$/i,Y=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,$=/[+~]/,_=new RegExp("\\\\([\\da-f]{1,6}"+K+"?|("+K+")|.)","ig"),aa=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:d<0?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},ba=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ca=function(a,b){return b?"\0"===a?"\ufffd":a.slice(0,-1)+"\\"+a.charCodeAt(a.length-1).toString(16)+" ":"\\"+a},da=function(){m()},ea=ta(function(a){return a.disabled===!0&&("form"in a||"label"in a)},{dir:"parentNode",next:"legend"});try{G.apply(D=H.call(v.childNodes),v.childNodes),D[v.childNodes.length].nodeType}catch(fa){G={apply:D.length?function(a,b){F.apply(a,H.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function ga(a,b,d,e){var f,h,j,k,l,o,r,s=b&&b.ownerDocument,w=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==w&&9!==w&&11!==w)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==w&&(l=Z.exec(a)))if(f=l[1]){if(9===w){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(s&&(j=s.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(l[2])return G.apply(d,b.getElementsByTagName(a)),d;if((f=l[3])&&c.getElementsByClassName&&b.getElementsByClassName)return G.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==w)s=b,r=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(ba,ca):b.setAttribute("id",k=u),o=g(a),h=o.length;while(h--)o[h]="#"+k+" "+sa(o[h]);r=o.join(","),s=$.test(a)&&qa(b.parentNode)||b}if(r)try{return G.apply(d,s.querySelectorAll(r)),d}catch(x){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(P,"$1"),b,d,e)}function ha(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ia(a){return a[u]=!0,a}function ja(a){var b=n.createElement("fieldset");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ka(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function la(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&a.sourceIndex-b.sourceIndex;if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function na(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function oa(a){return function(b){return"form"in b?b.parentNode&&b.disabled===!1?"label"in b?"label"in b.parentNode?b.parentNode.disabled===a:b.disabled===a:b.isDisabled===a||b.isDisabled!==!a&&ea(b)===a:b.disabled===a:"label"in b&&b.disabled===a}}function pa(a){return ia(function(b){return b=+b,ia(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function qa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=ga.support={},f=ga.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return!!b&&"HTML"!==b.nodeName},m=ga.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),v!==n&&(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ja(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ja(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Y.test(n.getElementsByClassName),c.getById=ja(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){return a.getAttribute("id")===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}}):(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c,d,e,f=b.getElementById(a);if(f){if(c=f.getAttributeNode("id"),c&&c.value===a)return[f];e=b.getElementsByName(a),d=0;while(f=e[d++])if(c=f.getAttributeNode("id"),c&&c.value===a)return[f]}return[]}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){if("undefined"!=typeof b.getElementsByClassName&&p)return b.getElementsByClassName(a)},r=[],q=[],(c.qsa=Y.test(n.querySelectorAll))&&(ja(function(a){o.appendChild(a).innerHTML="<a id='"+u+"'></a><select id='"+u+"-\r\\' msallowcapture=''><option selected=''></option></select>",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+K+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+K+"*(?:value|"+J+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ja(function(a){a.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+K+"*[*^$|!~]?="),2!==a.querySelectorAll(":enabled").length&&q.push(":enabled",":disabled"),o.appendChild(a).disabled=!0,2!==a.querySelectorAll(":disabled").length&&q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Y.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ja(function(a){c.disconnectedMatch=s.call(a,"*"),s.call(a,"[s!='']:x"),r.push("!=",N)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Y.test(o.compareDocumentPosition),t=b||Y.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?I(k,a)-I(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?I(k,a)-I(k,b):0;if(e===f)return la(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?la(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},ga.matches=function(a,b){return ga(a,null,null,b)},ga.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(S,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return ga(b,n,null,[a]).length>0},ga.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},ga.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&C.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},ga.escape=function(a){return(a+"").replace(ba,ca)},ga.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},ga.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=ga.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=ga.selectors={cacheLength:50,createPseudo:ia,match:V,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(_,aa),a[3]=(a[3]||a[4]||a[5]||"").replace(_,aa),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||ga.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&ga.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return V.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&T.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(_,aa).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+K+")"+a+"("+K+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=ga.attr(d,a);return null==e?"!="===b:!b||(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(O," ")+" ").indexOf(c)>-1:"|="===b&&(e===c||e.slice(0,c.length+1)===c+"-"))}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||ga.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ia(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=I(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ia(function(a){var b=[],c=[],d=h(a.replace(P,"$1"));return d[u]?ia(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ia(function(a){return function(b){return ga(a,b).length>0}}),contains:ia(function(a){return a=a.replace(_,aa),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ia(function(a){return U.test(a||"")||ga.error("unsupported lang: "+a),a=a.replace(_,aa).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:oa(!1),disabled:oa(!0),checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return X.test(a.nodeName)},input:function(a){return W.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:pa(function(){return[0]}),last:pa(function(a,b){return[b-1]}),eq:pa(function(a,b,c){return[c<0?c+b:c]}),even:pa(function(a,b){for(var c=0;c<b;c+=2)a.push(c);return a}),odd:pa(function(a,b){for(var c=1;c<b;c+=2)a.push(c);return a}),lt:pa(function(a,b,c){for(var d=c<0?c+b:c;--d>=0;)a.push(d);return a}),gt:pa(function(a,b,c){for(var d=c<0?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=ma(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=na(b);function ra(){}ra.prototype=d.filters=d.pseudos,d.setFilters=new ra,g=ga.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){c&&!(e=Q.exec(h))||(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=R.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(P," ")}),h=h.slice(c.length));for(g in d.filter)!(e=V[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?ga.error(a):z(a,i).slice(0)};function sa(a){for(var b=0,c=a.length,d="";b<c;b++)d+=a[b].value;return d}function ta(a,b,c){var d=b.dir,e=b.next,f=e||d,g=c&&"parentNode"===f,h=x++;return b.first?function(b,c,e){while(b=b[d])if(1===b.nodeType||g)return a(b,c,e);return!1}:function(b,c,i){var j,k,l,m=[w,h];if(i){while(b=b[d])if((1===b.nodeType||g)&&a(b,c,i))return!0}else while(b=b[d])if(1===b.nodeType||g)if(l=b[u]||(b[u]={}),k=l[b.uniqueID]||(l[b.uniqueID]={}),e&&e===b.nodeName.toLowerCase())b=b[d]||b;else{if((j=k[f])&&j[0]===w&&j[1]===h)return m[2]=j[2];if(k[f]=m,m[2]=a(b,c,i))return!0}return!1}}function ua(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function va(a,b,c){for(var d=0,e=b.length;d<e;d++)ga(a,b[d],c);return c}function wa(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;h<i;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function xa(a,b,c,d,e,f){return d&&!d[u]&&(d=xa(d)),e&&!e[u]&&(e=xa(e,f)),ia(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||va(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:wa(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=wa(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?I(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=wa(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):G.apply(g,r)})}function ya(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ta(function(a){return a===b},h,!0),l=ta(function(a){return I(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];i<f;i++)if(c=d.relative[a[i].type])m=[ta(ua(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;e<f;e++)if(d.relative[a[e].type])break;return xa(i>1&&ua(m),i>1&&sa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(P,"$1"),c,i<e&&ya(a.slice(i,e)),e<f&&ya(a=a.slice(e)),e<f&&sa(a))}m.push(c)}return ua(m)}function za(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=E.call(i));u=wa(u)}G.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&ga.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ia(f):f}return h=ga.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=ya(b[c]),f[u]?d.push(f):e.push(f);f=A(a,za(e,d)),f.selector=a}return f},i=ga.select=function(a,b,c,e){var f,i,j,k,l,m="function"==typeof a&&a,n=!e&&g(a=m.selector||a);if(c=c||[],1===n.length){if(i=n[0]=n[0].slice(0),i.length>2&&"ID"===(j=i[0]).type&&9===b.nodeType&&p&&d.relative[i[1].type]){if(b=(d.find.ID(j.matches[0].replace(_,aa),b)||[])[0],!b)return c;m&&(b=b.parentNode),a=a.slice(i.shift().value.length)}f=V.needsContext.test(a)?0:i.length;while(f--){if(j=i[f],d.relative[k=j.type])break;if((l=d.find[k])&&(e=l(j.matches[0].replace(_,aa),$.test(i[0].type)&&qa(b.parentNode)||b))){if(i.splice(f,1),a=e.length&&sa(i),!a)return G.apply(c,e),c;break}}}return(m||h(a,n))(e,b,!p,c,!b||$.test(a)&&qa(b.parentNode)||b),c},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ja(function(a){return 1&a.compareDocumentPosition(n.createElement("fieldset"))}),ja(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||ka("type|href|height|width",function(a,b,c){if(!c)return a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ja(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ka("value",function(a,b,c){if(!c&&"input"===a.nodeName.toLowerCase())return a.defaultValue}),ja(function(a){return null==a.getAttribute("disabled")})||ka(J,function(a,b,c){var d;if(!c)return a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),ga}(a);r.find=x,r.expr=x.selectors,r.expr[":"]=r.expr.pseudos,r.uniqueSort=r.unique=x.uniqueSort,r.text=x.getText,r.isXMLDoc=x.isXML,r.contains=x.contains,r.escapeSelector=x.escape;var y=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&r(a).is(c))break;d.push(a)}return d},z=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},A=r.expr.match.needsContext;function B(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()}var C=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i,D=/^.[^:#\[\.,]*$/;function E(a,b,c){return r.isFunction(b)?r.grep(a,function(a,d){return!!b.call(a,d,a)!==c}):b.nodeType?r.grep(a,function(a){return a===b!==c}):"string"!=typeof b?r.grep(a,function(a){return i.call(b,a)>-1!==c}):D.test(b)?r.filter(b,a,c):(b=r.filter(b,a),r.grep(a,function(a){return i.call(b,a)>-1!==c&&1===a.nodeType}))}r.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?r.find.matchesSelector(d,a)?[d]:[]:r.find.matches(a,r.grep(b,function(a){return 1===a.nodeType}))},r.fn.extend({find:function(a){var b,c,d=this.length,e=this;if("string"!=typeof a)return this.pushStack(r(a).filter(function(){for(b=0;b<d;b++)if(r.contains(e[b],this))return!0}));for(c=this.pushStack([]),b=0;b<d;b++)r.find(a,e[b],c);return d>1?r.uniqueSort(c):c},filter:function(a){return this.pushStack(E(this,a||[],!1))},not:function(a){return this.pushStack(E(this,a||[],!0))},is:function(a){return!!E(this,"string"==typeof a&&A.test(a)?r(a):a||[],!1).length}});var F,G=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/,H=r.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||F,"string"==typeof a){if(e="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:G.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof r?b[0]:b,r.merge(this,r.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),C.test(e[1])&&r.isPlainObject(b))for(e in b)r.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}return f=d.getElementById(e[2]),f&&(this[0]=f,this.length=1),this}return a.nodeType?(this[0]=a,this.length=1,this):r.isFunction(a)?void 0!==c.ready?c.ready(a):a(r):r.makeArray(a,this)};H.prototype=r.fn,F=r(d);var I=/^(?:parents|prev(?:Until|All))/,J={children:!0,contents:!0,next:!0,prev:!0};r.fn.extend({has:function(a){var b=r(a,this),c=b.length;return this.filter(function(){for(var a=0;a<c;a++)if(r.contains(this,b[a]))return!0})},closest:function(a,b){var c,d=0,e=this.length,f=[],g="string"!=typeof a&&r(a);if(!A.test(a))for(;d<e;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&r.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?r.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?i.call(r(a),this[0]):i.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(r.uniqueSort(r.merge(this.get(),r(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function K(a,b){while((a=a[b])&&1!==a.nodeType);return a}r.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return y(a,"parentNode")},parentsUntil:function(a,b,c){return y(a,"parentNode",c)},next:function(a){return K(a,"nextSibling")},prev:function(a){return K(a,"previousSibling")},nextAll:function(a){return y(a,"nextSibling")},prevAll:function(a){return y(a,"previousSibling")},nextUntil:function(a,b,c){return y(a,"nextSibling",c)},prevUntil:function(a,b,c){return y(a,"previousSibling",c)},siblings:function(a){return z((a.parentNode||{}).firstChild,a)},children:function(a){return z(a.firstChild)},contents:function(a){return B(a,"iframe")?a.contentDocument:(B(a,"template")&&(a=a.content||a),r.merge([],a.childNodes))}},function(a,b){r.fn[a]=function(c,d){var e=r.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=r.filter(d,e)),this.length>1&&(J[a]||r.uniqueSort(e),I.test(a)&&e.reverse()),this.pushStack(e)}});var L=/[^\x20\t\r\n\f]+/g;function M(a){var b={};return r.each(a.match(L)||[],function(a,c){b[c]=!0}),b}r.Callbacks=function(a){a="string"==typeof a?M(a):r.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=e||a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h<f.length)f[h].apply(c[0],c[1])===!1&&a.stopOnFalse&&(h=f.length,c=!1)}a.memory||(c=!1),b=!1,e&&(f=c?[]:"")},j={add:function(){return f&&(c&&!b&&(h=f.length-1,g.push(c)),function d(b){r.each(b,function(b,c){r.isFunction(c)?a.unique&&j.has(c)||f.push(c):c&&c.length&&"string"!==r.type(c)&&d(c)})}(arguments),c&&!b&&i()),this},remove:function(){return r.each(arguments,function(a,b){var c;while((c=r.inArray(b,f,c))>-1)f.splice(c,1),c<=h&&h--}),this},has:function(a){return a?r.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=g=[],c||b||(f=c=""),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j};function N(a){return a}function O(a){throw a}function P(a,b,c,d){var e;try{a&&r.isFunction(e=a.promise)?e.call(a).done(b).fail(c):a&&r.isFunction(e=a.then)?e.call(a,b,c):b.apply(void 0,[a].slice(d))}catch(a){c.apply(void 0,[a])}}r.extend({Deferred:function(b){var c=[["notify","progress",r.Callbacks("memory"),r.Callbacks("memory"),2],["resolve","done",r.Callbacks("once memory"),r.Callbacks("once memory"),0,"resolved"],["reject","fail",r.Callbacks("once memory"),r.Callbacks("once memory"),1,"rejected"]],d="pending",e={state:function(){return d},always:function(){return f.done(arguments).fail(arguments),this},"catch":function(a){return e.then(null,a)},pipe:function(){var a=arguments;return r.Deferred(function(b){r.each(c,function(c,d){var e=r.isFunction(a[d[4]])&&a[d[4]];f[d[1]](function(){var a=e&&e.apply(this,arguments);a&&r.isFunction(a.promise)?a.promise().progress(b.notify).done(b.resolve).fail(b.reject):b[d[0]+"With"](this,e?[a]:arguments)})}),a=null}).promise()},then:function(b,d,e){var f=0;function g(b,c,d,e){return function(){var h=this,i=arguments,j=function(){var a,j;if(!(b<f)){if(a=d.apply(h,i),a===c.promise())throw new TypeError("Thenable self-resolution");j=a&&("object"==typeof a||"function"==typeof a)&&a.then,r.isFunction(j)?e?j.call(a,g(f,c,N,e),g(f,c,O,e)):(f++,j.call(a,g(f,c,N,e),g(f,c,O,e),g(f,c,N,c.notifyWith))):(d!==N&&(h=void 0,i=[a]),(e||c.resolveWith)(h,i))}},k=e?j:function(){try{j()}catch(a){r.Deferred.exceptionHook&&r.Deferred.exceptionHook(a,k.stackTrace),b+1>=f&&(d!==O&&(h=void 0,i=[a]),c.rejectWith(h,i))}};b?k():(r.Deferred.getStackHook&&(k.stackTrace=r.Deferred.getStackHook()),a.setTimeout(k))}}return r.Deferred(function(a){c[0][3].add(g(0,a,r.isFunction(e)?e:N,a.notifyWith)),c[1][3].add(g(0,a,r.isFunction(b)?b:N)),c[2][3].add(g(0,a,r.isFunction(d)?d:O))}).promise()},promise:function(a){return null!=a?r.extend(a,e):e}},f={};return r.each(c,function(a,b){var g=b[2],h=b[5];e[b[1]]=g.add,h&&g.add(function(){d=h},c[3-a][2].disable,c[0][2].lock),g.add(b[3].fire),f[b[0]]=function(){return f[b[0]+"With"](this===f?void 0:this,arguments),this},f[b[0]+"With"]=g.fireWith}),e.promise(f),b&&b.call(f,f),f},when:function(a){var b=arguments.length,c=b,d=Array(c),e=f.call(arguments),g=r.Deferred(),h=function(a){return function(c){d[a]=this,e[a]=arguments.length>1?f.call(arguments):c,--b||g.resolveWith(d,e)}};if(b<=1&&(P(a,g.done(h(c)).resolve,g.reject,!b),"pending"===g.state()||r.isFunction(e[c]&&e[c].then)))return g.then();while(c--)P(e[c],h(c),g.reject);return g.promise()}});var Q=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;r.Deferred.exceptionHook=function(b,c){a.console&&a.console.warn&&b&&Q.test(b.name)&&a.console.warn("jQuery.Deferred exception: "+b.message,b.stack,c)},r.readyException=function(b){a.setTimeout(function(){throw b})};var R=r.Deferred();r.fn.ready=function(a){return R.then(a)["catch"](function(a){r.readyException(a)}),this},r.extend({isReady:!1,readyWait:1,ready:function(a){(a===!0?--r.readyWait:r.isReady)||(r.isReady=!0,a!==!0&&--r.readyWait>0||R.resolveWith(d,[r]))}}),r.ready.then=R.then;function S(){d.removeEventListener("DOMContentLoaded",S),
-a.removeEventListener("load",S),r.ready()}"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll?a.setTimeout(r.ready):(d.addEventListener("DOMContentLoaded",S),a.addEventListener("load",S));var T=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===r.type(c)){e=!0;for(h in c)T(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,r.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(r(a),c)})),b))for(;h<i;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},U=function(a){return 1===a.nodeType||9===a.nodeType||!+a.nodeType};function V(){this.expando=r.expando+V.uid++}V.uid=1,V.prototype={cache:function(a){var b=a[this.expando];return b||(b={},U(a)&&(a.nodeType?a[this.expando]=b:Object.defineProperty(a,this.expando,{value:b,configurable:!0}))),b},set:function(a,b,c){var d,e=this.cache(a);if("string"==typeof b)e[r.camelCase(b)]=c;else for(d in b)e[r.camelCase(d)]=b[d];return e},get:function(a,b){return void 0===b?this.cache(a):a[this.expando]&&a[this.expando][r.camelCase(b)]},access:function(a,b,c){return void 0===b||b&&"string"==typeof b&&void 0===c?this.get(a,b):(this.set(a,b,c),void 0!==c?c:b)},remove:function(a,b){var c,d=a[this.expando];if(void 0!==d){if(void 0!==b){Array.isArray(b)?b=b.map(r.camelCase):(b=r.camelCase(b),b=b in d?[b]:b.match(L)||[]),c=b.length;while(c--)delete d[b[c]]}(void 0===b||r.isEmptyObject(d))&&(a.nodeType?a[this.expando]=void 0:delete a[this.expando])}},hasData:function(a){var b=a[this.expando];return void 0!==b&&!r.isEmptyObject(b)}};var W=new V,X=new V,Y=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,Z=/[A-Z]/g;function $(a){return"true"===a||"false"!==a&&("null"===a?null:a===+a+""?+a:Y.test(a)?JSON.parse(a):a)}function _(a,b,c){var d;if(void 0===c&&1===a.nodeType)if(d="data-"+b.replace(Z,"-$&").toLowerCase(),c=a.getAttribute(d),"string"==typeof c){try{c=$(c)}catch(e){}X.set(a,b,c)}else c=void 0;return c}r.extend({hasData:function(a){return X.hasData(a)||W.hasData(a)},data:function(a,b,c){return X.access(a,b,c)},removeData:function(a,b){X.remove(a,b)},_data:function(a,b,c){return W.access(a,b,c)},_removeData:function(a,b){W.remove(a,b)}}),r.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=X.get(f),1===f.nodeType&&!W.get(f,"hasDataAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=r.camelCase(d.slice(5)),_(f,d,e[d])));W.set(f,"hasDataAttrs",!0)}return e}return"object"==typeof a?this.each(function(){X.set(this,a)}):T(this,function(b){var c;if(f&&void 0===b){if(c=X.get(f,a),void 0!==c)return c;if(c=_(f,a),void 0!==c)return c}else this.each(function(){X.set(this,a,b)})},null,b,arguments.length>1,null,!0)},removeData:function(a){return this.each(function(){X.remove(this,a)})}}),r.extend({queue:function(a,b,c){var d;if(a)return b=(b||"fx")+"queue",d=W.get(a,b),c&&(!d||Array.isArray(c)?d=W.access(a,b,r.makeArray(c)):d.push(c)),d||[]},dequeue:function(a,b){b=b||"fx";var c=r.queue(a,b),d=c.length,e=c.shift(),f=r._queueHooks(a,b),g=function(){r.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return W.get(a,c)||W.access(a,c,{empty:r.Callbacks("once memory").add(function(){W.remove(a,[b+"queue",c])})})}}),r.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?r.queue(this[0],a):void 0===b?this:this.each(function(){var c=r.queue(this,a,b);r._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&r.dequeue(this,a)})},dequeue:function(a){return this.each(function(){r.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=r.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=W.get(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}});var aa=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,ba=new RegExp("^(?:([+-])=|)("+aa+")([a-z%]*)$","i"),ca=["Top","Right","Bottom","Left"],da=function(a,b){return a=b||a,"none"===a.style.display||""===a.style.display&&r.contains(a.ownerDocument,a)&&"none"===r.css(a,"display")},ea=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e};function fa(a,b,c,d){var e,f=1,g=20,h=d?function(){return d.cur()}:function(){return r.css(a,b,"")},i=h(),j=c&&c[3]||(r.cssNumber[b]?"":"px"),k=(r.cssNumber[b]||"px"!==j&&+i)&&ba.exec(r.css(a,b));if(k&&k[3]!==j){j=j||k[3],c=c||[],k=+i||1;do f=f||".5",k/=f,r.style(a,b,k+j);while(f!==(f=h()/i)&&1!==f&&--g)}return c&&(k=+k||+i||0,e=c[1]?k+(c[1]+1)*c[2]:+c[2],d&&(d.unit=j,d.start=k,d.end=e)),e}var ga={};function ha(a){var b,c=a.ownerDocument,d=a.nodeName,e=ga[d];return e?e:(b=c.body.appendChild(c.createElement(d)),e=r.css(b,"display"),b.parentNode.removeChild(b),"none"===e&&(e="block"),ga[d]=e,e)}function ia(a,b){for(var c,d,e=[],f=0,g=a.length;f<g;f++)d=a[f],d.style&&(c=d.style.display,b?("none"===c&&(e[f]=W.get(d,"display")||null,e[f]||(d.style.display="")),""===d.style.display&&da(d)&&(e[f]=ha(d))):"none"!==c&&(e[f]="none",W.set(d,"display",c)));for(f=0;f<g;f++)null!=e[f]&&(a[f].style.display=e[f]);return a}r.fn.extend({show:function(){return ia(this,!0)},hide:function(){return ia(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){da(this)?r(this).show():r(this).hide()})}});var ja=/^(?:checkbox|radio)$/i,ka=/<([a-z][^\/\0>\x20\t\r\n\f]+)/i,la=/^$|\/(?:java|ecma)script/i,ma={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};ma.optgroup=ma.option,ma.tbody=ma.tfoot=ma.colgroup=ma.caption=ma.thead,ma.th=ma.td;function na(a,b){var c;return c="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):[],void 0===b||b&&B(a,b)?r.merge([a],c):c}function oa(a,b){for(var c=0,d=a.length;c<d;c++)W.set(a[c],"globalEval",!b||W.get(b[c],"globalEval"))}var pa=/<|&#?\w+;/;function qa(a,b,c,d,e){for(var f,g,h,i,j,k,l=b.createDocumentFragment(),m=[],n=0,o=a.length;n<o;n++)if(f=a[n],f||0===f)if("object"===r.type(f))r.merge(m,f.nodeType?[f]:f);else if(pa.test(f)){g=g||l.appendChild(b.createElement("div")),h=(ka.exec(f)||["",""])[1].toLowerCase(),i=ma[h]||ma._default,g.innerHTML=i[1]+r.htmlPrefilter(f)+i[2],k=i[0];while(k--)g=g.lastChild;r.merge(m,g.childNodes),g=l.firstChild,g.textContent=""}else m.push(b.createTextNode(f));l.textContent="",n=0;while(f=m[n++])if(d&&r.inArray(f,d)>-1)e&&e.push(f);else if(j=r.contains(f.ownerDocument,f),g=na(l.appendChild(f),"script"),j&&oa(g),c){k=0;while(f=g[k++])la.test(f.type||"")&&c.push(f)}return l}!function(){var a=d.createDocumentFragment(),b=a.appendChild(d.createElement("div")),c=d.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),o.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="<textarea>x</textarea>",o.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var ra=d.documentElement,sa=/^key/,ta=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,ua=/^([^.]*)(?:\.(.+)|)/;function va(){return!0}function wa(){return!1}function xa(){try{return d.activeElement}catch(a){}}function ya(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)ya(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=wa;else if(!e)return a;return 1===f&&(g=e,e=function(a){return r().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=r.guid++)),a.each(function(){r.event.add(this,b,e,d,c)})}r.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=W.get(a);if(q){c.handler&&(f=c,c=f.handler,e=f.selector),e&&r.find.matchesSelector(ra,e),c.guid||(c.guid=r.guid++),(i=q.events)||(i=q.events={}),(g=q.handle)||(g=q.handle=function(b){return"undefined"!=typeof r&&r.event.triggered!==b.type?r.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(L)||[""],j=b.length;while(j--)h=ua.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n&&(l=r.event.special[n]||{},n=(e?l.delegateType:l.bindType)||n,l=r.event.special[n]||{},k=r.extend({type:n,origType:p,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&r.expr.match.needsContext.test(e),namespace:o.join(".")},f),(m=i[n])||(m=i[n]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,o,g)!==!1||a.addEventListener&&a.addEventListener(n,g)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),r.event.global[n]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=W.hasData(a)&&W.get(a);if(q&&(i=q.events)){b=(b||"").match(L)||[""],j=b.length;while(j--)if(h=ua.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n){l=r.event.special[n]||{},n=(d?l.delegateType:l.bindType)||n,m=i[n]||[],h=h[2]&&new RegExp("(^|\\.)"+o.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&p!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,o,q.handle)!==!1||r.removeEvent(a,n,q.handle),delete i[n])}else for(n in i)r.event.remove(a,n+b[j],c,d,!0);r.isEmptyObject(i)&&W.remove(a,"handle events")}},dispatch:function(a){var b=r.event.fix(a),c,d,e,f,g,h,i=new Array(arguments.length),j=(W.get(this,"events")||{})[b.type]||[],k=r.event.special[b.type]||{};for(i[0]=b,c=1;c<arguments.length;c++)i[c]=arguments[c];if(b.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,b)!==!1){h=r.event.handlers.call(this,b,j),c=0;while((f=h[c++])&&!b.isPropagationStopped()){b.currentTarget=f.elem,d=0;while((g=f.handlers[d++])&&!b.isImmediatePropagationStopped())b.rnamespace&&!b.rnamespace.test(g.namespace)||(b.handleObj=g,b.data=g.data,e=((r.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==e&&(b.result=e)===!1&&(b.preventDefault(),b.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,b),b.result}},handlers:function(a,b){var c,d,e,f,g,h=[],i=b.delegateCount,j=a.target;if(i&&j.nodeType&&!("click"===a.type&&a.button>=1))for(;j!==this;j=j.parentNode||this)if(1===j.nodeType&&("click"!==a.type||j.disabled!==!0)){for(f=[],g={},c=0;c<i;c++)d=b[c],e=d.selector+" ",void 0===g[e]&&(g[e]=d.needsContext?r(e,this).index(j)>-1:r.find(e,this,null,[j]).length),g[e]&&f.push(d);f.length&&h.push({elem:j,handlers:f})}return j=this,i<b.length&&h.push({elem:j,handlers:b.slice(i)}),h},addProp:function(a,b){Object.defineProperty(r.Event.prototype,a,{enumerable:!0,configurable:!0,get:r.isFunction(b)?function(){if(this.originalEvent)return b(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[a]},set:function(b){Object.defineProperty(this,a,{enumerable:!0,configurable:!0,writable:!0,value:b})}})},fix:function(a){return a[r.expando]?a:new r.Event(a)},special:{load:{noBubble:!0},focus:{trigger:function(){if(this!==xa()&&this.focus)return this.focus(),!1},delegateType:"focusin"},blur:{trigger:function(){if(this===xa()&&this.blur)return this.blur(),!1},delegateType:"focusout"},click:{trigger:function(){if("checkbox"===this.type&&this.click&&B(this,"input"))return this.click(),!1},_default:function(a){return B(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}}},r.removeEvent=function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c)},r.Event=function(a,b){return this instanceof r.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?va:wa,this.target=a.target&&3===a.target.nodeType?a.target.parentNode:a.target,this.currentTarget=a.currentTarget,this.relatedTarget=a.relatedTarget):this.type=a,b&&r.extend(this,b),this.timeStamp=a&&a.timeStamp||r.now(),void(this[r.expando]=!0)):new r.Event(a,b)},r.Event.prototype={constructor:r.Event,isDefaultPrevented:wa,isPropagationStopped:wa,isImmediatePropagationStopped:wa,isSimulated:!1,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=va,a&&!this.isSimulated&&a.preventDefault()},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=va,a&&!this.isSimulated&&a.stopPropagation()},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=va,a&&!this.isSimulated&&a.stopImmediatePropagation(),this.stopPropagation()}},r.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:function(a){var b=a.button;return null==a.which&&sa.test(a.type)?null!=a.charCode?a.charCode:a.keyCode:!a.which&&void 0!==b&&ta.test(a.type)?1&b?1:2&b?3:4&b?2:0:a.which}},r.event.addProp),r.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){r.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return e&&(e===d||r.contains(d,e))||(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),r.fn.extend({on:function(a,b,c,d){return ya(this,a,b,c,d)},one:function(a,b,c,d){return ya(this,a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,r(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return b!==!1&&"function"!=typeof b||(c=b,b=void 0),c===!1&&(c=wa),this.each(function(){r.event.remove(this,a,c,b)})}});var za=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([a-z][^\/\0>\x20\t\r\n\f]*)[^>]*)\/>/gi,Aa=/<script|<style|<link/i,Ba=/checked\s*(?:[^=]|=\s*.checked.)/i,Ca=/^true\/(.*)/,Da=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function Ea(a,b){return B(a,"table")&&B(11!==b.nodeType?b:b.firstChild,"tr")?r(">tbody",a)[0]||a:a}function Fa(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function Ga(a){var b=Ca.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Ha(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(W.hasData(a)&&(f=W.access(a),g=W.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;c<d;c++)r.event.add(b,e,j[e][c])}X.hasData(a)&&(h=X.access(a),i=r.extend({},h),X.set(b,i))}}function Ia(a,b){var c=b.nodeName.toLowerCase();"input"===c&&ja.test(a.type)?b.checked=a.checked:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}function Ja(a,b,c,d){b=g.apply([],b);var e,f,h,i,j,k,l=0,m=a.length,n=m-1,q=b[0],s=r.isFunction(q);if(s||m>1&&"string"==typeof q&&!o.checkClone&&Ba.test(q))return a.each(function(e){var f=a.eq(e);s&&(b[0]=q.call(this,e,f.html())),Ja(f,b,c,d)});if(m&&(e=qa(b,a[0].ownerDocument,!1,a,d),f=e.firstChild,1===e.childNodes.length&&(e=f),f||d)){for(h=r.map(na(e,"script"),Fa),i=h.length;l<m;l++)j=e,l!==n&&(j=r.clone(j,!0,!0),i&&r.merge(h,na(j,"script"))),c.call(a[l],j,l);if(i)for(k=h[h.length-1].ownerDocument,r.map(h,Ga),l=0;l<i;l++)j=h[l],la.test(j.type||"")&&!W.access(j,"globalEval")&&r.contains(k,j)&&(j.src?r._evalUrl&&r._evalUrl(j.src):p(j.textContent.replace(Da,""),k))}return a}function Ka(a,b,c){for(var d,e=b?r.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||r.cleanData(na(d)),d.parentNode&&(c&&r.contains(d.ownerDocument,d)&&oa(na(d,"script")),d.parentNode.removeChild(d));return a}r.extend({htmlPrefilter:function(a){return a.replace(za,"<$1></$2>")},clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=r.contains(a.ownerDocument,a);if(!(o.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||r.isXMLDoc(a)))for(g=na(h),f=na(a),d=0,e=f.length;d<e;d++)Ia(f[d],g[d]);if(b)if(c)for(f=f||na(a),g=g||na(h),d=0,e=f.length;d<e;d++)Ha(f[d],g[d]);else Ha(a,h);return g=na(h,"script"),g.length>0&&oa(g,!i&&na(a,"script")),h},cleanData:function(a){for(var b,c,d,e=r.event.special,f=0;void 0!==(c=a[f]);f++)if(U(c)){if(b=c[W.expando]){if(b.events)for(d in b.events)e[d]?r.event.remove(c,d):r.removeEvent(c,d,b.handle);c[W.expando]=void 0}c[X.expando]&&(c[X.expando]=void 0)}}}),r.fn.extend({detach:function(a){return Ka(this,a,!0)},remove:function(a){return Ka(this,a)},text:function(a){return T(this,function(a){return void 0===a?r.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=a)})},null,a,arguments.length)},append:function(){return Ja(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ea(this,a);b.appendChild(a)}})},prepend:function(){return Ja(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ea(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ja(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ja(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(r.cleanData(na(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null!=a&&a,b=null==b?a:b,this.map(function(){return r.clone(this,a,b)})},html:function(a){return T(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!Aa.test(a)&&!ma[(ka.exec(a)||["",""])[1].toLowerCase()]){a=r.htmlPrefilter(a);try{for(;c<d;c++)b=this[c]||{},1===b.nodeType&&(r.cleanData(na(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return Ja(this,arguments,function(b){var c=this.parentNode;r.inArray(this,a)<0&&(r.cleanData(na(this)),c&&c.replaceChild(b,this))},a)}}),r.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){r.fn[a]=function(a){for(var c,d=[],e=r(a),f=e.length-1,g=0;g<=f;g++)c=g===f?this:this.clone(!0),r(e[g])[b](c),h.apply(d,c.get());return this.pushStack(d)}});var La=/^margin/,Ma=new RegExp("^("+aa+")(?!px)[a-z%]+$","i"),Na=function(b){var c=b.ownerDocument.defaultView;return c&&c.opener||(c=a),c.getComputedStyle(b)};!function(){function b(){if(i){i.style.cssText="box-sizing:border-box;position:relative;display:block;margin:auto;border:1px;padding:1px;top:1%;width:50%",i.innerHTML="",ra.appendChild(h);var b=a.getComputedStyle(i);c="1%"!==b.top,g="2px"===b.marginLeft,e="4px"===b.width,i.style.marginRight="50%",f="4px"===b.marginRight,ra.removeChild(h),i=null}}var c,e,f,g,h=d.createElement("div"),i=d.createElement("div");i.style&&(i.style.backgroundClip="content-box",i.cloneNode(!0).style.backgroundClip="",o.clearCloneStyle="content-box"===i.style.backgroundClip,h.style.cssText="border:0;width:8px;height:0;top:0;left:-9999px;padding:0;margin-top:1px;position:absolute",h.appendChild(i),r.extend(o,{pixelPosition:function(){return b(),c},boxSizingReliable:function(){return b(),e},pixelMarginRight:function(){return b(),f},reliableMarginLeft:function(){return b(),g}}))}();function Oa(a,b,c){var d,e,f,g,h=a.style;return c=c||Na(a),c&&(g=c.getPropertyValue(b)||c[b],""!==g||r.contains(a.ownerDocument,a)||(g=r.style(a,b)),!o.pixelMarginRight()&&Ma.test(g)&&La.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f)),void 0!==g?g+"":g}function Pa(a,b){return{get:function(){return a()?void delete this.get:(this.get=b).apply(this,arguments)}}}var Qa=/^(none|table(?!-c[ea]).+)/,Ra=/^--/,Sa={position:"absolute",visibility:"hidden",display:"block"},Ta={letterSpacing:"0",fontWeight:"400"},Ua=["Webkit","Moz","ms"],Va=d.createElement("div").style;function Wa(a){if(a in Va)return a;var b=a[0].toUpperCase()+a.slice(1),c=Ua.length;while(c--)if(a=Ua[c]+b,a in Va)return a}function Xa(a){var b=r.cssProps[a];return b||(b=r.cssProps[a]=Wa(a)||a),b}function Ya(a,b,c){var d=ba.exec(b);return d?Math.max(0,d[2]-(c||0))+(d[3]||"px"):b}function Za(a,b,c,d,e){var f,g=0;for(f=c===(d?"border":"content")?4:"width"===b?1:0;f<4;f+=2)"margin"===c&&(g+=r.css(a,c+ca[f],!0,e)),d?("content"===c&&(g-=r.css(a,"padding"+ca[f],!0,e)),"margin"!==c&&(g-=r.css(a,"border"+ca[f]+"Width",!0,e))):(g+=r.css(a,"padding"+ca[f],!0,e),"padding"!==c&&(g+=r.css(a,"border"+ca[f]+"Width",!0,e)));return g}function $a(a,b,c){var d,e=Na(a),f=Oa(a,b,e),g="border-box"===r.css(a,"boxSizing",!1,e);return Ma.test(f)?f:(d=g&&(o.boxSizingReliable()||f===a.style[b]),"auto"===f&&(f=a["offset"+b[0].toUpperCase()+b.slice(1)]),f=parseFloat(f)||0,f+Za(a,b,c||(g?"border":"content"),d,e)+"px")}r.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=Oa(a,"opacity");return""===c?"1":c}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":"cssFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=r.camelCase(b),i=Ra.test(b),j=a.style;return i||(b=Xa(h)),g=r.cssHooks[b]||r.cssHooks[h],void 0===c?g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:j[b]:(f=typeof c,"string"===f&&(e=ba.exec(c))&&e[1]&&(c=fa(a,b,e),f="number"),null!=c&&c===c&&("number"===f&&(c+=e&&e[3]||(r.cssNumber[h]?"":"px")),o.clearCloneStyle||""!==c||0!==b.indexOf("background")||(j[b]="inherit"),g&&"set"in g&&void 0===(c=g.set(a,c,d))||(i?j.setProperty(b,c):j[b]=c)),void 0)}},css:function(a,b,c,d){var e,f,g,h=r.camelCase(b),i=Ra.test(b);return i||(b=Xa(h)),g=r.cssHooks[b]||r.cssHooks[h],g&&"get"in g&&(e=g.get(a,!0,c)),void 0===e&&(e=Oa(a,b,d)),"normal"===e&&b in Ta&&(e=Ta[b]),""===c||c?(f=parseFloat(e),c===!0||isFinite(f)?f||0:e):e}}),r.each(["height","width"],function(a,b){r.cssHooks[b]={get:function(a,c,d){if(c)return!Qa.test(r.css(a,"display"))||a.getClientRects().length&&a.getBoundingClientRect().width?$a(a,b,d):ea(a,Sa,function(){return $a(a,b,d)})},set:function(a,c,d){var e,f=d&&Na(a),g=d&&Za(a,b,d,"border-box"===r.css(a,"boxSizing",!1,f),f);return g&&(e=ba.exec(c))&&"px"!==(e[3]||"px")&&(a.style[b]=c,c=r.css(a,b)),Ya(a,c,g)}}}),r.cssHooks.marginLeft=Pa(o.reliableMarginLeft,function(a,b){if(b)return(parseFloat(Oa(a,"marginLeft"))||a.getBoundingClientRect().left-ea(a,{marginLeft:0},function(){return a.getBoundingClientRect().left}))+"px"}),r.each({margin:"",padding:"",border:"Width"},function(a,b){r.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];d<4;d++)e[a+ca[d]+b]=f[d]||f[d-2]||f[0];return e}},La.test(a)||(r.cssHooks[a+b].set=Ya)}),r.fn.extend({css:function(a,b){return T(this,function(a,b,c){var d,e,f={},g=0;if(Array.isArray(b)){for(d=Na(a),e=b.length;g<e;g++)f[b[g]]=r.css(a,b[g],!1,d);return f}return void 0!==c?r.style(a,b,c):r.css(a,b)},a,b,arguments.length>1)}});function _a(a,b,c,d,e){return new _a.prototype.init(a,b,c,d,e)}r.Tween=_a,_a.prototype={constructor:_a,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||r.easing._default,this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(r.cssNumber[c]?"":"px")},cur:function(){var a=_a.propHooks[this.prop];return a&&a.get?a.get(this):_a.propHooks._default.get(this)},run:function(a){var b,c=_a.propHooks[this.prop];return this.options.duration?this.pos=b=r.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):_a.propHooks._default.set(this),this}},_a.prototype.init.prototype=_a.prototype,_a.propHooks={_default:{get:function(a){var b;return 1!==a.elem.nodeType||null!=a.elem[a.prop]&&null==a.elem.style[a.prop]?a.elem[a.prop]:(b=r.css(a.elem,a.prop,""),b&&"auto"!==b?b:0)},set:function(a){r.fx.step[a.prop]?r.fx.step[a.prop](a):1!==a.elem.nodeType||null==a.elem.style[r.cssProps[a.prop]]&&!r.cssHooks[a.prop]?a.elem[a.prop]=a.now:r.style(a.elem,a.prop,a.now+a.unit)}}},_a.propHooks.scrollTop=_a.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},r.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2},_default:"swing"},r.fx=_a.prototype.init,r.fx.step={};var ab,bb,cb=/^(?:toggle|show|hide)$/,db=/queueHooks$/;function eb(){bb&&(d.hidden===!1&&a.requestAnimationFrame?a.requestAnimationFrame(eb):a.setTimeout(eb,r.fx.interval),r.fx.tick())}function fb(){return a.setTimeout(function(){ab=void 0}),ab=r.now()}function gb(a,b){var c,d=0,e={height:a};for(b=b?1:0;d<4;d+=2-b)c=ca[d],e["margin"+c]=e["padding"+c]=a;return b&&(e.opacity=e.width=a),e}function hb(a,b,c){for(var d,e=(kb.tweeners[b]||[]).concat(kb.tweeners["*"]),f=0,g=e.length;f<g;f++)if(d=e[f].call(c,b,a))return d}function ib(a,b,c){var d,e,f,g,h,i,j,k,l="width"in b||"height"in b,m=this,n={},o=a.style,p=a.nodeType&&da(a),q=W.get(a,"fxshow");c.queue||(g=r._queueHooks(a,"fx"),null==g.unqueued&&(g.unqueued=0,h=g.empty.fire,g.empty.fire=function(){g.unqueued||h()}),g.unqueued++,m.always(function(){m.always(function(){g.unqueued--,r.queue(a,"fx").length||g.empty.fire()})}));for(d in b)if(e=b[d],cb.test(e)){if(delete b[d],f=f||"toggle"===e,e===(p?"hide":"show")){if("show"!==e||!q||void 0===q[d])continue;p=!0}n[d]=q&&q[d]||r.style(a,d)}if(i=!r.isEmptyObject(b),i||!r.isEmptyObject(n)){l&&1===a.nodeType&&(c.overflow=[o.overflow,o.overflowX,o.overflowY],j=q&&q.display,null==j&&(j=W.get(a,"display")),k=r.css(a,"display"),"none"===k&&(j?k=j:(ia([a],!0),j=a.style.display||j,k=r.css(a,"display"),ia([a]))),("inline"===k||"inline-block"===k&&null!=j)&&"none"===r.css(a,"float")&&(i||(m.done(function(){o.display=j}),null==j&&(k=o.display,j="none"===k?"":k)),o.display="inline-block")),c.overflow&&(o.overflow="hidden",m.always(function(){o.overflow=c.overflow[0],o.overflowX=c.overflow[1],o.overflowY=c.overflow[2]})),i=!1;for(d in n)i||(q?"hidden"in q&&(p=q.hidden):q=W.access(a,"fxshow",{display:j}),f&&(q.hidden=!p),p&&ia([a],!0),m.done(function(){p||ia([a]),W.remove(a,"fxshow");for(d in n)r.style(a,d,n[d])})),i=hb(p?q[d]:0,d,m),d in q||(q[d]=i.start,p&&(i.end=i.start,i.start=0))}}function jb(a,b){var c,d,e,f,g;for(c in a)if(d=r.camelCase(c),e=b[d],f=a[c],Array.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=r.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function kb(a,b,c){var d,e,f=0,g=kb.prefilters.length,h=r.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=ab||fb(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;g<i;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),f<1&&i?c:(i||h.notifyWith(a,[j,1,0]),h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:r.extend({},b),opts:r.extend(!0,{specialEasing:{},easing:r.easing._default},c),originalProperties:b,originalOptions:c,startTime:ab||fb(),duration:c.duration,tweens:[],createTween:function(b,c){var d=r.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;c<d;c++)j.tweens[c].run(1);return b?(h.notifyWith(a,[j,1,0]),h.resolveWith(a,[j,b])):h.rejectWith(a,[j,b]),this}}),k=j.props;for(jb(k,j.opts.specialEasing);f<g;f++)if(d=kb.prefilters[f].call(j,a,k,j.opts))return r.isFunction(d.stop)&&(r._queueHooks(j.elem,j.opts.queue).stop=r.proxy(d.stop,d)),d;return r.map(k,hb,j),r.isFunction(j.opts.start)&&j.opts.start.call(a,j),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always),r.fx.timer(r.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j}r.Animation=r.extend(kb,{tweeners:{"*":[function(a,b){var c=this.createTween(a,b);return fa(c.elem,a,ba.exec(b),c),c}]},tweener:function(a,b){r.isFunction(a)?(b=a,a=["*"]):a=a.match(L);for(var c,d=0,e=a.length;d<e;d++)c=a[d],kb.tweeners[c]=kb.tweeners[c]||[],kb.tweeners[c].unshift(b)},prefilters:[ib],prefilter:function(a,b){b?kb.prefilters.unshift(a):kb.prefilters.push(a)}}),r.speed=function(a,b,c){var d=a&&"object"==typeof a?r.extend({},a):{complete:c||!c&&b||r.isFunction(a)&&a,duration:a,easing:c&&b||b&&!r.isFunction(b)&&b};return r.fx.off?d.duration=0:"number"!=typeof d.duration&&(d.duration in r.fx.speeds?d.duration=r.fx.speeds[d.duration]:d.duration=r.fx.speeds._default),null!=d.queue&&d.queue!==!0||(d.queue="fx"),d.old=d.complete,d.complete=function(){r.isFunction(d.old)&&d.old.call(this),d.queue&&r.dequeue(this,d.queue)},d},r.fn.extend({fadeTo:function(a,b,c,d){return this.filter(da).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=r.isEmptyObject(a),f=r.speed(b,c,d),g=function(){var b=kb(this,r.extend({},a),f);(e||W.get(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=r.timers,g=W.get(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&db.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));!b&&c||r.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=W.get(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=r.timers,g=d?d.length:0;for(c.finish=!0,r.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;b<g;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),r.each(["toggle","show","hide"],function(a,b){var c=r.fn[b];r.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(gb(b,!0),a,d,e)}}),r.each({slideDown:gb("show"),slideUp:gb("hide"),slideToggle:gb("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){r.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),r.timers=[],r.fx.tick=function(){var a,b=0,c=r.timers;for(ab=r.now();b<c.length;b++)a=c[b],a()||c[b]!==a||c.splice(b--,1);c.length||r.fx.stop(),ab=void 0},r.fx.timer=function(a){r.timers.push(a),r.fx.start()},r.fx.interval=13,r.fx.start=function(){bb||(bb=!0,eb())},r.fx.stop=function(){bb=null},r.fx.speeds={slow:600,fast:200,_default:400},r.fn.delay=function(b,c){return b=r.fx?r.fx.speeds[b]||b:b,c=c||"fx",this.queue(c,function(c,d){var e=a.setTimeout(c,b);d.stop=function(){a.clearTimeout(e)}})},function(){var a=d.createElement("input"),b=d.createElement("select"),c=b.appendChild(d.createElement("option"));a.type="checkbox",o.checkOn=""!==a.value,o.optSelected=c.selected,a=d.createElement("input"),a.value="t",a.type="radio",o.radioValue="t"===a.value}();var lb,mb=r.expr.attrHandle;r.fn.extend({attr:function(a,b){return T(this,r.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){r.removeAttr(this,a)})}}),r.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return"undefined"==typeof a.getAttribute?r.prop(a,b,c):(1===f&&r.isXMLDoc(a)||(e=r.attrHooks[b.toLowerCase()]||(r.expr.match.bool.test(b)?lb:void 0)),void 0!==c?null===c?void r.removeAttr(a,b):e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:(a.setAttribute(b,c+""),c):e&&"get"in e&&null!==(d=e.get(a,b))?d:(d=r.find.attr(a,b),
-null==d?void 0:d))},attrHooks:{type:{set:function(a,b){if(!o.radioValue&&"radio"===b&&B(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}},removeAttr:function(a,b){var c,d=0,e=b&&b.match(L);if(e&&1===a.nodeType)while(c=e[d++])a.removeAttribute(c)}}),lb={set:function(a,b,c){return b===!1?r.removeAttr(a,c):a.setAttribute(c,c),c}},r.each(r.expr.match.bool.source.match(/\w+/g),function(a,b){var c=mb[b]||r.find.attr;mb[b]=function(a,b,d){var e,f,g=b.toLowerCase();return d||(f=mb[g],mb[g]=e,e=null!=c(a,b,d)?g:null,mb[g]=f),e}});var nb=/^(?:input|select|textarea|button)$/i,ob=/^(?:a|area)$/i;r.fn.extend({prop:function(a,b){return T(this,r.prop,a,b,arguments.length>1)},removeProp:function(a){return this.each(function(){delete this[r.propFix[a]||a]})}}),r.extend({prop:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return 1===f&&r.isXMLDoc(a)||(b=r.propFix[b]||b,e=r.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=r.find.attr(a,"tabindex");return b?parseInt(b,10):nb.test(a.nodeName)||ob.test(a.nodeName)&&a.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),o.optSelected||(r.propHooks.selected={get:function(a){var b=a.parentNode;return b&&b.parentNode&&b.parentNode.selectedIndex,null},set:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex)}}),r.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){r.propFix[this.toLowerCase()]=this});function pb(a){var b=a.match(L)||[];return b.join(" ")}function qb(a){return a.getAttribute&&a.getAttribute("class")||""}r.fn.extend({addClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).addClass(a.call(this,b,qb(this)))});if("string"==typeof a&&a){b=a.match(L)||[];while(c=this[i++])if(e=qb(c),d=1===c.nodeType&&" "+pb(e)+" "){g=0;while(f=b[g++])d.indexOf(" "+f+" ")<0&&(d+=f+" ");h=pb(d),e!==h&&c.setAttribute("class",h)}}return this},removeClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).removeClass(a.call(this,b,qb(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof a&&a){b=a.match(L)||[];while(c=this[i++])if(e=qb(c),d=1===c.nodeType&&" "+pb(e)+" "){g=0;while(f=b[g++])while(d.indexOf(" "+f+" ")>-1)d=d.replace(" "+f+" "," ");h=pb(d),e!==h&&c.setAttribute("class",h)}}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):r.isFunction(a)?this.each(function(c){r(this).toggleClass(a.call(this,c,qb(this),b),b)}):this.each(function(){var b,d,e,f;if("string"===c){d=0,e=r(this),f=a.match(L)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else void 0!==a&&"boolean"!==c||(b=qb(this),b&&W.set(this,"__className__",b),this.setAttribute&&this.setAttribute("class",b||a===!1?"":W.get(this,"__className__")||""))})},hasClass:function(a){var b,c,d=0;b=" "+a+" ";while(c=this[d++])if(1===c.nodeType&&(" "+pb(qb(c))+" ").indexOf(b)>-1)return!0;return!1}});var rb=/\r/g;r.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=r.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,r(this).val()):a,null==e?e="":"number"==typeof e?e+="":Array.isArray(e)&&(e=r.map(e,function(a){return null==a?"":a+""})),b=r.valHooks[this.type]||r.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=r.valHooks[e.type]||r.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(rb,""):null==c?"":c)}}}),r.extend({valHooks:{option:{get:function(a){var b=r.find.attr(a,"value");return null!=b?b:pb(r.text(a))}},select:{get:function(a){var b,c,d,e=a.options,f=a.selectedIndex,g="select-one"===a.type,h=g?null:[],i=g?f+1:e.length;for(d=f<0?i:g?f:0;d<i;d++)if(c=e[d],(c.selected||d===f)&&!c.disabled&&(!c.parentNode.disabled||!B(c.parentNode,"optgroup"))){if(b=r(c).val(),g)return b;h.push(b)}return h},set:function(a,b){var c,d,e=a.options,f=r.makeArray(b),g=e.length;while(g--)d=e[g],(d.selected=r.inArray(r.valHooks.option.get(d),f)>-1)&&(c=!0);return c||(a.selectedIndex=-1),f}}}}),r.each(["radio","checkbox"],function(){r.valHooks[this]={set:function(a,b){if(Array.isArray(b))return a.checked=r.inArray(r(a).val(),b)>-1}},o.checkOn||(r.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var sb=/^(?:focusinfocus|focusoutblur)$/;r.extend(r.event,{trigger:function(b,c,e,f){var g,h,i,j,k,m,n,o=[e||d],p=l.call(b,"type")?b.type:b,q=l.call(b,"namespace")?b.namespace.split("."):[];if(h=i=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!sb.test(p+r.event.triggered)&&(p.indexOf(".")>-1&&(q=p.split("."),p=q.shift(),q.sort()),k=p.indexOf(":")<0&&"on"+p,b=b[r.expando]?b:new r.Event(p,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=q.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+q.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:r.makeArray(c,[b]),n=r.event.special[p]||{},f||!n.trigger||n.trigger.apply(e,c)!==!1)){if(!f&&!n.noBubble&&!r.isWindow(e)){for(j=n.delegateType||p,sb.test(j+p)||(h=h.parentNode);h;h=h.parentNode)o.push(h),i=h;i===(e.ownerDocument||d)&&o.push(i.defaultView||i.parentWindow||a)}g=0;while((h=o[g++])&&!b.isPropagationStopped())b.type=g>1?j:n.bindType||p,m=(W.get(h,"events")||{})[b.type]&&W.get(h,"handle"),m&&m.apply(h,c),m=k&&h[k],m&&m.apply&&U(h)&&(b.result=m.apply(h,c),b.result===!1&&b.preventDefault());return b.type=p,f||b.isDefaultPrevented()||n._default&&n._default.apply(o.pop(),c)!==!1||!U(e)||k&&r.isFunction(e[p])&&!r.isWindow(e)&&(i=e[k],i&&(e[k]=null),r.event.triggered=p,e[p](),r.event.triggered=void 0,i&&(e[k]=i)),b.result}},simulate:function(a,b,c){var d=r.extend(new r.Event,c,{type:a,isSimulated:!0});r.event.trigger(d,null,b)}}),r.fn.extend({trigger:function(a,b){return this.each(function(){r.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];if(c)return r.event.trigger(a,b,c,!0)}}),r.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(a,b){r.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),r.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),o.focusin="onfocusin"in a,o.focusin||r.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){r.event.simulate(b,a.target,r.event.fix(a))};r.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=W.access(d,b);e||d.addEventListener(a,c,!0),W.access(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=W.access(d,b)-1;e?W.access(d,b,e):(d.removeEventListener(a,c,!0),W.remove(d,b))}}});var tb=a.location,ub=r.now(),vb=/\?/;r.parseXML=function(b){var c;if(!b||"string"!=typeof b)return null;try{c=(new a.DOMParser).parseFromString(b,"text/xml")}catch(d){c=void 0}return c&&!c.getElementsByTagName("parsererror").length||r.error("Invalid XML: "+b),c};var wb=/\[\]$/,xb=/\r?\n/g,yb=/^(?:submit|button|image|reset|file)$/i,zb=/^(?:input|select|textarea|keygen)/i;function Ab(a,b,c,d){var e;if(Array.isArray(b))r.each(b,function(b,e){c||wb.test(a)?d(a,e):Ab(a+"["+("object"==typeof e&&null!=e?b:"")+"]",e,c,d)});else if(c||"object"!==r.type(b))d(a,b);else for(e in b)Ab(a+"["+e+"]",b[e],c,d)}r.param=function(a,b){var c,d=[],e=function(a,b){var c=r.isFunction(b)?b():b;d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(null==c?"":c)};if(Array.isArray(a)||a.jquery&&!r.isPlainObject(a))r.each(a,function(){e(this.name,this.value)});else for(c in a)Ab(c,a[c],b,e);return d.join("&")},r.fn.extend({serialize:function(){return r.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=r.prop(this,"elements");return a?r.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!r(this).is(":disabled")&&zb.test(this.nodeName)&&!yb.test(a)&&(this.checked||!ja.test(a))}).map(function(a,b){var c=r(this).val();return null==c?null:Array.isArray(c)?r.map(c,function(a){return{name:b.name,value:a.replace(xb,"\r\n")}}):{name:b.name,value:c.replace(xb,"\r\n")}}).get()}});var Bb=/%20/g,Cb=/#.*$/,Db=/([?&])_=[^&]*/,Eb=/^(.*?):[ \t]*([^\r\n]*)$/gm,Fb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Gb=/^(?:GET|HEAD)$/,Hb=/^\/\//,Ib={},Jb={},Kb="*/".concat("*"),Lb=d.createElement("a");Lb.href=tb.href;function Mb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(L)||[];if(r.isFunction(c))while(d=f[e++])"+"===d[0]?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Nb(a,b,c,d){var e={},f=a===Jb;function g(h){var i;return e[h]=!0,r.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Ob(a,b){var c,d,e=r.ajaxSettings.flatOptions||{};for(c in b)void 0!==b[c]&&((e[c]?a:d||(d={}))[c]=b[c]);return d&&r.extend(!0,a,d),a}function Pb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===d&&(d=a.mimeType||b.getResponseHeader("Content-Type"));if(d)for(e in h)if(h[e]&&h[e].test(d)){i.unshift(e);break}if(i[0]in c)f=i[0];else{for(e in c){if(!i[0]||a.converters[e+" "+i[0]]){f=e;break}g||(g=e)}f=f||g}if(f)return f!==i[0]&&i.unshift(f),c[f]}function Qb(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}r.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:tb.href,type:"GET",isLocal:Fb.test(tb.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Kb,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":r.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Ob(Ob(a,r.ajaxSettings),b):Ob(r.ajaxSettings,a)},ajaxPrefilter:Mb(Ib),ajaxTransport:Mb(Jb),ajax:function(b,c){"object"==typeof b&&(c=b,b=void 0),c=c||{};var e,f,g,h,i,j,k,l,m,n,o=r.ajaxSetup({},c),p=o.context||o,q=o.context&&(p.nodeType||p.jquery)?r(p):r.event,s=r.Deferred(),t=r.Callbacks("once memory"),u=o.statusCode||{},v={},w={},x="canceled",y={readyState:0,getResponseHeader:function(a){var b;if(k){if(!h){h={};while(b=Eb.exec(g))h[b[1].toLowerCase()]=b[2]}b=h[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return k?g:null},setRequestHeader:function(a,b){return null==k&&(a=w[a.toLowerCase()]=w[a.toLowerCase()]||a,v[a]=b),this},overrideMimeType:function(a){return null==k&&(o.mimeType=a),this},statusCode:function(a){var b;if(a)if(k)y.always(a[y.status]);else for(b in a)u[b]=[u[b],a[b]];return this},abort:function(a){var b=a||x;return e&&e.abort(b),A(0,b),this}};if(s.promise(y),o.url=((b||o.url||tb.href)+"").replace(Hb,tb.protocol+"//"),o.type=c.method||c.type||o.method||o.type,o.dataTypes=(o.dataType||"*").toLowerCase().match(L)||[""],null==o.crossDomain){j=d.createElement("a");try{j.href=o.url,j.href=j.href,o.crossDomain=Lb.protocol+"//"+Lb.host!=j.protocol+"//"+j.host}catch(z){o.crossDomain=!0}}if(o.data&&o.processData&&"string"!=typeof o.data&&(o.data=r.param(o.data,o.traditional)),Nb(Ib,o,c,y),k)return y;l=r.event&&o.global,l&&0===r.active++&&r.event.trigger("ajaxStart"),o.type=o.type.toUpperCase(),o.hasContent=!Gb.test(o.type),f=o.url.replace(Cb,""),o.hasContent?o.data&&o.processData&&0===(o.contentType||"").indexOf("application/x-www-form-urlencoded")&&(o.data=o.data.replace(Bb,"+")):(n=o.url.slice(f.length),o.data&&(f+=(vb.test(f)?"&":"?")+o.data,delete o.data),o.cache===!1&&(f=f.replace(Db,"$1"),n=(vb.test(f)?"&":"?")+"_="+ub++ +n),o.url=f+n),o.ifModified&&(r.lastModified[f]&&y.setRequestHeader("If-Modified-Since",r.lastModified[f]),r.etag[f]&&y.setRequestHeader("If-None-Match",r.etag[f])),(o.data&&o.hasContent&&o.contentType!==!1||c.contentType)&&y.setRequestHeader("Content-Type",o.contentType),y.setRequestHeader("Accept",o.dataTypes[0]&&o.accepts[o.dataTypes[0]]?o.accepts[o.dataTypes[0]]+("*"!==o.dataTypes[0]?", "+Kb+"; q=0.01":""):o.accepts["*"]);for(m in o.headers)y.setRequestHeader(m,o.headers[m]);if(o.beforeSend&&(o.beforeSend.call(p,y,o)===!1||k))return y.abort();if(x="abort",t.add(o.complete),y.done(o.success),y.fail(o.error),e=Nb(Jb,o,c,y)){if(y.readyState=1,l&&q.trigger("ajaxSend",[y,o]),k)return y;o.async&&o.timeout>0&&(i=a.setTimeout(function(){y.abort("timeout")},o.timeout));try{k=!1,e.send(v,A)}catch(z){if(k)throw z;A(-1,z)}}else A(-1,"No Transport");function A(b,c,d,h){var j,m,n,v,w,x=c;k||(k=!0,i&&a.clearTimeout(i),e=void 0,g=h||"",y.readyState=b>0?4:0,j=b>=200&&b<300||304===b,d&&(v=Pb(o,y,d)),v=Qb(o,v,y,j),j?(o.ifModified&&(w=y.getResponseHeader("Last-Modified"),w&&(r.lastModified[f]=w),w=y.getResponseHeader("etag"),w&&(r.etag[f]=w)),204===b||"HEAD"===o.type?x="nocontent":304===b?x="notmodified":(x=v.state,m=v.data,n=v.error,j=!n)):(n=x,!b&&x||(x="error",b<0&&(b=0))),y.status=b,y.statusText=(c||x)+"",j?s.resolveWith(p,[m,x,y]):s.rejectWith(p,[y,x,n]),y.statusCode(u),u=void 0,l&&q.trigger(j?"ajaxSuccess":"ajaxError",[y,o,j?m:n]),t.fireWith(p,[y,x]),l&&(q.trigger("ajaxComplete",[y,o]),--r.active||r.event.trigger("ajaxStop")))}return y},getJSON:function(a,b,c){return r.get(a,b,c,"json")},getScript:function(a,b){return r.get(a,void 0,b,"script")}}),r.each(["get","post"],function(a,b){r[b]=function(a,c,d,e){return r.isFunction(c)&&(e=e||d,d=c,c=void 0),r.ajax(r.extend({url:a,type:b,dataType:e,data:c,success:d},r.isPlainObject(a)&&a))}}),r._evalUrl=function(a){return r.ajax({url:a,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,"throws":!0})},r.fn.extend({wrapAll:function(a){var b;return this[0]&&(r.isFunction(a)&&(a=a.call(this[0])),b=r(a,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstElementChild)a=a.firstElementChild;return a}).append(this)),this},wrapInner:function(a){return r.isFunction(a)?this.each(function(b){r(this).wrapInner(a.call(this,b))}):this.each(function(){var b=r(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=r.isFunction(a);return this.each(function(c){r(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(a){return this.parent(a).not("body").each(function(){r(this).replaceWith(this.childNodes)}),this}}),r.expr.pseudos.hidden=function(a){return!r.expr.pseudos.visible(a)},r.expr.pseudos.visible=function(a){return!!(a.offsetWidth||a.offsetHeight||a.getClientRects().length)},r.ajaxSettings.xhr=function(){try{return new a.XMLHttpRequest}catch(b){}};var Rb={0:200,1223:204},Sb=r.ajaxSettings.xhr();o.cors=!!Sb&&"withCredentials"in Sb,o.ajax=Sb=!!Sb,r.ajaxTransport(function(b){var c,d;if(o.cors||Sb&&!b.crossDomain)return{send:function(e,f){var g,h=b.xhr();if(h.open(b.type,b.url,b.async,b.username,b.password),b.xhrFields)for(g in b.xhrFields)h[g]=b.xhrFields[g];b.mimeType&&h.overrideMimeType&&h.overrideMimeType(b.mimeType),b.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest");for(g in e)h.setRequestHeader(g,e[g]);c=function(a){return function(){c&&(c=d=h.onload=h.onerror=h.onabort=h.onreadystatechange=null,"abort"===a?h.abort():"error"===a?"number"!=typeof h.status?f(0,"error"):f(h.status,h.statusText):f(Rb[h.status]||h.status,h.statusText,"text"!==(h.responseType||"text")||"string"!=typeof h.responseText?{binary:h.response}:{text:h.responseText},h.getAllResponseHeaders()))}},h.onload=c(),d=h.onerror=c("error"),void 0!==h.onabort?h.onabort=d:h.onreadystatechange=function(){4===h.readyState&&a.setTimeout(function(){c&&d()})},c=c("abort");try{h.send(b.hasContent&&b.data||null)}catch(i){if(c)throw i}},abort:function(){c&&c()}}}),r.ajaxPrefilter(function(a){a.crossDomain&&(a.contents.script=!1)}),r.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(a){return r.globalEval(a),a}}}),r.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET")}),r.ajaxTransport("script",function(a){if(a.crossDomain){var b,c;return{send:function(e,f){b=r("<script>").prop({charset:a.scriptCharset,src:a.url}).on("load error",c=function(a){b.remove(),c=null,a&&f("error"===a.type?404:200,a.type)}),d.head.appendChild(b[0])},abort:function(){c&&c()}}}});var Tb=[],Ub=/(=)\?(?=&|$)|\?\?/;r.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=Tb.pop()||r.expando+"_"+ub++;return this[a]=!0,a}}),r.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(Ub.test(b.url)?"url":"string"==typeof b.data&&0===(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ub.test(b.data)&&"data");if(h||"jsonp"===b.dataTypes[0])return e=b.jsonpCallback=r.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(Ub,"$1"+e):b.jsonp!==!1&&(b.url+=(vb.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||r.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){void 0===f?r(a).removeProp(e):a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,Tb.push(e)),g&&r.isFunction(f)&&f(g[0]),g=f=void 0}),"script"}),o.createHTMLDocument=function(){var a=d.implementation.createHTMLDocument("").body;return a.innerHTML="<form></form><form></form>",2===a.childNodes.length}(),r.parseHTML=function(a,b,c){if("string"!=typeof a)return[];"boolean"==typeof b&&(c=b,b=!1);var e,f,g;return b||(o.createHTMLDocument?(b=d.implementation.createHTMLDocument(""),e=b.createElement("base"),e.href=d.location.href,b.head.appendChild(e)):b=d),f=C.exec(a),g=!c&&[],f?[b.createElement(f[1])]:(f=qa([a],b,g),g&&g.length&&r(g).remove(),r.merge([],f.childNodes))},r.fn.load=function(a,b,c){var d,e,f,g=this,h=a.indexOf(" ");return h>-1&&(d=pb(a.slice(h)),a=a.slice(0,h)),r.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(e="POST"),g.length>0&&r.ajax({url:a,type:e||"GET",dataType:"html",data:b}).done(function(a){f=arguments,g.html(d?r("<div>").append(r.parseHTML(a)).find(d):a)}).always(c&&function(a,b){g.each(function(){c.apply(this,f||[a.responseText,b,a])})}),this},r.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){r.fn[b]=function(a){return this.on(b,a)}}),r.expr.pseudos.animated=function(a){return r.grep(r.timers,function(b){return a===b.elem}).length},r.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=r.css(a,"position"),l=r(a),m={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=r.css(a,"top"),i=r.css(a,"left"),j=("absolute"===k||"fixed"===k)&&(f+i).indexOf("auto")>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),r.isFunction(b)&&(b=b.call(a,c,r.extend({},h))),null!=b.top&&(m.top=b.top-h.top+g),null!=b.left&&(m.left=b.left-h.left+e),"using"in b?b.using.call(a,m):l.css(m)}},r.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){r.offset.setOffset(this,a,b)});var b,c,d,e,f=this[0];if(f)return f.getClientRects().length?(d=f.getBoundingClientRect(),b=f.ownerDocument,c=b.documentElement,e=b.defaultView,{top:d.top+e.pageYOffset-c.clientTop,left:d.left+e.pageXOffset-c.clientLeft}):{top:0,left:0}},position:function(){if(this[0]){var a,b,c=this[0],d={top:0,left:0};return"fixed"===r.css(c,"position")?b=c.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),B(a[0],"html")||(d=a.offset()),d={top:d.top+r.css(a[0],"borderTopWidth",!0),left:d.left+r.css(a[0],"borderLeftWidth",!0)}),{top:b.top-d.top-r.css(c,"marginTop",!0),left:b.left-d.left-r.css(c,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent;while(a&&"static"===r.css(a,"position"))a=a.offsetParent;return a||ra})}}),r.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c="pageYOffset"===b;r.fn[a]=function(d){return T(this,function(a,d,e){var f;return r.isWindow(a)?f=a:9===a.nodeType&&(f=a.defaultView),void 0===e?f?f[b]:a[d]:void(f?f.scrollTo(c?f.pageXOffset:e,c?e:f.pageYOffset):a[d]=e)},a,d,arguments.length)}}),r.each(["top","left"],function(a,b){r.cssHooks[b]=Pa(o.pixelPosition,function(a,c){if(c)return c=Oa(a,b),Ma.test(c)?r(a).position()[b]+"px":c})}),r.each({Height:"height",Width:"width"},function(a,b){r.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){r.fn[d]=function(e,f){var g=arguments.length&&(c||"boolean"!=typeof e),h=c||(e===!0||f===!0?"margin":"border");return T(this,function(b,c,e){var f;return r.isWindow(b)?0===d.indexOf("outer")?b["inner"+a]:b.document.documentElement["client"+a]:9===b.nodeType?(f=b.documentElement,Math.max(b.body["scroll"+a],f["scroll"+a],b.body["offset"+a],f["offset"+a],f["client"+a])):void 0===e?r.css(b,c,h):r.style(b,c,e,h)},b,g?e:void 0,g)}})}),r.fn.extend({bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}}),r.holdReady=function(a){a?r.readyWait++:r.ready(!0)},r.isArray=Array.isArray,r.parseJSON=JSON.parse,r.nodeName=B,"function"==typeof define&&define.amd&&define("jquery",[],function(){return r});var Vb=a.jQuery,Wb=a.$;return r.noConflict=function(b){return a.$===r&&(a.$=Wb),b&&a.jQuery===r&&(a.jQuery=Vb),r},b||(a.jQuery=a.$=r),r});
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/extensions/frequency.histogram.view.js b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/extensions/frequency.histogram.view.js
deleted file mode 100644
index 64f18a43f03068292fb11de839c194da0cfb993c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/extensions/frequency.histogram.view.js
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
-录音
-https://github.com/xiangyuecn/Recorder
-src: extensions/frequency.histogram.view.js
-*/
-!function(){"use strict";var t=function(t){return new e(t)},e=function(t){var e=this,r={scale:2,fps:20,lineCount:30,widthRatio:.6,spaceWidth:0,minHeight:0,position:-1,mirrorEnable:!1,stripeEnable:!0,stripeHeight:3,stripeMargin:6,fallDuration:1e3,stripeFallDuration:3500,linear:[0,"rgba(0,187,17,1)",.5,"rgba(255,215,0,1)",1,"rgba(255,102,0,1)"],stripeLinear:null,shadowBlur:0,shadowColor:"#bbb",stripeShadowBlur:-1,stripeShadowColor:"",onDraw:function(t,e){}};for(var a in t)r[a]=t[a];e.set=t=r;var i=t.elem;i&&("string"==typeof i?i=document.querySelector(i):i.length&&(i=i[0])),i&&(t.width=i.offsetWidth,t.height=i.offsetHeight);var o=t.scale,l=t.width*o,n=t.height*o,h=e.elem=document.createElement("div"),s=["","transform-origin:0 0;","transform:scale("+1/o+");"];h.innerHTML='<div style="width:'+t.width+"px;height:"+t.height+'px;overflow:hidden"><div style="width:'+l+"px;height:"+n+"px;"+s.join("-webkit-")+s.join("-ms-")+s.join("-moz-")+s.join("")+'"><canvas/></div></div>';var f=e.canvas=h.querySelector("canvas");e.ctx=f.getContext("2d");if(f.width=l,f.height=n,i&&(i.innerHTML="",i.appendChild(h)),!Recorder.LibFFT)throw new Error("需要lib.fft.js支持");e.fft=Recorder.LibFFT(1024),e.lastH=[],e.stripesH=[]};e.prototype=t.prototype={genLinear:function(t,e,r,a){for(var i=t.createLinearGradient(0,r,0,a),o=0;o<e.length;)i.addColorStop(e[o++],e[o++]);return i},input:function(t,e,r){var a=this;a.sampleRate=r,a.pcmData=t,a.pcmPos=0,a.inputTime=Date.now(),a.schedule()},schedule:function(){var t=this,e=t.set,r=Math.floor(1e3/e.fps);t.timer||(t.timer=setInterval(function(){t.schedule()},r));var a=Date.now(),i=t.drawTime||0;if(a-t.inputTime>1.3*e.stripeFallDuration)return clearInterval(t.timer),void(t.timer=0);if(!(a-i<r)){t.drawTime=a;for(var o=t.fft.bufferSize,l=t.pcmData,n=t.pcmPos,h=new Int16Array(o),s=0;s<o&&n<l.length;s++,n++)h[s]=l[n];t.pcmPos=n;var f=t.fft.transform(h);t.draw(f,t.sampleRate)}},draw:function(t,e){var r=this,a=r.set,i=r.ctx,o=a.scale,l=a.width*o,n=a.height*o,h=a.lineCount,s=r.fft.bufferSize,f=a.position,d=Math.abs(a.position),c=1==f?0:n,p=n;d<1&&(c=p/=2,p=Math.floor(p*(1+d)),c=Math.floor(0<f?c*(1-d):c*(1+d)));for(var u=r.lastH,v=r.stripesH,w=Math.ceil(p/(a.fallDuration/(1e3/a.fps))),g=Math.ceil(p/(a.stripeFallDuration/(1e3/a.fps))),m=a.stripeMargin*o,M=1<<(Math.round(Math.log(s)/Math.log(2)+3)<<1),b=Math.log(M)/Math.log(10),L=20*Math.log(32767)/Math.log(10),y=s/2,S=Math.min(y,Math.floor(5e3*y/(e/2))),C=S==y,H=C?h:Math.round(.8*h),R=S/H,D=C?0:(y-S)/(h-H),x=0,F=0;F<h;F++){var T=Math.ceil(x);x+=F<H?R:D;for(var B=Math.min(Math.ceil(x),y),E=0,j=T;j<B;j++)E=Math.max(E,Math.abs(t[j]));var I=M<E?Math.floor(17*(Math.log(E)/Math.log(10)-b)):0,q=p*Math.min(I/L,1);u[F]=(u[F]||0)-w,q<u[F]&&(q=u[F]),q<0&&(q=0),u[F]=q;var z=v[F]||0;if(q&&z<q+m)v[F]=q+m;else{var P=z-g;P<0&&(P=0),v[F]=P}}i.clearRect(0,0,l,n);var W=r.genLinear(i,a.linear,c,c-p),k=a.stripeLinear&&r.genLinear(i,a.stripeLinear,c,c-p)||W,A=r.genLinear(i,a.linear,c,c+p),G=a.stripeLinear&&r.genLinear(i,a.stripeLinear,c,c+p)||A;i.shadowBlur=a.shadowBlur*o,i.shadowColor=a.shadowColor;var V=a.mirrorEnable,J=V?2*h-1:h,K=a.widthRatio,N=a.spaceWidth*o;0!=N&&(K=(l-N*(J+1))/l);for(var O=Math.max(1*o,Math.floor(l*K/J)),Q=(l-J*O)/(J+1),U=a.minHeight*o,X=V?l/2-(Q+O/2):0,Y=(F=0,X);F<h;F++)Y+=Q,$=Math.floor(Y),q=Math.max(u[F],U),0!=c&&(_=c-q,i.fillStyle=W,i.fillRect($,_,O,q)),c!=n&&(i.fillStyle=A,i.fillRect($,c,O,q)),Y+=O;if(a.stripeEnable){var Z=a.stripeShadowBlur;i.shadowBlur=(-1==Z?a.shadowBlur:Z)*o,i.shadowColor=a.stripeShadowColor||a.shadowColor;var $,_,tt=a.stripeHeight*o;for(F=0,Y=X;F<h;F++)Y+=Q,$=Math.floor(Y),q=v[F],0!=c&&((_=c-q-tt)<0&&(_=0),i.fillStyle=k,i.fillRect($,_,O,tt)),c!=n&&(n<(_=c+q)+tt&&(_=n-tt),i.fillStyle=G,i.fillRect($,_,O,tt)),Y+=O}if(V){var et=Math.floor(l/2);i.save(),i.scale(-1,1),i.drawImage(r.canvas,Math.ceil(l/2),0,et,n,-et,0,et,n),i.restore()}a.onDraw(t,e)}},Recorder.FrequencyHistogramView=t}();
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/extensions/lib.fft.js b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/extensions/lib.fft.js
deleted file mode 100644
index a062585839dc04f56ebcc6f3576e0d1ba993624a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/extensions/lib.fft.js
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
-录音
-https://github.com/xiangyuecn/Recorder
-src: extensions/lib.fft.js
-*/
-Recorder.LibFFT=function(r){"use strict";var s,v,d,l,F,b,g,m;return function(r){var o,t,a,f;for(s=Math.round(Math.log(r)/Math.log(2)),d=((v=1<<s)<<2)*Math.sqrt(2),l=[],F=[],b=[0],g=[0],m=[],o=0;o<v;o++){for(a=o,f=t=0;t!=s;t++)f<<=1,f|=1&a,a>>>=1;m[o]=f}var n,u=2*Math.PI/v;for(o=(v>>1)-1;0<o;o--)n=o*u,g[o]=Math.cos(n),b[o]=Math.sin(n)}(r),{transform:function(r){var o,t,a,f,n,u,e,h,M=1,i=s-1;for(o=0;o!=v;o++)l[o]=r[m[o]],F[o]=0;for(o=s;0!=o;o--){for(t=0;t!=M;t++)for(n=g[t<<i],u=b[t<<i],a=t;a<v;a+=M<<1)e=n*l[f=a+M]-u*F[f],h=n*F[f]+u*l[f],l[f]=l[a]-e,F[f]=F[a]-h,l[a]+=e,F[a]+=h;M<<=1,i--}t=v>>1;var c=new Float64Array(t);for(n=-(u=d),o=t;0!=o;o--)e=l[o],h=F[o],c[o-1]=n<e&&e<u&&n<h&&h<u?0:Math.round(e*e+h*h);return c},bufferSize:v}};
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/recorder-core.js b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/recorder-core.js
deleted file mode 100644
index e6125b2e6c152bfc1f283e9b0b7f6e2b25f51a1d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/static/js/recorder/recorder-core.js
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
-录音
-https://github.com/xiangyuecn/Recorder
-src: recorder-core.js
-*/
-!function(y){"use strict";var h=function(){},A=function(e){return new t(e)};A.IsOpen=function(){var e=A.Stream;if(e){var t=e.getTracks&&e.getTracks()||e.audioTracks||[],n=t[0];if(n){var r=n.readyState;return"live"==r||r==n.LIVE}}return!1},A.BufferSize=4096,A.Destroy=function(){for(var e in M("Recorder Destroy"),g(),n)n[e]()};var n={};A.BindDestroy=function(e,t){n[e]=t},A.Support=function(){var e=y.AudioContext;if(e||(e=y.webkitAudioContext),!e)return!1;var t=navigator.mediaDevices||{};return t.getUserMedia||(t=navigator).getUserMedia||(t.getUserMedia=t.webkitGetUserMedia||t.mozGetUserMedia||t.msGetUserMedia),!!t.getUserMedia&&(A.Scope=t,A.Ctx&&"closed"!=A.Ctx.state||(A.Ctx=new e,A.BindDestroy("Ctx",function(){var e=A.Ctx;e&&e.close&&(e.close(),A.Ctx=0)})),!0)};var k="ConnectEnableWorklet";A[k]=!1;var d=function(e){var t=(e=e||A).BufferSize||A.BufferSize,r=A.Ctx,n=e.Stream,a=n._m=r.createMediaStreamSource(n),u=n._call,o=function(e,t){if(!t||h)for(var n in u){for(var r=t||e.inputBuffer.getChannelData(0),a=r.length,o=new Int16Array(a),s=0,i=0;i<a;i++){var c=Math.max(-1,Math.min(1,r[i]));c=c<0?32768*c:32767*c,o[i]=c,s+=Math.abs(c)}for(var f in u)u[f](o,s);return}else M(l+"多余回调",3)},s="ScriptProcessor",l="audioWorklet",i="Recorder",c=i+" "+l,f="RecProc",p=r.createScriptProcessor||r.createJavaScriptNode,v="。由于"+l+"内部1秒375次回调，在移动端可能会有性能问题导致回调丢失录音变短，PC端无影响，暂不建议开启"+l+"。",m=function(){h=n.isWorklet=!1,I(n),M("Connect采用老的"+s+"，"+(A[k]?"但已":"可")+"设置"+i+"."+k+"=true尝试启用"+l+v,3);var e=n._p=p.call(r,t,1,1);a.connect(e),e.connect(r.destination),e.onaudioprocess=function(e){o(e)}},h=n.isWorklet=!p||A[k],d=y.AudioWorkletNode;if(h&&r[l]&&d){var g,S=function(){return h&&n._na},_=n._na=function(){""!==g&&(clearTimeout(g),g=setTimeout(function(){g=0,M(l+"未返回任何音频，恢复使用"+s,3),S()&&p&&m()},500))},C=function(){if(S()){var e=n._n=new d(r,f,{processorOptions:{bufferSize:t}});a.connect(e),e.connect(r.destination),e.port.onmessage=function(e){g&&(clearTimeout(g),g=""),o(0,e.data.val)},M("Connect采用"+l+"方式，设置"+i+"."+k+"=false可恢复老式"+s+v,3)}};r.resume()[u&&"finally"](function(){if(S())if(r[f])C();else{var e,t,n=(t="class "+f+" extends AudioWorkletProcessor{",t+="constructor "+(e=function(e){return e.toString().replace(/^function|DEL_/g,"").replace(/\$RA/g,c)})(function(e){DEL_super(e);var t=this,n=e.processorOptions.bufferSize;t.bufferSize=n,t.buffer=new Float32Array(2*n),t.pos=0,t.port.onmessage=function(e){e.data.kill&&(t.kill=!0,console.log("$RA kill call"))},console.log("$RA .ctor call",e)}),t+="process "+e(function(e,t,n){var r=this,a=r.bufferSize,o=r.buffer,s=r.pos;if((e=(e[0]||[])[0]||[]).length){o.set(e,s);var i=~~((s+=e.length)/a)*a;if(i){this.port.postMessage({val:o.slice(0,i)});var c=o.subarray(i,s);(o=new Float32Array(2*a)).set(c),s=c.length,r.buffer=o}r.pos=s}return!r.kill}),t+='}try{registerProcessor("'+f+'", '+f+')}catch(e){console.error("'+c+'注册失败",e)}',"data:text/javascript;base64,"+btoa(unescape(encodeURIComponent(t))));r[l].addModule(n).then(function(e){S()&&(r[f]=1,C(),g&&_())})[u&&"catch"](function(e){M(l+".addModule失败",1,e),S()&&m()})}})}else m()},I=function(e){e._na=null,e._n&&(e._n.port.postMessage({kill:!0}),e._n.disconnect(),e._n=null)},g=function(e){var t=(e=e||A)==A,n=e.Stream;if(n&&(n._m&&(n._m.disconnect(),n._m=null),n._p&&(n._p.disconnect(),n._p.onaudioprocess=n._p=null),I(n),t)){for(var r=n.getTracks&&n.getTracks()||n.audioTracks||[],a=0;a<r.length;a++){var o=r[a];o.stop&&o.stop()}n.stop&&n.stop()}e.Stream=0};A.SampleData=function(e,t,n,r,a){r||(r={});var o=r.index||0,s=r.offset||0,i=r.frameNext||[];a||(a={});var c=a.frameSize||1;a.frameType&&(c="mp3"==a.frameType?1152:1);for(var f=0,u=o;u<e.length;u++)f+=e[u].length;f=Math.max(0,f-Math.floor(s));var l=t/n;1<l?f=Math.floor(f/l):(l=1,n=t),f+=i.length;for(var p=new Int16Array(f),v=0,u=0;u<i.length;u++)p[v]=i[u],v++;for(var m=e.length;o<m;o++){for(var h=e[o],u=s,d=h.length;u<d;){var g=Math.floor(u),S=Math.ceil(u),_=u-g,C=h[g],y=S<d?h[S]:(e[o+1]||[C])[0]||0;p[v]=C+(y-C)*_,v++,u+=l}s=u-d}i=null;var k=p.length%c;if(0<k){var I=2*(p.length-k);i=new Int16Array(p.buffer.slice(I)),p=new Int16Array(p.buffer.slice(0,I))}return{index:o,offset:s,frameNext:i,sampleRate:n,data:p}},A.PowerLevel=function(e,t){var n=e/t||0;return n<1251?Math.round(n/1250*10):Math.round(Math.min(100,Math.max(0,100*(1+Math.log(n/1e4)/Math.log(10)))))};var M=function(e,t){var n=new Date,r=("0"+n.getMinutes()).substr(-2)+":"+("0"+n.getSeconds()).substr(-2)+"."+("00"+n.getMilliseconds()).substr(-3),a=this&&this.envIn&&this.envCheck&&this.id,o=["["+r+" Recorder"+(a?":"+a:"")+"]"+e],s=arguments,i=y.console||{},c=2,f=i.log;for("number"==typeof t?f=1==t?i.error:3==t?i.warn:f:c=1;c<s.length;c++)o.push(s[c]);u?f&&f("[IsLoser]"+o[0],1<o.length?o:""):f.apply(i,o)},u=!0;try{u=!console.log.apply}catch(e){}A.CLog=M;var r=0;function t(e){this.id=++r,A.Traffic&&A.Traffic();var t={type:"mp3",bitRate:16,sampleRate:16e3,onProcess:h};for(var n in e)t[n]=e[n];this.set=t,this._S=9,this.Sync={O:9,C:9}}A.Sync={O:9,C:9},A.prototype=t.prototype={CLog:M,_streamStore:function(){return this.set.sourceStream?this:A},open:function(e,n){var r=this,t=r._streamStore();e=e||h;var a=function(e,t){t=!!t,r.CLog("录音open失败："+e+",isUserNotAllow:"+t,1),n&&n(e,t)},o=function(){r.CLog("open ok id:"+r.id),e(),r._SO=0},s=t.Sync,i=++s.O,c=s.C;r._O=r._O_=i,r._SO=r._S;var f=function(){if(c!=s.C||!r._O){var e="open被取消";return i==s.O?r.close():e="open被中断",a(e),!0}},u=r.envCheck({envName:"H5",canProcess:!0});if(u)a("不能录音："+u);else if(r.set.sourceStream){if(!A.Support())return void a("不支持此浏览器从流中获取录音");g(t),r.Stream=r.set.sourceStream,r.Stream._call={};try{d(t)}catch(e){return void a("从流中打开录音失败："+e.message)}o()}else{var l=function(e,t){try{y.top.a}catch(e){return void a('无权录音(跨域，请尝试给iframe添加麦克风访问策略，如allow="camera;microphone")')}/Permission|Allow/i.test(e)?a("用户拒绝了录音权限",!0):!1===y.isSecureContext?a("无权录音(需https)"):/Found/i.test(e)?a(t+"，无可用麦克风"):a(t)};if(A.IsOpen())o();else if(A.Support()){var p=function(e){(A.Stream=e)._call={},f()||setTimeout(function(){f()||(A.IsOpen()?(d(),o()):a("录音功能无效：无音频流"))},100)},v=function(e){var t=e.name||e.message||e.code+":"+e;r.CLog("请求录音权限错误",1,e),l(t,"无法录音："+t)},m=A.Scope.getUserMedia({audio:r.set.audioTrackSet||!0},p,v);m&&m.then&&m.then(p)[e&&"catch"](v)}else l("","此浏览器不支持录音")}},close:function(e){e=e||h;var t=this,n=t._streamStore();t._stop();var r=n.Sync;if(t._O=0,t._O_!=r.O)return t.CLog("close被忽略（因为同时open了多个rec，只有最后一个会真正close）",3),void e();r.C++,g(n),t.CLog("close"),e()},mock:function(e,t){var n=this;return n._stop(),n.isMock=1,n.mockEnvInfo=null,n.buffers=[e],n.recSize=e.length,n.srcSampleRate=t,n},envCheck:function(e){var t,n=this.set;return t||(this[n.type+"_envCheck"]?t=this[n.type+"_envCheck"](e,n):n.takeoffEncodeChunk&&(t=n.type+"类型不支持设置takeoffEncodeChunk")),t||""},envStart:function(e,t){var n=this,r=n.set;if(n.isMock=e?1:0,n.mockEnvInfo=e,n.buffers=[],n.recSize=0,n.envInLast=0,n.envInFirst=0,n.envInFix=0,n.envInFixTs=[],r.sampleRate=Math.min(t,r.sampleRate),n.srcSampleRate=t,n.engineCtx=0,n[r.type+"_start"]){var a=n.engineCtx=n[r.type+"_start"](r);a&&(a.pcmDatas=[],a.pcmSize=0)}},envResume:function(){this.envInFixTs=[]},envIn:function(e,t){var a=this,o=a.set,s=a.engineCtx,n=a.srcSampleRate,r=e.length,i=A.PowerLevel(t,r),c=a.buffers,f=c.length;c.push(e);var u=c,l=f,p=Date.now(),v=Math.round(r/n*1e3);a.envInLast=p,1==a.buffers.length&&(a.envInFirst=p-v);var m=a.envInFixTs;m.splice(0,0,{t:p,d:v});for(var h=p,d=0,g=0;g<m.length;g++){var S=m[g];if(3e3<p-S.t){m.length=g;break}h=S.t,d+=S.d}var _=m[1],C=p-h;if(C/3<C-d&&(_&&1e3<C||6<=m.length)){var y=p-_.t-v;if(v/5<y){var k=!o.disableEnvInFix;if(a.CLog("["+p+"]"+(k?"":"未")+"补偿"+y+"ms",3),a.envInFix+=y,k){var I=new Int16Array(y*n/1e3);r+=I.length,c.push(I)}}}var M=a.recSize,x=r,b=M+x;if(a.recSize=b,s){var R=A.SampleData(c,n,o.sampleRate,s.chunkInfo);s.chunkInfo=R,b=(M=s.pcmSize)+(x=R.data.length),s.pcmSize=b,c=s.pcmDatas,f=c.length,c.push(R.data),n=R.sampleRate}var L=Math.round(b/n*1e3),w=c.length,T=u.length,z=function(){for(var e=O?0:-x,t=null==c[0],n=f;n<w;n++){var r=c[n];null==r?t=1:(e+=r.length,s&&r.length&&a[o.type+"_encode"](s,r))}if(t&&s)for(n=l,u[0]&&(n=0);n<T;n++)u[n]=null;t&&(e=O?x:0,c[0]=null),s?s.pcmSize+=e:a.recSize+=e},O=o.onProcess(c,i,L,n,f,z);if(!0===O){var D=0;for(g=f;g<w;g++)null==c[g]?D=1:c[g]=new Int16Array(0);D?a.CLog("未进入异步前不能清除buffers",3):s?s.pcmSize-=x:a.recSize-=x}else z()},start:function(){var e=this,t=A.Ctx,n=1;if(e.set.sourceStream?e.Stream||(n=0):A.IsOpen()||(n=0),n)if(e.CLog("开始录音"),e._stop(),e.state=0,e.envStart(null,t.sampleRate),e._SO&&e._SO+1!=e._S)e.CLog("start被中断",3);else{e._SO=0;var r=function(){e.state=1,e.resume()};"suspended"==t.state?(e.CLog("wait ctx resume..."),e.state=3,t.resume().then(function(){e.CLog("ctx resume"),3==e.state&&r()})):r()}else e.CLog("未open",1)},pause:function(){var e=this;e.state&&(e.state=2,e.CLog("pause"),delete e._streamStore().Stream._call[e.id])},resume:function(){var e,n=this;if(n.state){n.state=1,n.CLog("resume"),n.envResume();var t=n._streamStore();t.Stream._call[n.id]=function(e,t){1==n.state&&n.envIn(e,t)},(e=(t||A).Stream)._na&&e._na()}},_stop:function(e){var t=this,n=t.set;t.isMock||t._S++,t.state&&(t.pause(),t.state=0),!e&&t[n.type+"_stop"]&&(t[n.type+"_stop"](t.engineCtx),t.engineCtx=0)},stop:function(n,t,e){var r,a=this,o=a.set;a.CLog("stop "+(a.envInLast?a.envInLast-a.envInFirst+"ms 补"+a.envInFix+"ms":"-"));var s=function(){a._stop(),e&&a.close()},i=function(e){a.CLog("结束录音失败："+e,1),t&&t(e),s()},c=function(e,t){if(a.CLog("结束录音 编码"+(Date.now()-r)+"ms 音频"+t+"ms/"+e.size+"b"),o.takeoffEncodeChunk)a.CLog("启用takeoffEncodeChunk后stop返回的blob长度为0不提供音频数据",3);else if(e.size<Math.max(100,t/2))return void i("生成的"+o.type+"无效");n&&n(e,t),s()};if(!a.isMock){var f=3==a.state;if(!a.state||f)return void i("未开始录音"+(f?"，开始录音前无用户交互导致AudioContext未运行":""));a._stop(!0)}var u=a.recSize;if(u)if(a.buffers[0])if(a[o.type]){if(a.isMock){var l=a.envCheck(a.mockEnvInfo||{envName:"mock",canProcess:!1});if(l)return void i("录音错误："+l)}var p=a.engineCtx;if(a[o.type+"_complete"]&&p){var v=Math.round(p.pcmSize/o.sampleRate*1e3);return r=Date.now(),void a[o.type+"_complete"](p,function(e){c(e,v)},i)}r=Date.now();var m=A.SampleData(a.buffers,a.srcSampleRate,o.sampleRate);o.sampleRate=m.sampleRate;var h=m.data;v=Math.round(h.length/o.sampleRate*1e3),a.CLog("采样"+u+"->"+h.length+" 花:"+(Date.now()-r)+"ms"),setTimeout(function(){r=Date.now(),a[o.type](h,function(e){c(e,v)},function(e){i(e)})})}else i("未加载"+o.type+"编码器");else i("音频buffers被释放");else i("未采集到录音")}},y.Recorder&&y.Recorder.Destroy(),(y.Recorder=A).LM="2022-03-05 11:53:19",A.TrafficImgUrl="//ia.51.la/go1?id=20469973&pvFlag=1",A.Traffic=function(){var e=A.TrafficImgUrl;if(e){var t=A.Traffic,n=location.href.replace(/#.*/,"");if(0==e.indexOf("//")&&(e=/^https:/i.test(n)?"https:"+e:"http:"+e),!t[n]){t[n]=1;var r=new Image;r.src=e,M("Traffic Analysis Image: Recorder.TrafficImgUrl="+A.TrafficImgUrl)}}}}(window),"function"==typeof define&&define.amd&&define(function(){return Recorder}),"object"==typeof module&&module.exports&&(module.exports=Recorder);
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/templates/index.html b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/templates/index.html
deleted file mode 100644
index 08b20848cb27b8343f50993272dd9d0a2e6a9988..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/web/templates/index.html
+++ /dev/null
@@ -1,209 +0,0 @@
-<!DOCTYPE html>
-<html>
-
-<head>
-  <meta charset="UTF-8">
-  <title>WeNet-语音实时转写</title>
-  <link rel="shortcut icon" href="./static/favicon.ico">
-  <script src="../static/js/jquery-3.2.1.min.js"></script>
-  <script src="../static/js/recorder/recorder-core.js"></script>
-  <script src="../static/js/recorder/extensions/lib.fft.js"></script>
-  <script src="../static/js/recorder/extensions/frequency.histogram.view.js"></script>
-  <script src="../static/js/recorder/engine/pcm.js"></script>
-  <script src="../static/js/SoundRecognizer.js"></script>
-  <link rel="stylesheet" href="../static/css/style.css">
-  <link rel="stylesheet" href="../static/css/font-awesome.min.css">
-</head>
-
-<body>
-  <div class="asr-content">
-    <div class="audio-banner">
-      <div class="weaper">
-        <div class="text-content">
-          <p><span class="title">WeNet简介</span></p>
-          <p class="con-container">
-            <span class="con">WeNet是全球首个面向工业级产品的、全栈式的、开源的端到端语音识别解决方案，其提供模型训练、模型推理、云侧和端侧模型部署的一站式服务。</span>
-          </p>
-        </div>
-        <div class="img-con">
-          <img src="../static/image/voice-pic.png" alt="" />
-        </div>
-      </div>
-    </div>
-    <div class="audio-experience">
-      <div class="asr-box">
-        <h2>产品体验</h2>
-        <div id="client-word-recorder" style="position: relative;">
-          <div class="pd">
-            <div style="text-align:center;height:20px;width:100%;
-                        border:0px solid #bcbcbc;color:#000;box-sizing: border-box;display:inline-block"
-              class="recwave">
-            </div>
-          </div>
-        </div>
-        <div class="voice-container">
-          <div class="voice-input">
-            <span>WebSocket URL：</span>
-            <input type="text" id="socketUrl" class="websocket-url" value="ws://127.0.0.1:10086"
-              placeholder="请输入服务器地址，如：ws://127.0.0.1:10086">
-            <div class="start-voice">
-              <button type="primary" id="beginBtn" class="voice-btn">
-                <span class="fa fa-microphone"> 开始识别</span>
-              </button>
-              <button type="primary" id="endBtn" class="voice-btn end">
-                <span class="fa fa-microphone-slash"> 结束识别</span>
-              </button>
-              <div id="timeBox" class="time-box flex-display-1">
-                <span class="total-time">识别中，<i id="timeCount"></i> 秒后自动停止识别</span>
-              </div>
-            </div>
-          </div>
-          <div class="voice">
-            <div class="result-text" id="resultPanel">此处显示识别结果</div>
-          </div>
-        </div>
-      </div>
-    </div>
-    <div class="audio-advantage">
-      <div class="asr-advantage">
-        <h2>核心特点</h2>
-        <ul class="service-item-content">
-          <li class="icons-advantage-mb35">
-            <i class="icons icons-advantage icons-advantage-1"> </i>
-            <div class="service-item-content-title">产品优先和产品就绪的设计</div>
-            <div class="service-item-content-desc">WeNet训练的模型可以直接无缝应用到产品环境中，<br>并提供长语音、端点检测、时间戳、语言模型等<br>产品级别特性支持。</div>
-          </li>
-          <li class="icons-advantage-mb35">
-            <i class="icons icons-advantage icons-advantage-2"> </i>
-            <div class="service-item-content-title">统一的流式和非流式模型</div>
-            <div class="service-item-content-desc">WeNet训练的模型既能应用到低延迟的流式语音识别场景，也能应用到高识别率要求的非流式语音识别场景。</div>
-          </li>
-          <li>
-            <i class="icons icons-advantage icons-advantage-3"> </i>
-            <div class="service-item-content-title">云侧和端侧解决方案</div>
-            <div class="service-item-content-desc">WeNet中同时集成了云侧和端侧的解决方案。</div>
-          </li>
-        </ul>
-      </div>
-    </div>
-    <div class="audio-scene-con">
-      <div class="audio-scene">
-        <h2>联系我们</h2>
-        <ul class="service-item-content">
-          <li>
-            <div class="service-item-content-desc">微信扫码关注公众号</div>
-            <img src="../static/image/qrcode-official-account.png" alt="" />
-          </li>
-        </ul>
-      </div>
-    </div>
-  </div>
-  <script>
-    var wenetWs = null
-    var timeLoop = null
-    var result = ""
-    $(document).ready(function () {
-      $('#beginBtn').on('click', startRecording)
-      $('#endBtn').on('click', stopRecording)
-    })
-
-    function openWebSocket(url) {
-      if ("WebSocket" in window) {
-        wenetWs = new WebSocket(url)
-        wenetWs.onopen = function () {
-          console.log("Websocket 连接成功，开始识别")
-          wenetWs.send(JSON.stringify({
-            "signal": "start",
-            "nbest": 1,
-            "continuous_decoding": true
-          }))
-        }
-        wenetWs.onmessage = function (_msg) { parseResult(_msg.data) }
-        wenetWs.onclose = function () {
-          console.log("WebSocket 连接断开")
-          if (result.length > 0) {
-            if (result.endsWith("，")) {
-              result = result.slice(0, -1)
-            }
-            $("#resultPanel").html(result + "。")
-            result = ""
-          }
-        }
-        wenetWs.onerror = function () { console.log("WebSocket 连接失败") }
-      }
-    }
-
-    function parseResult(data) {
-      var data = JSON.parse(data)
-      if (data.type == 'partial_result') {
-        nbest = JSON.parse(data.nbest)
-        var sentence = nbest[0].sentence
-        if (sentence.length > 0) {
-          $("#resultPanel").html(result + sentence)
-        }
-      } else if (data.type == 'final_result') {
-        nbest = JSON.parse(data.nbest)
-        var sentence = nbest[0].sentence
-        if (sentence.length > 0) {
-          result += sentence + "，"
-          $("#resultPanel").html(result)
-        }
-        console.log(nbest)
-      }
-    }
-
-    function TransferUpload(number, blobOrNull, duration, blobRec, isClose) {
-      if (blobOrNull) {
-        var blob = blobOrNull
-        var encTime = blob.encTime
-        var reader = new FileReader()
-        reader.onloadend = function () { wenetWs.send(reader.result) }
-        reader.readAsArrayBuffer(blob)
-      }
-    }
-
-    function startRecording() {
-      // Check socket url
-      var socketUrl = $('#socketUrl').val()
-      if (!socketUrl.trim()) {
-        alert('请输入 WebSocket 服务器地址，如：ws://127.0.0.1:10086')
-        $('#socketUrl').focus()
-        return
-      }
-      // init recorder
-      SoundRecognizer.init({
-        soundType: 'pcm',
-        sampleRate: 16000,
-        recwaveElm: '.recwave',
-        translerCallBack: TransferUpload
-      })
-      openWebSocket(socketUrl)
-
-      // Change button state
-      $('#beginBtn').hide()
-      $('#endBtn, #timeBox').addClass('show')
-      // Start countdown
-      var seconds = 180
-      $('#timeCount').text(seconds)
-      timeLoop = setInterval(function () {
-        seconds--
-        $('#timeCount').text(seconds)
-        if (seconds === 0) {
-          stopRecording()
-        }
-      }, 1000)
-    }
-
-    function stopRecording() {
-      wenetWs.send(JSON.stringify({ "signal": "end" }))
-      SoundRecognizer.recordClose()
-
-      $('#endBtn').add($('#timeBox')).removeClass('show')
-      $('#beginBtn').show()
-      $('#timeCount').text('')
-      clearInterval(timeLoop)
-    }
-  </script>
-</body>
-
-</html>
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/CMakeLists.txt
deleted file mode 100644
index 67447c42d977f120fc39cdab0d052b011edd3efe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(websocket STATIC
-  websocket_client.cc
-  websocket_server.cc
-)
-target_link_libraries(websocket PUBLIC decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_client.cc
deleted file mode 100644
index c0394e6250153e2d59636c9eab62badc4a737d16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_client.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_client.h"
-
-#include "boost/json/src.hpp"
-
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-WebSocketClient::WebSocketClient(const std::string& hostname, int port)
-    : hostname_(hostname), port_(port) {
-  Connect();
-  t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
-}
-
-void WebSocketClient::Connect() {
-  tcp::resolver resolver{ioc_};
-  // Look up the domain name
-  auto const results = resolver.resolve(hostname_, std::to_string(port_));
-  // Make the connection on the IP address we get from a lookup
-  auto ep = asio::connect(ws_.next_layer(), results);
-  // Provide the value of the Host HTTP header during the WebSocket handshake.
-  // See https://tools.ietf.org/html/rfc7230#section-5.4
-  std::string host = hostname_ + ":" + std::to_string(ep.port());
-  // Perform the websocket handshake
-  ws_.handshake(host, "/");
-}
-
-void WebSocketClient::SendTextData(const std::string& data) {
-  ws_.text(true);
-  ws_.write(asio::buffer(data));
-}
-
-void WebSocketClient::SendBinaryData(const void* data, size_t size) {
-  ws_.binary(true);
-  ws_.write(asio::buffer(data, size));
-}
-
-void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }
-
-void WebSocketClient::ReadLoopFunc() {
-  try {
-    while (true) {
-      beast::flat_buffer buffer;
-      ws_.read(buffer);
-      std::string message = beast::buffers_to_string(buffer.data());
-      LOG(INFO) << message;
-      CHECK(ws_.got_text());
-      json::object obj = json::parse(message).as_object();
-      if (obj["status"] != "ok") {
-        break;
-      }
-      if (obj["type"] == "speech_end") {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (beast::system_error const& se) {
-    // This indicates that the session was closed
-    if (se.code() != websocket::error::closed) {
-      LOG(ERROR) << se.code().message();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketClient::Join() { t_->join(); }
-
-void WebSocketClient::SendStartSignal() {
-  // TODO(Binbin Zhang): Add sample rate and other setting support
-  json::value start_tag = {{"signal", "start"},
-                           {"nbest", nbest_},
-                           {"continuous_decoding", continuous_decoding_}};
-  std::string start_message = json::serialize(start_tag);
-  this->SendTextData(start_message);
-}
-
-void WebSocketClient::SendEndSignal() {
-  json::value end_tag = {{"signal", "end"}};
-  std::string end_message = json::serialize(end_tag);
-  this->SendTextData(end_message);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_client.h
deleted file mode 100644
index 76ec3aa451d31c7ee6b158ce21c8acdc10575eb3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_client.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_CLIENT_H_
-#define WEBSOCKET_WEBSOCKET_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class WebSocketClient {
- public:
-  WebSocketClient(const std::string& host, int port);
-
-  void SendTextData(const std::string& data);
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Close();
-  void Join();
-  void SendStartSignal();
-  void SendEndSignal();
-  void set_nbest(int nbest) { nbest_ = nbest; }
-  void set_continuous_decoding(bool continuous_decoding) {
-    continuous_decoding_ = continuous_decoding;
-  }
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string hostname_;
-  int port_;
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  asio::io_context ioc_;
-  websocket::stream<tcp::socket> ws_{ioc_};
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketClient);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_server.cc
deleted file mode 100644
index 52ab088f46d59b9f3f1add1e34d3aceae290f5da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_server.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_server.h"
-
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "boost/json/src.hpp"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-ConnectionHandler::ConnectionHandler(
-    tcp::socket&& socket, std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : ws_(std::move(socket)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void ConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ =
-      std::make_shared<std::thread>(&ConnectionHandler::DecodeThreadFunc, this);
-}
-
-void ConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  if (feature_pipeline_ != nullptr) {
-    feature_pipeline_->set_input_finished();
-  }
-  got_end_tag_ = true;
-}
-
-void ConnectionHandler::OnPartialResult(const std::string& result) {
-  LOG(INFO) << "Partial result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "partial_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinalResult(const std::string& result) {
-  LOG(INFO) << "Final result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "final_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinish() {
-  // Send finish tag
-  json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
-  // Read binary PCM data
-  int num_samples = buffer.size() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  const auto* pcm_data = static_cast<const int16_t*>(buffer.data().data());
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-std::string ConnectionHandler::SerializeResult(bool finish) {
-  json::array nbest;
-  for (const DecodeResult& path : decoder_->result()) {
-    json::object jpath({{"sentence", path.sentence}});
-    if (finish) {
-      json::array word_pieces;
-      for (const WordPiece& word_piece : path.word_pieces) {
-        json::object jword_piece({{"word", word_piece.word},
-                                  {"start", word_piece.start},
-                                  {"end", word_piece.end}});
-        word_pieces.emplace_back(jword_piece);
-      }
-      jpath.emplace("word_pieces", word_pieces);
-    }
-    nbest.emplace_back(jpath);
-
-    if (nbest.size() == nbest_) {
-      break;
-    }
-  }
-  return json::serialize(nbest);
-}
-
-void ConnectionHandler::DecodeThreadFunc() {
-  try {
-    while (true) {
-      DecodeState state = decoder_->Decode();
-      if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      } else if (state == DecodeState::kEndpoint) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        // If it's not continuous decoding, continue to do next recognition
-        // otherwise stop the recognition
-        if (continuous_decoding_) {
-          decoder_->ResetContinuousDecoding();
-        } else {
-          OnFinish();
-          stop_recognition_ = true;
-          break;
-        }
-      } else {
-        if (decoder_->DecodedSomething()) {
-          std::string result = SerializeResult(false);
-          OnPartialResult(result);
-        }
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void ConnectionHandler::OnError(const std::string& message) {
-  json::value rv = {{"status", "failed"}, {"message", message}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  // Close websocket
-  ws_.close(websocket::close_code::normal);
-}
-
-void ConnectionHandler::OnText(const std::string& message) {
-  json::value v = json::parse(message);
-  if (v.is_object()) {
-    json::object obj = v.get_object();
-    if (obj.find("signal") != obj.end()) {
-      json::string signal = obj["signal"].as_string();
-      if (signal == "start") {
-        if (obj.find("nbest") != obj.end()) {
-          if (obj["nbest"].is_int64()) {
-            nbest_ = obj["nbest"].as_int64();
-          } else {
-            OnError("integer is expected for nbest option");
-          }
-        }
-        if (obj.find("continuous_decoding") != obj.end()) {
-          if (obj["continuous_decoding"].is_bool()) {
-            continuous_decoding_ = obj["continuous_decoding"].as_bool();
-          } else {
-            OnError(
-                "boolean true or false is expected for "
-                "continuous_decoding option");
-          }
-        }
-        OnSpeechStart();
-      } else if (signal == "end") {
-        OnSpeechEnd();
-      } else {
-        OnError("Unexpected signal type");
-      }
-    } else {
-      OnError("Wrong message header");
-    }
-  } else {
-    OnError("Wrong protocol");
-  }
-}
-
-void ConnectionHandler::operator()() {
-  try {
-    // Accept the websocket handshake
-    ws_.accept();
-    for (;;) {
-      // This buffer will hold the incoming message
-      beast::flat_buffer buffer;
-      // Read a message
-      ws_.read(buffer);
-      if (ws_.got_text()) {
-        std::string message = beast::buffers_to_string(buffer.data());
-        LOG(INFO) << message;
-        OnText(message);
-        if (got_end_tag_) {
-          break;
-        }
-      } else {
-        if (!got_start_tag_) {
-          OnError("Start signal is expected before binary data");
-        } else {
-          if (stop_recognition_) {
-            break;
-          }
-          OnSpeechData(buffer);
-        }
-      }
-    }
-
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (beast::system_error const& se) {
-    LOG(INFO) << se.code().message();
-    // This indicates that the session was closed
-    if (se.code() == websocket::error::closed) {
-      OnSpeechEnd();
-    }
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketServer::Start() {
-  try {
-    auto const address = asio::ip::make_address("0.0.0.0");
-    tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
-    for (;;) {
-      // This will receive the new connection
-      tcp::socket socket{ioc_};
-      // Block until we get a connection
-      acceptor.accept(socket);
-      // Launch the session, transferring ownership of the socket
-      ConnectionHandler handler(std::move(socket), feature_config_,
-                                decode_config_, decode_resource_);
-      std::thread t(std::move(handler));
-      t.detach();
-    }
-  } catch (const std::exception& e) {
-    LOG(FATAL) << e.what();
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_server.h
deleted file mode 100644
index a1241834221dcf93c34d6414bd9b5ae40ef1cf38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/libtorch/websocket/websocket_server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_SERVER_H_
-#define WEBSOCKET_WEBSOCKET_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class ConnectionHandler {
- public:
-  ConnectionHandler(tcp::socket&& socket,
-                    std::shared_ptr<FeaturePipelineConfig> feature_config,
-                    std::shared_ptr<DecodeOptions> decode_config,
-                    std::shared_ptr<DecodeResource> decode_resource_);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnText(const std::string& message);
-  void OnFinish();
-  void OnSpeechData(const beast::flat_buffer& buffer);
-  void OnError(const std::string& message);
-  void OnPartialResult(const std::string& result);
-  void OnFinalResult(const std::string& result);
-  void DecodeThreadFunc();
-  std::string SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  websocket::stream<tcp::socket> ws_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class WebSocketServer {
- public:
-  WebSocketServer(int port,
-                  std::shared_ptr<FeaturePipelineConfig> feature_config,
-                  std::shared_ptr<DecodeOptions> decode_config,
-                  std::shared_ptr<DecodeResource> decode_resource)
-      : port_(port),
-        feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-
-  void Start();
-
- private:
-  int port_;
-  // The io_context is required for all I/O
-  asio::io_context ioc_{1};
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketServer);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/CMakeLists.txt
deleted file mode 100644
index 6223e1481e7e98846d9de3535ec510b41c237d48..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/CMakeLists.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-project(wenet VERSION 0.1)
-
-option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
-option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
-option(BUILD_TESTING "whether to build unit test" ON)
-
-option(GRPC "whether to build with gRPC" OFF)
-# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
-#                     which is a very big library
-option(WEBSOCKET "whether to build with websocket" ON)
-option(HTTP "whether to build with http" OFF)
-option(TORCH "whether to build with Torch" ON)
-option(ONNX "whether to build with ONNX" OFF)
-option(GPU "whether to build with GPU" OFF)
-
-set(CMAKE_VERBOSE_MAKEFILE OFF)
-
-include(FetchContent)
-set(FETCHCONTENT_QUIET OFF)
-get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-set(FETCHCONTENT_BASE_DIR ${fc_base})
-
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-
-if(NOT MSVC)
-  # Keep the same with openfst, -fPIC or -fpic
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
-else()
-  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-  add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
-endif()
-
-# Include all dependency
-if(TORCH)
-  include(libtorch)
-endif()
-if(ONNX)
-  include(onnx)
-endif()
-include(openfst)
-include_directories(
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-)
-
-# Build all libraries
-add_subdirectory(utils)
-add_subdirectory(frontend)
-add_subdirectory(post_processor)
-add_subdirectory(kaldi)  # kaldi: wfst based decoder
-add_subdirectory(decoder)
-add_subdirectory(api)
-
-# Optionally, you can build with websocket
-if(WEBSOCKET)
-  include(boost)
-  add_subdirectory(websocket)
-endif()
-
-# Optionally, you can build with gRPC
-if(GRPC)
-  include(grpc)
-  add_subdirectory(grpc)
-endif()
-
-# Optionally, you can build with http
-if(HTTP)
-  include(boost)
-  add_subdirectory(http)
-endif()
-
-# Build all bins
-add_subdirectory(bin)
-
-# Unit Test
-if(BUILD_TESTING)
-  include(gtest)
-  add_subdirectory(test)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/README.md
deleted file mode 100644
index f9aa1a3d3267a5de21ba255be7c7658070c4e67b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/README.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# ONNX backend on WeNet
-
-* Step 1. Export your experiment model to ONNX by https://github.com/wenet-e2e/wenet/blob/main/wenet/bin/export_onnx_cpu.py
-
-``` sh
-exp=exp  # Change it to your experiment dir
-onnx_dir=onnx
-python -m wenet.bin.export_onnx_cpu \
-  --config $exp/train.yaml \
-  --checkpoint $exp/final.pt \
-  --chunk_size 16 \
-  --output_dir $onnx_dir \
-  --num_decoding_left_chunks -1
-
-# When it finishes, you can find `encoder.onnx`, `ctc.onnx`, and `decoder.onnx` in the $onnx_dir respectively.
-```
-
-* Step 2. Build. The build requires cmake 3.14 or above.
-
-``` sh
-mkdir build && cd build
-cmake -DONNX=ON -DTORCH=OFF -DWEBSOCKET=OFF -DGRPC=OFF ..
-cmake --build .
-```
-
-* Step 3. Testing, the RTF(real time factor) is shown in the console.
-
-``` sh
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=your_test_wav_path
-onnx_dir=your_model_dir
-units=units.txt  # Change it to your model units path
-./build/bin/decoder_main \
-    --chunk_size 16 \
-    --wav_path $wav_path \
-    --onnx_dir $onnx_dir \
-    --unit_path $units 2>&1 | tee log.txt
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/CMakeLists.txt
deleted file mode 100644
index 8d61ca8477f0f0b6128f1effe0a2738494b2620f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-if(TORCH)
- add_library(wenet_api SHARED wenet_api.cc)
- target_link_libraries(wenet_api PUBLIC decoder)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/README.md
deleted file mode 100644
index 5eaa13b977eb4836eb930452f4434dc9f2ea4139..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# WeNet API
-
-We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
-for the interface design.
-
-
-We are going to implement the following interfaces:
-
-- [x] non-streaming recognition
-- [] streaming recognition
-- [] nbest
-- [] contextual biasing word
-- [] alignment
-- [] language support(post processor)
-- [] label check
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/wenet_api.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/wenet_api.cc
deleted file mode 100644
index cb1e0c8552e0126e2db274a29075578fe351a25f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/wenet_api.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "post_processor/post_processor.h"
-#include "utils/file.h"
-#include "utils/json.h"
-#include "utils/string.h"
-
-class Recognizer {
- public:
-  explicit Recognizer(const std::string& model_dir) {
-    // FeaturePipeline init
-    feature_config_ = std::make_shared<wenet::FeaturePipelineConfig>(80, 16000);
-    feature_pipeline_ =
-        std::make_shared<wenet::FeaturePipeline>(*feature_config_);
-    // Resource init
-    resource_ = std::make_shared<wenet::DecodeResource>();
-    wenet::TorchAsrModel::InitEngineThreads();
-    std::string model_path = wenet::JoinPath(model_dir, "final.zip");
-    CHECK(wenet::FileExists(model_path));
-
-    auto model = std::make_shared<wenet::TorchAsrModel>();
-    model->Read(model_path);
-    resource_->model = model;
-
-    // units.txt: E2E model unit
-    std::string unit_path = wenet::JoinPath(model_dir, "units.txt");
-    CHECK(wenet::FileExists(unit_path));
-    resource_->unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(unit_path));
-
-    std::string fst_path = wenet::JoinPath(model_dir, "TLG.fst");
-    if (wenet::FileExists(fst_path)) {  // With LM
-      resource_->fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-          fst::Fst<fst::StdArc>::Read(fst_path));
-
-      std::string symbol_path = wenet::JoinPath(model_dir, "words.txt");
-      CHECK(wenet::FileExists(symbol_path));
-      resource_->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(symbol_path));
-    } else {  // Without LM, symbol_table is the same as unit_table
-      resource_->symbol_table = resource_->unit_table;
-    }
-
-    // Context config init
-    context_config_ = std::make_shared<wenet::ContextConfig>();
-    decode_options_ = std::make_shared<wenet::DecodeOptions>();
-    post_process_opts_ = std::make_shared<wenet::PostProcessOptions>();
-  }
-
-  void Reset() {
-    if (feature_pipeline_ != nullptr) {
-      feature_pipeline_->Reset();
-    }
-    if (decoder_ != nullptr) {
-      decoder_->Reset();
-    }
-    result_.clear();
-  }
-
-  void InitDecoder() {
-    CHECK(decoder_ == nullptr);
-    // Optional init context graph
-    if (context_.size() > 0) {
-      context_config_->context_score = context_score_;
-      auto context_graph =
-          std::make_shared<wenet::ContextGraph>(*context_config_);
-      context_graph->BuildContextGraph(context_, resource_->symbol_table);
-      resource_->context_graph = context_graph;
-    }
-    // PostProcessor
-    if (language_ == "chs") {  // TODO(Binbin Zhang): CJK(chs, jp, kr)
-      post_process_opts_->language_type = wenet::kMandarinEnglish;
-    } else {
-      post_process_opts_->language_type = wenet::kIndoEuropean;
-    }
-    resource_->post_processor =
-        std::make_shared<wenet::PostProcessor>(*post_process_opts_);
-    // Init decoder
-    decoder_ = std::make_shared<wenet::AsrDecoder>(feature_pipeline_, resource_,
-                                                   *decode_options_);
-  }
-
-  void Decode(const char* data, int len, int last) {
-    using wenet::DecodeState;
-    // Init decoder when it is called first time
-    if (decoder_ == nullptr) {
-      InitDecoder();
-    }
-    // Convert to 16 bits PCM data to float
-    CHECK_EQ(len % 2, 0);
-    feature_pipeline_->AcceptWaveform(reinterpret_cast<const int16_t*>(data),
-                                      len / 2);
-    if (last > 0) {
-      feature_pipeline_->set_input_finished();
-    }
-
-    while (true) {
-      DecodeState state = decoder_->Decode(false);
-      if (state == DecodeState::kWaitFeats) {
-        break;
-      } else if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        break;
-      } else if (state == DecodeState::kEndpoint && continuous_decoding_) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        decoder_->ResetContinuousDecoding();
-      } else {  // kEndBatch
-        UpdateResult(false);
-      }
-    }
-  }
-
-  void UpdateResult(bool final_result) {
-    json::JSON obj;
-    obj["type"] = final_result ? "final_result" : "partial_result";
-    int nbest = final_result ? nbest_ : 1;
-    obj["nbest"] = json::Array();
-    for (int i = 0; i < nbest && i < decoder_->result().size(); i++) {
-      json::JSON one;
-      one["sentence"] = decoder_->result()[i].sentence;
-      if (final_result && enable_timestamp_) {
-        one["word_pieces"] = json::Array();
-        for (const auto& word_piece : decoder_->result()[i].word_pieces) {
-          json::JSON piece;
-          piece["word"] = word_piece.word;
-          piece["start"] = word_piece.start;
-          piece["end"] = word_piece.end;
-          one["word_pieces"].append(piece);
-        }
-      }
-      one["sentence"] = decoder_->result()[i].sentence;
-      obj["nbest"].append(one);
-    }
-    result_ = obj.dump();
-  }
-
-  const char* GetResult() { return result_.c_str(); }
-
-  void set_nbest(int n) { nbest_ = n; }
-  void set_enable_timestamp(bool flag) { enable_timestamp_ = flag; }
-  void AddContext(const char* word) { context_.emplace_back(word); }
-  void set_context_score(float score) { context_score_ = score; }
-  void set_language(const char* lang) { language_ = lang; }
-  void set_continuous_decoding(bool flag) { continuous_decoding_ = flag; }
-
- private:
-  // NOTE(Binbin Zhang): All use shared_ptr for clone in the future
-  std::shared_ptr<wenet::FeaturePipelineConfig> feature_config_ = nullptr;
-  std::shared_ptr<wenet::FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<wenet::DecodeResource> resource_ = nullptr;
-  std::shared_ptr<wenet::DecodeOptions> decode_options_ = nullptr;
-  std::shared_ptr<wenet::AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<wenet::ContextConfig> context_config_ = nullptr;
-  std::shared_ptr<wenet::PostProcessOptions> post_process_opts_ = nullptr;
-
-  int nbest_ = 1;
-  std::string result_;
-  bool enable_timestamp_ = false;
-  std::vector<std::string> context_;
-  float context_score_;
-  std::string language_ = "chs";
-  bool continuous_decoding_ = false;
-};
-
-void* wenet_init(const char* model_dir) {
-  Recognizer* decoder = new Recognizer(model_dir);
-  return reinterpret_cast<void*>(decoder);
-}
-
-void wenet_free(void* decoder) {
-  delete reinterpret_cast<Recognizer*>(decoder);
-}
-
-void wenet_reset(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Reset();
-}
-
-void wenet_decode(void* decoder, const char* data, int len, int last) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Decode(data, len, last);
-}
-
-const char* wenet_get_result(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  return recognizer->GetResult();
-}
-
-void wenet_set_log_level(int level) {
-  FLAGS_logtostderr = true;
-  FLAGS_v = level;
-}
-
-void wenet_set_nbest(void* decoder, int n) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_nbest(n);
-}
-
-void wenet_set_timestamp(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  bool enable = flag > 0 ? true : false;
-  recognizer->set_enable_timestamp(enable);
-}
-
-void wenet_add_context(void* decoder, const char* word) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->AddContext(word);
-}
-
-void wenet_set_context_score(void* decoder, float score) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_context_score(score);
-}
-
-void wenet_set_language(void* decoder, const char* lang) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_language(lang);
-}
-
-void wenet_set_continuous_decoding(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_continuous_decoding(flag > 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/wenet_api.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/wenet_api.h
deleted file mode 100644
index e839aaa40166a6e50d9aa2ac0e697356bd25b941..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/api/wenet_api.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_WENET_API_H_
-#define API_WENET_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Init decoder from the file and returns the object
- *
- * @param model_dir: the model dir
- * @returns model object or NULL if problem occured
- */
-void* wenet_init(const char* model_dir);
-
-/** Free wenet decoder and corresponding resource
- */
-void wenet_free(void* decoder);
-
-/** Reset decoder for next decoding
- */
-void wenet_reset(void* decoder);
-
-/** Decode the input wav data
- * @param data: pcm data, encoded as int16_t(16 bits)
- * @param len: data length
- * @param last: if it is the last package
- */
-void wenet_decode(void* decoder, const char* data, int len, int last);
-
-/** Get decode result in json format
- *  It returns partial result when last is 0
- *  It returns final result when last is 1
-
-    {
-      "nbest" : [{
-          "sentence" : "are you okay"
-          "word_pieces" : [{
-              "end" : 960,
-              "start" : 0,
-              "word" : "are"
-            }, {
-              "end" : 1200,
-              "start" : 960,
-              "word" : "you"
-            }, {
-            ...}]
-        }, {
-          "sentence" : "are you ok"
-        }],
-      "type" : "final_result"
-    }
-
-    "type": final_result/partial_result
-    "nbest": nbest is enabled when n > 1 in final_result
-        "sentence": the ASR result
-        "word_pieces": optional, output timestamp when enabled
- */
-const char* wenet_get_result(void* decoder);
-
-/** Set n-best, range 1~10
- *  wenet_get_result will return top-n best results
- */
-void wenet_set_nbest(void* decoder, int n);
-
-/** Whether to enable word level timestamp in results
-    disable it when flag = 0, otherwise enable
- */
-void wenet_set_timestamp(void* decoder, int flag);
-
-/** Add one contextual biasing
- */
-void wenet_add_context(void* decoder, const char* word);
-
-/** Set contextual biasing bonus score
- */
-void wenet_set_context_score(void* decoder, float score);
-
-/** Set language, has effect on the postpocessing
- *  @param: lang, could be chs/en now
- */
-void wenet_set_language(void* decoder, const char* lang);
-
-/** Set log level
- *  We use glog in wenet, so the level is the glog level
- */
-void wenet_set_log_level(int level);
-
-/** Enable continous decoding or not
- *  flag > 0: enable, otherwise disable
- */
-void wenet_set_continuous_decoding(void* decoder, int flag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // API_WENET_API_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/CMakeLists.txt
deleted file mode 100644
index a117b8bcb580c8738a7ce72f88bc10ff0a450e98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_executable(decoder_main decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC decoder)
-
-add_executable(label_checker_main label_checker_main.cc)
-target_link_libraries(label_checker_main PUBLIC decoder)
-
-# if(TORCH)
-#  add_executable(api_main api_main.cc)
-#  target_link_libraries(api_main PUBLIC wenet_api)
-# endif()
-
-if(WEBSOCKET)
-  add_executable(websocket_client_main websocket_client_main.cc)
-  target_link_libraries(websocket_client_main PUBLIC websocket)
-  add_executable(websocket_server_main websocket_server_main.cc)
-  target_link_libraries(websocket_server_main PUBLIC websocket)
-endif()
-
-if(GRPC)
-  add_executable(grpc_server_main grpc_server_main.cc)
-  target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
-  add_executable(grpc_client_main grpc_client_main.cc)
-  target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
-endif()
-
-if(HTTP)
-  add_executable(http_client_main http_client_main.cc)
-  target_link_libraries(http_client_main PUBLIC http)
-  add_executable(http_server_main http_server_main.cc)
-  target_link_libraries(http_server_main PUBLIC http)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/api_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/api_main.cc
deleted file mode 100644
index 94b20d52a7b8eee5c39a12af4e1e25324d7d880f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/api_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-
-DEFINE_string(model_dir, "", "model dir path");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_bool(enable_timestamp, false, "enable timestamps");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet_set_log_level(2);
-
-  void* decoder = wenet_init(FLAGS_model_dir.c_str());
-  wenet_set_timestamp(decoder, FLAGS_enable_timestamp == true ? 1 : 0);
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  std::vector<int16_t> data(wav_reader.num_samples());
-  for (int i = 0; i < wav_reader.num_samples(); i++) {
-    data[i] = static_cast<int16_t>(*(wav_reader.data() + i));
-  }
-
-  for (int i = 0; i < 10; i++) {
-    // Return the final result when last is 1
-    wenet_decode(decoder, reinterpret_cast<const char*>(data.data()),
-                 data.size() * 2, 1);
-    const char* result = wenet_get_result(decoder);
-    LOG(INFO) << i << " " << result;
-    wenet_reset(decoder);
-  }
-  wenet_free(decoder);
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/decoder_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/decoder_main.cc
deleted file mode 100644
index b8f1dbae6b88390504cc9ce63f33dc9bd54a2d6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/decoder_main.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <iomanip>
-#include <thread>
-#include <utility>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-#include "utils/thread_pool.h"
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-DEFINE_bool(simulate_streaming, false, "simulate streaming input");
-DEFINE_bool(output_nbest, false, "output n-best of decode result");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_string(wav_scp, "", "input wav scp");
-DEFINE_string(result, "", "result output file");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-DEFINE_int32(thread_num, 1, "num of decode thread");
-DEFINE_int32(warmup, 0, "num of warmup decode, 0 means no warmup");
-
-std::shared_ptr<wenet::DecodeOptions> g_decode_config;
-std::shared_ptr<wenet::FeaturePipelineConfig> g_feature_config;
-std::shared_ptr<wenet::DecodeResource> g_decode_resource;
-
-std::ofstream g_result;
-std::mutex g_mutex;
-int g_total_waves_dur = 0;
-int g_total_decode_time = 0;
-
-void decode(std::pair<std::string, std::string> wav, bool warmup = false) {
-  wenet::WavReader wav_reader(wav.second);
-  int num_samples = wav_reader.num_samples();
-  CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-
-  auto feature_pipeline =
-      std::make_shared<wenet::FeaturePipeline>(*g_feature_config);
-  feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-  feature_pipeline->set_input_finished();
-  LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-
-  wenet::AsrDecoder decoder(feature_pipeline, g_decode_resource,
-                            *g_decode_config);
-
-  int wave_dur = static_cast<int>(static_cast<float>(num_samples) /
-                                  wav_reader.sample_rate() * 1000);
-  int decode_time = 0;
-  std::string final_result;
-  while (true) {
-    wenet::Timer timer;
-    wenet::DecodeState state = decoder.Decode();
-    if (state == wenet::DecodeState::kEndFeats) {
-      decoder.Rescoring();
-    }
-    int chunk_decode_time = timer.Elapsed();
-    decode_time += chunk_decode_time;
-    if (decoder.DecodedSomething()) {
-      LOG(INFO) << "Partial result: " << decoder.result()[0].sentence;
-    }
-
-    if (FLAGS_continuous_decoding && state == wenet::DecodeState::kEndpoint) {
-      if (decoder.DecodedSomething()) {
-        decoder.Rescoring();
-        LOG(INFO) << "Final result (continuous decoding): "
-                  << decoder.result()[0].sentence;
-        final_result.append(decoder.result()[0].sentence);
-      }
-      decoder.ResetContinuousDecoding();
-    }
-
-    if (state == wenet::DecodeState::kEndFeats) {
-      break;
-    } else if (FLAGS_chunk_size > 0 && FLAGS_simulate_streaming) {
-      float frame_shift_in_ms =
-          static_cast<float>(g_feature_config->frame_shift) /
-          wav_reader.sample_rate() * 1000;
-      auto wait_time =
-          decoder.num_frames_in_current_chunk() * frame_shift_in_ms -
-          chunk_decode_time;
-      if (wait_time > 0) {
-        LOG(INFO) << "Simulate streaming, waiting for " << wait_time << "ms";
-        std::this_thread::sleep_for(
-            std::chrono::milliseconds(static_cast<int>(wait_time)));
-      }
-    }
-  }
-  if (decoder.DecodedSomething()) {
-    final_result.append(decoder.result()[0].sentence);
-  }
-  LOG(INFO) << wav.first << " Final result: " << final_result << std::endl;
-  LOG(INFO) << "Decoded " << wave_dur << "ms audio taken " << decode_time
-            << "ms.";
-
-  if (!warmup) {
-    g_mutex.lock();
-    std::ostream& buffer = FLAGS_result.empty() ? std::cout : g_result;
-    if (!FLAGS_output_nbest) {
-      buffer << wav.first << " " << final_result << std::endl;
-    } else {
-      buffer << "wav " << wav.first << std::endl;
-      auto& results = decoder.result();
-      for (auto& r : results) {
-        if (r.sentence.empty()) continue;
-        buffer << "candidate " << r.score << " " << r.sentence << std::endl;
-      }
-    }
-    g_total_waves_dur += wave_dur;
-    g_total_decode_time += decode_time;
-    g_mutex.unlock();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  g_decode_config = wenet::InitDecodeOptionsFromFlags();
-  g_feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  g_decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  if (FLAGS_wav_path.empty() && FLAGS_wav_scp.empty()) {
-    LOG(FATAL) << "Please provide the wave path or the wav scp.";
-  }
-  std::vector<std::pair<std::string, std::string>> waves;
-  if (!FLAGS_wav_path.empty()) {
-    waves.emplace_back(make_pair("test", FLAGS_wav_path));
-  } else {
-    std::ifstream wav_scp(FLAGS_wav_scp);
-    std::string line;
-    while (getline(wav_scp, line)) {
-      std::vector<std::string> strs;
-      wenet::SplitString(line, &strs);
-      CHECK_GE(strs.size(), 2);
-      waves.emplace_back(make_pair(strs[0], strs[1]));
-    }
-
-    if (waves.empty()) {
-      LOG(FATAL) << "Please provide non-empty wav scp.";
-    }
-  }
-
-  if (!FLAGS_result.empty()) {
-    g_result.open(FLAGS_result, std::ios::out);
-  }
-
-  // Warmup
-  if (FLAGS_warmup > 0) {
-    LOG(INFO) << "Warming up...";
-    {
-      ThreadPool pool(FLAGS_thread_num);
-      auto wav = waves[0];
-      for (int i = 0; i < FLAGS_warmup; i++) {
-        pool.enqueue(decode, wav, true);
-      }
-    }
-    LOG(INFO) << "Warmup done.";
-  }
-
-  {
-    ThreadPool pool(FLAGS_thread_num);
-    for (auto& wav : waves) {
-      pool.enqueue(decode, wav, false);
-    }
-  }
-
-  LOG(INFO) << "Total: decoded " << g_total_waves_dur << "ms audio taken "
-            << g_total_decode_time << "ms.";
-  LOG(INFO) << "RTF: " << std::setprecision(4)
-            << static_cast<float>(g_total_decode_time) / g_total_waves_dur;
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/grpc_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/grpc_client_main.cc
deleted file mode 100644
index f2d226d48d3757c5f095335eff3288f5d227282b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/grpc_client_main.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "grpc/grpc_client.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::GrpcClient client(FLAGS_hostname, FLAGS_port, FLAGS_nbest,
-                           FLAGS_continuous_decoding);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  std::vector<float> pcm_data(wav_reader.data(),
-                              wav_reader.data() + num_samples);
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(pcm_data[j]));
-    }
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/grpc_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/grpc_server_main.cc
deleted file mode 100644
index b00f3cbade1ee70dadfb49829e9ca73fd50c2be2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/grpc_server_main.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <grpcpp/ext/proto_server_reflection_plugin.h>
-#include <grpcpp/grpcpp.h>
-#include <grpcpp/health_check_service_interface.h>
-
-#include "decoder/params.h"
-#include "grpc/grpc_server.h"
-#include "utils/log.h"
-
-DEFINE_int32(port, 10086, "grpc listening port");
-DEFINE_int32(workers, 4, "grpc num workers");
-
-using grpc::Server;
-using grpc::ServerBuilder;
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::GrpcServer service(feature_config, decode_config, decode_resource);
-  grpc::EnableDefaultHealthCheckService(true);
-  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
-  ServerBuilder builder;
-  std::string address("0.0.0.0:" + std::to_string(FLAGS_port));
-  builder.AddListeningPort(address, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  builder.SetSyncServerOption(ServerBuilder::SyncServerOption::NUM_CQS,
-                              FLAGS_workers);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server->Wait();
-  google::ShutdownGoogleLogging();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/http_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/http_client_main.cc
deleted file mode 100644
index b59ee3f5f32bf08552416b183802029ac5d5afa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/http_client_main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "http/http_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of http server");
-DEFINE_int32(port, 10086, "port of http server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Convert to short
-  std::vector<int16_t> data;
-  data.reserve(num_samples);
-  for (int j = 0; j < num_samples; j++) {
-    data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-  }
-  // Send data
-  wenet::HttpClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  wenet::Timer timer;
-  VLOG(2) << "Send " << data.size() << " samples";
-  client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/http_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/http_server_main.cc
deleted file mode 100644
index e30cf2bcdf746c2072f023e90f470ccba5467c2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/http_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "http/http_server.h"
-
-DEFINE_int32(port, 10086, "http listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
-                           decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/label_checker_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/label_checker_main.cc
deleted file mode 100644
index e36e3d5c29a38a7ebee80606ebd8e69ae8b1eb96..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/label_checker_main.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_string(text, "", "kaldi style text input file");
-DEFINE_string(wav_scp, "", "kaldi style wav scp");
-DEFINE_double(is_penalty, 1.0,
-              "insertion/substitution penalty for align insertion");
-DEFINE_double(del_penalty, 1.0, "deletion penalty for align insertion");
-DEFINE_string(result, "", "result output file");
-DEFINE_string(timestamp, "", "timestamp output file");
-
-namespace wenet {
-
-const char* kDeletion = "<del>";
-// Is: Insertion and substitution
-const char* kIsStart = "<is>";
-const char* kIsEnd = "</is>";
-
-bool MapToLabel(const std::string& text,
-                std::shared_ptr<fst::SymbolTable> symbol_table,
-                std::vector<int>* labels) {
-  labels->clear();
-  // Split label to char sequence
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(text, &chars);
-  for (size_t i = 0; i < chars.size(); i++) {
-    // ▁ is special symbol for white space
-    std::string label = chars[i] != " " ? chars[i] : "▁";
-    int id = symbol_table->Find(label);
-    if (id != -1) {  // fst::kNoSymbol
-      // LOG(INFO) << label << " " << id;
-      labels->push_back(id);
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<fst::SymbolTable> MakeSymbolTableForFst(
-    std::shared_ptr<fst::SymbolTable> isymbol_table) {
-  LOG(INFO) << isymbol_table;
-  CHECK(isymbol_table != nullptr);
-  auto osymbol_table = std::make_shared<fst::SymbolTable>();
-  osymbol_table->AddSymbol("<eps>", 0);
-  CHECK_EQ(isymbol_table->Find("<blank>"), 0);
-  osymbol_table->AddSymbol("<blank>", 1);
-  for (int i = 1; i < isymbol_table->NumSymbols(); i++) {
-    std::string symbol = isymbol_table->Find(i);
-    osymbol_table->AddSymbol(symbol, i + 1);
-  }
-  osymbol_table->AddSymbol(kDeletion, isymbol_table->NumSymbols() + 1);
-  osymbol_table->AddSymbol(kIsStart, isymbol_table->NumSymbols() + 2);
-  osymbol_table->AddSymbol(kIsEnd, isymbol_table->NumSymbols() + 3);
-  return osymbol_table;
-}
-
-void CompileCtcFst(std::shared_ptr<fst::SymbolTable> symbol_table,
-                   fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  CHECK_EQ(symbol_table->Find("<eps>"), 0);
-  CHECK_EQ(symbol_table->Find("<blank>"), 1);
-  ofst->AddArc(start, fst::StdArc(1, 0, 0.0, start));
-  // Exclude kDeletion and kInsertion
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    int s = ofst->AddState();
-    ofst->AddArc(start, fst::StdArc(i, i, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(i, 0, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(0, 0, 0.0, start));
-  }
-  ofst->SetFinal(start, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdOLabelCompare());
-}
-
-void CompileAlignFst(std::vector<int> labels,
-                     std::shared_ptr<fst::SymbolTable> symbol_table,
-                     fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int deletion = symbol_table->Find(kDeletion);
-  int insertion_start = symbol_table->Find(kIsStart);
-  int insertion_end = symbol_table->Find(kIsEnd);
-
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  // Filler State
-  int filler_start = ofst->AddState();
-  int filler_end = ofst->AddState();
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    ofst->AddArc(filler_start, fst::StdArc(i, i, FLAGS_is_penalty, filler_end));
-  }
-  ofst->AddArc(filler_end, fst::StdArc(0, 0, 0.0, filler_start));
-
-  int prev = start;
-  // Alignment path and optional filler
-  for (size_t i = 0; i < labels.size(); i++) {
-    int cur = ofst->AddState();
-    // 1. Insertion or Substitution
-    ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-    ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-    // 2. Correct
-    ofst->AddArc(prev, fst::StdArc(labels[i], labels[i], 0.0, cur));
-    // 3. Deletion
-    ofst->AddArc(prev, fst::StdArc(0, deletion, FLAGS_del_penalty, cur));
-
-    prev = cur;
-  }
-  // Optional add endding filler
-  ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-  ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-  ofst->SetFinal(prev, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdILabelCompare());
-}
-
-}  // namespace wenet
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-  CHECK(decode_resource->unit_table != nullptr);
-
-  auto wfst_symbol_table =
-      wenet::MakeSymbolTableForFst(decode_resource->unit_table);
-  // wfst_symbol_table->WriteText("fst.txt");
-  // Reset symbol_table to on-the-fly generated wfst_symbol_table
-  decode_resource->symbol_table = wfst_symbol_table;
-
-  // Compile ctc FST
-  fst::StdVectorFst ctc_fst;
-  wenet::CompileCtcFst(wfst_symbol_table, &ctc_fst);
-  // ctc_fst.Write("ctc.fst");
-
-  std::unordered_map<std::string, std::string> wav_table;
-  std::ifstream wav_is(FLAGS_wav_scp);
-  std::string line;
-  while (std::getline(wav_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    CHECK_EQ(strs.size(), 2);
-    wav_table[strs[0]] = strs[1];
-  }
-
-  std::ifstream text_is(FLAGS_text);
-  std::ofstream result_os(FLAGS_result, std::ios::out);
-  std::ofstream timestamp_out;
-  if (!FLAGS_timestamp.empty()) {
-    timestamp_out.open(FLAGS_timestamp, std::ios::out);
-  }
-  std::ostream& timestamp_os =
-      FLAGS_timestamp.empty() ? std::cout : timestamp_out;
-
-  while (std::getline(text_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    if (strs.size() < 2) continue;
-    std::string key = strs[0];
-    LOG(INFO) << "Processing " << key;
-    if (wav_table.find(key) != wav_table.end()) {
-      strs.erase(strs.begin());
-      std::string text = wenet::JoinString(" ", strs);
-      std::vector<int> labels;
-      wenet::MapToLabel(text, wfst_symbol_table, &labels);
-      // Prepare FST for alignment decoding
-      fst::StdVectorFst align_fst;
-      wenet::CompileAlignFst(labels, wfst_symbol_table, &align_fst);
-      // align_fst.Write("align.fst");
-      auto decoding_fst = std::make_shared<fst::StdVectorFst>();
-      fst::Compose(ctc_fst, align_fst, decoding_fst.get());
-      // decoding_fst->Write("decoding.fst");
-      // Preapre feature pipeline
-      wenet::WavReader wav_reader;
-      if (!wav_reader.Open(wav_table[key])) {
-        LOG(WARNING) << "Error in reading " << wav_table[key];
-        continue;
-      }
-      int num_samples = wav_reader.num_samples();
-      CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-      auto feature_pipeline =
-          std::make_shared<wenet::FeaturePipeline>(*feature_config);
-      feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-      feature_pipeline->set_input_finished();
-      decode_resource->fst = decoding_fst;
-      LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-      wenet::AsrDecoder decoder(feature_pipeline, decode_resource,
-                                *decode_config);
-      while (true) {
-        wenet::DecodeState state = decoder.Decode();
-        if (state == wenet::DecodeState::kEndFeats) {
-          decoder.Rescoring();
-          break;
-        }
-      }
-      std::string final_result;
-      std::string timestamp_str;
-      if (decoder.DecodedSomething()) {
-        const wenet::DecodeResult& result = decoder.result()[0];
-        final_result = result.sentence;
-        std::stringstream ss;
-        for (const auto& w : result.word_pieces) {
-          ss << " " << w.word << " " << w.start << " " << w.end;
-        }
-        timestamp_str = ss.str();
-      }
-      result_os << key << " " << final_result << std::endl;
-      timestamp_os << key << " " << timestamp_str << std::endl;
-      LOG(INFO) << key << " " << final_result;
-    } else {
-      LOG(WARNING) << "No wav file for " << key;
-    }
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/websocket_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/websocket_client_main.cc
deleted file mode 100644
index 3eaa96069dc5f57673fbb2819bf7d4883e0d5ffa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/websocket_client_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "websocket/websocket_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::WebSocketClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  client.set_continuous_decoding(FLAGS_continuous_decoding);
-  client.SendStartSignal();
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-    }
-    // TODO(Binbin Zhang): Network order?
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-  client.SendEndSignal();
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/websocket_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/websocket_server_main.cc
deleted file mode 100644
index 796d9d2e6d151f7c08b43d66b7245c58ee086cc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/bin/websocket_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "websocket/websocket_server.h"
-
-DEFINE_int32(port, 10086, "websocket listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
-                                decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/CMakeLists.txt
deleted file mode 100644
index 2a152dd0d38cdc17d2758d7dbd542cd974d5f0c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# compile wenet.proto
-set(PROTO_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-add_custom_command(
-  OUTPUT  ${PROTO_DIR}/wenet.pb.cc
-          ${PROTO_DIR}/wenet.pb.h
-          ${PROTO_DIR}/wenet.grpc.pb.cc
-          ${PROTO_DIR}/wenet.grpc.pb.h
-  COMMAND ${protobuf_BINARY_DIR}/protoc
-  ARGS --grpc_out "${PROTO_DIR}"
-    --cpp_out "${PROTO_DIR}"
-    -I "${PROTO_DIR}"
-    --plugin=protoc-gen-grpc=${grpc_BINARY_DIR}/grpc_cpp_plugin
-    wenet.proto)
-
-# grpc_server/client
-link_directories(${protobuf_BINARY_DIR}/lib)
-add_library(wenet_grpc STATIC
-  grpc_client.cc
-  grpc_server.cc
-  wenet.pb.cc
-  wenet.grpc.pb.cc
-)
-target_link_libraries(wenet_grpc PUBLIC grpc++ grpc++_reflection decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_client.cc
deleted file mode 100644
index 7a2e3f6f384980b6566468213d3eead43a404070..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_client.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_client.h"
-
-#include "utils/log.h"
-
-namespace wenet {
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using grpc::Status;
-using wenet::Request;
-using wenet::Response;
-
-GrpcClient::GrpcClient(const std::string& host, int port, int nbest,
-                       bool continuous_decoding)
-    : host_(host),
-      port_(port),
-      nbest_(nbest),
-      continuous_decoding_(continuous_decoding) {
-  Connect();
-  t_.reset(new std::thread(&GrpcClient::ReadLoopFunc, this));
-}
-
-void GrpcClient::Connect() {
-  channel_ = grpc::CreateChannel(host_ + ":" + std::to_string(port_),
-                                 grpc::InsecureChannelCredentials());
-  stub_ = ASR::NewStub(channel_);
-  context_ = std::make_shared<ClientContext>();
-  stream_ = stub_->Recognize(context_.get());
-  request_ = std::make_shared<Request>();
-  response_ = std::make_shared<Response>();
-  request_->mutable_decode_config()->set_nbest_config(nbest_);
-  request_->mutable_decode_config()->set_continuous_decoding_config(
-      continuous_decoding_);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::SendBinaryData(const void* data, size_t size) {
-  const int16_t* pdata = reinterpret_cast<const int16_t*>(data);
-  request_->set_audio_data(pdata, size);
-  stream_->Write(*request_);
-}
-
-void GrpcClient::ReadLoopFunc() {
-  try {
-    while (stream_->Read(response_.get())) {
-      for (int i = 0; i < response_->nbest_size(); i++) {
-        // you can also traverse wordpieces like demonstrated above
-        LOG(INFO) << i + 1 << "best " << response_->nbest(i).sentence();
-      }
-      if (response_->status() != Response_Status_ok) {
-        break;
-      }
-      if (response_->type() == Response_Type_speech_end) {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void GrpcClient::Join() {
-  stream_->WritesDone();
-  t_->join();
-  Status status = stream_->Finish();
-  if (!status.ok()) {
-    LOG(INFO) << "Recognize rpc failed.";
-  }
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_client.h
deleted file mode 100644
index 36e36a0f5f5ec5bbb818009fe931e863eaa7fd60..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_client.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_CLIENT_H_
-#define GRPC_GRPC_CLIENT_H_
-
-#include <grpc/grpc.h>
-#include <grpcpp/channel.h>
-#include <grpcpp/client_context.h>
-#include <grpcpp/create_channel.h>
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "grpc/wenet.grpc.pb.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientReaderWriter;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcClient {
- public:
-  GrpcClient(const std::string& host, int port, int nbest,
-             bool continuous_decoding);
-
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Join();
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string host_;
-  int port_;
-  std::shared_ptr<Channel> channel_{nullptr};
-  std::unique_ptr<ASR::Stub> stub_{nullptr};
-  std::shared_ptr<ClientContext> context_{nullptr};
-  std::unique_ptr<ClientReaderWriter<Request, Response>> stream_{nullptr};
-  std::shared_ptr<Request> request_{nullptr};
-  std::shared_ptr<Response> response_{nullptr};
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(GrpcClient);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_server.cc
deleted file mode 100644
index 26268bc02a2f2ea56bb24a1eb379a565f693429a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_server.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "grpc/grpc_server.h"
-
-namespace wenet {
-
-using grpc::ServerReaderWriter;
-using wenet::Request;
-using wenet::Response;
-
-GrpcConnectionHandler::GrpcConnectionHandler(
-    ServerReaderWriter<Response, Request>* stream,
-    std::shared_ptr<Request> request, std::shared_ptr<Response> response,
-    std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : stream_(std::move(stream)),
-      request_(std::move(request)),
-      response_(std::move(response)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void GrpcConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  response_->set_status(Response::ok);
-  response_->set_type(Response::server_ready);
-  stream_->Write(*response_);
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ = std::make_shared<std::thread>(
-      &GrpcConnectionHandler::DecodeThreadFunc, this);
-}
-
-void GrpcConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  CHECK(feature_pipeline_ != nullptr);
-  feature_pipeline_->set_input_finished();
-  got_end_tag_ = true;
-}
-
-void GrpcConnectionHandler::OnPartialResult() {
-  LOG(INFO) << "Partial result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::partial_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinalResult() {
-  LOG(INFO) << "Final result";
-  response_->set_status(Response::ok);
-  response_->set_type(Response::final_result);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnFinish() {
-  // Send finish tag
-  response_->set_status(Response::ok);
-  response_->set_type(Response::speech_end);
-  stream_->Write(*response_);
-}
-
-void GrpcConnectionHandler::OnSpeechData() {
-  // Read binary PCM data
-  const int16_t* pcm_data =
-      reinterpret_cast<const int16_t*>(request_->audio_data().c_str());
-  int num_samples = request_->audio_data().length() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-void GrpcConnectionHandler::SerializeResult(bool finish) {
-  for (const DecodeResult& path : decoder_->result()) {
-    Response_OneBest* one_best_ = response_->add_nbest();
-    one_best_->set_sentence(path.sentence);
-    if (finish) {
-      for (const WordPiece& word_piece : path.word_pieces) {
-        Response_OnePiece* one_piece_ = one_best_->add_wordpieces();
-        one_piece_->set_word(word_piece.word);
-        one_piece_->set_start(word_piece.start);
-        one_piece_->set_end(word_piece.end);
-      }
-    }
-    if (response_->nbest_size() == nbest_) {
-      break;
-    }
-  }
-  return;
-}
-
-void GrpcConnectionHandler::DecodeThreadFunc() {
-  while (true) {
-    DecodeState state = decoder_->Decode();
-    response_->clear_status();
-    response_->clear_type();
-    response_->clear_nbest();
-    if (state == DecodeState::kEndFeats) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      OnFinish();
-      stop_recognition_ = true;
-      break;
-    } else if (state == DecodeState::kEndpoint) {
-      decoder_->Rescoring();
-      SerializeResult(true);
-      OnFinalResult();
-      // If it's not continuous decoding, continue to do next recognition
-      // otherwise stop the recognition
-      if (continuous_decoding_) {
-        decoder_->ResetContinuousDecoding();
-      } else {
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      }
-    } else {
-      if (decoder_->DecodedSomething()) {
-        SerializeResult(false);
-        OnPartialResult();
-      }
-    }
-  }
-}
-
-void GrpcConnectionHandler::operator()() {
-  try {
-    while (stream_->Read(request_.get())) {
-      if (!got_start_tag_) {
-        nbest_ = request_->decode_config().nbest_config();
-        continuous_decoding_ =
-            request_->decode_config().continuous_decoding_config();
-        OnSpeechStart();
-      } else {
-        OnSpeechData();
-      }
-    }
-    OnSpeechEnd();
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-Status GrpcServer::Recognize(ServerContext* context,
-                             ServerReaderWriter<Response, Request>* stream) {
-  LOG(INFO) << "Get Recognize request" << std::endl;
-  auto request = std::make_shared<Request>();
-  auto response = std::make_shared<Response>();
-  GrpcConnectionHandler handler(stream, request, response, feature_config_,
-                                decode_config_, decode_resource_);
-  std::thread t(std::move(handler));
-  t.join();
-  return Status::OK;
-}
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_server.h
deleted file mode 100644
index 3ab47ce5b15897c2a596d8ef27f2e7c4f8d26a3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/grpc_server.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GRPC_GRPC_SERVER_H_
-#define GRPC_GRPC_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-#include "grpc/wenet.grpc.pb.h"
-
-namespace wenet {
-
-using grpc::ServerContext;
-using grpc::ServerReaderWriter;
-using grpc::Status;
-using wenet::ASR;
-using wenet::Request;
-using wenet::Response;
-
-class GrpcConnectionHandler {
- public:
-  GrpcConnectionHandler(ServerReaderWriter<Response, Request>* stream,
-                        std::shared_ptr<Request> request,
-                        std::shared_ptr<Response> response,
-                        std::shared_ptr<FeaturePipelineConfig> feature_config,
-                        std::shared_ptr<DecodeOptions> decode_config,
-                        std::shared_ptr<DecodeResource> decode_resource);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnFinish();
-  void OnSpeechData();
-  void OnPartialResult();
-  void OnFinalResult();
-  void DecodeThreadFunc();
-  void SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  ServerReaderWriter<Response, Request>* stream_;
-  std::shared_ptr<Request> request_;
-  std::shared_ptr<Response> response_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class GrpcServer final : public ASR::Service {
- public:
-  GrpcServer(std::shared_ptr<FeaturePipelineConfig> feature_config,
-             std::shared_ptr<DecodeOptions> decode_config,
-             std::shared_ptr<DecodeResource> decode_resource)
-      : feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-  Status Recognize(ServerContext* context,
-                   ServerReaderWriter<Response, Request>* reader) override;
-
- private:
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  DISALLOW_COPY_AND_ASSIGN(GrpcServer);
-};
-
-}  // namespace wenet
-
-#endif  // GRPC_GRPC_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/wenet.proto b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/wenet.proto
deleted file mode 100644
index 4c3033c034c513611c9159ff9db42b225be2cc98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/grpc/wenet.proto
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-syntax = "proto3";
-
-option java_package = "ex.grpc";
-option objc_class_prefix = "wenet";
-
-package wenet;
-
-service ASR {
-  rpc Recognize (stream Request) returns (stream Response) {}
-}
-
-message Request {
-
-  message DecodeConfig {
-    int32 nbest_config = 1;
-    bool continuous_decoding_config = 2;
-  }
-
-  oneof RequestPayload {
-    DecodeConfig decode_config = 1;
-    bytes audio_data = 2;
-  }
-}
-
-message Response {
-
-  message OneBest {
-    string sentence = 1;
-    repeated OnePiece wordpieces = 2;
-  }
-
-  message OnePiece {
-    string word = 1;
-    int32 start = 2;
-    int32 end = 3;
-  }
-
-  enum Status {
-    ok = 0;
-    failed = 1;
-  }
-
-  enum Type {
-    server_ready = 0;
-    partial_result = 1;
-    final_result = 2;
-    speech_end = 3;
-  }
-
-  Status status = 1;
-  Type type = 2;
-  repeated OneBest nbest = 3;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/CMakeLists.txt
deleted file mode 100644
index 145654105350e91a5f9121b47197f5fc60663f5c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-link_libraries(gtest_main gmock)
-
-add_executable(utils_test utils_test.cc)
-target_link_libraries(utils_test PUBLIC utils)
-add_test(UTILS_TEST utils_test)
-
-add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
-target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
-add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
-
-add_executable(post_processor_test post_processor_test.cc)
-target_link_libraries(post_processor_test PUBLIC post_processor)
-add_test(POST_PROCESSOR_TEST post_processor_test)
-
-
-add_executable(feature_pipeline_test feature_pipeline_test.cc)
-target_link_libraries(feature_pipeline_test PUBLIC frontend)
-add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/ctc_prefix_beam_search_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/ctc_prefix_beam_search_test.cc
deleted file mode 100644
index d8f3b65693b934beb33f3a770795f0b6e7ce3456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/ctc_prefix_beam_search_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <cmath>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(CtcPrefixBeamSearchTest, CtcPrefixBeamSearchLogicTest) {
-  using ::testing::ElementsAre;
-  // See https://robin1001.github.io/2020/12/11/ctc-search for the
-  // graph demonstration of the data
-  std::vector<std::vector<float>> data = {
-      {0.25, 0.40, 0.35}, {0.40, 0.35, 0.25}, {0.10, 0.50, 0.40}};
-  // Apply log
-  for (int i = 0; i < data.size(); i++) {
-    for (int j = 0; j < data[i].size(); j++) {
-      data[i][j] = std::log(data[i][j]);
-    }
-  }
-  wenet::CtcPrefixBeamSearchOptions option;
-  option.first_beam_size = 3;
-  option.second_beam_size = 3;
-  wenet::CtcPrefixBeamSearch prefix_beam_search(option);
-  prefix_beam_search.Search(data);
-  /* Test case info
-  | top k | result index | prefix score | viterbi score | timestamp |
-  |-------|--------------|--------------|---------------|-----------|
-  | top 1 | [2, 1]       | 0.2185       | 0.07          | [0, 2]    |
-  | top 2 | [1, 2]       | 0.1550       | 0.064         | [0, 2]    |
-  | top 3 | [1]          | 0.1525       | 0.07          | [2]       |
-  */
-  const std::vector<std::vector<int>>& result = prefix_beam_search.Outputs();
-  EXPECT_EQ(result.size(), 3);
-  ASSERT_THAT(result[0], ElementsAre(2, 1));
-  ASSERT_THAT(result[1], ElementsAre(1, 2));
-  ASSERT_THAT(result[2], ElementsAre(1));
-
-  const std::vector<float>& likelihood = prefix_beam_search.Likelihood();
-  EXPECT_EQ(likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[0]), 0.2185);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[1]), 0.1550);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[2]), 0.1525);
-
-  const std::vector<float>& viterbi_likelihood =
-      prefix_beam_search.viterbi_likelihood();
-  EXPECT_EQ(viterbi_likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[0]), 0.07);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[1]), 0.064);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[2]), 0.07);
-
-  const std::vector<std::vector<int>>& times = prefix_beam_search.Times();
-  EXPECT_EQ(times.size(), 3);
-  ASSERT_THAT(times[0], ElementsAre(0, 2));
-  ASSERT_THAT(times[1], ElementsAre(0, 2));
-  ASSERT_THAT(times[2], ElementsAre(2));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/feature_pipeline_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/feature_pipeline_test.cc
deleted file mode 100644
index 244ec0735b6086211b476e8d97569e1ee5959bc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/feature_pipeline_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2022 Roney
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <thread>
-#include <vector>
-
-#include "frontend/feature_pipeline.h"
-#include "utils/blocking_queue.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-void pushQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que,
-               std::vector<int> vec) {
-  que->Push(vec);
-}
-
-void popQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que, int num,
-              int back_data) {
-  auto pop_data = que->Pop(num);
-  ASSERT_EQ(pop_data[num - 1], back_data);
-}
-
-TEST(FeaturePipelineTest, BlockingQueueTest) {
-  auto capacity_queue = std::make_shared<wenet::BlockingQueue<int>>(2);
-  std::vector<int> test_data{1, 2, 3, 4, 5};
-  std::thread push_thread(&pushQueue, capacity_queue, test_data);
-  ASSERT_EQ(capacity_queue->Pop(), 1);
-  ASSERT_LE(capacity_queue->Size(), 2);    // capacity_queue: 2 or 2,3
-  auto pop_data = capacity_queue->Pop(3);  // 2,3,4 num > capacity
-  ASSERT_EQ(pop_data.size(), 3);
-  ASSERT_EQ(pop_data[2], 4);
-  push_thread.join();
-  ASSERT_EQ(capacity_queue->Size(), 1);  // capacity_queue:5
-
-  std::thread pop_thread(&popQueue, capacity_queue, 3, 0);  // num > capacity
-  capacity_queue->Push(9);  // capacity_queue:5,9
-  capacity_queue->Push(0);  // capacity_queue:5,9,0
-  pop_thread.join();        // capacity_queue:
-  ASSERT_EQ(capacity_queue->Size(), 0);
-
-  pop_data = capacity_queue->Pop(0);
-  ASSERT_TRUE(pop_data.empty());
-}
-
-TEST(FeaturePipelineTest, PipelineTest) {
-  wenet::FeaturePipelineConfig config(80, 8000);
-  wenet::FeaturePipeline feature_pipeline(config);
-  int audio_len = 8 * 55;  // audio len 55ms,4 frames
-  std::vector<float> pcm(audio_len, 0);
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 4);
-
-  std::vector<std::vector<float>> out_feats;
-  auto b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_TRUE(b);
-  ASSERT_EQ(out_feats.size(), 2);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 2);
-
-  std::vector<float> out_feat;
-  b = feature_pipeline.ReadOne(&out_feat);
-  ASSERT_TRUE(b);
-  ASSERT_FALSE(out_feat.empty());
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 1);
-
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 1);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  feature_pipeline.Read(2, &out_feats);
-  feature_pipeline.Reset();
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 0);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/post_processor_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/post_processor_test.cc
deleted file mode 100644
index fa11fa29231032d62389a93fd00b0ec782bf8a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/post_processor_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(PostProcessorTest, ProcessSpacekMandarinEnglishTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: mandarin character
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "震东好帅",
-      // modeling unit: mandarin word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 吴迪 也 好帅",
-      // modeling unit: english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁binbin▁is▁also▁handsome",
-      // modeling unit: english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " life is short i use wenet",
-      // modeling unit: mandarin character + english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "超哥▁is▁the▁most▁handsome",
-      // modeling unit: mandarin word + english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 人生 苦短 i use wenet",
-  };
-
-  std::vector<std::string> result_lowercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "binbin is also handsome",
-      "life is short i use wenet",
-      "超哥 is the most handsome",
-      "人生苦短i use wenet",
-  };
-
-  std::vector<std::string> result_uppercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "BINBIN IS ALSO HANDSOME",
-      "LIFE IS SHORT I USE WENET",
-      "超哥 IS THE MOST HANDSOME",
-      "人生苦短I USE WENET",
-  };
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
-
-TEST(PostProcessorTest, ProcessSpacekIndoEuropeanTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  opts_lowercase.language_type = wenet::kIndoEuropean;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.language_type = wenet::kIndoEuropean;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁zhendong▁ist▁so▁schön",
-      // modeling unit: word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " zhendong ist so schön"};
-
-  std::vector<std::string> result_lowercase = {"zhendong ist so schön",
-                                               "zhendong ist so schön"};
-
-  std::vector<std::string> result_uppercase = {"ZHENDONG IST SO SCHÖN",
-                                               "ZHENDONG IST SO SCHÖN"};
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/utils_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/utils_test.cc
deleted file mode 100644
index 6b2bbac25e000ce854d5e55a50cb51109d62d758..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/test/utils_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "utils/utils.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-TEST(UtilsTest, TopKTest) {
-  using ::testing::ElementsAre;
-  using ::testing::FloatNear;
-  using ::testing::Pointwise;
-  std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
-  std::vector<float> values;
-  std::vector<int32_t> indices;
-  wenet::TopK(data, 3, &values, &indices);
-  EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
-  ASSERT_THAT(indices, ElementsAre(9, 4, 8));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/CMakeLists.txt
deleted file mode 100644
index 67447c42d977f120fc39cdab0d052b011edd3efe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(websocket STATIC
-  websocket_client.cc
-  websocket_server.cc
-)
-target_link_libraries(websocket PUBLIC decoder)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_client.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_client.cc
deleted file mode 100644
index c0394e6250153e2d59636c9eab62badc4a737d16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_client.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_client.h"
-
-#include "boost/json/src.hpp"
-
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-WebSocketClient::WebSocketClient(const std::string& hostname, int port)
-    : hostname_(hostname), port_(port) {
-  Connect();
-  t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
-}
-
-void WebSocketClient::Connect() {
-  tcp::resolver resolver{ioc_};
-  // Look up the domain name
-  auto const results = resolver.resolve(hostname_, std::to_string(port_));
-  // Make the connection on the IP address we get from a lookup
-  auto ep = asio::connect(ws_.next_layer(), results);
-  // Provide the value of the Host HTTP header during the WebSocket handshake.
-  // See https://tools.ietf.org/html/rfc7230#section-5.4
-  std::string host = hostname_ + ":" + std::to_string(ep.port());
-  // Perform the websocket handshake
-  ws_.handshake(host, "/");
-}
-
-void WebSocketClient::SendTextData(const std::string& data) {
-  ws_.text(true);
-  ws_.write(asio::buffer(data));
-}
-
-void WebSocketClient::SendBinaryData(const void* data, size_t size) {
-  ws_.binary(true);
-  ws_.write(asio::buffer(data, size));
-}
-
-void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }
-
-void WebSocketClient::ReadLoopFunc() {
-  try {
-    while (true) {
-      beast::flat_buffer buffer;
-      ws_.read(buffer);
-      std::string message = beast::buffers_to_string(buffer.data());
-      LOG(INFO) << message;
-      CHECK(ws_.got_text());
-      json::object obj = json::parse(message).as_object();
-      if (obj["status"] != "ok") {
-        break;
-      }
-      if (obj["type"] == "speech_end") {
-        done_ = true;
-        break;
-      }
-    }
-  } catch (beast::system_error const& se) {
-    // This indicates that the session was closed
-    if (se.code() != websocket::error::closed) {
-      LOG(ERROR) << se.code().message();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketClient::Join() { t_->join(); }
-
-void WebSocketClient::SendStartSignal() {
-  // TODO(Binbin Zhang): Add sample rate and other setting support
-  json::value start_tag = {{"signal", "start"},
-                           {"nbest", nbest_},
-                           {"continuous_decoding", continuous_decoding_}};
-  std::string start_message = json::serialize(start_tag);
-  this->SendTextData(start_message);
-}
-
-void WebSocketClient::SendEndSignal() {
-  json::value end_tag = {{"signal", "end"}};
-  std::string end_message = json::serialize(end_tag);
-  this->SendTextData(end_message);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_client.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_client.h
deleted file mode 100644
index 76ec3aa451d31c7ee6b158ce21c8acdc10575eb3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_client.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_CLIENT_H_
-#define WEBSOCKET_WEBSOCKET_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class WebSocketClient {
- public:
-  WebSocketClient(const std::string& host, int port);
-
-  void SendTextData(const std::string& data);
-  void SendBinaryData(const void* data, size_t size);
-  void ReadLoopFunc();
-  void Close();
-  void Join();
-  void SendStartSignal();
-  void SendEndSignal();
-  void set_nbest(int nbest) { nbest_ = nbest; }
-  void set_continuous_decoding(bool continuous_decoding) {
-    continuous_decoding_ = continuous_decoding;
-  }
-  bool done() const { return done_; }
-
- private:
-  void Connect();
-  std::string hostname_;
-  int port_;
-  int nbest_ = 1;
-  bool continuous_decoding_ = false;
-  bool done_ = false;
-  asio::io_context ioc_;
-  websocket::stream<tcp::socket> ws_{ioc_};
-  std::unique_ptr<std::thread> t_{nullptr};
-
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketClient);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_CLIENT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_server.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_server.cc
deleted file mode 100644
index 52ab088f46d59b9f3f1add1e34d3aceae290f5da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_server.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "websocket/websocket_server.h"
-
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "boost/json/src.hpp"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-namespace json = boost::json;
-
-ConnectionHandler::ConnectionHandler(
-    tcp::socket&& socket, std::shared_ptr<FeaturePipelineConfig> feature_config,
-    std::shared_ptr<DecodeOptions> decode_config,
-    std::shared_ptr<DecodeResource> decode_resource)
-    : ws_(std::move(socket)),
-      feature_config_(std::move(feature_config)),
-      decode_config_(std::move(decode_config)),
-      decode_resource_(std::move(decode_resource)) {}
-
-void ConnectionHandler::OnSpeechStart() {
-  LOG(INFO) << "Received speech start signal, start reading speech";
-  got_start_tag_ = true;
-  json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);
-  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,
-                                          *decode_config_);
-  // Start decoder thread
-  decode_thread_ =
-      std::make_shared<std::thread>(&ConnectionHandler::DecodeThreadFunc, this);
-}
-
-void ConnectionHandler::OnSpeechEnd() {
-  LOG(INFO) << "Received speech end signal";
-  if (feature_pipeline_ != nullptr) {
-    feature_pipeline_->set_input_finished();
-  }
-  got_end_tag_ = true;
-}
-
-void ConnectionHandler::OnPartialResult(const std::string& result) {
-  LOG(INFO) << "Partial result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "partial_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinalResult(const std::string& result) {
-  LOG(INFO) << "Final result: " << result;
-  json::value rv = {
-      {"status", "ok"}, {"type", "final_result"}, {"nbest", result}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnFinish() {
-  // Send finish tag
-  json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-}
-
-void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
-  // Read binary PCM data
-  int num_samples = buffer.size() / sizeof(int16_t);
-  VLOG(2) << "Received " << num_samples << " samples";
-  CHECK(feature_pipeline_ != nullptr);
-  CHECK(decoder_ != nullptr);
-  const auto* pcm_data = static_cast<const int16_t*>(buffer.data().data());
-  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);
-}
-
-std::string ConnectionHandler::SerializeResult(bool finish) {
-  json::array nbest;
-  for (const DecodeResult& path : decoder_->result()) {
-    json::object jpath({{"sentence", path.sentence}});
-    if (finish) {
-      json::array word_pieces;
-      for (const WordPiece& word_piece : path.word_pieces) {
-        json::object jword_piece({{"word", word_piece.word},
-                                  {"start", word_piece.start},
-                                  {"end", word_piece.end}});
-        word_pieces.emplace_back(jword_piece);
-      }
-      jpath.emplace("word_pieces", word_pieces);
-    }
-    nbest.emplace_back(jpath);
-
-    if (nbest.size() == nbest_) {
-      break;
-    }
-  }
-  return json::serialize(nbest);
-}
-
-void ConnectionHandler::DecodeThreadFunc() {
-  try {
-    while (true) {
-      DecodeState state = decoder_->Decode();
-      if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        OnFinish();
-        stop_recognition_ = true;
-        break;
-      } else if (state == DecodeState::kEndpoint) {
-        decoder_->Rescoring();
-        std::string result = SerializeResult(true);
-        OnFinalResult(result);
-        // If it's not continuous decoding, continue to do next recognition
-        // otherwise stop the recognition
-        if (continuous_decoding_) {
-          decoder_->ResetContinuousDecoding();
-        } else {
-          OnFinish();
-          stop_recognition_ = true;
-          break;
-        }
-      } else {
-        if (decoder_->DecodedSomething()) {
-          std::string result = SerializeResult(false);
-          OnPartialResult(result);
-        }
-      }
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void ConnectionHandler::OnError(const std::string& message) {
-  json::value rv = {{"status", "failed"}, {"message", message}};
-  ws_.text(true);
-  ws_.write(asio::buffer(json::serialize(rv)));
-  // Close websocket
-  ws_.close(websocket::close_code::normal);
-}
-
-void ConnectionHandler::OnText(const std::string& message) {
-  json::value v = json::parse(message);
-  if (v.is_object()) {
-    json::object obj = v.get_object();
-    if (obj.find("signal") != obj.end()) {
-      json::string signal = obj["signal"].as_string();
-      if (signal == "start") {
-        if (obj.find("nbest") != obj.end()) {
-          if (obj["nbest"].is_int64()) {
-            nbest_ = obj["nbest"].as_int64();
-          } else {
-            OnError("integer is expected for nbest option");
-          }
-        }
-        if (obj.find("continuous_decoding") != obj.end()) {
-          if (obj["continuous_decoding"].is_bool()) {
-            continuous_decoding_ = obj["continuous_decoding"].as_bool();
-          } else {
-            OnError(
-                "boolean true or false is expected for "
-                "continuous_decoding option");
-          }
-        }
-        OnSpeechStart();
-      } else if (signal == "end") {
-        OnSpeechEnd();
-      } else {
-        OnError("Unexpected signal type");
-      }
-    } else {
-      OnError("Wrong message header");
-    }
-  } else {
-    OnError("Wrong protocol");
-  }
-}
-
-void ConnectionHandler::operator()() {
-  try {
-    // Accept the websocket handshake
-    ws_.accept();
-    for (;;) {
-      // This buffer will hold the incoming message
-      beast::flat_buffer buffer;
-      // Read a message
-      ws_.read(buffer);
-      if (ws_.got_text()) {
-        std::string message = beast::buffers_to_string(buffer.data());
-        LOG(INFO) << message;
-        OnText(message);
-        if (got_end_tag_) {
-          break;
-        }
-      } else {
-        if (!got_start_tag_) {
-          OnError("Start signal is expected before binary data");
-        } else {
-          if (stop_recognition_) {
-            break;
-          }
-          OnSpeechData(buffer);
-        }
-      }
-    }
-
-    LOG(INFO) << "Read all pcm data, wait for decoding thread";
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (beast::system_error const& se) {
-    LOG(INFO) << se.code().message();
-    // This indicates that the session was closed
-    if (se.code() == websocket::error::closed) {
-      OnSpeechEnd();
-    }
-    if (decode_thread_ != nullptr) {
-      decode_thread_->join();
-    }
-  } catch (std::exception const& e) {
-    LOG(ERROR) << e.what();
-  }
-}
-
-void WebSocketServer::Start() {
-  try {
-    auto const address = asio::ip::make_address("0.0.0.0");
-    tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
-    for (;;) {
-      // This will receive the new connection
-      tcp::socket socket{ioc_};
-      // Block until we get a connection
-      acceptor.accept(socket);
-      // Launch the session, transferring ownership of the socket
-      ConnectionHandler handler(std::move(socket), feature_config_,
-                                decode_config_, decode_resource_);
-      std::thread t(std::move(handler));
-      t.detach();
-    }
-  } catch (const std::exception& e) {
-    LOG(FATAL) << e.what();
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_server.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_server.h
deleted file mode 100644
index a1241834221dcf93c34d6414bd9b5ae40ef1cf38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/onnxruntime/websocket/websocket_server.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef WEBSOCKET_WEBSOCKET_SERVER_H_
-#define WEBSOCKET_WEBSOCKET_SERVER_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-
-#include "boost/asio/connect.hpp"
-#include "boost/asio/ip/tcp.hpp"
-#include "boost/beast/core.hpp"
-#include "boost/beast/websocket.hpp"
-
-#include "decoder/asr_decoder.h"
-#include "frontend/feature_pipeline.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-namespace beast = boost::beast;          // from <boost/beast.hpp>
-namespace http = beast::http;            // from <boost/beast/http.hpp>
-namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
-namespace asio = boost::asio;            // from <boost/asio.hpp>
-using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>
-
-class ConnectionHandler {
- public:
-  ConnectionHandler(tcp::socket&& socket,
-                    std::shared_ptr<FeaturePipelineConfig> feature_config,
-                    std::shared_ptr<DecodeOptions> decode_config,
-                    std::shared_ptr<DecodeResource> decode_resource_);
-  void operator()();
-
- private:
-  void OnSpeechStart();
-  void OnSpeechEnd();
-  void OnText(const std::string& message);
-  void OnFinish();
-  void OnSpeechData(const beast::flat_buffer& buffer);
-  void OnError(const std::string& message);
-  void OnPartialResult(const std::string& result);
-  void OnFinalResult(const std::string& result);
-  void DecodeThreadFunc();
-  std::string SerializeResult(bool finish);
-
-  bool continuous_decoding_ = false;
-  int nbest_ = 1;
-  websocket::stream<tcp::socket> ws_;
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-
-  bool got_start_tag_ = false;
-  bool got_end_tag_ = false;
-  // When endpoint is detected, stop recognition, and stop receiving data.
-  bool stop_recognition_ = false;
-  std::shared_ptr<FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<std::thread> decode_thread_ = nullptr;
-};
-
-class WebSocketServer {
- public:
-  WebSocketServer(int port,
-                  std::shared_ptr<FeaturePipelineConfig> feature_config,
-                  std::shared_ptr<DecodeOptions> decode_config,
-                  std::shared_ptr<DecodeResource> decode_resource)
-      : port_(port),
-        feature_config_(std::move(feature_config)),
-        decode_config_(std::move(decode_config)),
-        decode_resource_(std::move(decode_resource)) {}
-
-  void Start();
-
- private:
-  int port_;
-  // The io_context is required for all I/O
-  asio::io_context ioc_{1};
-  std::shared_ptr<FeaturePipelineConfig> feature_config_;
-  std::shared_ptr<DecodeOptions> decode_config_;
-  std::shared_ptr<DecodeResource> decode_resource_;
-  WENET_DISALLOW_COPY_AND_ASSIGN(WebSocketServer);
-};
-
-}  // namespace wenet
-
-#endif  // WEBSOCKET_WEBSOCKET_SERVER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/CMakeLists.txt
deleted file mode 100644
index 492b84242b48c3ccb2e62e997ebb182e98dbecc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-../libtorch/CMakeLists.txt
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/README.md
deleted file mode 100644
index 9cb1943e7e7763cb5e17ae62037b2f67e8f1b417..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# WeNet & Raspberry PI (Cross Compile)
-
-* Step 1. Install cross compile tools in the PC.
-
-``` sh
-sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
-```
-
-Or download, and install the binaries from: https://releases.linaro.org/components/toolchain/binaries/latest-7
-
-
-* Step 2. Export your experiment model to ONNX by https://github.com/wenet-e2e/wenet/blob/main/wenet/bin/export_onnx_cpu.py
-
-``` sh
-exp=exp  # Change it to your experiment dir
-onnx_dir=onnx
-python -m wenet.bin.export_onnx_cpu \
-  --config $exp/train.yaml \
-  --checkpoint $exp/final.pt \
-  --chunk_size 16 \
-  --output_dir $onnx_dir \
-  --num_decoding_left_chunks -1
-
-# When it finishes, you can find `encoder.onnx(.quant)`, `ctc.onnx(.quant)`, and `decoder.onnx(.quant)` in the $onnx_dir respectively.
-# We use the quantified to speed up the inference, so rename it without the suffix `.quant`
-```
-
-* Step 3. Build. The build requires cmake 3.14 or above. and Send the binary and libraries to Raspberry PI.
-
-``` sh
-cmake -B build -DONNX=ON -DTORCH=OFF -DWEBSOCKET=OFF -DGRPC=OFF -DCMAKE_TOOLCHAIN_FILE=toolchains/aarch64-linux-gnu.toolchain.cmake
-cmake --build build
-scp build/bin/decoder_main pi@xxx.xxx.xxx:/path/to/wenet
-scp fc_base/onnxruntime-src/lib/libonnxruntime.so* pi@xxx.xxx.xxx:/path/to/wenet
-```
-
-* Step 4. Testing, the RTF(real time factor) is shown in Raspberry PI's console.
-
-``` sh
-cd /path/to/wenet
-export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH
-export GLOG_logtostderr=1
-export GLOG_v=2
-wav_path=your_test_wav_path
-onnx_dir=your_model_dir
-units=units.txt  # Change it to your model units path
-./build/bin/decoder_main \
-    --chunk_size 16 \
-    --wav_path $wav_path \
-    --onnx_dir $onnx_dir \
-    --unit_path $units 2>&1 | tee log.txt
-```
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/CMakeLists.txt
deleted file mode 100644
index 8d61ca8477f0f0b6128f1effe0a2738494b2620f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-if(TORCH)
- add_library(wenet_api SHARED wenet_api.cc)
- target_link_libraries(wenet_api PUBLIC decoder)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/README.md
deleted file mode 100644
index 5eaa13b977eb4836eb930452f4434dc9f2ea4139..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# WeNet API
-
-We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
-for the interface design.
-
-
-We are going to implement the following interfaces:
-
-- [x] non-streaming recognition
-- [] streaming recognition
-- [] nbest
-- [] contextual biasing word
-- [] alignment
-- [] language support(post processor)
-- [] label check
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/wenet_api.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/wenet_api.cc
deleted file mode 100644
index cb1e0c8552e0126e2db274a29075578fe351a25f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/wenet_api.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#include "decoder/torch_asr_model.h"
-#include "post_processor/post_processor.h"
-#include "utils/file.h"
-#include "utils/json.h"
-#include "utils/string.h"
-
-class Recognizer {
- public:
-  explicit Recognizer(const std::string& model_dir) {
-    // FeaturePipeline init
-    feature_config_ = std::make_shared<wenet::FeaturePipelineConfig>(80, 16000);
-    feature_pipeline_ =
-        std::make_shared<wenet::FeaturePipeline>(*feature_config_);
-    // Resource init
-    resource_ = std::make_shared<wenet::DecodeResource>();
-    wenet::TorchAsrModel::InitEngineThreads();
-    std::string model_path = wenet::JoinPath(model_dir, "final.zip");
-    CHECK(wenet::FileExists(model_path));
-
-    auto model = std::make_shared<wenet::TorchAsrModel>();
-    model->Read(model_path);
-    resource_->model = model;
-
-    // units.txt: E2E model unit
-    std::string unit_path = wenet::JoinPath(model_dir, "units.txt");
-    CHECK(wenet::FileExists(unit_path));
-    resource_->unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(unit_path));
-
-    std::string fst_path = wenet::JoinPath(model_dir, "TLG.fst");
-    if (wenet::FileExists(fst_path)) {  // With LM
-      resource_->fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-          fst::Fst<fst::StdArc>::Read(fst_path));
-
-      std::string symbol_path = wenet::JoinPath(model_dir, "words.txt");
-      CHECK(wenet::FileExists(symbol_path));
-      resource_->symbol_table = std::shared_ptr<fst::SymbolTable>(
-          fst::SymbolTable::ReadText(symbol_path));
-    } else {  // Without LM, symbol_table is the same as unit_table
-      resource_->symbol_table = resource_->unit_table;
-    }
-
-    // Context config init
-    context_config_ = std::make_shared<wenet::ContextConfig>();
-    decode_options_ = std::make_shared<wenet::DecodeOptions>();
-    post_process_opts_ = std::make_shared<wenet::PostProcessOptions>();
-  }
-
-  void Reset() {
-    if (feature_pipeline_ != nullptr) {
-      feature_pipeline_->Reset();
-    }
-    if (decoder_ != nullptr) {
-      decoder_->Reset();
-    }
-    result_.clear();
-  }
-
-  void InitDecoder() {
-    CHECK(decoder_ == nullptr);
-    // Optional init context graph
-    if (context_.size() > 0) {
-      context_config_->context_score = context_score_;
-      auto context_graph =
-          std::make_shared<wenet::ContextGraph>(*context_config_);
-      context_graph->BuildContextGraph(context_, resource_->symbol_table);
-      resource_->context_graph = context_graph;
-    }
-    // PostProcessor
-    if (language_ == "chs") {  // TODO(Binbin Zhang): CJK(chs, jp, kr)
-      post_process_opts_->language_type = wenet::kMandarinEnglish;
-    } else {
-      post_process_opts_->language_type = wenet::kIndoEuropean;
-    }
-    resource_->post_processor =
-        std::make_shared<wenet::PostProcessor>(*post_process_opts_);
-    // Init decoder
-    decoder_ = std::make_shared<wenet::AsrDecoder>(feature_pipeline_, resource_,
-                                                   *decode_options_);
-  }
-
-  void Decode(const char* data, int len, int last) {
-    using wenet::DecodeState;
-    // Init decoder when it is called first time
-    if (decoder_ == nullptr) {
-      InitDecoder();
-    }
-    // Convert to 16 bits PCM data to float
-    CHECK_EQ(len % 2, 0);
-    feature_pipeline_->AcceptWaveform(reinterpret_cast<const int16_t*>(data),
-                                      len / 2);
-    if (last > 0) {
-      feature_pipeline_->set_input_finished();
-    }
-
-    while (true) {
-      DecodeState state = decoder_->Decode(false);
-      if (state == DecodeState::kWaitFeats) {
-        break;
-      } else if (state == DecodeState::kEndFeats) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        break;
-      } else if (state == DecodeState::kEndpoint && continuous_decoding_) {
-        decoder_->Rescoring();
-        UpdateResult(true);
-        decoder_->ResetContinuousDecoding();
-      } else {  // kEndBatch
-        UpdateResult(false);
-      }
-    }
-  }
-
-  void UpdateResult(bool final_result) {
-    json::JSON obj;
-    obj["type"] = final_result ? "final_result" : "partial_result";
-    int nbest = final_result ? nbest_ : 1;
-    obj["nbest"] = json::Array();
-    for (int i = 0; i < nbest && i < decoder_->result().size(); i++) {
-      json::JSON one;
-      one["sentence"] = decoder_->result()[i].sentence;
-      if (final_result && enable_timestamp_) {
-        one["word_pieces"] = json::Array();
-        for (const auto& word_piece : decoder_->result()[i].word_pieces) {
-          json::JSON piece;
-          piece["word"] = word_piece.word;
-          piece["start"] = word_piece.start;
-          piece["end"] = word_piece.end;
-          one["word_pieces"].append(piece);
-        }
-      }
-      one["sentence"] = decoder_->result()[i].sentence;
-      obj["nbest"].append(one);
-    }
-    result_ = obj.dump();
-  }
-
-  const char* GetResult() { return result_.c_str(); }
-
-  void set_nbest(int n) { nbest_ = n; }
-  void set_enable_timestamp(bool flag) { enable_timestamp_ = flag; }
-  void AddContext(const char* word) { context_.emplace_back(word); }
-  void set_context_score(float score) { context_score_ = score; }
-  void set_language(const char* lang) { language_ = lang; }
-  void set_continuous_decoding(bool flag) { continuous_decoding_ = flag; }
-
- private:
-  // NOTE(Binbin Zhang): All use shared_ptr for clone in the future
-  std::shared_ptr<wenet::FeaturePipelineConfig> feature_config_ = nullptr;
-  std::shared_ptr<wenet::FeaturePipeline> feature_pipeline_ = nullptr;
-  std::shared_ptr<wenet::DecodeResource> resource_ = nullptr;
-  std::shared_ptr<wenet::DecodeOptions> decode_options_ = nullptr;
-  std::shared_ptr<wenet::AsrDecoder> decoder_ = nullptr;
-  std::shared_ptr<wenet::ContextConfig> context_config_ = nullptr;
-  std::shared_ptr<wenet::PostProcessOptions> post_process_opts_ = nullptr;
-
-  int nbest_ = 1;
-  std::string result_;
-  bool enable_timestamp_ = false;
-  std::vector<std::string> context_;
-  float context_score_;
-  std::string language_ = "chs";
-  bool continuous_decoding_ = false;
-};
-
-void* wenet_init(const char* model_dir) {
-  Recognizer* decoder = new Recognizer(model_dir);
-  return reinterpret_cast<void*>(decoder);
-}
-
-void wenet_free(void* decoder) {
-  delete reinterpret_cast<Recognizer*>(decoder);
-}
-
-void wenet_reset(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Reset();
-}
-
-void wenet_decode(void* decoder, const char* data, int len, int last) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->Decode(data, len, last);
-}
-
-const char* wenet_get_result(void* decoder) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  return recognizer->GetResult();
-}
-
-void wenet_set_log_level(int level) {
-  FLAGS_logtostderr = true;
-  FLAGS_v = level;
-}
-
-void wenet_set_nbest(void* decoder, int n) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_nbest(n);
-}
-
-void wenet_set_timestamp(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  bool enable = flag > 0 ? true : false;
-  recognizer->set_enable_timestamp(enable);
-}
-
-void wenet_add_context(void* decoder, const char* word) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->AddContext(word);
-}
-
-void wenet_set_context_score(void* decoder, float score) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_context_score(score);
-}
-
-void wenet_set_language(void* decoder, const char* lang) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_language(lang);
-}
-
-void wenet_set_continuous_decoding(void* decoder, int flag) {
-  Recognizer* recognizer = reinterpret_cast<Recognizer*>(decoder);
-  recognizer->set_continuous_decoding(flag > 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/wenet_api.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/wenet_api.h
deleted file mode 100644
index e839aaa40166a6e50d9aa2ac0e697356bd25b941..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/api/wenet_api.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_WENET_API_H_
-#define API_WENET_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Init decoder from the file and returns the object
- *
- * @param model_dir: the model dir
- * @returns model object or NULL if problem occured
- */
-void* wenet_init(const char* model_dir);
-
-/** Free wenet decoder and corresponding resource
- */
-void wenet_free(void* decoder);
-
-/** Reset decoder for next decoding
- */
-void wenet_reset(void* decoder);
-
-/** Decode the input wav data
- * @param data: pcm data, encoded as int16_t(16 bits)
- * @param len: data length
- * @param last: if it is the last package
- */
-void wenet_decode(void* decoder, const char* data, int len, int last);
-
-/** Get decode result in json format
- *  It returns partial result when last is 0
- *  It returns final result when last is 1
-
-    {
-      "nbest" : [{
-          "sentence" : "are you okay"
-          "word_pieces" : [{
-              "end" : 960,
-              "start" : 0,
-              "word" : "are"
-            }, {
-              "end" : 1200,
-              "start" : 960,
-              "word" : "you"
-            }, {
-            ...}]
-        }, {
-          "sentence" : "are you ok"
-        }],
-      "type" : "final_result"
-    }
-
-    "type": final_result/partial_result
-    "nbest": nbest is enabled when n > 1 in final_result
-        "sentence": the ASR result
-        "word_pieces": optional, output timestamp when enabled
- */
-const char* wenet_get_result(void* decoder);
-
-/** Set n-best, range 1~10
- *  wenet_get_result will return top-n best results
- */
-void wenet_set_nbest(void* decoder, int n);
-
-/** Whether to enable word level timestamp in results
-    disable it when flag = 0, otherwise enable
- */
-void wenet_set_timestamp(void* decoder, int flag);
-
-/** Add one contextual biasing
- */
-void wenet_add_context(void* decoder, const char* word);
-
-/** Set contextual biasing bonus score
- */
-void wenet_set_context_score(void* decoder, float score);
-
-/** Set language, has effect on the postpocessing
- *  @param: lang, could be chs/en now
- */
-void wenet_set_language(void* decoder, const char* lang);
-
-/** Set log level
- *  We use glog in wenet, so the level is the glog level
- */
-void wenet_set_log_level(int level);
-
-/** Enable continous decoding or not
- *  flag > 0: enable, otherwise disable
- */
-void wenet_set_continuous_decoding(void* decoder, int flag);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // API_WENET_API_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/CMakeLists.txt
deleted file mode 100644
index a117b8bcb580c8738a7ce72f88bc10ff0a450e98..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_executable(decoder_main decoder_main.cc)
-target_link_libraries(decoder_main PUBLIC decoder)
-
-add_executable(label_checker_main label_checker_main.cc)
-target_link_libraries(label_checker_main PUBLIC decoder)
-
-# if(TORCH)
-#  add_executable(api_main api_main.cc)
-#  target_link_libraries(api_main PUBLIC wenet_api)
-# endif()
-
-if(WEBSOCKET)
-  add_executable(websocket_client_main websocket_client_main.cc)
-  target_link_libraries(websocket_client_main PUBLIC websocket)
-  add_executable(websocket_server_main websocket_server_main.cc)
-  target_link_libraries(websocket_server_main PUBLIC websocket)
-endif()
-
-if(GRPC)
-  add_executable(grpc_server_main grpc_server_main.cc)
-  target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
-  add_executable(grpc_client_main grpc_client_main.cc)
-  target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
-endif()
-
-if(HTTP)
-  add_executable(http_client_main http_client_main.cc)
-  target_link_libraries(http_client_main PUBLIC http)
-  add_executable(http_server_main http_server_main.cc)
-  target_link_libraries(http_server_main PUBLIC http)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/api_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/api_main.cc
deleted file mode 100644
index 94b20d52a7b8eee5c39a12af4e1e25324d7d880f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/api_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "api/wenet_api.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-
-DEFINE_string(model_dir, "", "model dir path");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_bool(enable_timestamp, false, "enable timestamps");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet_set_log_level(2);
-
-  void* decoder = wenet_init(FLAGS_model_dir.c_str());
-  wenet_set_timestamp(decoder, FLAGS_enable_timestamp == true ? 1 : 0);
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  std::vector<int16_t> data(wav_reader.num_samples());
-  for (int i = 0; i < wav_reader.num_samples(); i++) {
-    data[i] = static_cast<int16_t>(*(wav_reader.data() + i));
-  }
-
-  for (int i = 0; i < 10; i++) {
-    // Return the final result when last is 1
-    wenet_decode(decoder, reinterpret_cast<const char*>(data.data()),
-                 data.size() * 2, 1);
-    const char* result = wenet_get_result(decoder);
-    LOG(INFO) << i << " " << result;
-    wenet_reset(decoder);
-  }
-  wenet_free(decoder);
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/decoder_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/decoder_main.cc
deleted file mode 100644
index b8f1dbae6b88390504cc9ce63f33dc9bd54a2d6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/decoder_main.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <iomanip>
-#include <thread>
-#include <utility>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-#include "utils/thread_pool.h"
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-DEFINE_bool(simulate_streaming, false, "simulate streaming input");
-DEFINE_bool(output_nbest, false, "output n-best of decode result");
-DEFINE_string(wav_path, "", "single wave path");
-DEFINE_string(wav_scp, "", "input wav scp");
-DEFINE_string(result, "", "result output file");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-DEFINE_int32(thread_num, 1, "num of decode thread");
-DEFINE_int32(warmup, 0, "num of warmup decode, 0 means no warmup");
-
-std::shared_ptr<wenet::DecodeOptions> g_decode_config;
-std::shared_ptr<wenet::FeaturePipelineConfig> g_feature_config;
-std::shared_ptr<wenet::DecodeResource> g_decode_resource;
-
-std::ofstream g_result;
-std::mutex g_mutex;
-int g_total_waves_dur = 0;
-int g_total_decode_time = 0;
-
-void decode(std::pair<std::string, std::string> wav, bool warmup = false) {
-  wenet::WavReader wav_reader(wav.second);
-  int num_samples = wav_reader.num_samples();
-  CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-
-  auto feature_pipeline =
-      std::make_shared<wenet::FeaturePipeline>(*g_feature_config);
-  feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-  feature_pipeline->set_input_finished();
-  LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-
-  wenet::AsrDecoder decoder(feature_pipeline, g_decode_resource,
-                            *g_decode_config);
-
-  int wave_dur = static_cast<int>(static_cast<float>(num_samples) /
-                                  wav_reader.sample_rate() * 1000);
-  int decode_time = 0;
-  std::string final_result;
-  while (true) {
-    wenet::Timer timer;
-    wenet::DecodeState state = decoder.Decode();
-    if (state == wenet::DecodeState::kEndFeats) {
-      decoder.Rescoring();
-    }
-    int chunk_decode_time = timer.Elapsed();
-    decode_time += chunk_decode_time;
-    if (decoder.DecodedSomething()) {
-      LOG(INFO) << "Partial result: " << decoder.result()[0].sentence;
-    }
-
-    if (FLAGS_continuous_decoding && state == wenet::DecodeState::kEndpoint) {
-      if (decoder.DecodedSomething()) {
-        decoder.Rescoring();
-        LOG(INFO) << "Final result (continuous decoding): "
-                  << decoder.result()[0].sentence;
-        final_result.append(decoder.result()[0].sentence);
-      }
-      decoder.ResetContinuousDecoding();
-    }
-
-    if (state == wenet::DecodeState::kEndFeats) {
-      break;
-    } else if (FLAGS_chunk_size > 0 && FLAGS_simulate_streaming) {
-      float frame_shift_in_ms =
-          static_cast<float>(g_feature_config->frame_shift) /
-          wav_reader.sample_rate() * 1000;
-      auto wait_time =
-          decoder.num_frames_in_current_chunk() * frame_shift_in_ms -
-          chunk_decode_time;
-      if (wait_time > 0) {
-        LOG(INFO) << "Simulate streaming, waiting for " << wait_time << "ms";
-        std::this_thread::sleep_for(
-            std::chrono::milliseconds(static_cast<int>(wait_time)));
-      }
-    }
-  }
-  if (decoder.DecodedSomething()) {
-    final_result.append(decoder.result()[0].sentence);
-  }
-  LOG(INFO) << wav.first << " Final result: " << final_result << std::endl;
-  LOG(INFO) << "Decoded " << wave_dur << "ms audio taken " << decode_time
-            << "ms.";
-
-  if (!warmup) {
-    g_mutex.lock();
-    std::ostream& buffer = FLAGS_result.empty() ? std::cout : g_result;
-    if (!FLAGS_output_nbest) {
-      buffer << wav.first << " " << final_result << std::endl;
-    } else {
-      buffer << "wav " << wav.first << std::endl;
-      auto& results = decoder.result();
-      for (auto& r : results) {
-        if (r.sentence.empty()) continue;
-        buffer << "candidate " << r.score << " " << r.sentence << std::endl;
-      }
-    }
-    g_total_waves_dur += wave_dur;
-    g_total_decode_time += decode_time;
-    g_mutex.unlock();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  g_decode_config = wenet::InitDecodeOptionsFromFlags();
-  g_feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  g_decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  if (FLAGS_wav_path.empty() && FLAGS_wav_scp.empty()) {
-    LOG(FATAL) << "Please provide the wave path or the wav scp.";
-  }
-  std::vector<std::pair<std::string, std::string>> waves;
-  if (!FLAGS_wav_path.empty()) {
-    waves.emplace_back(make_pair("test", FLAGS_wav_path));
-  } else {
-    std::ifstream wav_scp(FLAGS_wav_scp);
-    std::string line;
-    while (getline(wav_scp, line)) {
-      std::vector<std::string> strs;
-      wenet::SplitString(line, &strs);
-      CHECK_GE(strs.size(), 2);
-      waves.emplace_back(make_pair(strs[0], strs[1]));
-    }
-
-    if (waves.empty()) {
-      LOG(FATAL) << "Please provide non-empty wav scp.";
-    }
-  }
-
-  if (!FLAGS_result.empty()) {
-    g_result.open(FLAGS_result, std::ios::out);
-  }
-
-  // Warmup
-  if (FLAGS_warmup > 0) {
-    LOG(INFO) << "Warming up...";
-    {
-      ThreadPool pool(FLAGS_thread_num);
-      auto wav = waves[0];
-      for (int i = 0; i < FLAGS_warmup; i++) {
-        pool.enqueue(decode, wav, true);
-      }
-    }
-    LOG(INFO) << "Warmup done.";
-  }
-
-  {
-    ThreadPool pool(FLAGS_thread_num);
-    for (auto& wav : waves) {
-      pool.enqueue(decode, wav, false);
-    }
-  }
-
-  LOG(INFO) << "Total: decoded " << g_total_waves_dur << "ms audio taken "
-            << g_total_decode_time << "ms.";
-  LOG(INFO) << "RTF: " << std::setprecision(4)
-            << static_cast<float>(g_total_decode_time) / g_total_waves_dur;
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/grpc_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/grpc_client_main.cc
deleted file mode 100644
index f2d226d48d3757c5f095335eff3288f5d227282b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/grpc_client_main.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "grpc/grpc_client.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::GrpcClient client(FLAGS_hostname, FLAGS_port, FLAGS_nbest,
-                           FLAGS_continuous_decoding);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  std::vector<float> pcm_data(wav_reader.data(),
-                              wav_reader.data() + num_samples);
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(pcm_data[j]));
-    }
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/grpc_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/grpc_server_main.cc
deleted file mode 100644
index b00f3cbade1ee70dadfb49829e9ca73fd50c2be2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/grpc_server_main.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <grpcpp/ext/proto_server_reflection_plugin.h>
-#include <grpcpp/grpcpp.h>
-#include <grpcpp/health_check_service_interface.h>
-
-#include "decoder/params.h"
-#include "grpc/grpc_server.h"
-#include "utils/log.h"
-
-DEFINE_int32(port, 10086, "grpc listening port");
-DEFINE_int32(workers, 4, "grpc num workers");
-
-using grpc::Server;
-using grpc::ServerBuilder;
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::GrpcServer service(feature_config, decode_config, decode_resource);
-  grpc::EnableDefaultHealthCheckService(true);
-  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
-  ServerBuilder builder;
-  std::string address("0.0.0.0:" + std::to_string(FLAGS_port));
-  builder.AddListeningPort(address, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  builder.SetSyncServerOption(ServerBuilder::SyncServerOption::NUM_CQS,
-                              FLAGS_workers);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server->Wait();
-  google::ShutdownGoogleLogging();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/http_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/http_client_main.cc
deleted file mode 100644
index b59ee3f5f32bf08552416b183802029ac5d5afa5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/http_client_main.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "http/http_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of http server");
-DEFINE_int32(port, 10086, "port of http server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Convert to short
-  std::vector<int16_t> data;
-  data.reserve(num_samples);
-  for (int j = 0; j < num_samples; j++) {
-    data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-  }
-  // Send data
-  wenet::HttpClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  wenet::Timer timer;
-  VLOG(2) << "Send " << data.size() << " samples";
-  client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/http_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/http_server_main.cc
deleted file mode 100644
index e30cf2bcdf746c2072f023e90f470ccba5467c2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/http_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "http/http_server.h"
-
-DEFINE_int32(port, 10086, "http listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
-                           decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/label_checker_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/label_checker_main.cc
deleted file mode 100644
index e36e3d5c29a38a7ebee80606ebd8e69ae8b1eb96..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/label_checker_main.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <memory>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "decoder/params.h"
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_string(text, "", "kaldi style text input file");
-DEFINE_string(wav_scp, "", "kaldi style wav scp");
-DEFINE_double(is_penalty, 1.0,
-              "insertion/substitution penalty for align insertion");
-DEFINE_double(del_penalty, 1.0, "deletion penalty for align insertion");
-DEFINE_string(result, "", "result output file");
-DEFINE_string(timestamp, "", "timestamp output file");
-
-namespace wenet {
-
-const char* kDeletion = "<del>";
-// Is: Insertion and substitution
-const char* kIsStart = "<is>";
-const char* kIsEnd = "</is>";
-
-bool MapToLabel(const std::string& text,
-                std::shared_ptr<fst::SymbolTable> symbol_table,
-                std::vector<int>* labels) {
-  labels->clear();
-  // Split label to char sequence
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(text, &chars);
-  for (size_t i = 0; i < chars.size(); i++) {
-    // ▁ is special symbol for white space
-    std::string label = chars[i] != " " ? chars[i] : "▁";
-    int id = symbol_table->Find(label);
-    if (id != -1) {  // fst::kNoSymbol
-      // LOG(INFO) << label << " " << id;
-      labels->push_back(id);
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<fst::SymbolTable> MakeSymbolTableForFst(
-    std::shared_ptr<fst::SymbolTable> isymbol_table) {
-  LOG(INFO) << isymbol_table;
-  CHECK(isymbol_table != nullptr);
-  auto osymbol_table = std::make_shared<fst::SymbolTable>();
-  osymbol_table->AddSymbol("<eps>", 0);
-  CHECK_EQ(isymbol_table->Find("<blank>"), 0);
-  osymbol_table->AddSymbol("<blank>", 1);
-  for (int i = 1; i < isymbol_table->NumSymbols(); i++) {
-    std::string symbol = isymbol_table->Find(i);
-    osymbol_table->AddSymbol(symbol, i + 1);
-  }
-  osymbol_table->AddSymbol(kDeletion, isymbol_table->NumSymbols() + 1);
-  osymbol_table->AddSymbol(kIsStart, isymbol_table->NumSymbols() + 2);
-  osymbol_table->AddSymbol(kIsEnd, isymbol_table->NumSymbols() + 3);
-  return osymbol_table;
-}
-
-void CompileCtcFst(std::shared_ptr<fst::SymbolTable> symbol_table,
-                   fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  CHECK_EQ(symbol_table->Find("<eps>"), 0);
-  CHECK_EQ(symbol_table->Find("<blank>"), 1);
-  ofst->AddArc(start, fst::StdArc(1, 0, 0.0, start));
-  // Exclude kDeletion and kInsertion
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    int s = ofst->AddState();
-    ofst->AddArc(start, fst::StdArc(i, i, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(i, 0, 0.0, s));
-    ofst->AddArc(s, fst::StdArc(0, 0, 0.0, start));
-  }
-  ofst->SetFinal(start, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdOLabelCompare());
-}
-
-void CompileAlignFst(std::vector<int> labels,
-                     std::shared_ptr<fst::SymbolTable> symbol_table,
-                     fst::StdVectorFst* ofst) {
-  ofst->DeleteStates();
-  int deletion = symbol_table->Find(kDeletion);
-  int insertion_start = symbol_table->Find(kIsStart);
-  int insertion_end = symbol_table->Find(kIsEnd);
-
-  int start = ofst->AddState();
-  ofst->SetStart(start);
-  // Filler State
-  int filler_start = ofst->AddState();
-  int filler_end = ofst->AddState();
-  for (int i = 2; i < symbol_table->NumSymbols() - 3; i++) {
-    ofst->AddArc(filler_start, fst::StdArc(i, i, FLAGS_is_penalty, filler_end));
-  }
-  ofst->AddArc(filler_end, fst::StdArc(0, 0, 0.0, filler_start));
-
-  int prev = start;
-  // Alignment path and optional filler
-  for (size_t i = 0; i < labels.size(); i++) {
-    int cur = ofst->AddState();
-    // 1. Insertion or Substitution
-    ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-    ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-    // 2. Correct
-    ofst->AddArc(prev, fst::StdArc(labels[i], labels[i], 0.0, cur));
-    // 3. Deletion
-    ofst->AddArc(prev, fst::StdArc(0, deletion, FLAGS_del_penalty, cur));
-
-    prev = cur;
-  }
-  // Optional add endding filler
-  ofst->AddArc(prev, fst::StdArc(0, insertion_start, 0.0, filler_start));
-  ofst->AddArc(filler_end, fst::StdArc(0, insertion_end, 0.0, prev));
-  ofst->SetFinal(prev, fst::StdArc::Weight::One());
-  fst::ArcSort(ofst, fst::StdILabelCompare());
-}
-
-}  // namespace wenet
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-  CHECK(decode_resource->unit_table != nullptr);
-
-  auto wfst_symbol_table =
-      wenet::MakeSymbolTableForFst(decode_resource->unit_table);
-  // wfst_symbol_table->WriteText("fst.txt");
-  // Reset symbol_table to on-the-fly generated wfst_symbol_table
-  decode_resource->symbol_table = wfst_symbol_table;
-
-  // Compile ctc FST
-  fst::StdVectorFst ctc_fst;
-  wenet::CompileCtcFst(wfst_symbol_table, &ctc_fst);
-  // ctc_fst.Write("ctc.fst");
-
-  std::unordered_map<std::string, std::string> wav_table;
-  std::ifstream wav_is(FLAGS_wav_scp);
-  std::string line;
-  while (std::getline(wav_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    CHECK_EQ(strs.size(), 2);
-    wav_table[strs[0]] = strs[1];
-  }
-
-  std::ifstream text_is(FLAGS_text);
-  std::ofstream result_os(FLAGS_result, std::ios::out);
-  std::ofstream timestamp_out;
-  if (!FLAGS_timestamp.empty()) {
-    timestamp_out.open(FLAGS_timestamp, std::ios::out);
-  }
-  std::ostream& timestamp_os =
-      FLAGS_timestamp.empty() ? std::cout : timestamp_out;
-
-  while (std::getline(text_is, line)) {
-    std::vector<std::string> strs;
-    wenet::SplitString(line, &strs);
-    if (strs.size() < 2) continue;
-    std::string key = strs[0];
-    LOG(INFO) << "Processing " << key;
-    if (wav_table.find(key) != wav_table.end()) {
-      strs.erase(strs.begin());
-      std::string text = wenet::JoinString(" ", strs);
-      std::vector<int> labels;
-      wenet::MapToLabel(text, wfst_symbol_table, &labels);
-      // Prepare FST for alignment decoding
-      fst::StdVectorFst align_fst;
-      wenet::CompileAlignFst(labels, wfst_symbol_table, &align_fst);
-      // align_fst.Write("align.fst");
-      auto decoding_fst = std::make_shared<fst::StdVectorFst>();
-      fst::Compose(ctc_fst, align_fst, decoding_fst.get());
-      // decoding_fst->Write("decoding.fst");
-      // Preapre feature pipeline
-      wenet::WavReader wav_reader;
-      if (!wav_reader.Open(wav_table[key])) {
-        LOG(WARNING) << "Error in reading " << wav_table[key];
-        continue;
-      }
-      int num_samples = wav_reader.num_samples();
-      CHECK_EQ(wav_reader.sample_rate(), FLAGS_sample_rate);
-      auto feature_pipeline =
-          std::make_shared<wenet::FeaturePipeline>(*feature_config);
-      feature_pipeline->AcceptWaveform(wav_reader.data(), num_samples);
-      feature_pipeline->set_input_finished();
-      decode_resource->fst = decoding_fst;
-      LOG(INFO) << "num frames " << feature_pipeline->num_frames();
-      wenet::AsrDecoder decoder(feature_pipeline, decode_resource,
-                                *decode_config);
-      while (true) {
-        wenet::DecodeState state = decoder.Decode();
-        if (state == wenet::DecodeState::kEndFeats) {
-          decoder.Rescoring();
-          break;
-        }
-      }
-      std::string final_result;
-      std::string timestamp_str;
-      if (decoder.DecodedSomething()) {
-        const wenet::DecodeResult& result = decoder.result()[0];
-        final_result = result.sentence;
-        std::stringstream ss;
-        for (const auto& w : result.word_pieces) {
-          ss << " " << w.word << " " << w.start << " " << w.end;
-        }
-        timestamp_str = ss.str();
-      }
-      result_os << key << " " << final_result << std::endl;
-      timestamp_os << key << " " << timestamp_str << std::endl;
-      LOG(INFO) << key << " " << final_result;
-    } else {
-      LOG(WARNING) << "No wav file for " << key;
-    }
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/websocket_client_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/websocket_client_main.cc
deleted file mode 100644
index 3eaa96069dc5f57673fbb2819bf7d4883e0d5ffa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/websocket_client_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/wav.h"
-#include "utils/flags.h"
-#include "utils/timer.h"
-#include "websocket/websocket_client.h"
-
-DEFINE_string(hostname, "127.0.0.1", "hostname of websocket server");
-DEFINE_int32(port, 10086, "port of websocket server");
-DEFINE_int32(nbest, 1, "n-best of decode result");
-DEFINE_string(wav_path, "", "test wav file path");
-DEFINE_bool(continuous_decoding, false, "continuous decoding mode");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-  wenet::WebSocketClient client(FLAGS_hostname, FLAGS_port);
-  client.set_nbest(FLAGS_nbest);
-  client.set_continuous_decoding(FLAGS_continuous_decoding);
-  client.SendStartSignal();
-
-  wenet::WavReader wav_reader(FLAGS_wav_path);
-  const int sample_rate = 16000;
-  // Only support 16K
-  CHECK_EQ(wav_reader.sample_rate(), sample_rate);
-  const int num_samples = wav_reader.num_samples();
-  // Send data every 0.5 second
-  const float interval = 0.5;
-  const int sample_interval = interval * sample_rate;
-  for (int start = 0; start < num_samples; start += sample_interval) {
-    if (client.done()) {
-      break;
-    }
-    int end = std::min(start + sample_interval, num_samples);
-    // Convert to short
-    std::vector<int16_t> data;
-    data.reserve(end - start);
-    for (int j = start; j < end; j++) {
-      data.push_back(static_cast<int16_t>(wav_reader.data()[j]));
-    }
-    // TODO(Binbin Zhang): Network order?
-    // Send PCM data
-    client.SendBinaryData(data.data(), data.size() * sizeof(int16_t));
-    VLOG(2) << "Send " << data.size() << " samples";
-    std::this_thread::sleep_for(
-        std::chrono::milliseconds(static_cast<int>(interval * 1000)));
-  }
-  wenet::Timer timer;
-  client.SendEndSignal();
-  client.Join();
-  VLOG(2) << "Total latency: " << timer.Elapsed() << "ms.";
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/websocket_server_main.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/websocket_server_main.cc
deleted file mode 100644
index 796d9d2e6d151f7c08b43d66b7245c58ee086cc2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/bin/websocket_server_main.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/params.h"
-#include "utils/log.h"
-#include "websocket/websocket_server.h"
-
-DEFINE_int32(port, 10086, "websocket listening port");
-
-int main(int argc, char* argv[]) {
-  gflags::ParseCommandLineFlags(&argc, &argv, false);
-  google::InitGoogleLogging(argv[0]);
-
-  auto decode_config = wenet::InitDecodeOptionsFromFlags();
-  auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
-  auto decode_resource = wenet::InitDecodeResourceFromFlags();
-
-  wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
-                                decode_resource);
-  LOG(INFO) << "Listening at port " << FLAGS_port;
-  server.Start();
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/boost.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/boost.cmake
deleted file mode 100644
index 8684c0ec43960da213da923dc57416f04301ea2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/boost.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-FetchContent_Declare(boost
-  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
-  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
-)
-FetchContent_MakeAvailable(boost)
-include_directories(${boost_SOURCE_DIR})
-
-if(MSVC)
-  add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/bpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/bpu.cmake
deleted file mode 100644
index 350d76c19d6f656fb130de09877d649cf49972a4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/bpu.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-if(BPU)
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
-      set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
-    else()
-      message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
-    endif()
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
-  endif()
-
-  FetchContent_Declare(easy_dnn
-    URL ${EASY_DNN_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(easy_dnn)
-  include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
-  include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
-  link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
-  link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
-
-  add_definitions(-DUSE_BPU)
-  # NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
-  #   https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
-  #   https://github.com/tensorflow/tensorflow/issues/47849
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/gflags.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/gflags.cmake
deleted file mode 100644
index 53ae5763b5a8c860b7e64d35b380eee5429f539d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/gflags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
-  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
-)
-FetchContent_MakeAvailable(gflags)
-include_directories(${gflags_BINARY_DIR}/include)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/glog.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/glog.cmake
deleted file mode 100644
index 447ab4132f669ee2c3a52c37959dd684a39ff21b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/glog.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-FetchContent_Declare(glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
-  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
-)
-FetchContent_MakeAvailable(glog)
-include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/grpc.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/grpc.cmake
deleted file mode 100644
index 644093a4bf8191f3a45b0df0a72c000981c48f58..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/grpc.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
-# third_party: grpc
-# On how to build grpc, you may refer to https://github.com/grpc/grpc
-# We recommend manually recursive clone the repo to avoid internet connection problem
-FetchContent_Declare(gRPC
-  GIT_REPOSITORY https://github.com/grpc/grpc
-  GIT_TAG        v1.37.1
-)
-FetchContent_MakeAvailable(gRPC)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/gtest.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/gtest.cmake
deleted file mode 100644
index 30dc7c1a31d8b83991841a4dc33f61ed078b532a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/gtest.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-FetchContent_Declare(googletest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
-  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
-)
-if(MSVC)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
-endif()
-FetchContent_MakeAvailable(googletest)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/libtorch.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/libtorch.cmake
deleted file mode 100644
index 3cd9245b2da52f8be206d27164de5f411bff171b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/libtorch.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-if(TORCH)
-  add_definitions(-DUSE_TORCH)
-  if(NOT ANDROID)
-    if(GPU)
-      if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        message(FATAL_ERROR "GPU is supported only Linux, you can use CPU version")
-      else()
-        add_definitions(-DUSE_GPU)
-      endif()
-    endif()
-
-    if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-      if(${CMAKE_BUILD_TYPE} MATCHES "Release")
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=bece54d36377990257e9d028c687c5b6759c5cfec0a0153da83cf6f0f71f648f")
-      else()
-        set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-debug-1.13.0%2Bcpu.zip")
-        set(URL_HASH "SHA256=3cc7ba3c3865d86f03d78c2f0878fdbed8b764359476397a5c95cf3bba0d665a")
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-      if(CXX11_ABI)
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=d52f63577a07adb0bfd6d77c90f7da21896e94f71eb7dcd55ed7835ccb3b2b59")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=80f089939de20e68e3fcad4dfa72a26c8bf91b5e77b11042f671f39ebac35865")
-        endif()
-      else()
-        if(NOT GPU)
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.13.0%2Bcpu.zip")
-          set(URL_HASH "SHA256=bee1b7be308792aa60fc95a4f5274d9658cb7248002d0e333d49eb81ec88430c")
-        else()
-          set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip")
-          set(URL_HASH "SHA256=90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad")
-        endif()
-      endif()
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.13.0.zip")
-      set(URL_HASH "SHA256=a8f80050b95489b4e002547910410c2c230e9f590ffab2482e19e809afe4f7aa")
-    elseif(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-      add_definitions(-DIOS)
-    else()
-      message(FATAL_ERROR "Unsupported System '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux', 'Darwin' or 'iOS')")
-    endif()
-
-    # iOS use LibTorch from pod install
-    if(NOT IOS)
-      FetchContent_Declare(libtorch
-        URL      ${LIBTORCH_URL}
-        URL_HASH ${URL_HASH}
-      )
-      FetchContent_MakeAvailable(libtorch)
-      find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -DC10_USE_GLOG")
-    endif()
-
-    if(MSVC)
-      file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
-      file(COPY ${TORCH_DLLS} DESTINATION ${CMAKE_BINARY_DIR})
-    endif()
-  else()
-    # Change version in runtime/android/app/build.gradle.
-    file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers")
-    file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}")
-    find_library(PYTORCH_LIBRARY pytorch_jni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    find_library(FBJNI_LIBRARY fbjni
-      PATHS ${PYTORCH_LINK_DIRS}
-      NO_CMAKE_FIND_ROOT_PATH
-    )
-    include_directories(
-      ${PYTORCH_INCLUDE_DIRS}
-      ${PYTORCH_INCLUDE_DIRS}/torch/csrc/api/include
-    )
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/onnx.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/onnx.cmake
deleted file mode 100644
index bd55402cb2a6024620fa6ff8b5c413207041adfa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/onnx.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-if(ONNX)
-  set(ONNX_VERSION "1.12.0")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-win-x64-${ONNX_VERSION}.zip")
-    set(URL_HASH "SHA256=8b5d61204989350b7904ac277f5fbccd3e6736ddbb6ec001e412723d71c9c176")
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5820d9f343df73c63b6b2b174a1ff62575032e171c9564bcf92060f46827d0ac")
-    else()
-      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz")
-      set(URL_HASH "SHA256=5d503ce8540358b59be26c675e42081be14a3e833a5301926f555451046929c5")
-    endif()
-  elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
-  else()
-    message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
-  endif()
-
-  FetchContent_Declare(onnxruntime
-    URL ${ONNX_URL}
-    URL_HASH ${URL_HASH}
-  )
-  FetchContent_MakeAvailable(onnxruntime)
-  include_directories(${onnxruntime_SOURCE_DIR}/include)
-  link_directories(${onnxruntime_SOURCE_DIR}/lib)
-
-  if(MSVC)
-    file(GLOB ONNX_DLLS "${onnxruntime_SOURCE_DIR}/lib/*.dll")
-    file(COPY ${ONNX_DLLS} DESTINATION ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE})
-  endif()
-
-  add_definitions(-DUSE_ONNX)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/openfst.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/openfst.cmake
deleted file mode 100644
index 490a3da6b571ec228114167fb9c0d9e9b4043bd2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/openfst.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT ANDROID)
-  include(gflags)
-  # We can't build glog with gflags, unless gflags is pre-installed.
-  # If build glog with pre-installed gflags, there will be conflict.
-  set(WITH_GFLAGS OFF CACHE BOOL "whether build glog with gflags" FORCE)
-  include(glog)
-
-  if(NOT GRAPH_TOOLS)
-    set(HAVE_BIN OFF CACHE BOOL "Build the fst binaries" FORCE)
-    set(HAVE_SCRIPT OFF CACHE BOOL "Build the fstscript" FORCE)
-  endif()
-  set(HAVE_COMPACT OFF CACHE BOOL "Build compact" FORCE)
-  set(HAVE_CONST OFF CACHE BOOL "Build const" FORCE)
-  set(HAVE_GRM OFF CACHE BOOL "Build grm" FORCE)
-  set(HAVE_FAR OFF CACHE BOOL "Build far" FORCE)
-  set(HAVE_PDT OFF CACHE BOOL "Build pdt" FORCE)
-  set(HAVE_MPDT OFF CACHE BOOL "Build mpdt" FORCE)
-  set(HAVE_LINEAR OFF CACHE BOOL "Build linear" FORCE)
-  set(HAVE_LOOKAHEAD OFF CACHE BOOL "Build lookahead" FORCE)
-  set(HAVE_NGRAM OFF CACHE BOOL "Build ngram" FORCE)
-  set(HAVE_SPECIAL OFF CACHE BOOL "Build special" FORCE)
-
-  if(MSVC)
-    add_compile_options(/W0 /wd4244 /wd4267)
-  endif()
-
-  # "OpenFST port for Windows" builds openfst with cmake for multiple platforms.
-  # Openfst is compiled with glog/gflags to avoid log and flag conflicts with log and flags in wenet/libtorch.
-  # To build openfst with gflags and glog, we comment out some vars of {flags, log}.h and flags.cc.
-  set(openfst_SOURCE_DIR ${fc_base}/openfst-src CACHE PATH "OpenFST source directory")
-    FetchContent_Declare(openfst
-      URL           https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz
-      URL_HASH      SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
-    )
-    FetchContent_MakeAvailable(openfst)
-    add_dependencies(fst gflags glog)
-    target_link_libraries(fst PUBLIC gflags_nothreads_static glog)
-  include_directories(${openfst_SOURCE_DIR}/src/include)
-else()
-  set(openfst_BINARY_DIR ${build_DIR}/wenet-openfst-android-1.0.2.aar/jni)
-  include_directories(${openfst_BINARY_DIR}/include)
-  link_directories(${openfst_BINARY_DIR}/${ANDROID_ABI})
-  link_libraries(log gflags_nothreads glog fst)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/pybind11.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/pybind11.cmake
deleted file mode 100644
index 6bdae202c1c4d94228e5f92dab051c118dba7d3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/pybind11.cmake
+++ /dev/null
@@ -1,7 +0,0 @@
-FetchContent_Declare(pybind11
-  URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
-  URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
-)
-FetchContent_MakeAvailable(pybind11)
-
-add_subdirectory(${pybind11_SOURCE_DIR})
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/xpu.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/xpu.cmake
deleted file mode 100644
index 38418671b0237550cd01d4d95e8743067e113e56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/cmake/xpu.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColourReset "${Esc}[m")
-  set(ColourBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green       "${Esc}[32m")
-  set(Yellow      "${Esc}[33m")
-  set(Blue        "${Esc}[34m")
-  set(Magenta     "${Esc}[35m")
-  set(Cyan        "${Esc}[36m")
-  set(White       "${Esc}[37m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
-  set(BoldBlue    "${Esc}[1;34m")
-  set(BoldMagenta "${Esc}[1;35m")
-  set(BoldCyan    "${Esc}[1;36m")
-  set(BoldWhite   "${Esc}[1;37m")
-endif()
-
-if(XPU)
-  set(RUNTIME_KUNLUN_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  message(STATUS "RUNTIME_KUNLUN_PATH is ${RUNTIME_KUNLUN_PATH} .\n")
-  set(KUNLUN_XPU_PATH ${RUNTIME_KUNLUN_PATH}/xpu)
-  if(NOT DEFINED ENV{XPU_API_PATH})
-    message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
-  else()
-    set(XPU_API_PATH $ENV{XPU_API_PATH})
-    message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
-  endif()
-
-  include_directories(${RUNTIME_KUNLUN_PATH} ${KUNLUN_XPU_PATH}/
-                      ${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
-  link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
-
-  add_definitions(-DUSE_XPU)
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/CMakeLists.txt
deleted file mode 100644
index fe03efb288eb1c7ae3d05e896e95855e5865472f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-set(decoder_srcs
-  asr_decoder.cc
-  asr_model.cc
-  context_graph.cc
-  ctc_prefix_beam_search.cc
-  ctc_wfst_beam_search.cc
-  ctc_endpoint.cc
-)
-
-if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
-  message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
-endif()
-if(TORCH OR IOS)
-  list(APPEND decoder_srcs torch_asr_model.cc)
-endif()
-if(ONNX)
-  list(APPEND decoder_srcs onnx_asr_model.cc)
-endif()
-
-add_library(decoder STATIC ${decoder_srcs})
-target_link_libraries(decoder PUBLIC kaldi-decoder frontend
-                      post_processor utils)
-
-if(ANDROID)
-  target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
-else()
-  if(TORCH)
-    target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
-  endif()
-  if(ONNX)
-    target_link_libraries(decoder PUBLIC onnxruntime)
-  endif()
-  if(BPU)
-    target_link_libraries(decoder PUBLIC bpu_asr_model)
-  endif()
-  if(XPU)
-    target_link_libraries(decoder PUBLIC xpu_conformer)
-  endif()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_decoder.cc
deleted file mode 100644
index 34de7550ea287b37d2cb707e148f5d6853b3d804..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_decoder.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/asr_decoder.h"
-
-#include <ctype.h>
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-#include "utils/timer.h"
-
-namespace wenet {
-
-AsrDecoder::AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-                       std::shared_ptr<DecodeResource> resource,
-                       const DecodeOptions& opts)
-    : feature_pipeline_(std::move(feature_pipeline)),
-      // Make a copy of the model ASR model since we will change the inner
-      // status of the model
-      model_(resource->model->Copy()),
-      post_processor_(resource->post_processor),
-      symbol_table_(resource->symbol_table),
-      fst_(resource->fst),
-      unit_table_(resource->unit_table),
-      opts_(opts),
-      ctc_endpointer_(new CtcEndpoint(opts.ctc_endpoint_config)) {
-  if (opts_.reverse_weight > 0) {
-    // Check if model has a right to left decoder
-    CHECK(model_->is_bidirectional_decoder());
-  }
-  if (nullptr == fst_) {
-    searcher_.reset(new CtcPrefixBeamSearch(opts.ctc_prefix_search_opts,
-                                            resource->context_graph));
-  } else {
-    searcher_.reset(new CtcWfstBeamSearch(*fst_, opts.ctc_wfst_search_opts,
-                                          resource->context_graph));
-  }
-  ctc_endpointer_->frame_shift_in_ms(frame_shift_in_ms());
-}
-
-void AsrDecoder::Reset() {
-  start_ = false;
-  result_.clear();
-  num_frames_ = 0;
-  global_frame_offset_ = 0;
-  model_->Reset();
-  searcher_->Reset();
-  feature_pipeline_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-void AsrDecoder::ResetContinuousDecoding() {
-  global_frame_offset_ = num_frames_;
-  start_ = false;
-  result_.clear();
-  model_->Reset();
-  searcher_->Reset();
-  ctc_endpointer_->Reset();
-}
-
-DecodeState AsrDecoder::Decode(bool block) {
-  return this->AdvanceDecoding(block);
-}
-
-void AsrDecoder::Rescoring() {
-  // Do attention rescoring
-  Timer timer;
-  AttentionRescoring();
-  VLOG(2) << "Rescoring cost latency: " << timer.Elapsed() << "ms.";
-}
-
-DecodeState AsrDecoder::AdvanceDecoding(bool block) {
-  DecodeState state = DecodeState::kEndBatch;
-  model_->set_chunk_size(opts_.chunk_size);
-  model_->set_num_left_chunks(opts_.num_left_chunks);
-  int num_required_frames = model_->num_frames_for_chunk(start_);
-  std::vector<std::vector<float>> chunk_feats;
-  // Return immediately if we do not want to block
-  if (!block && !feature_pipeline_->input_finished() &&
-      feature_pipeline_->NumQueuedFrames() < num_required_frames) {
-    return DecodeState::kWaitFeats;
-  }
-  // If not okay, that means we reach the end of the input
-  if (!feature_pipeline_->Read(num_required_frames, &chunk_feats)) {
-    state = DecodeState::kEndFeats;
-  }
-
-  num_frames_ += chunk_feats.size();
-  VLOG(2) << "Required " << num_required_frames << " get "
-          << chunk_feats.size();
-  Timer timer;
-  std::vector<std::vector<float>> ctc_log_probs;
-  model_->ForwardEncoder(chunk_feats, &ctc_log_probs);
-  int forward_time = timer.Elapsed();
-  if (opts_.ctc_wfst_search_opts.blank_scale != 1.0) {
-    for (int i = 0; i < ctc_log_probs.size(); i++) {
-      ctc_log_probs[i][0] = ctc_log_probs[i][0]
-                  + std::log(opts_.ctc_wfst_search_opts.blank_scale);
-    }
-  }
-  timer.Reset();
-  searcher_->Search(ctc_log_probs);
-  int search_time = timer.Elapsed();
-  VLOG(3) << "forward takes " << forward_time << " ms, search takes "
-          << search_time << " ms";
-  UpdateResult();
-
-  if (state != DecodeState::kEndFeats) {
-    if (ctc_endpointer_->IsEndpoint(ctc_log_probs, DecodedSomething())) {
-      VLOG(1) << "Endpoint is detected at " << num_frames_;
-      state = DecodeState::kEndpoint;
-    }
-  }
-
-  start_ = true;
-  return state;
-}
-
-void AsrDecoder::UpdateResult(bool finish) {
-  const auto& hypotheses = searcher_->Outputs();
-  const auto& inputs = searcher_->Inputs();
-  const auto& likelihood = searcher_->Likelihood();
-  const auto& times = searcher_->Times();
-  result_.clear();
-
-  CHECK_EQ(hypotheses.size(), likelihood.size());
-  for (size_t i = 0; i < hypotheses.size(); i++) {
-    const std::vector<int>& hypothesis = hypotheses[i];
-
-    DecodeResult path;
-    path.score = likelihood[i];
-    int offset = global_frame_offset_ * feature_frame_shift_in_ms();
-    for (size_t j = 0; j < hypothesis.size(); j++) {
-      std::string word = symbol_table_->Find(hypothesis[j]);
-      // A detailed explanation of this if-else branch can be found in
-      // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-      if (searcher_->Type() == kWfstBeamSearch) {
-        path.sentence += (' ' + word);
-      } else {
-        path.sentence += (word);
-      }
-    }
-
-    // TimeStamp is only supported in final result
-    // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
-    // various FST operations when building the decoding graph. So here we use
-    // time stamp of the input(e2e model unit), which is more accurate, and it
-    // requires the symbol table of the e2e model used in training.
-    if (unit_table_ != nullptr && finish) {
-      const std::vector<int>& input = inputs[i];
-      const std::vector<int>& time_stamp = times[i];
-      CHECK_EQ(input.size(), time_stamp.size());
-      for (size_t j = 0; j < input.size(); j++) {
-        std::string word = unit_table_->Find(input[j]);
-        int start = time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_ > 0
-                        ? time_stamp[j] * frame_shift_in_ms() - time_stamp_gap_
-                        : 0;
-        if (j > 0) {
-          start = (time_stamp[j] - time_stamp[j - 1]) * frame_shift_in_ms() <
-                          time_stamp_gap_
-                      ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
-                            frame_shift_in_ms()
-                      : start;
-        }
-        int end = time_stamp[j] * frame_shift_in_ms();
-        if (j < input.size() - 1) {
-          end = (time_stamp[j + 1] - time_stamp[j]) * frame_shift_in_ms() <
-                        time_stamp_gap_
-                    ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
-                          frame_shift_in_ms()
-                    : end;
-        }
-        WordPiece word_piece(word, offset + start, offset + end);
-        path.word_pieces.emplace_back(word_piece);
-      }
-    }
-
-    if (post_processor_ != nullptr) {
-      path.sentence = post_processor_->Process(path.sentence, finish);
-    }
-    result_.emplace_back(path);
-  }
-
-  if (DecodedSomething()) {
-    VLOG(1) << "Partial CTC result " << result_[0].sentence;
-  }
-}
-
-void AsrDecoder::AttentionRescoring() {
-  searcher_->FinalizeSearch();
-  UpdateResult(true);
-  // No need to do rescoring
-  if (0.0 == opts_.rescoring_weight) {
-    return;
-  }
-  // Inputs() returns N-best input ids, which is the basic unit for rescoring
-  // In CtcPrefixBeamSearch, inputs are the same to outputs
-  const auto& hypotheses = searcher_->Inputs();
-  int num_hyps = hypotheses.size();
-  if (num_hyps <= 0) {
-    return;
-  }
-
-  std::vector<float> rescoring_score;
-  model_->AttentionRescoring(hypotheses, opts_.reverse_weight,
-                             &rescoring_score);
-
-  // Combine ctc score and rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    result_[i].score = opts_.rescoring_weight * rescoring_score[i] +
-                       opts_.ctc_weight * result_[i].score;
-  }
-  std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_decoder.h
deleted file mode 100644
index df71f5b7bad7b2ffdc69bbd7ab11f576bed464d2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_decoder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_ASR_DECODER_H_
-#define DECODER_ASR_DECODER_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fst/symbol-table.h"
-
-#include "decoder/asr_model.h"
-#include "decoder/context_graph.h"
-#include "decoder/ctc_endpoint.h"
-#include "decoder/ctc_prefix_beam_search.h"
-#include "decoder/ctc_wfst_beam_search.h"
-#include "decoder/search_interface.h"
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct DecodeOptions {
-  // chunk_size is the frame number of one chunk after subsampling.
-  // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
-  // one chunk are 64 = 16*4
-  int chunk_size = 16;
-  int num_left_chunks = -1;
-
-  // final_score = rescoring_weight * rescoring_score + ctc_weight * ctc_score;
-  // rescoring_score = left_to_right_score * (1 - reverse_weight) +
-  // right_to_left_score * reverse_weight
-  // Please note the concept of ctc_scores in the following two search
-  // methods are different.
-  // For CtcPrefixBeamSearch, it's a sum(prefix) score + context score
-  // For CtcWfstBeamSearch, it's a max(viterbi) path score + context score
-  // So we should carefully set ctc_weight according to the search methods.
-  float ctc_weight = 0.5;
-  float rescoring_weight = 1.0;
-  float reverse_weight = 0.0;
-  CtcEndpointConfig ctc_endpoint_config;
-  CtcPrefixBeamSearchOptions ctc_prefix_search_opts;
-  CtcWfstBeamSearchOptions ctc_wfst_search_opts;
-};
-
-struct WordPiece {
-  std::string word;
-  int start = -1;
-  int end = -1;
-
-  WordPiece(std::string word, int start, int end)
-      : word(std::move(word)), start(start), end(end) {}
-};
-
-struct DecodeResult {
-  float score = -kFloatMax;
-  std::string sentence;
-  std::vector<WordPiece> word_pieces;
-
-  static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
-    return a.score > b.score;
-  }
-};
-
-enum DecodeState {
-  kEndBatch = 0x00,  // End of current decoding batch, normal case
-  kEndpoint = 0x01,  // Endpoint is detected
-  kEndFeats = 0x02,  // All feature is decoded
-  kWaitFeats = 0x03  // Feat is not enough for one chunk inference, wait
-};
-
-// DecodeResource is thread safe, which can be shared for multiple
-// decoding threads
-struct DecodeResource {
-  std::shared_ptr<AsrModel> model = nullptr;
-  std::shared_ptr<fst::SymbolTable> symbol_table = nullptr;
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst = nullptr;
-  std::shared_ptr<fst::SymbolTable> unit_table = nullptr;
-  std::shared_ptr<ContextGraph> context_graph = nullptr;
-  std::shared_ptr<PostProcessor> post_processor = nullptr;
-};
-
-// Torch ASR decoder
-class AsrDecoder {
- public:
-  AsrDecoder(std::shared_ptr<FeaturePipeline> feature_pipeline,
-             std::shared_ptr<DecodeResource> resource,
-             const DecodeOptions& opts);
-  // @param block: if true, block when feature is not enough for one chunk
-  //               inference. Otherwise, return kWaitFeats.
-  DecodeState Decode(bool block = true);
-  void Rescoring();
-  void Reset();
-  void ResetContinuousDecoding();
-  bool DecodedSomething() const {
-    return !result_.empty() && !result_[0].sentence.empty();
-  }
-
-  // This method is used for time benchmark
-  int num_frames_in_current_chunk() const {
-    return num_frames_in_current_chunk_;
-  }
-  int frame_shift_in_ms() const {
-    return model_->subsampling_rate() *
-           feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  int feature_frame_shift_in_ms() const {
-    return feature_pipeline_->config().frame_shift * 1000 /
-           feature_pipeline_->config().sample_rate;
-  }
-  const std::vector<DecodeResult>& result() const { return result_; }
-
- private:
-  DecodeState AdvanceDecoding(bool block = true);
-  void AttentionRescoring();
-
-  void UpdateResult(bool finish = false);
-
-  std::shared_ptr<FeaturePipeline> feature_pipeline_;
-  std::shared_ptr<AsrModel> model_;
-  std::shared_ptr<PostProcessor> post_processor_;
-
-  std::shared_ptr<fst::Fst<fst::StdArc>> fst_ = nullptr;
-  // output symbol table
-  std::shared_ptr<fst::SymbolTable> symbol_table_;
-  // e2e unit symbol table
-  std::shared_ptr<fst::SymbolTable> unit_table_ = nullptr;
-  const DecodeOptions& opts_;
-  // cache feature
-  bool start_ = false;
-  // For continuous decoding
-  int num_frames_ = 0;
-  int global_frame_offset_ = 0;
-  const int time_stamp_gap_ = 100;  // timestamp gap between words in a sentence
-
-  std::unique_ptr<SearchInterface> searcher_;
-  std::unique_ptr<CtcEndpoint> ctc_endpointer_;
-
-  int num_frames_in_current_chunk_ = 0;
-  std::vector<DecodeResult> result_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(AsrDecoder);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_model.cc
deleted file mode 100644
index 8c7b0fb1195cf07bac6c3ff1bb8cb0e187e977da..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_model.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#include "decoder/asr_model.h"
-
-#include <memory>
-#include <utility>
-
-namespace wenet {
-
-int AsrModel::num_frames_for_chunk(bool start) const {
-  int num_required_frames = 0;
-  if (chunk_size_ > 0) {
-    if (!start) {                        // First batch
-      int context = right_context_ + 1;  // Add current frame
-      num_required_frames = (chunk_size_ - 1) * subsampling_rate_ + context;
-    } else {
-      num_required_frames = chunk_size_ * subsampling_rate_;
-    }
-  } else {
-    num_required_frames = std::numeric_limits<int>::max();
-  }
-  return num_required_frames;
-}
-
-void AsrModel::CacheFeature(
-    const std::vector<std::vector<float>>& chunk_feats) {
-  // Cache feature for next chunk
-  const int cached_feature_size = 1 + right_context_ - subsampling_rate_;
-  if (chunk_feats.size() >= cached_feature_size) {
-    // TODO(Binbin Zhang): Only deal the case when
-    // chunk_feats.size() > cached_feature_size here, and it's consistent
-    // with our current model, refine it later if we have new model or
-    // new requirements
-    cached_feature_.resize(cached_feature_size);
-    for (int i = 0; i < cached_feature_size; ++i) {
-      cached_feature_[i] =
-          chunk_feats[chunk_feats.size() - cached_feature_size + i];
-    }
-  }
-}
-
-void AsrModel::ForwardEncoder(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* ctc_prob) {
-  ctc_prob->clear();
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  if (num_frames >= right_context_ + 1) {
-    this->ForwardEncoderFunc(chunk_feats, ctc_prob);
-    this->CacheFeature(chunk_feats);
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_model.h
deleted file mode 100644
index d100dd818551014fa4769c1766bc3b1b626e8453..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/asr_model.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2022 Horizon Robotics. All Rights Reserved.
-// Author: binbin.zhang@horizon.ai (Binbin Zhang)
-
-#ifndef DECODER_ASR_MODEL_H_
-#define DECODER_ASR_MODEL_H_
-
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "utils/timer.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class AsrModel {
- public:
-  virtual int right_context() const { return right_context_; }
-  virtual int subsampling_rate() const { return subsampling_rate_; }
-  virtual int sos() const { return sos_; }
-  virtual int eos() const { return eos_; }
-  virtual bool is_bidirectional_decoder() const {
-    return is_bidirectional_decoder_;
-  }
-  virtual int offset() const { return offset_; }
-
-  // If chunk_size > 0, streaming case. Otherwise, none streaming case
-  virtual void set_chunk_size(int chunk_size) { chunk_size_ = chunk_size; }
-  virtual void set_num_left_chunks(int num_left_chunks) {
-    num_left_chunks_ = num_left_chunks;
-  }
-  // start: if it is the start chunk of one sentence
-  virtual int num_frames_for_chunk(bool start) const;
-
-  virtual void Reset() = 0;
-
-  virtual void ForwardEncoder(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob);
-
-  virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                  float reverse_weight,
-                                  std::vector<float>* rescoring_score) = 0;
-
-  virtual std::shared_ptr<AsrModel> Copy() const = 0;
-
- protected:
-  virtual void ForwardEncoderFunc(
-      const std::vector<std::vector<float>>& chunk_feats,
-      std::vector<std::vector<float>>* ctc_prob) = 0;
-  virtual void CacheFeature(const std::vector<std::vector<float>>& chunk_feats);
-
-  int right_context_ = 1;
-  int subsampling_rate_ = 1;
-  int sos_ = 0;
-  int eos_ = 0;
-  bool is_bidirectional_decoder_ = false;
-  int chunk_size_ = 16;
-  int num_left_chunks_ = -1;  // -1 means all left chunks
-  int offset_ = 0;
-
-  std::vector<std::vector<float>> cached_feature_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/context_graph.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/context_graph.cc
deleted file mode 100644
index adc59c506de2afa7087815887295e4d8735d2a35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/context_graph.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/context_graph.h"
-
-#include <utility>
-
-#include "fst/determinize.h"
-
-#include "utils/string.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-ContextGraph::ContextGraph(ContextConfig config) : config_(config) {}
-
-void ContextGraph::BuildContextGraph(
-    const std::vector<std::string>& query_contexts,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table) {
-  CHECK(symbol_table != nullptr) << "Symbols table should not be nullptr!";
-  start_tag_id_ = symbol_table->AddSymbol("<context>");
-  end_tag_id_ = symbol_table->AddSymbol("</context>");
-  symbol_table_ = symbol_table;
-  if (query_contexts.empty()) {
-    if (graph_ != nullptr) graph_.reset();
-    return;
-  }
-
-  std::unique_ptr<fst::StdVectorFst> ofst(new fst::StdVectorFst());
-  // State 0 is the start state and the final state.
-  int start_state = ofst->AddState();
-  ofst->SetStart(start_state);
-  ofst->SetFinal(start_state, fst::StdArc::Weight::One());
-
-  LOG(INFO) << "Contexts count size: " << query_contexts.size();
-  int count = 0;
-  for (const auto& context : query_contexts) {
-    if (context.size() > config_.max_context_length) {
-      LOG(INFO) << "Skip long context: " << context;
-      continue;
-    }
-    if (++count > config_.max_contexts) break;
-
-    std::vector<std::string> words;
-    // Split context to words by symbol table, and build the context graph.
-    bool no_oov = SplitUTF8StringToWords(Trim(context), symbol_table, &words);
-    if (!no_oov) {
-      LOG(WARNING) << "Ignore unknown word found during compilation.";
-      continue;
-    }
-
-    int prev_state = start_state;
-    int next_state = start_state;
-    float escape_score = 0;
-    for (size_t i = 0; i < words.size(); ++i) {
-      int word_id = symbol_table_->Find(words[i]);
-      float score = (i * config_.incremental_context_score
-                     + config_.context_score) * UTF8StringLength(words[i]);
-      next_state = (i < words.size() - 1) ? ofst->AddState() : start_state;
-      ofst->AddArc(prev_state,
-                   fst::StdArc(word_id, word_id, score, next_state));
-      // Add escape arc to clean the previous context score.
-      if (i > 0) {
-        // ilabel and olabel of the escape arc is 0 (<epsilon>).
-        ofst->AddArc(prev_state, fst::StdArc(0, 0, -escape_score, start_state));
-      }
-      prev_state = next_state;
-      escape_score += score;
-    }
-  }
-  std::unique_ptr<fst::StdVectorFst> det_fst(new fst::StdVectorFst());
-  fst::Determinize(*ofst, det_fst.get());
-  graph_ = std::move(det_fst);
-}
-
-int ContextGraph::GetNextState(int cur_state, int word_id, float* score,
-                               bool* is_start_boundary, bool* is_end_boundary) {
-  int next_state = 0;
-  for (fst::ArcIterator<fst::StdFst> aiter(*graph_, cur_state); !aiter.Done();
-       aiter.Next()) {
-    const fst::StdArc& arc = aiter.Value();
-    if (arc.ilabel == 0) {
-      // escape score, will be overwritten when ilabel equals to word id.
-      *score = arc.weight.Value();
-    } else if (arc.ilabel == word_id) {
-      next_state = arc.nextstate;
-      *score = arc.weight.Value();
-      if (cur_state == 0) {
-        *is_start_boundary = true;
-      }
-      if (graph_->Final(arc.nextstate) == fst::StdArc::Weight::One()) {
-        *is_end_boundary = true;
-      }
-      break;
-    }
-  }
-  return next_state;
-}
-
-bool ContextGraph::SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(Trim(str), &chars);
-
-  bool no_oov = true;
-  for (size_t start = 0; start < chars.size();) {
-    for (size_t end = chars.size(); end > start; --end) {
-      std::string word;
-      for (size_t i = start; i < end; i++) {
-        word += chars[i];
-      }
-      // Skip space.
-      if (word == " ") {
-        start = end;
-        continue;
-      }
-      // Add '▁' at the beginning of English word.
-      if (IsAlpha(word)) {
-        word = kSpaceSymbol + word;
-      }
-
-      if (symbol_table->Find(word) != -1) {
-        words->emplace_back(word);
-        start = end;
-        continue;
-      }
-      if (end == start + 1) {
-        ++start;
-        no_oov = false;
-        LOG(WARNING) << word << " is oov.";
-      }
-    }
-  }
-  return no_oov;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/context_graph.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/context_graph.h
deleted file mode 100644
index 41b59206987cfe22d421f40506057830b6311f8e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/context_graph.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CONTEXT_GRAPH_H_
-#define DECODER_CONTEXT_GRAPH_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/compose.h"
-#include "fst/fst.h"
-#include "fst/vector-fst.h"
-
-namespace wenet {
-
-using StateId = fst::StdArc::StateId;
-
-struct ContextConfig {
-  int max_contexts = 5000;
-  int max_context_length = 100;
-  float context_score = 3.0;
-  float incremental_context_score = 0.0;
-};
-
-class ContextGraph {
- public:
-  explicit ContextGraph(ContextConfig config);
-  void BuildContextGraph(const std::vector<std::string>& query_context,
-                         const std::shared_ptr<fst::SymbolTable>& symbol_table);
-  int GetNextState(int cur_state, int word_id, float* score,
-                   bool* is_start_boundary, bool* is_end_boundary);
-
-  int start_tag_id() { return start_tag_id_; }
-  int end_tag_id() { return end_tag_id_; }
-
- private:
-  bool SplitUTF8StringToWords(
-      const std::string& str,
-      const std::shared_ptr<fst::SymbolTable>& symbol_table,
-      std::vector<std::string>* words);
-
-  int start_tag_id_ = -1;
-  int end_tag_id_ = -1;
-  ContextConfig config_;
-  std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
-  std::unique_ptr<fst::StdVectorFst> graph_ = nullptr;
-  DISALLOW_COPY_AND_ASSIGN(ContextGraph);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CONTEXT_GRAPH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_endpoint.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_endpoint.cc
deleted file mode 100644
index 4a64dd048f32401ab0dca468836cfac8be943d26..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_endpoint.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_endpoint.h"
-
-#include <math.h>
-
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-CtcEndpoint::CtcEndpoint(const CtcEndpointConfig& config) : config_(config) {
-  Reset();
-}
-
-void CtcEndpoint::Reset() {
-  num_frames_decoded_ = 0;
-  num_frames_trailing_blank_ = 0;
-}
-
-static bool RuleActivated(const CtcEndpointRule& rule,
-                          const std::string& rule_name, bool decoded_sth,
-                          int trailing_silence, int utterance_length) {
-  bool ans = (decoded_sth || !rule.must_decoded_sth) &&
-             trailing_silence >= rule.min_trailing_silence &&
-             utterance_length >= rule.min_utterance_length;
-  if (ans) {
-    VLOG(2) << "Endpointing rule " << rule_name
-            << " activated: " << (decoded_sth ? "true" : "false") << ','
-            << trailing_silence << ',' << utterance_length;
-  }
-  return ans;
-}
-
-bool CtcEndpoint::IsEndpoint(
-    const std::vector<std::vector<float>>& ctc_log_probs,
-    bool decoded_something) {
-  for (int t = 0; t < ctc_log_probs.size(); ++t) {
-    const auto& logp_t = ctc_log_probs[t];
-    float blank_prob = expf(logp_t[config_.blank]);
-
-    num_frames_decoded_++;
-    if (blank_prob > config_.blank_threshold) {
-      num_frames_trailing_blank_++;
-    } else {
-      num_frames_trailing_blank_ = 0;
-    }
-  }
-  CHECK_GE(num_frames_decoded_, num_frames_trailing_blank_);
-  CHECK_GT(frame_shift_in_ms_, 0);
-  int utterance_length = num_frames_decoded_ * frame_shift_in_ms_;
-  int trailing_silence = num_frames_trailing_blank_ * frame_shift_in_ms_;
-  if (RuleActivated(config_.rule1, "rule1", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule2, "rule2", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  if (RuleActivated(config_.rule3, "rule3", decoded_something, trailing_silence,
-                    utterance_length))
-    return true;
-  return false;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_endpoint.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_endpoint.h
deleted file mode 100644
index 56d9e08e7d3fab5562028e956f7b1d6ebac7b9e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_endpoint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_ENDPOINT_H_
-#define DECODER_CTC_ENDPOINT_H_
-
-#include <vector>
-
-namespace wenet {
-
-struct CtcEndpointRule {
-  bool must_decoded_sth;
-  int min_trailing_silence;
-  int min_utterance_length;
-
-  CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000,
-                  int min_utterance_length = 0)
-      : must_decoded_sth(must_decoded_sth),
-        min_trailing_silence(min_trailing_silence),
-        min_utterance_length(min_utterance_length) {}
-};
-
-struct CtcEndpointConfig {
-  /// We consider blank as silence for purposes of endpointing.
-  int blank = 0;                // blank id
-  float blank_threshold = 0.8;  // blank threshold to be silence
-  /// We support three rules. We terminate decoding if ANY of these rules
-  /// evaluates to "true". If you want to add more rules, do it by changing this
-  /// code. If you want to disable a rule, you can set the silence-timeout for
-  /// that rule to a very large number.
-
-  /// rule1 times out after 5000 ms of silence, even if we decoded nothing.
-  CtcEndpointRule rule1;
-  /// rule2 times out after 1000 ms of silence after decoding something.
-  CtcEndpointRule rule2;
-  /// rule3 times out after the utterance is 20000 ms long, regardless of
-  /// anything else.
-  CtcEndpointRule rule3;
-
-  CtcEndpointConfig()
-      : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {}
-};
-
-class CtcEndpoint {
- public:
-  explicit CtcEndpoint(const CtcEndpointConfig& config);
-
-  void Reset();
-  /// This function returns true if this set of endpointing rules thinks we
-  /// should terminate decoding.
-  bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs,
-                  bool decoded_something);
-
-  void frame_shift_in_ms(int frame_shift_in_ms) {
-    frame_shift_in_ms_ = frame_shift_in_ms;
-  }
-
- private:
-  CtcEndpointConfig config_;
-  int frame_shift_in_ms_ = -1;
-  int num_frames_decoded_ = 0;
-  int num_frames_trailing_blank_ = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_ENDPOINT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_prefix_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_prefix_beam_search.cc
deleted file mode 100644
index 154c8864ba98255528a33a80a35b18eee8fa5dc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_prefix_beam_search.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <algorithm>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-CtcPrefixBeamSearch::CtcPrefixBeamSearch(
-    const CtcPrefixBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : opts_(opts), context_graph_(context_graph) {
-  Reset();
-}
-
-void CtcPrefixBeamSearch::Reset() {
-  hypotheses_.clear();
-  likelihood_.clear();
-  cur_hyps_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  outputs_.clear();
-  abs_time_step_ = 0;
-  PrefixScore prefix_score;
-  prefix_score.s = 0.0;
-  prefix_score.ns = -kFloatMax;
-  prefix_score.v_s = 0.0;
-  prefix_score.v_ns = 0.0;
-  std::vector<int> empty;
-  cur_hyps_[empty] = prefix_score;
-  outputs_.emplace_back(empty);
-  hypotheses_.emplace_back(empty);
-  likelihood_.emplace_back(prefix_score.total_score());
-  times_.emplace_back(empty);
-}
-
-static bool PrefixScoreCompare(
-    const std::pair<std::vector<int>, PrefixScore>& a,
-    const std::pair<std::vector<int>, PrefixScore>& b) {
-  return a.second.total_score() > b.second.total_score();
-}
-
-void CtcPrefixBeamSearch::UpdateOutputs(
-    const std::pair<std::vector<int>, PrefixScore>& prefix) {
-  const std::vector<int>& input = prefix.first;
-  const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
-  const std::vector<int>& end_boundaries = prefix.second.end_boundaries;
-
-  std::vector<int> output;
-  int s = 0;
-  int e = 0;
-  for (int i = 0; i < input.size(); ++i) {
-    if (s < start_boundaries.size() && i == start_boundaries[s]) {
-      output.emplace_back(context_graph_->start_tag_id());
-      ++s;
-    }
-    output.emplace_back(input[i]);
-    if (e < end_boundaries.size() && i == end_boundaries[e]) {
-      output.emplace_back(context_graph_->end_tag_id());
-      ++e;
-    }
-  }
-  outputs_.emplace_back(output);
-}
-
-void CtcPrefixBeamSearch::UpdateHypotheses(
-    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys) {
-  cur_hyps_.clear();
-  outputs_.clear();
-  hypotheses_.clear();
-  likelihood_.clear();
-  viterbi_likelihood_.clear();
-  times_.clear();
-  for (auto& item : hpys) {
-    cur_hyps_[item.first] = item.second;
-    UpdateOutputs(item);
-    hypotheses_.emplace_back(std::move(item.first));
-    likelihood_.emplace_back(item.second.total_score());
-    viterbi_likelihood_.emplace_back(item.second.viterbi_score());
-    times_.emplace_back(item.second.times());
-  }
-}
-
-// Please refer https://robin1001.github.io/2020/12/11/ctc-search
-// for how CTC prefix beam search works, and there is a simple graph demo in
-// it.
-void CtcPrefixBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (logp.size() == 0) return;
-  int first_beam_size =
-      std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);
-  for (int t = 0; t < logp.size(); ++t, ++abs_time_step_) {
-    const std::vector<float>& logp_t = logp[t];
-    std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> next_hyps;
-    // 1. First beam prune, only select topk candidates
-    std::vector<float> topk_score;
-    std::vector<int32_t> topk_index;
-    TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
-    // 2. Token passing
-    for (int i = 0; i < topk_index.size(); ++i) {
-      int id = topk_index[i];
-      auto prob = topk_score[i];
-      for (const auto& it : cur_hyps_) {
-        const std::vector<int>& prefix = it.first;
-        const PrefixScore& prefix_score = it.second;
-        // If prefix doesn't exist in next_hyps, next_hyps[prefix] will insert
-        // PrefixScore(-inf, -inf) by default, since the default constructor
-        // of PrefixScore will set fields s(blank ending score) and
-        // ns(none blank ending score) to -inf, respectively.
-        if (id == opts_.blank) {
-          // Case 0: *a + ε => *a
-          PrefixScore& next_score = next_hyps[prefix];
-          next_score.s = LogAdd(next_score.s, prefix_score.score() + prob);
-          next_score.v_s = prefix_score.viterbi_score() + prob;
-          next_score.times_s = prefix_score.times();
-          // Prefix not changed, copy the context from prefix.
-          if (context_graph_ && !next_score.has_context) {
-            next_score.CopyContext(prefix_score);
-            next_score.has_context = true;
-          }
-        } else if (!prefix.empty() && id == prefix.back()) {
-          // Case 1: *a + a => *a
-          PrefixScore& next_score1 = next_hyps[prefix];
-          next_score1.ns = LogAdd(next_score1.ns, prefix_score.ns + prob);
-          if (next_score1.v_ns < prefix_score.v_ns + prob) {
-            next_score1.v_ns = prefix_score.v_ns + prob;
-            if (next_score1.cur_token_prob < prob) {
-              next_score1.cur_token_prob = prob;
-              next_score1.times_ns = prefix_score.times_ns;
-              CHECK_GT(next_score1.times_ns.size(), 0);
-              next_score1.times_ns.back() = abs_time_step_;
-            }
-          }
-          if (context_graph_ && !next_score1.has_context) {
-            next_score1.CopyContext(prefix_score);
-            next_score1.has_context = true;
-          }
-
-          // Case 2: *aε + a => *aa
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score2 = next_hyps[new_prefix];
-          next_score2.ns = LogAdd(next_score2.ns, prefix_score.s + prob);
-          if (next_score2.v_ns < prefix_score.v_s + prob) {
-            next_score2.v_ns = prefix_score.v_s + prob;
-            next_score2.cur_token_prob = prob;
-            next_score2.times_ns = prefix_score.times_s;
-            next_score2.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score2.has_context) {
-            // Prefix changed, calculate the context score.
-            next_score2.UpdateContext(context_graph_, prefix_score, id,
-                                      prefix.size());
-            next_score2.has_context = true;
-          }
-        } else {
-          // Case 3: *a + b => *ab, *aε + b => *ab
-          std::vector<int> new_prefix(prefix);
-          new_prefix.emplace_back(id);
-          PrefixScore& next_score = next_hyps[new_prefix];
-          next_score.ns = LogAdd(next_score.ns, prefix_score.score() + prob);
-          if (next_score.v_ns < prefix_score.viterbi_score() + prob) {
-            next_score.v_ns = prefix_score.viterbi_score() + prob;
-            next_score.cur_token_prob = prob;
-            next_score.times_ns = prefix_score.times();
-            next_score.times_ns.emplace_back(abs_time_step_);
-          }
-          if (context_graph_ && !next_score.has_context) {
-            // Calculate the context score.
-            next_score.UpdateContext(context_graph_, prefix_score, id,
-                                     prefix.size());
-            next_score.has_context = true;
-          }
-        }
-      }
-    }
-
-    // 3. Second beam prune, only keep top n best paths
-    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(next_hyps.begin(),
-                                                              next_hyps.end());
-    int second_beam_size =
-        std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
-    std::nth_element(arr.begin(), arr.begin() + second_beam_size, arr.end(),
-                     PrefixScoreCompare);
-    arr.resize(second_beam_size);
-    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-    // 4. Update cur_hyps_ and get new result
-    UpdateHypotheses(arr);
-  }
-}
-
-void CtcPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
-
-void CtcPrefixBeamSearch::UpdateFinalContext() {
-  if (context_graph_ == nullptr) return;
-  CHECK_EQ(hypotheses_.size(), cur_hyps_.size());
-  CHECK_EQ(hypotheses_.size(), likelihood_.size());
-  // We should backoff the context score/state when the context is
-  // not fully matched at the last time.
-  for (const auto& prefix : hypotheses_) {
-    PrefixScore& prefix_score = cur_hyps_[prefix];
-    if (prefix_score.context_state != 0) {
-      prefix_score.UpdateContext(context_graph_, prefix_score, 0,
-                                 prefix.size());
-    }
-  }
-  std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
-                                                            cur_hyps_.end());
-  std::sort(arr.begin(), arr.end(), PrefixScoreCompare);
-
-  // Update cur_hyps_ and get new result
-  UpdateHypotheses(arr);
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_prefix_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_prefix_beam_search.h
deleted file mode 100644
index f44ec23c37af517c9e45140f89ef7346768f5d35..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_prefix_beam_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-#define DECODER_CTC_PREFIX_BEAM_SEARCH_H_
-
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-struct CtcPrefixBeamSearchOptions {
-  int blank = 0;  // blank id
-  int first_beam_size = 10;
-  int second_beam_size = 10;
-};
-
-struct PrefixScore {
-  float s = -kFloatMax;               // blank ending score
-  float ns = -kFloatMax;              // none blank ending score
-  float v_s = -kFloatMax;             // viterbi blank ending score
-  float v_ns = -kFloatMax;            // viterbi none blank ending score
-  float cur_token_prob = -kFloatMax;  // prob of current token
-  std::vector<int> times_s;           // times of viterbi blank path
-  std::vector<int> times_ns;          // times of viterbi none blank path
-
-  float score() const { return LogAdd(s, ns); }
-  float viterbi_score() const { return v_s > v_ns ? v_s : v_ns; }
-  const std::vector<int>& times() const {
-    return v_s > v_ns ? times_s : times_ns;
-  }
-
-  bool has_context = false;
-  int context_state = 0;
-  float context_score = 0;
-  std::vector<int> start_boundaries;
-  std::vector<int> end_boundaries;
-
-  void CopyContext(const PrefixScore& prefix_score) {
-    context_state = prefix_score.context_state;
-    context_score = prefix_score.context_score;
-    start_boundaries = prefix_score.start_boundaries;
-    end_boundaries = prefix_score.end_boundaries;
-  }
-
-  void UpdateContext(const std::shared_ptr<ContextGraph>& context_graph,
-                     const PrefixScore& prefix_score, int word_id,
-                     int prefix_len) {
-    this->CopyContext(prefix_score);
-
-    float score = 0;
-    bool is_start_boundary = false;
-    bool is_end_boundary = false;
-
-    context_state =
-        context_graph->GetNextState(prefix_score.context_state, word_id, &score,
-                                    &is_start_boundary, &is_end_boundary);
-    context_score += score;
-    if (is_start_boundary) start_boundaries.emplace_back(prefix_len);
-    if (is_end_boundary) end_boundaries.emplace_back(prefix_len);
-  }
-
-  float total_score() const { return score() + context_score; }
-};
-
-struct PrefixHash {
-  size_t operator()(const std::vector<int>& prefix) const {
-    size_t hash_code = 0;
-    // here we use KB&DR hash code
-    for (int id : prefix) {
-      hash_code = id + 31 * hash_code;
-    }
-    return hash_code;
-  }
-};
-
-class CtcPrefixBeamSearch : public SearchInterface {
- public:
-  explicit CtcPrefixBeamSearch(
-      const CtcPrefixBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph = nullptr);
-
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kPrefixBeamSearch; }
-  void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
-  void UpdateHypotheses(
-      const std::vector<std::pair<std::vector<int>, PrefixScore>>& hpys);
-  void UpdateFinalContext();
-
-  const std::vector<float>& viterbi_likelihood() const {
-    return viterbi_likelihood_;
-  }
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return hypotheses_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  int abs_time_step_ = 0;
-
-  // N-best list and corresponding likelihood_, in sorted order
-  std::vector<std::vector<int>> hypotheses_;
-  std::vector<float> likelihood_;
-  std::vector<float> viterbi_likelihood_;
-  std::vector<std::vector<int>> times_;
-
-  std::unordered_map<std::vector<int>, PrefixScore, PrefixHash> cur_hyps_;
-  std::shared_ptr<ContextGraph> context_graph_ = nullptr;
-  // Outputs contain the hypotheses_ and tags like: <context> and </context>
-  std::vector<std::vector<int>> outputs_;
-  const CtcPrefixBeamSearchOptions& opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(CtcPrefixBeamSearch);
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_PREFIX_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_wfst_beam_search.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_wfst_beam_search.cc
deleted file mode 100644
index 10e93f387e87b5f16fb7784d7060c50f227bf58e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_wfst_beam_search.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_wfst_beam_search.h"
-
-#include <utility>
-
-namespace wenet {
-
-void DecodableTensorScaled::Reset() {
-  num_frames_ready_ = 0;
-  done_ = false;
-  // Give an empty initialization, will throw error when
-  // AcceptLoglikes is not called
-  logp_.clear();
-}
-
-void DecodableTensorScaled::AcceptLoglikes(const std::vector<float>& logp) {
-  ++num_frames_ready_;
-  // TODO(Binbin Zhang): Avoid copy here
-  logp_ = logp;
-}
-
-float DecodableTensorScaled::LogLikelihood(int32 frame, int32 index) {
-  CHECK_GT(index, 0);
-  CHECK_LT(frame, num_frames_ready_);
-  return scale_ * logp_[index - 1];
-}
-
-bool DecodableTensorScaled::IsLastFrame(int32 frame) const {
-  CHECK_LT(frame, num_frames_ready_);
-  return done_ && (frame == num_frames_ready_ - 1);
-}
-
-int32 DecodableTensorScaled::NumIndices() const {
-  LOG(FATAL) << "Not implement";
-  return 0;
-}
-
-CtcWfstBeamSearch::CtcWfstBeamSearch(
-    const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-    const std::shared_ptr<ContextGraph>& context_graph)
-    : decodable_(opts.acoustic_scale),
-      decoder_(fst, opts, context_graph),
-      context_graph_(context_graph),
-      opts_(opts) {
-  Reset();
-}
-
-void CtcWfstBeamSearch::Reset() {
-  num_frames_ = 0;
-  decoded_frames_mapping_.clear();
-  is_last_frame_blank_ = false;
-  last_best_ = 0;
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  decodable_.Reset();
-  decoder_.InitDecoding();
-}
-
-void CtcWfstBeamSearch::Search(const std::vector<std::vector<float>>& logp) {
-  if (0 == logp.size()) {
-    return;
-  }
-  // Every time we get the log posterior, we decode it all before return
-  for (int i = 0; i < logp.size(); i++) {
-    float blank_score = std::exp(logp[i][0]);
-    if (blank_score > opts_.blank_skip_thresh * opts_.blank_scale) {
-      VLOG(3) << "skipping frame " << num_frames_ << " score " << blank_score;
-      is_last_frame_blank_ = true;
-      last_frame_prob_ = logp[i];
-    } else {
-      // Get the best symbol
-      int cur_best =
-          std::max_element(logp[i].begin(), logp[i].end()) - logp[i].begin();
-      // Optional, adding one blank frame if we has skipped it in two same
-      // symbols
-      if (cur_best != 0 && is_last_frame_blank_ && cur_best == last_best_) {
-        decodable_.AcceptLoglikes(last_frame_prob_);
-        decoder_.AdvanceDecoding(&decodable_, 1);
-        decoded_frames_mapping_.push_back(num_frames_ - 1);
-        VLOG(2) << "Adding blank frame at symbol " << cur_best;
-      }
-      last_best_ = cur_best;
-
-      decodable_.AcceptLoglikes(logp[i]);
-      decoder_.AdvanceDecoding(&decodable_, 1);
-      decoded_frames_mapping_.push_back(num_frames_);
-      is_last_frame_blank_ = false;
-    }
-    num_frames_++;
-  }
-  // Get the best path
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    inputs_.resize(1);
-    outputs_.resize(1);
-    likelihood_.resize(1);
-    kaldi::Lattice lat;
-    decoder_.GetBestPath(&lat, false);
-    std::vector<int> alignment;
-    kaldi::LatticeWeight weight;
-    fst::GetLinearSymbolSequence(lat, &alignment, &outputs_[0], &weight);
-    ConvertToInputs(alignment, &inputs_[0]);
-    RemoveContinuousTags(&outputs_[0]);
-    VLOG(3) << weight.Value1() << " " << weight.Value2();
-    likelihood_[0] = -(weight.Value1() + weight.Value2());
-  }
-}
-
-void CtcWfstBeamSearch::FinalizeSearch() {
-  decodable_.SetFinish();
-  decoder_.FinalizeDecoding();
-  inputs_.clear();
-  outputs_.clear();
-  likelihood_.clear();
-  times_.clear();
-  if (decoded_frames_mapping_.size() > 0) {
-    std::vector<kaldi::Lattice> nbest_lats;
-    if (opts_.nbest == 1) {
-      kaldi::Lattice lat;
-      decoder_.GetBestPath(&lat, true);
-      nbest_lats.push_back(std::move(lat));
-    } else {
-      // Get N-best path by lattice(CompactLattice)
-      kaldi::CompactLattice clat;
-      decoder_.GetLattice(&clat, true);
-      kaldi::Lattice lat, nbest_lat;
-      fst::ConvertLattice(clat, &lat);
-      // TODO(Binbin Zhang): it's n-best word lists here, not character n-best
-      fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
-      fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
-    }
-    int nbest = nbest_lats.size();
-    inputs_.resize(nbest);
-    outputs_.resize(nbest);
-    likelihood_.resize(nbest);
-    times_.resize(nbest);
-    for (int i = 0; i < nbest; i++) {
-      kaldi::LatticeWeight weight;
-      std::vector<int> alignment;
-      fst::GetLinearSymbolSequence(nbest_lats[i], &alignment, &outputs_[i],
-                                   &weight);
-      ConvertToInputs(alignment, &inputs_[i], &times_[i]);
-      RemoveContinuousTags(&outputs_[i]);
-      likelihood_[i] = -(weight.Value1() + weight.Value2());
-    }
-  }
-}
-
-void CtcWfstBeamSearch::ConvertToInputs(const std::vector<int>& alignment,
-                                        std::vector<int>* input,
-                                        std::vector<int>* time) {
-  input->clear();
-  if (time != nullptr) time->clear();
-  for (int cur = 0; cur < alignment.size(); ++cur) {
-    // ignore blank
-    if (alignment[cur] - 1 == 0) continue;
-    // merge continuous same label
-    if (cur > 0 && alignment[cur] == alignment[cur - 1]) continue;
-
-    input->push_back(alignment[cur] - 1);
-    if (time != nullptr) {
-      time->push_back(decoded_frames_mapping_[cur]);
-    }
-  }
-}
-
-void CtcWfstBeamSearch::RemoveContinuousTags(std::vector<int>* output) {
-  if (context_graph_) {
-    for (auto it = output->begin(); it != output->end();) {
-      if (*it == context_graph_->start_tag_id() ||
-          *it == context_graph_->end_tag_id()) {
-        if (it + 1 != output->end() && *it == *(it + 1)) {
-          it = output->erase(it);
-          continue;
-        }
-      }
-      ++it;
-    }
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_wfst_beam_search.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_wfst_beam_search.h
deleted file mode 100644
index 204a0c8db1254035b7e3bd4a6e02b65d66b756f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/ctc_wfst_beam_search.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_CTC_WFST_BEAM_SEARCH_H_
-#define DECODER_CTC_WFST_BEAM_SEARCH_H_
-
-#include <memory>
-#include <vector>
-
-#include "decoder/context_graph.h"
-#include "decoder/search_interface.h"
-#include "kaldi/decoder/lattice-faster-online-decoder.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class DecodableTensorScaled : public kaldi::DecodableInterface {
- public:
-  explicit DecodableTensorScaled(float scale = 1.0) : scale_(scale) { Reset(); }
-
-  void Reset();
-  int32 NumFramesReady() const override { return num_frames_ready_; }
-  bool IsLastFrame(int32 frame) const override;
-  float LogLikelihood(int32 frame, int32 index) override;
-  int32 NumIndices() const override;
-  void AcceptLoglikes(const std::vector<float>& logp);
-  void SetFinish() { done_ = true; }
-
- private:
-  int num_frames_ready_ = 0;
-  float scale_ = 1.0;
-  bool done_ = false;
-  std::vector<float> logp_;
-};
-
-// LatticeFasterDecoderConfig has the following key members
-// beam: decoding beam
-// max_active: Decoder max active states
-// lattice_beam: Lattice generation beam
-struct CtcWfstBeamSearchOptions : public kaldi::LatticeFasterDecoderConfig {
-  float acoustic_scale = 1.0;
-  float nbest = 10;
-  // When blank score is greater than this thresh, skip the frame in viterbi
-  // search
-  float blank_skip_thresh = 0.98;
-  float blank_scale = 1.0;
-};
-
-class CtcWfstBeamSearch : public SearchInterface {
- public:
-  explicit CtcWfstBeamSearch(
-      const fst::Fst<fst::StdArc>& fst, const CtcWfstBeamSearchOptions& opts,
-      const std::shared_ptr<ContextGraph>& context_graph);
-  void Search(const std::vector<std::vector<float>>& logp) override;
-  void Reset() override;
-  void FinalizeSearch() override;
-  SearchType Type() const override { return SearchType::kWfstBeamSearch; }
-  // For CTC prefix beam search, both inputs and outputs are hypotheses_
-  const std::vector<std::vector<int>>& Inputs() const override {
-    return inputs_;
-  }
-  const std::vector<std::vector<int>>& Outputs() const override {
-    return outputs_;
-  }
-  const std::vector<float>& Likelihood() const override { return likelihood_; }
-  const std::vector<std::vector<int>>& Times() const override { return times_; }
-
- private:
-  // Sub one and remove <blank>
-  void ConvertToInputs(const std::vector<int>& alignment,
-                       std::vector<int>* input,
-                       std::vector<int>* time = nullptr);
-  void RemoveContinuousTags(std::vector<int>* output);
-
-  int num_frames_ = 0;
-  std::vector<int> decoded_frames_mapping_;
-
-  int last_best_ = 0;  // last none blank best id
-  std::vector<float> last_frame_prob_;
-  bool is_last_frame_blank_ = false;
-  std::vector<std::vector<int>> inputs_, outputs_;
-  std::vector<float> likelihood_;
-  std::vector<std::vector<int>> times_;
-  DecodableTensorScaled decodable_;
-  kaldi::LatticeFasterOnlineDecoder decoder_;
-  std::shared_ptr<ContextGraph> context_graph_;
-  const CtcWfstBeamSearchOptions& opts_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_CTC_WFST_BEAM_SEARCH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/onnx_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/onnx_asr_model.cc
deleted file mode 100644
index fc7afc704febbde3b7e350e392dc46763c453e74..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/onnx_asr_model.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/onnx_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-Ort::Env OnnxAsrModel::env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
-Ort::SessionOptions OnnxAsrModel::session_options_ = Ort::SessionOptions();
-
-void OnnxAsrModel::InitEngineThreads(int num_threads) {
-  session_options_.SetIntraOpNumThreads(num_threads);
-}
-
-void OnnxAsrModel::GetInputOutputInfo(
-    const std::shared_ptr<Ort::Session>& session,
-    std::vector<const char*>* in_names, std::vector<const char*>* out_names) {
-  Ort::AllocatorWithDefaultOptions allocator;
-  // Input info
-  int num_nodes = session->GetInputCount();
-  in_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetInputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tInput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*in_names)[i] = name;
-  }
-  // Output info
-  num_nodes = session->GetOutputCount();
-  out_names->resize(num_nodes);
-  for (int i = 0; i < num_nodes; ++i) {
-    char* name = session->GetOutputName(i, allocator);
-    Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
-    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-    ONNXTensorElementDataType type = tensor_info.GetElementType();
-    std::vector<int64_t> node_dims = tensor_info.GetShape();
-    std::stringstream shape;
-    for (auto j : node_dims) {
-      shape << j;
-      shape << " ";
-    }
-    LOG(INFO) << "\tOutput " << i << " : name=" << name << " type=" << type
-              << " dims=" << shape.str();
-    (*out_names)[i] = name;
-  }
-}
-
-void OnnxAsrModel::Read(const std::string& model_dir) {
-  std::string encoder_onnx_path = model_dir + "/encoder.onnx";
-  std::string rescore_onnx_path = model_dir + "/decoder.onnx";
-  std::string ctc_onnx_path = model_dir + "/ctc.onnx";
-
-  // 1. Load sessions
-  try {
-#ifdef _MSC_VER
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(encoder_onnx_path).c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(rescore_onnx_path).c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(
-        env_, ToWString(ctc_onnx_path).c_str(), session_options_);
-#else
-    encoder_session_ = std::make_shared<Ort::Session>(
-        env_, encoder_onnx_path.c_str(), session_options_);
-    rescore_session_ = std::make_shared<Ort::Session>(
-        env_, rescore_onnx_path.c_str(), session_options_);
-    ctc_session_ = std::make_shared<Ort::Session>(env_, ctc_onnx_path.c_str(),
-                                                  session_options_);
-#endif
-  } catch (std::exception const& e) {
-    LOG(ERROR) << "error when load onnx model: " << e.what();
-    exit(0);
-  }
-
-  // 2. Read metadata
-  auto model_metadata = encoder_session_->GetModelMetadata();
-
-  Ort::AllocatorWithDefaultOptions allocator;
-  encoder_output_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("output_size", allocator));
-  num_blocks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("num_blocks", allocator));
-  head_ = atoi(model_metadata.LookupCustomMetadataMap("head", allocator));
-  cnn_module_kernel_ = atoi(
-      model_metadata.LookupCustomMetadataMap("cnn_module_kernel", allocator));
-  subsampling_rate_ = atoi(
-      model_metadata.LookupCustomMetadataMap("subsampling_rate", allocator));
-  right_context_ =
-      atoi(model_metadata.LookupCustomMetadataMap("right_context", allocator));
-  sos_ = atoi(model_metadata.LookupCustomMetadataMap("sos_symbol", allocator));
-  eos_ = atoi(model_metadata.LookupCustomMetadataMap("eos_symbol", allocator));
-  is_bidirectional_decoder_ = atoi(model_metadata.LookupCustomMetadataMap(
-      "is_bidirectional_decoder", allocator));
-  chunk_size_ =
-      atoi(model_metadata.LookupCustomMetadataMap("chunk_size", allocator));
-  num_left_chunks_ =
-      atoi(model_metadata.LookupCustomMetadataMap("left_chunks", allocator));
-
-  LOG(INFO) << "Onnx Model Info:";
-  LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
-  LOG(INFO) << "\tnum_blocks " << num_blocks_;
-  LOG(INFO) << "\thead " << head_;
-  LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
-  LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
-  LOG(INFO) << "\tright_context " << right_context_;
-  LOG(INFO) << "\tsos " << sos_;
-  LOG(INFO) << "\teos " << eos_;
-  LOG(INFO) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-  LOG(INFO) << "\tchunk_size " << chunk_size_;
-  LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;
-
-  // 3. Read model nodes
-  LOG(INFO) << "Onnx Encoder:";
-  GetInputOutputInfo(encoder_session_, &encoder_in_names_, &encoder_out_names_);
-  LOG(INFO) << "Onnx CTC:";
-  GetInputOutputInfo(ctc_session_, &ctc_in_names_, &ctc_out_names_);
-  LOG(INFO) << "Onnx Rescore:";
-  GetInputOutputInfo(rescore_session_, &rescore_in_names_, &rescore_out_names_);
-}
-
-OnnxAsrModel::OnnxAsrModel(const OnnxAsrModel& other) {
-  // metadatas
-  encoder_output_size_ = other.encoder_output_size_;
-  num_blocks_ = other.num_blocks_;
-  head_ = other.head_;
-  cnn_module_kernel_ = other.cnn_module_kernel_;
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-
-  // sessions
-  encoder_session_ = other.encoder_session_;
-  ctc_session_ = other.ctc_session_;
-  rescore_session_ = other.rescore_session_;
-
-  // node names
-  encoder_in_names_ = other.encoder_in_names_;
-  encoder_out_names_ = other.encoder_out_names_;
-  ctc_in_names_ = other.ctc_in_names_;
-  ctc_out_names_ = other.ctc_out_names_;
-  rescore_in_names_ = other.rescore_in_names_;
-  rescore_out_names_ = other.rescore_out_names_;
-}
-
-std::shared_ptr<AsrModel> OnnxAsrModel::Copy() const {
-  auto asr_model = std::make_shared<OnnxAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void OnnxAsrModel::Reset() {
-  offset_ = 0;
-  encoder_outs_.clear();
-  cached_feature_.clear();
-  // Reset att_cache
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  if (num_left_chunks_ > 0) {
-    int required_cache_size = chunk_size_ * num_left_chunks_;
-    offset_ = required_cache_size;
-    att_cache_.resize(num_blocks_ * head_ * required_cache_size *
-                          encoder_output_size_ / head_ * 2,
-                      0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, required_cache_size,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  } else {
-    att_cache_.resize(0, 0.0);
-    const int64_t att_cache_shape[] = {num_blocks_, head_, 0,
-                                       encoder_output_size_ / head_ * 2};
-    att_cache_ort_ = Ort::Value::CreateTensor<float>(
-        memory_info, att_cache_.data(), att_cache_.size(), att_cache_shape, 4);
-  }
-
-  // Reset cnn_cache
-  cnn_cache_.resize(
-      num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
-  const int64_t cnn_cache_shape[] = {num_blocks_, 1, encoder_output_size_,
-                                     cnn_module_kernel_ - 1};
-  cnn_cache_ort_ = Ort::Value::CreateTensor<float>(
-      memory_info, cnn_cache_.data(), cnn_cache_.size(), cnn_cache_shape, 4);
-}
-
-void OnnxAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
-  // chunk
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  std::vector<float> feats;
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    feats.insert(feats.end(), cached_feature_[i].begin(),
-                 cached_feature_[i].end());
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    feats.insert(feats.end(), chunk_feats[i].begin(), chunk_feats[i].end());
-  }
-  const int64_t feats_shape[3] = {1, num_frames, feature_dim};
-  Ort::Value feats_ort = Ort::Value::CreateTensor<float>(
-      memory_info, feats.data(), feats.size(), feats_shape, 3);
-  // offset
-  int64_t offset_int64 = static_cast<int64_t>(offset_);
-  Ort::Value offset_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &offset_int64, 1, std::vector<int64_t>{}.data(), 0);
-  // required_cache_size
-  int64_t required_cache_size = chunk_size_ * num_left_chunks_;
-  Ort::Value required_cache_size_ort = Ort::Value::CreateTensor<int64_t>(
-      memory_info, &required_cache_size, 1, std::vector<int64_t>{}.data(), 0);
-  // att_mask
-  Ort::Value att_mask_ort{nullptr};
-  std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
-  if (num_left_chunks_ > 0) {
-    int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
-    if (chunk_idx < num_left_chunks_) {
-      for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
-        att_mask[i] = 0;
-      }
-    }
-    const int64_t att_mask_shape[] = {1, 1, required_cache_size + chunk_size_};
-    att_mask_ort = Ort::Value::CreateTensor<bool>(
-        memory_info, reinterpret_cast<bool*>(att_mask.data()), att_mask.size(),
-        att_mask_shape, 3);
-  }
-
-  // 2. Encoder chunk forward
-  std::vector<Ort::Value> inputs;
-  for (auto name : encoder_in_names_) {
-    if (!strcmp(name, "chunk")) {
-      inputs.emplace_back(std::move(feats_ort));
-    } else if (!strcmp(name, "offset")) {
-      inputs.emplace_back(std::move(offset_ort));
-    } else if (!strcmp(name, "required_cache_size")) {
-      inputs.emplace_back(std::move(required_cache_size_ort));
-    } else if (!strcmp(name, "att_cache")) {
-      inputs.emplace_back(std::move(att_cache_ort_));
-    } else if (!strcmp(name, "cnn_cache")) {
-      inputs.emplace_back(std::move(cnn_cache_ort_));
-    } else if (!strcmp(name, "att_mask")) {
-      inputs.emplace_back(std::move(att_mask_ort));
-    }
-  }
-
-  std::vector<Ort::Value> ort_outputs = encoder_session_->Run(
-      Ort::RunOptions{nullptr}, encoder_in_names_.data(), inputs.data(),
-      inputs.size(), encoder_out_names_.data(), encoder_out_names_.size());
-
-  offset_ += static_cast<int>(
-      ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[1]);
-  att_cache_ort_ = std::move(ort_outputs[1]);
-  cnn_cache_ort_ = std::move(ort_outputs[2]);
-
-  std::vector<Ort::Value> ctc_inputs;
-  ctc_inputs.emplace_back(std::move(ort_outputs[0]));
-
-  std::vector<Ort::Value> ctc_ort_outputs = ctc_session_->Run(
-      Ort::RunOptions{nullptr}, ctc_in_names_.data(), ctc_inputs.data(),
-      ctc_inputs.size(), ctc_out_names_.data(), ctc_out_names_.size());
-  encoder_outs_.push_back(std::move(ctc_inputs[0]));
-
-  float* logp_data = ctc_ort_outputs[0].GetTensorMutableData<float>();
-  auto type_info = ctc_ort_outputs[0].GetTensorTypeAndShapeInfo();
-
-  int num_outputs = type_info.GetShape()[1];
-  int output_dim = type_info.GetShape()[2];
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
-           sizeof(float) * output_dim);
-  }
-}
-
-float OnnxAsrModel::ComputeAttentionScore(const float* prob,
-                                          const std::vector<int>& hyp, int eos,
-                                          int decode_out_len) {
-  float score = 0.0f;
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += *(prob + j * decode_out_len + hyp[j]);
-  }
-  score += *(prob + hyp.size() * decode_out_len + eos);
-  return score;
-}
-
-void OnnxAsrModel::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                                      float reverse_weight,
-                                      std::vector<float>* rescoring_score) {
-  Ort::MemoryInfo memory_info =
-      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  std::vector<int64_t> hyps_lens;
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_lens.emplace_back(static_cast<int64_t>(length));
-  }
-
-  std::vector<float> rescore_input;
-  int encoder_len = 0;
-  for (int i = 0; i < encoder_outs_.size(); i++) {
-    float* encoder_outs_data = encoder_outs_[i].GetTensorMutableData<float>();
-    auto type_info = encoder_outs_[i].GetTensorTypeAndShapeInfo();
-    for (int j = 0; j < type_info.GetElementCount(); j++) {
-      rescore_input.emplace_back(encoder_outs_data[j]);
-    }
-    encoder_len += type_info.GetShape()[1];
-  }
-
-  const int64_t decode_input_shape[] = {1, encoder_len, encoder_output_size_};
-
-  std::vector<int64_t> hyps_pad;
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_pad.emplace_back(sos_);
-    size_t j = 0;
-    for (; j < hyp.size(); ++j) {
-      hyps_pad.emplace_back(hyp[j]);
-    }
-    if (j == max_hyps_len - 1) {
-      continue;
-    }
-    for (; j < max_hyps_len - 1; ++j) {
-      hyps_pad.emplace_back(0);
-    }
-  }
-
-  const int64_t hyps_pad_shape[] = {num_hyps, max_hyps_len};
-
-  const int64_t hyps_lens_shape[] = {num_hyps};
-
-  Ort::Value decode_input_tensor_ = Ort::Value::CreateTensor<float>(
-      memory_info, rescore_input.data(), rescore_input.size(),
-      decode_input_shape, 3);
-  Ort::Value hyps_pad_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_pad.data(), hyps_pad.size(), hyps_pad_shape, 2);
-  Ort::Value hyps_lens_tensor_ = Ort::Value::CreateTensor<int64_t>(
-      memory_info, hyps_lens.data(), hyps_lens.size(), hyps_lens_shape, 1);
-
-  std::vector<Ort::Value> rescore_inputs;
-
-  rescore_inputs.emplace_back(std::move(hyps_pad_tensor_));
-  rescore_inputs.emplace_back(std::move(hyps_lens_tensor_));
-  rescore_inputs.emplace_back(std::move(decode_input_tensor_));
-
-  std::vector<Ort::Value> rescore_outputs = rescore_session_->Run(
-      Ort::RunOptions{nullptr}, rescore_in_names_.data(), rescore_inputs.data(),
-      rescore_inputs.size(), rescore_out_names_.data(),
-      rescore_out_names_.size());
-
-  float* decoder_outs_data = rescore_outputs[0].GetTensorMutableData<float>();
-  float* r_decoder_outs_data = rescore_outputs[1].GetTensorMutableData<float>();
-
-  auto type_info = rescore_outputs[0].GetTensorTypeAndShapeInfo();
-  int decode_out_len = type_info.GetShape()[2];
-
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left to right decoder score
-    score = ComputeAttentionScore(
-        decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
-        decode_out_len);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(
-          r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
-          decode_out_len);
-    }
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/onnx_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/onnx_asr_model.h
deleted file mode 100644
index f5d9e9a0c61d728f2fb6d45d1428234abae98c90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/onnx_asr_model.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 ZeXuan Li (lizexuan@huya.com)
-//                    Xingchen Song(sxc19@mails.tsinghua.edu.cn)
-//                    hamddct@gmail.com (Mddct)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_ONNX_ASR_MODEL_H_
-#define DECODER_ONNX_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-#include "decoder/asr_model.h"
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class OnnxAsrModel : public AsrModel {
- public:
-  static void InitEngineThreads(int num_threads = 1);
-
- public:
-  OnnxAsrModel() = default;
-  OnnxAsrModel(const OnnxAsrModel& other);
-  void Read(const std::string& model_dir);
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-  void GetInputOutputInfo(const std::shared_ptr<Ort::Session>& session,
-                          std::vector<const char*>* in_names,
-                          std::vector<const char*>* out_names);
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
-                              int eos, int decode_out_len);
-
- private:
-  int encoder_output_size_ = 0;
-  int num_blocks_ = 0;
-  int cnn_module_kernel_ = 0;
-  int head_ = 0;
-
-  // sessions
-  // NOTE(Mddct): The Env holds the logging state used by all other objects.
-  //  One Env must be created before using any other Onnxruntime functionality.
-  static Ort::Env env_;  // shared environment across threads.
-  static Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
-  std::shared_ptr<Ort::Session> rescore_session_ = nullptr;
-  std::shared_ptr<Ort::Session> ctc_session_ = nullptr;
-
-  // node names
-  std::vector<const char*> encoder_in_names_, encoder_out_names_;
-  std::vector<const char*> ctc_in_names_, ctc_out_names_;
-  std::vector<const char*> rescore_in_names_, rescore_out_names_;
-
-  // caches
-  Ort::Value att_cache_ort_{nullptr};
-  Ort::Value cnn_cache_ort_{nullptr};
-  std::vector<Ort::Value> encoder_outs_;
-  // NOTE: Instead of making a copy of the xx_cache, ONNX only maintains
-  //  its data pointer when initializing xx_cache_ort (see https://github.com/
-  //  microsoft/onnxruntime/blob/master/onnxruntime/core/framework
-  //  /tensor.cc#L102-L129), so we need the following variables to keep
-  //  our data "alive" during the lifetime of decoder.
-  std::vector<float> att_cache_;
-  std::vector<float> cnn_cache_;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_ONNX_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/params.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/params.h
deleted file mode 100644
index 3edc877f1bb6d876ca087cab8e4ed00d42e97e63..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/params.h
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_PARAMS_H_
-#define DECODER_PARAMS_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "decoder/asr_decoder.h"
-#ifdef USE_ONNX
-#include "decoder/onnx_asr_model.h"
-#endif
-#ifdef USE_TORCH
-#include "decoder/torch_asr_model.h"
-#endif
-#ifdef USE_XPU
-#include "xpu/xpu_asr_model.h"
-#endif
-#ifdef USE_BPU
-#include "bpu/bpu_asr_model.h"
-#endif
-#include "frontend/feature_pipeline.h"
-#include "post_processor/post_processor.h"
-#include "utils/flags.h"
-#include "utils/string.h"
-
-DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
-
-// TorchAsrModel flags
-DEFINE_string(model_path, "", "pytorch exported model path");
-// OnnxAsrModel flags
-DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
-// XPUAsrModel flags
-DEFINE_string(xpu_model_dir, "",
-              "directory where the XPU model and weights is saved");
-// BPUAsrModel flags
-DEFINE_string(bpu_model_dir, "",
-              "directory where the HORIZON BPU model is saved");
-
-// FeaturePipelineConfig flags
-DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
-DEFINE_int32(sample_rate, 16000, "sample rate for audio");
-
-// TLG fst
-DEFINE_string(fst_path, "", "TLG fst path");
-
-// DecodeOptions flags
-DEFINE_int32(chunk_size, 16, "decoding chunk size");
-DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
-DEFINE_double(ctc_weight, 0.5,
-              "ctc weight when combining ctc score and rescoring score");
-DEFINE_double(rescoring_weight, 1.0,
-              "rescoring weight when combining ctc score and rescoring score");
-DEFINE_double(reverse_weight, 0.0,
-              "used for bitransformer rescoring. it must be 0.0 if decoder is"
-              "conventional transformer decoder, and only reverse_weight > 0.0"
-              "dose the right to left decoder will be calculated and used");
-DEFINE_int32(max_active, 7000, "max active states in ctc wfst search");
-DEFINE_int32(min_active, 200, "min active states in ctc wfst search");
-DEFINE_double(beam, 16.0, "beam in ctc wfst search");
-DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
-DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
-DEFINE_double(blank_skip_thresh, 1.0,
-              "blank skip thresh for ctc wfst search, 1.0 means no skip");
-DEFINE_double(blank_scale, 1.0, "blank scale for ctc wfst search");
-DEFINE_double(length_penalty, 0.0,
-              "length penalty ctc wfst search, will not"
-              "apply on self-loop arc, for balancing the del/ins ratio, "
-              "suggest set to -3.0");
-DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
-
-// SymbolTable flags
-DEFINE_string(dict_path, "",
-              "dict symbol table path, required when LM is enabled");
-DEFINE_string(unit_path, "",
-              "e2e model unit symbol table, it is used in both "
-              "with/without LM scenarios for context/timestamp");
-
-// Context flags
-DEFINE_string(context_path, "", "context path, is used to build context graph");
-DEFINE_double(context_score, 3.0, "is used to rescore the decoded result");
-
-// PostProcessOptions flags
-DEFINE_int32(language_type, 0,
-             "remove spaces according to language type"
-             "0x00 = kMandarinEnglish, "
-             "0x01 = kIndoEuropean");
-DEFINE_bool(lowercase, true, "lowercase final result if needed");
-
-namespace wenet {
-std::shared_ptr<FeaturePipelineConfig> InitFeaturePipelineConfigFromFlags() {
-  auto feature_config = std::make_shared<FeaturePipelineConfig>(
-      FLAGS_num_bins, FLAGS_sample_rate);
-  return feature_config;
-}
-
-std::shared_ptr<DecodeOptions> InitDecodeOptionsFromFlags() {
-  auto decode_config = std::make_shared<DecodeOptions>();
-  decode_config->chunk_size = FLAGS_chunk_size;
-  decode_config->num_left_chunks = FLAGS_num_left_chunks;
-  decode_config->ctc_weight = FLAGS_ctc_weight;
-  decode_config->reverse_weight = FLAGS_reverse_weight;
-  decode_config->rescoring_weight = FLAGS_rescoring_weight;
-  decode_config->ctc_wfst_search_opts.max_active = FLAGS_max_active;
-  decode_config->ctc_wfst_search_opts.min_active = FLAGS_min_active;
-  decode_config->ctc_wfst_search_opts.beam = FLAGS_beam;
-  decode_config->ctc_wfst_search_opts.lattice_beam = FLAGS_lattice_beam;
-  decode_config->ctc_wfst_search_opts.acoustic_scale = FLAGS_acoustic_scale;
-  decode_config->ctc_wfst_search_opts.blank_skip_thresh =
-      FLAGS_blank_skip_thresh;
-  decode_config->ctc_wfst_search_opts.blank_scale = FLAGS_blank_scale;
-  decode_config->ctc_wfst_search_opts.length_penalty = FLAGS_length_penalty;
-  decode_config->ctc_wfst_search_opts.nbest = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
-  decode_config->ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-  return decode_config;
-}
-
-std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
-  auto resource = std::make_shared<DecodeResource>();
-  const int kNumGemmThreads = 1;
-  if (!FLAGS_onnx_dir.empty()) {
-#ifdef USE_ONNX
-    LOG(INFO) << "Reading onnx model ";
-    OnnxAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<OnnxAsrModel>();
-    model->Read(FLAGS_onnx_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
-#endif
-  } else if (!FLAGS_model_path.empty()) {
-#ifdef USE_TORCH
-    LOG(INFO) << "Reading torch model " << FLAGS_model_path;
-    TorchAsrModel::InitEngineThreads(kNumGemmThreads);
-    auto model = std::make_shared<TorchAsrModel>();
-    model->Read(FLAGS_model_path);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
-#endif
-  } else if (!FLAGS_xpu_model_dir.empty()) {
-#ifdef USE_XPU
-    LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
-    auto model = std::make_shared<XPUAsrModel>();
-    model->SetEngineThreads(kNumGemmThreads);
-    model->SetDeviceId(FLAGS_device_id);
-    model->Read(FLAGS_xpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
-#endif
-  } else if (!FLAGS_bpu_model_dir.empty()) {
-#ifdef USE_BPU
-    LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
-    auto model = std::make_shared<BPUAsrModel>();
-    model->Read(FLAGS_bpu_model_dir);
-    resource->model = model;
-#else
-    LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
-#endif
-  } else {
-    LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
-  }
-
-  LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
-  auto unit_table = std::shared_ptr<fst::SymbolTable>(
-      fst::SymbolTable::ReadText(FLAGS_unit_path));
-  CHECK(unit_table != nullptr);
-  resource->unit_table = unit_table;
-
-  if (!FLAGS_fst_path.empty()) {  // With LM
-    CHECK(!FLAGS_dict_path.empty());
-    LOG(INFO) << "Reading fst " << FLAGS_fst_path;
-    auto fst = std::shared_ptr<fst::Fst<fst::StdArc>>(
-        fst::Fst<fst::StdArc>::Read(FLAGS_fst_path));
-    CHECK(fst != nullptr);
-    resource->fst = fst;
-
-    LOG(INFO) << "Reading symbol table " << FLAGS_dict_path;
-    auto symbol_table = std::shared_ptr<fst::SymbolTable>(
-        fst::SymbolTable::ReadText(FLAGS_dict_path));
-    CHECK(symbol_table != nullptr);
-    resource->symbol_table = symbol_table;
-  } else {  // Without LM, symbol_table is the same as unit_table
-    resource->symbol_table = unit_table;
-  }
-
-  if (!FLAGS_context_path.empty()) {
-    LOG(INFO) << "Reading context " << FLAGS_context_path;
-    std::vector<std::string> contexts;
-    std::ifstream infile(FLAGS_context_path);
-    std::string context;
-    while (getline(infile, context)) {
-      contexts.emplace_back(Trim(context));
-    }
-    ContextConfig config;
-    config.context_score = FLAGS_context_score;
-    resource->context_graph = std::make_shared<ContextGraph>(config);
-    resource->context_graph->BuildContextGraph(contexts,
-                                               resource->symbol_table);
-  }
-
-  PostProcessOptions post_process_opts;
-  post_process_opts.language_type =
-      FLAGS_language_type == 0 ? kMandarinEnglish : kIndoEuropean;
-  post_process_opts.lowercase = FLAGS_lowercase;
-  resource->post_processor =
-      std::make_shared<PostProcessor>(std::move(post_process_opts));
-  return resource;
-}
-
-}  // namespace wenet
-
-#endif  // DECODER_PARAMS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/search_interface.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/search_interface.h
deleted file mode 100644
index 25bad26705f8be44561d2c686f50a63035b14bbf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/search_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef DECODER_SEARCH_INTERFACE_H_
-#define DECODER_SEARCH_INTERFACE_H_
-
-namespace wenet {
-
-#include <vector>
-
-enum SearchType {
-  kPrefixBeamSearch = 0x00,
-  kWfstBeamSearch = 0x01,
-};
-
-class SearchInterface {
- public:
-  virtual ~SearchInterface() {}
-  virtual void Search(const std::vector<std::vector<float>>& logp) = 0;
-  virtual void Reset() = 0;
-  virtual void FinalizeSearch() = 0;
-
-  virtual SearchType Type() const = 0;
-  // N-best inputs id
-  virtual const std::vector<std::vector<int>>& Inputs() const = 0;
-  // N-best outputs id
-  virtual const std::vector<std::vector<int>>& Outputs() const = 0;
-  // N-best likelihood
-  virtual const std::vector<float>& Likelihood() const = 0;
-  // N-best timestamp
-  virtual const std::vector<std::vector<int>>& Times() const = 0;
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_SEARCH_INTERFACE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/torch_asr_model.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/torch_asr_model.cc
deleted file mode 100644
index 3abca283e12f5c173c9511707229ea82b31f26d8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/torch_asr_model.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/torch_asr_model.h"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <utility>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-namespace wenet {
-
-#ifndef IOS
-void TorchAsrModel::InitEngineThreads(int num_threads) {
-  // For multi-thread performance
-  at::set_num_threads(num_threads);
-  VLOG(1) << "Num intra-op threads: " << at::get_num_threads();
-}
-#endif
-
-void TorchAsrModel::Read(const std::string& model_path) {
-  torch::DeviceType device = at::kCPU;
-#ifdef USE_GPU
-  if (!torch::cuda::is_available()) {
-    VLOG(1) << "CUDA is not available! Please check your GPU settings";
-    throw std::runtime_error("CUDA is not available!");
-  } else {
-    VLOG(1) << "CUDA available! Running on GPU";
-    device = at::kCUDA;
-  }
-#endif
-  torch::jit::script::Module model = torch::jit::load(model_path, device);
-  model_ = std::make_shared<TorchModule>(std::move(model));
-  torch::NoGradGuard no_grad;
-  model_->eval();
-  torch::jit::IValue o1 = model_->run_method("subsampling_rate");
-  CHECK_EQ(o1.isInt(), true);
-  subsampling_rate_ = o1.toInt();
-  torch::jit::IValue o2 = model_->run_method("right_context");
-  CHECK_EQ(o2.isInt(), true);
-  right_context_ = o2.toInt();
-  torch::jit::IValue o3 = model_->run_method("sos_symbol");
-  CHECK_EQ(o3.isInt(), true);
-  sos_ = o3.toInt();
-  torch::jit::IValue o4 = model_->run_method("eos_symbol");
-  CHECK_EQ(o4.isInt(), true);
-  eos_ = o4.toInt();
-  torch::jit::IValue o5 = model_->run_method("is_bidirectional_decoder");
-  CHECK_EQ(o5.isBool(), true);
-  is_bidirectional_decoder_ = o5.toBool();
-
-  VLOG(1) << "Torch Model Info:";
-  VLOG(1) << "\tsubsampling_rate " << subsampling_rate_;
-  VLOG(1) << "\tright context " << right_context_;
-  VLOG(1) << "\tsos " << sos_;
-  VLOG(1) << "\teos " << eos_;
-  VLOG(1) << "\tis bidirectional decoder " << is_bidirectional_decoder_;
-}
-
-TorchAsrModel::TorchAsrModel(const TorchAsrModel& other) {
-  // 1. Init the model info
-  right_context_ = other.right_context_;
-  subsampling_rate_ = other.subsampling_rate_;
-  sos_ = other.sos_;
-  eos_ = other.eos_;
-  is_bidirectional_decoder_ = other.is_bidirectional_decoder_;
-  chunk_size_ = other.chunk_size_;
-  num_left_chunks_ = other.num_left_chunks_;
-  offset_ = other.offset_;
-  // 2. Model copy, just copy the model ptr since:
-  // PyTorch allows using multiple CPU threads during TorchScript model
-  // inference, please see https://pytorch.org/docs/stable/notes/cpu_
-  // threading_torchscript_inference.html
-  model_ = other.model_;
-
-  // NOTE(Binbin Zhang):
-  // inner states for forward are not copied here.
-}
-
-std::shared_ptr<AsrModel> TorchAsrModel::Copy() const {
-  auto asr_model = std::make_shared<TorchAsrModel>(*this);
-  // Reset the inner states for new decoding
-  asr_model->Reset();
-  return asr_model;
-}
-
-void TorchAsrModel::Reset() {
-  offset_ = 0;
-  att_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  cnn_cache_ = std::move(torch::zeros({0, 0, 0, 0}));
-  encoder_outs_.clear();
-  cached_feature_.clear();
-}
-
-void TorchAsrModel::ForwardEncoderFunc(
-    const std::vector<std::vector<float>>& chunk_feats,
-    std::vector<std::vector<float>>* out_prob) {
-  // 1. Prepare libtorch required data, splice cached_feature_ and chunk_feats
-  // The first dimension is for batchsize, which is 1.
-  int num_frames = cached_feature_.size() + chunk_feats.size();
-  const int feature_dim = chunk_feats[0].size();
-  torch::Tensor feats =
-      torch::zeros({1, num_frames, feature_dim}, torch::kFloat);
-  for (size_t i = 0; i < cached_feature_.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(cached_feature_[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][i] = std::move(row);
-  }
-  for (size_t i = 0; i < chunk_feats.size(); ++i) {
-    torch::Tensor row =
-        torch::from_blob(const_cast<float*>(chunk_feats[i].data()),
-                         {feature_dim}, torch::kFloat)
-            .clone();
-    feats[0][cached_feature_.size() + i] = std::move(row);
-  }
-
-  // 2. Encoder chunk forward
-#ifdef USE_GPU
-  feats = feats.to(at::kCUDA);
-  att_cache_ = att_cache_.to(at::kCUDA);
-  cnn_cache_ = cnn_cache_.to(at::kCUDA);
-#endif
-  int required_cache_size = chunk_size_ * num_left_chunks_;
-  torch::NoGradGuard no_grad;
-  std::vector<torch::jit::IValue> inputs = {feats, offset_, required_cache_size,
-                                            att_cache_, cnn_cache_};
-
-  // Refer interfaces in wenet/transformer/asr_model.py
-  auto outputs =
-      model_->get_method("forward_encoder_chunk")(inputs).toTuple()->elements();
-  CHECK_EQ(outputs.size(), 3);
-#ifdef USE_GPU
-  torch::Tensor chunk_out = outputs[0].toTensor().to(at::kCPU);
-  att_cache_ = outputs[1].toTensor().to(at::kCPU);
-  cnn_cache_ = outputs[2].toTensor().to(at::kCPU);
-#else
-  torch::Tensor chunk_out = outputs[0].toTensor();
-  att_cache_ = outputs[1].toTensor();
-  cnn_cache_ = outputs[2].toTensor();
-#endif
-  offset_ += chunk_out.size(1);
-
-  // The first dimension of returned value is for batchsize, which is 1
-#ifdef USE_GPU
-  chunk_out = chunk_out.to(at::kCUDA);
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor();
-  ctc_log_probs = ctc_log_probs.to(at::kCPU)[0];
-  encoder_outs_.push_back(std::move(chunk_out.to(at::kCPU)));
-#else
-  torch::Tensor ctc_log_probs =
-      model_->run_method("ctc_activation", chunk_out).toTensor()[0];
-  encoder_outs_.push_back(std::move(chunk_out));
-#endif
-
-  // Copy to output
-  int num_outputs = ctc_log_probs.size(0);
-  int output_dim = ctc_log_probs.size(1);
-  out_prob->resize(num_outputs);
-  for (int i = 0; i < num_outputs; i++) {
-    (*out_prob)[i].resize(output_dim);
-    memcpy((*out_prob)[i].data(), ctc_log_probs[i].data_ptr(),
-           sizeof(float) * output_dim);
-  }
-}
-
-float TorchAsrModel::ComputeAttentionScore(const torch::Tensor& prob,
-                                           const std::vector<int>& hyp,
-                                           int eos) {
-  float score = 0.0f;
-  auto accessor = prob.accessor<float, 2>();
-  for (size_t j = 0; j < hyp.size(); ++j) {
-    score += accessor[j][hyp[j]];
-  }
-  score += accessor[hyp.size()][eos];
-  return score;
-}
-
-void TorchAsrModel::AttentionRescoring(
-    const std::vector<std::vector<int>>& hyps, float reverse_weight,
-    std::vector<float>* rescoring_score) {
-  CHECK(rescoring_score != nullptr);
-  int num_hyps = hyps.size();
-  rescoring_score->resize(num_hyps, 0.0f);
-
-  if (num_hyps == 0) {
-    return;
-  }
-  // No encoder output
-  if (encoder_outs_.size() == 0) {
-    return;
-  }
-
-  torch::NoGradGuard no_grad;
-  // Step 1: Prepare input for libtorch
-  torch::Tensor hyps_length = torch::zeros({num_hyps}, torch::kLong);
-  int max_hyps_len = 0;
-  for (size_t i = 0; i < num_hyps; ++i) {
-    int length = hyps[i].size() + 1;
-    max_hyps_len = std::max(length, max_hyps_len);
-    hyps_length[i] = static_cast<int64_t>(length);
-  }
-  torch::Tensor hyps_tensor =
-      torch::zeros({num_hyps, max_hyps_len}, torch::kLong);
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    hyps_tensor[i][0] = sos_;
-    for (size_t j = 0; j < hyp.size(); ++j) {
-      hyps_tensor[i][j + 1] = hyp[j];
-    }
-  }
-
-  // Step 2: Forward attention decoder by hyps and corresponding encoder_outs_
-  torch::Tensor encoder_out = torch::cat(encoder_outs_, 1);
-#ifdef USE_GPU
-  hyps_tensor = hyps_tensor.to(at::kCUDA);
-  hyps_length = hyps_length.to(at::kCUDA);
-  encoder_out = encoder_out.to(at::kCUDA);
-#endif
-  auto outputs = model_
-                     ->run_method("forward_attention_decoder", hyps_tensor,
-                                  hyps_length, encoder_out, reverse_weight)
-                     .toTuple()
-                     ->elements();
-#ifdef USE_GPU
-  auto probs = outputs[0].toTensor().to(at::kCPU);
-  auto r_probs = outputs[1].toTensor().to(at::kCPU);
-#else
-  auto probs = outputs[0].toTensor();
-  auto r_probs = outputs[1].toTensor();
-#endif
-  CHECK_EQ(probs.size(0), num_hyps);
-  CHECK_EQ(probs.size(1), max_hyps_len);
-
-  // Step 3: Compute rescoring score
-  for (size_t i = 0; i < num_hyps; ++i) {
-    const std::vector<int>& hyp = hyps[i];
-    float score = 0.0f;
-    // left-to-right decoder score
-    score = ComputeAttentionScore(probs[i], hyp, eos_);
-    // Optional: Used for right to left score
-    float r_score = 0.0f;
-    if (is_bidirectional_decoder_ && reverse_weight > 0) {
-      // right-to-left score
-      CHECK_EQ(r_probs.size(0), num_hyps);
-      CHECK_EQ(r_probs.size(1), max_hyps_len);
-      std::vector<int> r_hyp(hyp.size());
-      std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
-      // right to left decoder score
-      r_score = ComputeAttentionScore(r_probs[i], r_hyp, eos_);
-    }
-
-    // combined left-to-right and right-to-left score
-    (*rescoring_score)[i] =
-        score * (1 - reverse_weight) + r_score * reverse_weight;
-  }
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/torch_asr_model.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/torch_asr_model.h
deleted file mode 100644
index a3cebe08798f1cad60ca4cd73c7b2488173b6114..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/decoder/torch_asr_model.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_TORCH_ASR_MODEL_H_
-#define DECODER_TORCH_ASR_MODEL_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "torch/script.h"
-#ifndef IOS
-#include "torch/torch.h"
-#endif
-
-#include "decoder/asr_model.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-class TorchAsrModel : public AsrModel {
- public:
-#ifndef IOS
-  static void InitEngineThreads(int num_threads = 1);
-#endif
-
- public:
-  using TorchModule = torch::jit::script::Module;
-  TorchAsrModel() = default;
-  TorchAsrModel(const TorchAsrModel& other);
-  void Read(const std::string& model_path);
-  std::shared_ptr<TorchModule> torch_model() const { return model_; }
-  void Reset() override;
-  void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                          float reverse_weight,
-                          std::vector<float>* rescoring_score) override;
-  std::shared_ptr<AsrModel> Copy() const override;
-
- protected:
-  void ForwardEncoderFunc(const std::vector<std::vector<float>>& chunk_feats,
-                          std::vector<std::vector<float>>* ctc_prob) override;
-
-  float ComputeAttentionScore(const torch::Tensor& prob,
-                              const std::vector<int>& hyp, int eos);
-
- private:
-  std::shared_ptr<TorchModule> model_ = nullptr;
-  std::vector<torch::Tensor> encoder_outs_;
-  // transformer/conformer attention cache
-  torch::Tensor att_cache_ = torch::zeros({0, 0, 0, 0});
-  // conformer-only conv_module cache
-  torch::Tensor cnn_cache_ = torch::zeros({0, 0, 0, 0});
-};
-
-}  // namespace wenet
-
-#endif  // DECODER_TORCH_ASR_MODEL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/CMakeLists.txt
deleted file mode 100644
index 78872257e43bb9a6ffcedaae977bf0173817ae50..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(frontend STATIC
-  feature_pipeline.cc
-  fft.cc
-)
-target_link_libraries(frontend PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fbank.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fbank.h
deleted file mode 100644
index 5a650dc035b8e244388cc1f2e0b9512654de7fda..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fbank.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FBANK_H_
-#define FRONTEND_FBANK_H_
-
-#include <cstring>
-#include <limits>
-#include <random>
-#include <utility>
-#include <vector>
-
-#include "frontend/fft.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-// This code is based on kaldi Fbank implementation, please see
-// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc
-class Fbank {
- public:
-  Fbank(int num_bins, int sample_rate, int frame_length, int frame_shift)
-      : num_bins_(num_bins),
-        sample_rate_(sample_rate),
-        frame_length_(frame_length),
-        frame_shift_(frame_shift),
-        use_log_(true),
-        remove_dc_offset_(true),
-        generator_(0),
-        distribution_(0, 1.0),
-        dither_(0.0) {
-    fft_points_ = UpperPowerOfTwo(frame_length_);
-    // generate bit reversal table and trigonometric function table
-    const int fft_points_4 = fft_points_ / 4;
-    bitrev_.resize(fft_points_);
-    sintbl_.resize(fft_points_ + fft_points_4);
-    make_sintbl(fft_points_, sintbl_.data());
-    make_bitrev(fft_points_, bitrev_.data());
-
-    int num_fft_bins = fft_points_ / 2;
-    float fft_bin_width = static_cast<float>(sample_rate_) / fft_points_;
-    int low_freq = 20, high_freq = sample_rate_ / 2;
-    float mel_low_freq = MelScale(low_freq);
-    float mel_high_freq = MelScale(high_freq);
-    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
-    bins_.resize(num_bins_);
-    center_freqs_.resize(num_bins_);
-    for (int bin = 0; bin < num_bins; ++bin) {
-      float left_mel = mel_low_freq + bin * mel_freq_delta,
-            center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-            right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-      center_freqs_[bin] = InverseMelScale(center_mel);
-      std::vector<float> this_bin(num_fft_bins);
-      int first_index = -1, last_index = -1;
-      for (int i = 0; i < num_fft_bins; ++i) {
-        float freq = (fft_bin_width * i);  // Center frequency of this fft
-        // bin.
-        float mel = MelScale(freq);
-        if (mel > left_mel && mel < right_mel) {
-          float weight;
-          if (mel <= center_mel)
-            weight = (mel - left_mel) / (center_mel - left_mel);
-          else
-            weight = (right_mel - mel) / (right_mel - center_mel);
-          this_bin[i] = weight;
-          if (first_index == -1) first_index = i;
-          last_index = i;
-        }
-      }
-      CHECK(first_index != -1 && last_index >= first_index);
-      bins_[bin].first = first_index;
-      int size = last_index + 1 - first_index;
-      bins_[bin].second.resize(size);
-      for (int i = 0; i < size; ++i) {
-        bins_[bin].second[i] = this_bin[first_index + i];
-      }
-    }
-
-    // povey window
-    povey_window_.resize(frame_length_);
-    double a = M_2PI / (frame_length - 1);
-    for (int i = 0; i < frame_length; ++i) {
-      povey_window_[i] = pow(0.5 - 0.5 * cos(a * i), 0.85);
-    }
-  }
-
-  void set_use_log(bool use_log) { use_log_ = use_log; }
-
-  void set_remove_dc_offset(bool remove_dc_offset) {
-    remove_dc_offset_ = remove_dc_offset;
-  }
-
-  void set_dither(float dither) { dither_ = dither; }
-
-  int num_bins() const { return num_bins_; }
-
-  static inline float InverseMelScale(float mel_freq) {
-    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline float MelScale(float freq) {
-    return 1127.0f * logf(1.0f + freq / 700.0f);
-  }
-
-  static int UpperPowerOfTwo(int n) {
-    return static_cast<int>(pow(2, ceil(log(n) / log(2))));
-  }
-
-  // pre emphasis
-  void PreEmphasis(float coeff, std::vector<float>* data) const {
-    if (coeff == 0.0) return;
-    for (int i = data->size() - 1; i > 0; i--)
-      (*data)[i] -= coeff * (*data)[i - 1];
-    (*data)[0] -= coeff * (*data)[0];
-  }
-
-  // Apply povey window on data in place
-  void Povey(std::vector<float>* data) const {
-    CHECK_GE(data->size(), povey_window_.size());
-    for (size_t i = 0; i < povey_window_.size(); ++i) {
-      (*data)[i] *= povey_window_[i];
-    }
-  }
-
-  // Compute fbank feat, return num frames
-  int Compute(const std::vector<float>& wave,
-              std::vector<std::vector<float>>* feat) {
-    int num_samples = wave.size();
-    if (num_samples < frame_length_) return 0;
-    int num_frames = 1 + ((num_samples - frame_length_) / frame_shift_);
-    feat->resize(num_frames);
-    std::vector<float> fft_real(fft_points_, 0), fft_img(fft_points_, 0);
-    std::vector<float> power(fft_points_ / 2);
-    for (int i = 0; i < num_frames; ++i) {
-      std::vector<float> data(wave.data() + i * frame_shift_,
-                              wave.data() + i * frame_shift_ + frame_length_);
-      // optional add noise
-      if (dither_ != 0.0) {
-        for (size_t j = 0; j < data.size(); ++j)
-          data[j] += dither_ * distribution_(generator_);
-      }
-      // optinal remove dc offset
-      if (remove_dc_offset_) {
-        float mean = 0.0;
-        for (size_t j = 0; j < data.size(); ++j) mean += data[j];
-        mean /= data.size();
-        for (size_t j = 0; j < data.size(); ++j) data[j] -= mean;
-      }
-
-      PreEmphasis(0.97, &data);
-      Povey(&data);
-      // copy data to fft_real
-      memset(fft_img.data(), 0, sizeof(float) * fft_points_);
-      memset(fft_real.data() + frame_length_, 0,
-             sizeof(float) * (fft_points_ - frame_length_));
-      memcpy(fft_real.data(), data.data(), sizeof(float) * frame_length_);
-      fft(bitrev_.data(), sintbl_.data(), fft_real.data(), fft_img.data(),
-          fft_points_);
-      // power
-      for (int j = 0; j < fft_points_ / 2; ++j) {
-        power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-      }
-
-      (*feat)[i].resize(num_bins_);
-      // cepstral coefficients, triangle filter array
-      for (int j = 0; j < num_bins_; ++j) {
-        float mel_energy = 0.0;
-        int s = bins_[j].first;
-        for (size_t k = 0; k < bins_[j].second.size(); ++k) {
-          mel_energy += bins_[j].second[k] * power[s + k];
-        }
-        // optional use log
-        if (use_log_) {
-          if (mel_energy < std::numeric_limits<float>::epsilon())
-            mel_energy = std::numeric_limits<float>::epsilon();
-          mel_energy = logf(mel_energy);
-        }
-
-        (*feat)[i][j] = mel_energy;
-      }
-    }
-    return num_frames;
-  }
-
- private:
-  int num_bins_;
-  int sample_rate_;
-  int frame_length_, frame_shift_;
-  int fft_points_;
-  bool use_log_;
-  bool remove_dc_offset_;
-  std::vector<float> center_freqs_;
-  std::vector<std::pair<int, std::vector<float>>> bins_;
-  std::vector<float> povey_window_;
-  std::default_random_engine generator_;
-  std::normal_distribution<float> distribution_;
-  float dither_;
-
-  // bit reversal table
-  std::vector<int> bitrev_;
-  // trigonometric function table
-  std::vector<float> sintbl_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FBANK_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/feature_pipeline.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/feature_pipeline.cc
deleted file mode 100644
index ab450b15cd35ebd8101a3bcdec4f963a73bed10c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/feature_pipeline.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "frontend/feature_pipeline.h"
-
-#include <algorithm>
-#include <utility>
-
-namespace wenet {
-
-FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
-    : config_(config),
-      feature_dim_(config.num_bins),
-      fbank_(config.num_bins, config.sample_rate, config.frame_length,
-             config.frame_shift),
-      num_frames_(0),
-      input_finished_(false) {}
-
-void FeaturePipeline::AcceptWaveform(const float* pcm, const int size) {
-  std::vector<std::vector<float>> feats;
-  std::vector<float> waves;
-  waves.insert(waves.end(), remained_wav_.begin(), remained_wav_.end());
-  waves.insert(waves.end(), pcm, pcm + size);
-  int num_frames = fbank_.Compute(waves, &feats);
-  feature_queue_.Push(std::move(feats));
-  num_frames_ += num_frames;
-
-  int left_samples = waves.size() - config_.frame_shift * num_frames;
-  remained_wav_.resize(left_samples);
-  std::copy(waves.begin() + config_.frame_shift * num_frames, waves.end(),
-            remained_wav_.begin());
-  // We are still adding wave, notify input is not finished
-  finish_condition_.notify_one();
-}
-
-void FeaturePipeline::AcceptWaveform(const int16_t* pcm, const int size) {
-  auto* float_pcm = new float[size];
-  for (size_t i = 0; i < size; i++) {
-    float_pcm[i] = static_cast<float>(pcm[i]);
-  }
-  this->AcceptWaveform(float_pcm, size);
-  delete[] float_pcm;
-}
-
-void FeaturePipeline::set_input_finished() {
-  CHECK(!input_finished_);
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    input_finished_ = true;
-  }
-  finish_condition_.notify_one();
-}
-
-bool FeaturePipeline::ReadOne(std::vector<float>* feat) {
-  if (!feature_queue_.Empty()) {
-    *feat = std::move(feature_queue_.Pop());
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (!feature_queue_.Empty()) {
-        *feat = std::move(feature_queue_.Pop());
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (!feature_queue_.Empty()) {
-      *feat = std::move(feature_queue_.Pop());
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool FeaturePipeline::Read(int num_frames,
-                           std::vector<std::vector<float>>* feats) {
-  feats->clear();
-  if (feature_queue_.Size() >= num_frames) {
-    *feats = std::move(feature_queue_.Pop(num_frames));
-    return true;
-  } else {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (!input_finished_) {
-      // This will release the lock and wait for notify_one()
-      // from AcceptWaveform() or set_input_finished()
-      finish_condition_.wait(lock);
-      if (feature_queue_.Size() >= num_frames) {
-        *feats = std::move(feature_queue_.Pop(num_frames));
-        return true;
-      }
-    }
-    CHECK(input_finished_);
-    // Double check queue.empty, see issue#893 for detailed discussions.
-    if (feature_queue_.Size() >= num_frames) {
-      *feats = std::move(feature_queue_.Pop(num_frames));
-      return true;
-    } else {
-      *feats = std::move(feature_queue_.Pop(feature_queue_.Size()));
-      return false;
-    }
-  }
-}
-
-void FeaturePipeline::Reset() {
-  input_finished_ = false;
-  num_frames_ = 0;
-  remained_wav_.clear();
-  feature_queue_.Clear();
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/feature_pipeline.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/feature_pipeline.h
deleted file mode 100644
index 9918d6b573255795e0e665f0a9598c44be625c19..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/feature_pipeline.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2017 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef FRONTEND_FEATURE_PIPELINE_H_
-#define FRONTEND_FEATURE_PIPELINE_H_
-
-#include <mutex>
-#include <queue>
-#include <string>
-#include <vector>
-
-#include "frontend/fbank.h"
-#include "utils/blocking_queue.h"
-#include "utils/log.h"
-
-namespace wenet {
-
-struct FeaturePipelineConfig {
-  int num_bins;
-  int sample_rate;
-  int frame_length;
-  int frame_shift;
-  FeaturePipelineConfig(int num_bins, int sample_rate)
-      : num_bins(num_bins),                  // 80 dim fbank
-        sample_rate(sample_rate) {           // 16k sample rate
-    frame_length = sample_rate / 1000 * 25;  // frame length 25ms
-    frame_shift = sample_rate / 1000 * 10;   // frame shift 10ms
-  }
-
-  void Info() const {
-    LOG(INFO) << "feature pipeline config"
-              << " num_bins " << num_bins << " frame_length " << frame_length
-              << " frame_shift " << frame_shift;
-  }
-};
-
-// Typically, FeaturePipeline is used in two threads: one thread A calls
-// AcceptWaveform() to add raw wav data and set_input_finished() to notice
-// the end of input wav, another thread B (decoder thread) calls Read() to
-// consume features.So a BlockingQueue is used to make this class thread safe.
-
-// The Read() is designed as a blocking method when there is no feature
-// in feature_queue_ and the input is not finished.
-
-// See bin/decoder_main.cc, websocket/websocket_server.cc and
-// decoder/torch_asr_decoder.cc for usage
-
-class FeaturePipeline {
- public:
-  explicit FeaturePipeline(const FeaturePipelineConfig& config);
-
-  // The feature extraction is done in AcceptWaveform().
-  void AcceptWaveform(const float* pcm, const int size);
-  void AcceptWaveform(const int16_t* pcm, const int size);
-
-  // Current extracted frames number.
-  int num_frames() const { return num_frames_; }
-  int feature_dim() const { return feature_dim_; }
-  const FeaturePipelineConfig& config() const { return config_; }
-
-  // The caller should call this method when speech input is end.
-  // Never call AcceptWaveform() after calling set_input_finished() !
-  void set_input_finished();
-  bool input_finished() const { return input_finished_; }
-
-  // Return False if input is finished and no feature could be read.
-  // Return True if a feature is read.
-  // This function is a blocking method. It will block the thread when
-  // there is no feature in feature_queue_ and the input is not finished.
-  bool ReadOne(std::vector<float>* feat);
-
-  // Read #num_frames frame features.
-  // Return False if less than #num_frames features are read and the
-  // input is finished.
-  // Return True if #num_frames features are read.
-  // This function is a blocking method when there is no feature
-  // in feature_queue_ and the input is not finished.
-  bool Read(int num_frames, std::vector<std::vector<float>>* feats);
-
-  void Reset();
-  bool IsLastFrame(int frame) const {
-    return input_finished_ && (frame == num_frames_ - 1);
-  }
-
-  int NumQueuedFrames() const { return feature_queue_.Size(); }
-
- private:
-  const FeaturePipelineConfig& config_;
-  int feature_dim_;
-  Fbank fbank_;
-
-  BlockingQueue<std::vector<float>> feature_queue_;
-  int num_frames_;
-  bool input_finished_;
-
-  // The feature extraction is done in AcceptWaveform().
-  // This waveform sample points are consumed by frame size.
-  // The residual waveform sample points after framing are
-  // kept to be used in next AcceptWaveform() calling.
-  std::vector<float> remained_wav_;
-
-  // Used to block the Read when there is no feature in feature_queue_
-  // and the input is not finished.
-  mutable std::mutex mutex_;
-  std::condition_variable finish_condition_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FEATURE_PIPELINE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fft.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fft.cc
deleted file mode 100644
index 9e05f854e79ea733d0411045385e924c2670b7f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fft.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "frontend/fft.h"
-
-namespace wenet {
-
-void make_sintbl(int n, float* sintbl) {
-  int i, n2, n4, n8;
-  float c, s, dc, ds, t;
-
-  n2 = n / 2;
-  n4 = n / 4;
-  n8 = n / 8;
-  t = sin(M_PI / n);
-  dc = 2 * t * t;
-  ds = sqrt(dc * (2 - dc));
-  t = 2 * dc;
-  c = sintbl[n4] = 1;
-  s = sintbl[0] = 0;
-  for (i = 1; i < n8; ++i) {
-    c -= dc;
-    dc += t * c;
-    s += ds;
-    ds -= t * s;
-    sintbl[i] = s;
-    sintbl[n4 - i] = c;
-  }
-  if (n8 != 0) sintbl[n8] = sqrt(0.5);
-  for (i = 0; i < n4; ++i) sintbl[n2 - i] = sintbl[i];
-  for (i = 0; i < n2 + n4; ++i) sintbl[i + n2] = -sintbl[i];
-}
-
-void make_bitrev(int n, int* bitrev) {
-  int i, j, k, n2;
-
-  n2 = n / 2;
-  i = j = 0;
-  for (;;) {
-    bitrev[i] = j;
-    if (++i >= n) break;
-    k = n2;
-    while (k <= j) {
-      j -= k;
-      k /= 2;
-    }
-    j += k;
-  }
-}
-
-// bitrev: bit reversal table
-// sintbl: trigonometric function table
-// x:real part
-// y:image part
-// n: fft length
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n) {
-  int i, j, k, ik, h, d, k2, n4, inverse;
-  float t, s, c, dx, dy;
-
-  /* preparation */
-  if (n < 0) {
-    n = -n;
-    inverse = 1; /* inverse transform */
-  } else {
-    inverse = 0;
-  }
-  n4 = n / 4;
-  if (n == 0) {
-    return 0;
-  }
-
-  /* bit reversal */
-  for (i = 0; i < n; ++i) {
-    j = bitrev[i];
-    if (i < j) {
-      t = x[i];
-      x[i] = x[j];
-      x[j] = t;
-      t = y[i];
-      y[i] = y[j];
-      y[j] = t;
-    }
-  }
-
-  /* transformation */
-  for (k = 1; k < n; k = k2) {
-    h = 0;
-    k2 = k + k;
-    d = n / k2;
-    for (j = 0; j < k; ++j) {
-      c = sintbl[h + n4];
-      if (inverse)
-        s = -sintbl[h];
-      else
-        s = sintbl[h];
-      for (i = j; i < n; i += k2) {
-        ik = i + k;
-        dx = s * y[ik] + c * x[ik];
-        dy = c * y[ik] - s * x[ik];
-        x[ik] = x[i] - dx;
-        x[i] += dx;
-        y[ik] = y[i] - dy;
-        y[i] += dy;
-      }
-      h += d;
-    }
-  }
-  if (inverse) {
-    /* divide by n in case of the inverse transformation */
-    for (i = 0; i < n; ++i) {
-      x[i] /= n;
-      y[i] /= n;
-    }
-  }
-  return 0; /* finished successfully */
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fft.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fft.h
deleted file mode 100644
index 6b92e406c44b4768eaee6e734f55bb39cd9af28b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/fft.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2016 Network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_FFT_H_
-#define FRONTEND_FFT_H_
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-namespace wenet {
-
-// Fast Fourier Transform
-
-void make_sintbl(int n, float* sintbl);
-
-void make_bitrev(int n, int* bitrev);
-
-int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
-
-}  // namespace wenet
-
-#endif  // FRONTEND_FFT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/wav.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/wav.h
deleted file mode 100644
index 688a049a940ebbdc83f24e59134fff22b7b09bfd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/frontend/wav.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2016 Personal (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef FRONTEND_WAV_H_
-#define FRONTEND_WAV_H_
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-struct WavHeader {
-  char riff[4] = {'R', 'I', 'F', 'F'};
-  unsigned int size = 0;
-  char wav[4] = {'W', 'A', 'V', 'E'};
-  char fmt[4] = {'f', 'm', 't', ' '};
-  unsigned int fmt_size = 16;
-  uint16_t format = 1;
-  uint16_t channels = 0;
-  unsigned int sample_rate = 0;
-  unsigned int bytes_per_second = 0;
-  uint16_t block_size = 0;
-  uint16_t bit = 0;
-  char data[4] = {'d', 'a', 't', 'a'};
-  unsigned int data_size = 0;
-
-  WavHeader() {}
-
-  WavHeader(int num_samples, int num_channel, int sample_rate,
-            int bits_per_sample) {
-    data_size = num_samples * num_channel * (bits_per_sample / 8);
-    size = sizeof(WavHeader) - 8 + data_size;
-    channels = num_channel;
-    this->sample_rate = sample_rate;
-    bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8);
-    block_size = num_channel * (bits_per_sample / 8);
-    bit = bits_per_sample;
-  }
-};
-
-class WavReader {
- public:
-  WavReader() : data_(nullptr) {}
-  explicit WavReader(const std::string& filename) { Open(filename); }
-
-  bool Open(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (NULL == fp) {
-      LOG(WARNING) << "Error in read " << filename;
-      return false;
-    }
-
-    WavHeader header;
-    fread(&header, 1, sizeof(header), fp);
-    if (header.fmt_size < 16) {
-      fprintf(stderr,
-              "WaveData: expect PCM format data "
-              "to have fmt chunk of at least size 16.\n");
-      return false;
-    } else if (header.fmt_size > 16) {
-      int offset = 44 - 8 + header.fmt_size - 16;
-      fseek(fp, offset, SEEK_SET);
-      fread(header.data, 8, sizeof(char), fp);
-    }
-    // check "RIFF" "WAVE" "fmt " "data"
-
-    // Skip any sub-chunks between "fmt" and "data".  Usually there will
-    // be a single "fact" sub chunk, but on Windows there can also be a
-    // "list" sub chunk.
-    while (0 != strncmp(header.data, "data", 4)) {
-      // We will just ignore the data in these chunks.
-      fseek(fp, header.data_size, SEEK_CUR);
-      // read next sub chunk
-      fread(header.data, 8, sizeof(char), fp);
-    }
-
-    num_channel_ = header.channels;
-    sample_rate_ = header.sample_rate;
-    bits_per_sample_ = header.bit;
-    int num_data = header.data_size / (bits_per_sample_ / 8);
-    data_ = new float[num_data];
-    num_samples_ = num_data / num_channel_;
-
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
-        case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
-        }
-        default:
-          fprintf(stderr, "unsupported quantization bits");
-          exit(1);
-      }
-    }
-    fclose(fp);
-    return true;
-  }
-
-  int num_channel() const { return num_channel_; }
-  int sample_rate() const { return sample_rate_; }
-  int bits_per_sample() const { return bits_per_sample_; }
-  int num_samples() const { return num_samples_; }
-
-  ~WavReader() {
-    delete[] data_;
-  }
-
-  const float* data() const { return data_; }
-
- private:
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  int num_samples_;  // sample points per channel
-  float* data_;
-};
-
-class WavWriter {
- public:
-  WavWriter(const float* data, int num_samples, int num_channel,
-            int sample_rate, int bits_per_sample)
-      : data_(data),
-        num_samples_(num_samples),
-        num_channel_(num_channel),
-        sample_rate_(sample_rate),
-        bits_per_sample_(bits_per_sample) {}
-
-  void Write(const std::string& filename) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    WavHeader header(num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fwrite(&header, 1, sizeof(header), fp);
-
-    for (int i = 0; i < num_samples_; ++i) {
-      for (int j = 0; j < num_channel_; ++j) {
-        switch (bits_per_sample_) {
-          case 8: {
-            char sample = static_cast<char>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 16: {
-            int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-          case 32: {
-            int sample = static_cast<int>(data_[i * num_channel_ + j]);
-            fwrite(&sample, 1, sizeof(sample), fp);
-            break;
-          }
-        }
-      }
-    }
-    fclose(fp);
-  }
-
- private:
-  const float* data_;
-  int num_samples_;  // total float points in data_
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-};
-
-class StreamWavWriter {
- public:
-  StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample)
-     : num_channel_(num_channel),
-       sample_rate_(sample_rate),
-       bits_per_sample_(bits_per_sample),
-       total_num_samples_(0) {}
-
-  StreamWavWriter(const std::string& filename, int num_channel,
-                  int sample_rate, int bits_per_sample)
-     : StreamWavWriter(num_channel, sample_rate, bits_per_sample) {
-    Open(filename);
-  }
-
-  void Open(const std::string& filename) {
-    fp_ = fopen(filename.c_str(), "wb");
-    fseek(fp_, sizeof(WavHeader), SEEK_SET);
-  }
-
-  void Write(const int16_t* sample_data, size_t num_samples) {
-    fwrite(sample_data, sizeof(int16_t), num_samples, fp_);
-    total_num_samples_ += num_samples;
-  }
-
-  void Close() {
-    WavHeader header(total_num_samples_, num_channel_, sample_rate_,
-                     bits_per_sample_);
-    fseek(fp_, 0L, SEEK_SET);
-    fwrite(&header, 1, sizeof(header), fp_);
-    fclose(fp_);
-  }
-
- private:
-  FILE* fp_;
-  int num_channel_;
-  int sample_rate_;
-  int bits_per_sample_;
-  size_t total_num_samples_;
-};
-
-}  // namespace wenet
-
-#endif  // FRONTEND_WAV_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/CMakeLists.txt
deleted file mode 100644
index b072309e44b90dcee44ea31e9bcbc1741e73f151..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-
-project(kaldi)
-
-# include_directories() is called in the root CMakeLists.txt
-
-add_library(kaldi-util
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  util/kaldi-io.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/text-utils.cc
-)
-target_link_libraries(kaldi-util PUBLIC utils)
-
-add_library(kaldi-decoder
-  lat/determinize-lattice-pruned.cc
-  lat/lattice-functions.cc
-  decoder/lattice-faster-decoder.cc
-  decoder/lattice-faster-online-decoder.cc
-)
-target_link_libraries(kaldi-decoder PUBLIC kaldi-util)
-
-if(GRAPH_TOOLS)
-  # Arpa binary
-  add_executable(arpa2fst
-    lm/arpa-file-parser.cc
-    lm/arpa-lm-compiler.cc
-    lmbin/arpa2fst.cc
-  )
-  target_link_libraries(arpa2fst PUBLIC kaldi-util)
-
-  # FST tools binary
-  set(FST_BINS
-    fstaddselfloops
-    fstdeterminizestar
-    fstisstochastic
-    fstminimizeencoded
-    fsttablecompose
-  )
-
-  if(NOT MSVC)
-    # dl is for dynamic linking, otherwise there is a linking error on linux
-    link_libraries(dl)
-  endif()
-  foreach(name IN LISTS FST_BINS)
-    add_executable(${name}
-      fstbin/${name}.cc
-      fstext/kaldi-fst-io.cc
-    )
-    target_link_libraries(${name} PUBLIC kaldi-util)
-  endforeach()
-endif()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/README.md
deleted file mode 100644
index 4eb9c9173b747686f00b658afc5e1e0dfdc17e68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-We use Kaldi decoder to implement TLG based language model integration,
-so we copied related files to this directory.
-The main changes are:
-
-1. To minimize the change, we use the same directories tree as Kaldi.
-
-2. We replace Kaldi log system with glog in the following way.
-
-``` c++
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_INFO \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-```
-
-3. We lint all the files to satisfy the lint in WeNet.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs-inl.h
deleted file mode 100644
index 9397400833676b323492321183c989cec2f41c3f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs-inl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-#include <utility>
-
-namespace kaldi {
-
-// Template that covers integers.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                 static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteBasicType.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in),
-         len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
-                          static_cast<char>(sizeof(*t));
-    if (len_c != len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c) << " vs. "
-                << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
-                                                           end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(iter->first) << ','
-           << static_cast<int16>(iter->second) << ' ';
-      else
-        os << iter->first << ',' << iter->second << ' ';
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
-  }
-}
-
-// Template that covers integers.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz * 2);
-    }
-  } else {
-    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use
-                                          // extra memory due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
-      } else {
-        T next_t1, next_t2;
-        is >> next_t1;
-        if (is.fail()) goto bad;
-        if (is.peek() != static_cast<int>(','))
-          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
-                    << is.peek() << ", at file position " << is.tellg();
-        is.get();  // consume the ','.
-        is >> next_t2 >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
-            << is.tellg();
-}
-
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteIntegerVector.";
-  }
-}
-
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary,
-                              std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T) * vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-    is.get();       // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail())
-          goto bad;
-        else
-          tmp_v.push_back(next_t);
-      }
-    }
-    is.get();    // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
-bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7) os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs.cc
deleted file mode 100644
index bd6c350780d1096ff8c452fd00864aa07a30ac65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T" : "F");
-  if (!binary) os << " ";
-  if (os.fail()) KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-    *b = true;
-    is.get();
-  } else if (c == 'F') {
-    *b = false;
-    is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char *>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  if (*token == '\0') KALDI_ERR << "Token is empty (not a valid token)";
-  const char *orig_token = token;
-  while (*token != '\0') {
-    if (::isspace(*token))
-      KALDI_ERR << "Token is not a valid token (contains space): '"
-                << orig_token << "'";
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    KALDI_ERR << "Write failure in WriteToken.";
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string &token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << CharToString(static_cast<char>(is.peek()))
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget()) {
-      // Clear the bad bit. This code can be (and is in fact) reached, since the
-      // C++ standard does not guarantee that a call to unget() must succeed.
-      is.clear();
-    }
-  }
-  return ans;
-}
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);           // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  // The second half of the '&&' expression below is so that if we're expecting
-  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
-  // code will tolerate errors in PeekToken where is.unget() failed; search for
-  // is.clear() in PeekToken() for an explanation.
-  if (strcmp(str.c_str(), token) != 0 &&
-      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \"" << str
-              << "\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs.h
deleted file mode 100644
index 06ad1e3d2d8dc8385886a7c6653f620642c7c05a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/io-funcs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/io-funcs-inl.h"
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing a KaldiFatalError exception.
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the
-  Vector and Matrix classes do not use these functions [but they preserve the
-  type interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for
-  specific classes]) const; Read(std::istream &, bool binary, [possibly extra
-  optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T>
-  &v); void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of
-  the type WriteMyTypedefName.  This is to avoid introducing confusing templated
-  functions; we could easily create templated functions to handle most of these
-  cases but they would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the
-  class. We provide special functions for manipulating these.  These special
-  tokens must be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allows the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where
-  the strings contain data rather than "special tokens" that are whitespace-free
-  and nonempty).  This is because Kaldi is structured in such a way that strings
-  don't appear, except as OpenFst symbol table entries (and these have their own
-  format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template <class T>
-void ReadBasicType(std::istream &is, bool binary, T *t);
-
-// Declare specialization for bool.
-template <>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template <>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template <>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template <>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template <>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template <class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template <class T>
-inline void WriteIntegerVector(std::ostream &os, bool binary,
-                               const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template <class T>
-inline void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-/// Function for writing STL vectors of pairs of integer types.
-template <class T>
-inline void WriteIntegerPairVector(std::ostream &os, bool binary,
-                                   const std::vector<std::pair<T, T> > &v);
-
-/// Function for reading STL vector of pairs of integer types.
-template <class T>
-inline void ReadIntegerPairVector(std::istream &is, bool binary,
-                                  std::vector<std::pair<T, T> > *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string &token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure). If
-/// PeekToken() had been previously called, it is possible that the stream had
-/// failed to unget the starting '<' character. In this case ReadToken() returns
-/// the token string without the leading '<'. You must be prepared to handle
-/// this case. ExpectToken() handles this internally, and is not affected.
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character. It will attempt to unget
-/// the '<' so the stream is where it was before you did PeekToken(), however,
-/// this is not guaranteed (see ReadToken()).
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string &token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string &token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-common.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-common.h
deleted file mode 100644
index eee5f34d7234e7c029e6bb59584d3ee65ff5a875..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-// #include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-// #include "base/timer.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-error.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-error.cc
deleted file mode 100644
index 77edc6af6e56bb8fa3431d519e58fda9ee0bac6a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-error.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2019 LAIX (Yi Sun)
-// Copyright 2019 SmartAction LLC (kkm)
-// Copyright 2016 Brno University of Technology (author: Karel Vesely)
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-error.h"
-
-#include <string>
-
-namespace kaldi {
-
-/***** GLOBAL VARIABLES FOR LOGGING *****/
-
-int32 g_kaldi_verbose_level = 0;
-static std::string program_name;  // NOLINT
-
-void SetProgramName(const char *basename) {
-  // Using the 'static std::string' for the program name is mostly harmless,
-  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
-  // string implementation has been found in the wild that would not be just
-  // an empty string when zero-initialized but not yet constructed.
-  program_name = basename;
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-error.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-error.h
deleted file mode 100644
index 0f65db372b5f05a8017433eed7c95badc819a0a6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include "utils/log.h"
-
-namespace kaldi {
-
-#define KALDI_WARN \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
-#define KALDI_ERR \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
-#define KALDI_LOG \
-  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
-#define KALDI_VLOG(v) VLOG(v)
-
-#define KALDI_ASSERT(condition) CHECK(condition)
-
-
-/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
-
-/// Called by ParseOptions to set base name (no directory) of the executing
-/// program. The name is printed in logging code along with every message,
-/// because in our scripts, we often mix together the stderr of many programs.
-/// This function is very thread-unsafe.
-void SetProgramName(const char *basename);
-
-/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
-/// Do not use directly, prefer {Get,Set}VerboseLevel().
-extern int32 g_kaldi_verbose_level;
-
-/// Get verbosity level, usually set via command line '--verbose=' switch.
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used, except by programs using Kaldi as library;
-/// command-line programs set the verbose level automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-math.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-math.cc
deleted file mode 100644
index 175d9f49b6c5216645e90e146f4e2eab5572c342..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-math.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-math.h"
-#ifndef _MSC_VER
-#include <stdlib.h>
-#include <unistd.h>
-#endif
-#include <string>
-#include <mutex>
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-static std::mutex _RandMutex;
-
-int Rand(struct RandomState* state) {
-#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
-  // On Windows and Cygwin, just call Rand()
-  return rand();
-#else
-  if (state) {
-    return rand_r(&(state->seed));
-  } else {
-    std::lock_guard<std::mutex> lock(_RandMutex);
-    return rand();
-  }
-#endif
-}
-
-RandomState::RandomState() {
-  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
-  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
-  // the case that rand_r when initialized with rand() will give you the exact
-  // same sequence of numbers that rand() will give if you keep calling rand()
-  // after that initial call.  This can cause problems with repeated sequences.
-  // For example if you initialize two RandomState structs one after the other
-  // without calling rand() in between, they would give you the same sequence
-  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
-  // a randomly chosen prime number.
-  seed = unsigned(Rand()) + 27437;
-}
-
-bool WithProb(BaseFloat prob, struct RandomState* state) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) {
-    return false;
-  } else if (prob == 1.0) {
-    return true;
-  } else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
-  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val +
-      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) >
-        (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      KALDI_ERR << "rand_int failed because we do not support such large "
-          "random numbers. (Extend this function).";
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform(state);
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-void RandGauss2(float *a, float *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float u1 = RandUniform(state);
-  float u2 = RandUniform(state);
-  u1 = sqrtf(-2.0f * logf(u1));
-  u2 =  2.0f * M_PI * u2;
-  *a = u1 * cosf(u2);
-  *b = u1 * sinf(u2);
-}
-
-void RandGauss2(double *a, double *b, RandomState *state) {
-  KALDI_ASSERT(a);
-  KALDI_ASSERT(b);
-  float a_float, b_float;
-  // Just because we're using doubles doesn't mean we need super-high-quality
-  // random numbers, so we just use the floating-point version internally.
-  RandGauss2(&a_float, &b_float, state);
-  *a = a_float;
-  *b = b_float;
-}
-
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-math.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-math.h
deleted file mode 100644
index 93c265ee96e704893da26b9083a44a9e60c6c192..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-math.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.4142135623730950488016887
-#endif
-
-#ifndef M_2PI
-#define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-#define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifndef M_LN10
-#define M_LN10 2.302585092994045684017991454684
-#endif
-
-
-#define KALDI_ISNAN std::isnan
-#define KALDI_ISINF std::isinf
-#define KALDI_ISFINITE(x) std::isfinite(x)
-
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline double Exp(double x) { return exp(x); }
-#ifndef KALDI_NO_EXPF
-inline float Exp(float x) { return expf(x); }
-#else
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#endif  // KALDI_NO_EXPF
-#else
-inline double Exp(double x) { return exp(x); }
-#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-// Microsoft CL v18.0 buggy 64-bit implementation of
-// expf() incorrectly returns -inf for exp(-inf).
-inline float Exp(float x) { return exp(static_cast<double>(x)); }
-#else
-inline float Exp(float x) { return expf(x); }
-#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-inline double Log(double x) { return log(x); }
-inline float Log(float x) { return logf(x); }
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
-inline double Log1p(double x) {  return log1p(x); }
-inline float Log1p(float x) {  return log1pf(x); }
-#else
-inline double Log1p(double x) {
-  const double cutoff = 1.0e-08;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-
-inline float Log1p(float x) {
-  const float cutoff = 1.0e-07;
-  if (x < cutoff)
-    return x - 0.5 * x * x;
-  else
-    return Log(1.0 + x);
-}
-#endif
-
-static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
-
-// Returns a random integer between 0 and RAND_MAX, inclusive
-int Rand(struct RandomState* state = NULL);
-
-// State for thread-safe random number generator
-struct RandomState {
-  RandomState();
-  unsigned seed;
-};
-
-// Returns a random integer between first and last inclusive.
-int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
-
-// Returns true with probability "prob",
-bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
-// with 0 <= prob <= 1 [we check this].
-// Internally calls Rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-/// Returns a random number strictly between 0 and 1.
-inline float RandUniform(struct RandomState* state = NULL) {
-  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
-}
-
-inline float RandGauss(struct RandomState* state = NULL) {
-  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
-                            * cosf(2*M_PI*RandUniform(state)));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportional
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda, struct RandomState* state = NULL);
-
-// Returns a pair of gaussian random numbers. Uses Box-Muller transform
-void RandGauss2(float *a, float *b, RandomState *state = NULL);
-void RandGauss2(double *a, double *b, RandomState *state = NULL);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh,
-                       struct RandomState* state = NULL) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-// returns log(exp(x) + exp(y)).
-inline double LogAdd(double x, double y) {
-  double diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) + exp(y)).
-inline float LogAdd(float x, float y) {
-  float diff;
-
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-    res = x + Log1p(Exp(diff));
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + Log(1.0 - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns log(exp(x) - exp(y)).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + Log(1.0f - Exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a == b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff != diff) return false;  // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
-}
-
-/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-/// Returns a / b, rounding towards negative infinity in all cases.
-static inline int32 DivideRoundingDown(int32 a, int32 b) {
-  KALDI_ASSERT(b != 0);
-  if (a * b >= 0)
-    return a / b;
-  else if (a < 0)
-    return (a - b + 1) / b;
-  else
-    return (a - b - 1) / b;
-}
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-/// Returns the least common multiple of two integers.  Will
-/// crash unless the inputs are positive.
-template<class I> I  Lcm(I m, I n) {
-  KALDI_ASSERT(m > 0 && n > 0);
-  I gcd = Gcd(m, n);
-  return gcd * (m/gcd) * (n/gcd);
-}
-
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-types.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-types.h
deleted file mode 100644
index 7ebf4f85386192a65e176d8f0ecde9bb348af4a0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-types.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-#include <basetsd.h>
-#define ssize_t SSIZE_T
-#endif
-
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-// for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
-#include <fst/types.h>
-
-namespace kaldi {
-  using ::int16;
-  using ::int32;
-  using ::int64;
-  using ::uint16;
-  using ::uint32;
-  using ::uint64;
-  typedef float   float32;
-  typedef double double64;
-}  // end namespace kaldi
-
-// In a theoretical case you decide compile Kaldi without the OpenFST
-// comment the previous namespace statement and uncomment the following
-/*
-namespace kaldi {
-  typedef int8_t   int8;
-  typedef int16_t  int16;
-  typedef int32_t  int32;
-  typedef int64_t  int64;
-
-  typedef uint8_t  uint8;
-  typedef uint16_t uint16;
-  typedef uint32_t uint32;
-  typedef uint64_t uint64;
-  typedef float    float32;
-  typedef double   double64;
-}  // end namespace kaldi
-*/
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-utils.h
deleted file mode 100644
index bd434d09ed92ec94bc4208f53a4416f941edfdb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/base/kaldi-utils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#if defined(_MSC_VER)
-# define WIN32_LEAN_AND_MEAN
-# define NOMINMAX
-# include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdio.h>
-#define unlink _unlink
-#else
-#include <unistd.h>
-#endif
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#if _MSC_VER < 1400
-#define __restrict__
-#else
-#define __restrict__ __restrict
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#elif defined(__CYGWIN__)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = aligned_alloc(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#else
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-// This function kaldi::Sleep() provides a portable way
-// to sleep for a possibly fractional
-// number of seconds.  On Windows it's only accurate to microseconds.
-void Sleep(float seconds);
-}  // namespace kaldi
-
-#define KALDI_SWAP8(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
-          (reinterpret_cast<char*>(&a))[7] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
-          (reinterpret_cast<char*>(&a))[6] = t;\
-      t = (reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
-          (reinterpret_cast<char*>(&a))[5] = t;\
-      t = (reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
-          (reinterpret_cast<char*>(&a))[4] = t;} while (0)
-#define KALDI_SWAP4(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
-          (reinterpret_cast<char*>(&a))[3] = t;\
-      t = (reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
-          (reinterpret_cast<char*>(&a))[2]=t;} while (0)
-#define KALDI_SWAP2(a) do { \
-  int t = (reinterpret_cast<char*>(&a))[0];\
-          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
-          (reinterpret_cast<char*>(&a))[1] = t;} while (0)
-
-
-// Makes copy constructor and operator= private.
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-#if defined(_MSC_VER)
-#define KALDI_STRCASECMP _stricmp
-#elif defined(__CYGWIN__)
-#include <strings.h>
-#define KALDI_STRCASECMP strcasecmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-decoder.cc
deleted file mode 100644
index 06f77557fa49a23f6a44d07c327a1b3b081c6dec..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-decoder.cc
+++ /dev/null
@@ -1,1101 +0,0 @@
-// decoder/lattice-faster-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "decoder/lattice-faster-decoder.h"
-// #include "lat/lattice-functions.h"
-
-namespace kaldi {
-
-// instantiate this class once for each thing you have to decode.
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const FST &fst, const LatticeFasterDecoderConfig &config,
-    const std::shared_ptr<wenet::ContextGraph> &context_graph)
-    : fst_(&fst),
-      delete_fst_(false),
-      config_(config),
-      num_toks_(0),
-      context_graph_(context_graph) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
-    const LatticeFasterDecoderConfig &config, FST *fst)
-    : fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
-  config.Check();
-  toks_.SetSize(
-      1000);  // just so on the first frame we do something reasonable.
-}
-
-template <typename FST, typename Token>
-LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
-  DeleteElems(toks_.Clear());
-  ClearActiveTokens();
-  if (delete_fst_) delete fst_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
-  // clean up from last time:
-  DeleteElems(toks_.Clear());
-  cost_offsets_.clear();
-  ClearActiveTokens();
-  warned_ = false;
-  num_toks_ = 0;
-  decoding_finalized_ = false;
-  final_costs_.clear();
-  StateId start_state = fst_->Start();
-  KALDI_ASSERT(start_state != fst::kNoStateId);
-  active_toks_.resize(1);
-  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
-  active_toks_[0].toks = start_tok;
-  toks_.Insert(start_state, start_tok);
-  num_toks_++;
-  ProcessNonemitting(config_.beam);
-}
-
-// Returns true if any kind of traceback is available (not necessarily from
-// a final state).  It should only very rarely return false; this indicates
-// an unusual search error.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::Decode(
-    DecodableInterface *decodable) {
-  InitDecoding();
-  // We use 1-based indexing for frames in this decoder (if you view it in
-  // terms of features), but note that the decodable object uses zero-based
-  // numbering, which we have to correct for when we call it.
-  AdvanceDecoding(decodable);
-  FinalizeDecoding();
-
-  // Returns true if we have any kind of traceback available (not necessarily
-  // to the end state; query ReachedFinal() for that).
-  return !active_toks_.empty() && active_toks_.back().toks != NULL;
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  Lattice raw_lat;
-  GetRawLattice(&raw_lat, use_final_probs);
-  ShortestPath(raw_lat, olat);
-  return (olat->NumStates() != 0);
-}
-
-// Outputs an FST corresponding to the raw, state-level lattice
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
-    Lattice *ofst, bool use_final_probs) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (decoding_finalized_ ? final_costs_ : final_costs_local);
-  if (!decoding_finalized_ && use_final_probs)
-    ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  const int32 bucket_count = num_toks_ / 2 + 3;
-  unordered_map<Token *, StateId> tok_map(bucket_count);
-  // First create all states.
-  std::vector<Token *> token_list;
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (active_toks_[f].toks == NULL) {
-      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-    TopSortTokens(active_toks_[f].toks, &token_list);
-    for (size_t i = 0; i < token_list.size(); i++)
-      if (token_list[i] != NULL) tok_map[token_list[i]] = ofst->AddState();
-  }
-  // The next statement sets the start state of the output FST.  Because we
-  // topologically sorted the tokens, state zero must be the start-state.
-  ofst->SetStart(0);
-
-  KALDI_VLOG(4) << "init:" << num_toks_ / 2 + 3
-                << " buckets:" << tok_map.bucket_count()
-                << " load:" << tok_map.load_factor()
-                << " max:" << tok_map.max_load_factor();
-  // Now create all arcs.
-  for (int32 f = 0; f <= num_frames; f++) {
-    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
-      StateId cur_state = tok_map[tok];
-      for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
-        typename unordered_map<Token *, StateId>::const_iterator iter =
-            tok_map.find(l->next_tok);
-        StateId nextstate = iter->second;
-        KALDI_ASSERT(iter != tok_map.end());
-        BaseFloat cost_offset = 0.0;
-        if (l->ilabel != 0) {  // emitting..
-          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
-          cost_offset = cost_offsets_[f];
-        }
-
-        StateId state = cur_state;
-        if (l->is_start_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->start_tag_id(), Weight(0, 0), tmp);
-          ofst->AddArc(state, arc);
-          state = tmp;
-        }
-        if (l->is_end_boundary) {
-          StateId tmp = ofst->AddState();
-          Arc arc(0, context_graph_->end_tag_id(), Weight(0, 0), nextstate);
-          ofst->AddArc(tmp, arc);
-          nextstate = tmp;
-        }
-
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(state, arc);
-      }
-      if (f == num_frames) {
-        if (use_final_probs && !final_costs.empty()) {
-          typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-              final_costs.find(tok);
-          if (iter != final_costs.end())
-            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-        } else {
-          ofst->SetFinal(cur_state, LatticeWeight::One());
-        }
-      }
-    }
-  }
-
-  fst::TopSort(ofst);
-  return (ofst->NumStates() > 0);
-}
-
-// This function is now deprecated, since now we do determinization from outside
-// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
-// lattice-determinized lattice (one path per word sequence).
-template <typename FST, typename Token>
-bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(
-    CompactLattice *ofst, bool use_final_probs) const {
-  Lattice raw_fst;
-  GetRawLattice(&raw_fst, use_final_probs);
-  Invert(&raw_fst);  // make it so word labels are on the input.
-  // (in phase where we get backward-costs).
-  fst::ILabelCompare<LatticeArc> ilabel_comp;
-  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
-  // lattice-determinization more efficient.
-
-  fst::DeterminizeLatticePrunedOptions lat_opts;
-  lat_opts.max_mem = config_.det_opts.max_mem;
-
-  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
-  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
-  Connect(ofst);           // Remove unreachable states... there might be
-  // a small number of these, in some cases.
-  // Note: if something went wrong and the raw lattice was empty,
-  // we should still get to this point in the code without warnings or failures.
-  return (ofst->NumStates() != 0);
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
-  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks) *
-                                      config_.hash_ratio);
-  if (new_sz > toks_.Size()) {
-    toks_.SetSize(new_sz);
-  }
-}
-
-/*
-  A note on the definition of extra_cost.
-
-  extra_cost is used in pruning tokens, to save memory.
-
-  extra_cost can be thought of as a beta (backward) cost assuming
-  we had set the betas on currently-active tokens to all be the negative
-  of the alphas for those tokens.  (So all currently active tokens would
-  be on (tied) best paths).
-
-  We can use the extra_cost to accurately prune away tokens that we know will
-  never appear in the lattice.  If the extra_cost is greater than the desired
-  lattice beam, the token would provably never appear in the lattice, so we can
-  prune away the token.
-
-  (Note: we don't update all the extra_costs every time we update a frame; we
-  only do it every 'config_.prune_interval' frames).
- */
-
-// FindOrAddToken either locates a token in hash of toks_,
-// or if necessary inserts a new, empty token (i.e. with no forward links)
-// for the current frame.  [note: it's inserted if necessary into hash toks_
-// and also into the singly linked list of tokens active on this frame
-// (whose head is at active_toks_[frame]).
-template <typename FST, typename Token>
-inline typename LatticeFasterDecoderTpl<FST, Token>::Elem *
-LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(StateId state,
-                                                    int32 frame_plus_one,
-                                                    BaseFloat tot_cost,
-                                                    Token *backpointer,
-                                                    bool *changed) {
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
-  // if the token was newly created or the cost changed.
-  KALDI_ASSERT(frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  Elem *e_found = toks_.Insert(state, NULL);
-  if (e_found->val == NULL) {  // no such token presently.
-    const BaseFloat extra_cost = 0.0;
-    // tokens on the currently final frame have zero extra_cost
-    // as any of them could end up
-    // on the winning path.
-    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
-    // NULL: no forward links yet
-    toks = new_tok;
-    num_toks_++;
-    e_found->val = new_tok;
-    if (changed) *changed = true;
-    return e_found;
-  } else {
-    Token *tok = e_found->val;  // There is an existing Token for this state.
-    if (tok->tot_cost > tot_cost) {  // replace old token
-      tok->tot_cost = tot_cost;
-      // SetBackpointer() just does tok->backpointer = backpointer in
-      // the case where Token == BackpointerToken, else nothing.
-      tok->SetBackpointer(backpointer);
-      // we don't allocate a new token, the old stays linked in active_toks_
-      // we only replace the tot_cost
-      // in the current frame, there are no forward links (and no extra_cost)
-      // only in ProcessNonemitting we have to delete forward links
-      // in case we visit a state for the second time
-      // those forward links, that lead to this replaced token before:
-      // they remain and will hopefully be pruned later (PruneForwardLinks...)
-      if (changed) *changed = true;
-    } else {
-      if (changed) *changed = false;
-    }
-    return e_found;
-  }
-}
-
-// prunes outgoing links for all tokens in active_toks_[frame]
-// it's called by PruneActiveTokens
-// all links, that have link_extra_cost > lattice_beam are pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
-    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
-    BaseFloat delta) {
-  // delta is the amount by which the extra_costs must change
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-
-  *extra_costs_changed = false;
-  *links_pruned = false;
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  if (active_toks_[frame_plus_one].toks ==
-      NULL) {  // empty list; should not happen.
-    if (!warned_) {
-      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
-                    "time only for each utterance\n";
-      warned_ = true;
-    }
-  }
-
-  // We have to iterate until there is no more change, because the links
-  // are not guaranteed to be in topological order.
-  bool changed = true;  // difference new minus old extra cost >= delta ?
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost for tok.
-      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);  // difference in brackets is >= 0
-        // link_exta_cost is the difference in score between the best paths
-        // through link source state and through link destination state
-        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
-        // the graph_cost contatins the context score
-        // if it's the score of the backoff arc, it should be removed.
-        if (link->context_score < 0) {
-          link_extra_cost += link->context_score;
-        }
-        if (link_extra_cost > config_.lattice_beam) {      // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-          *links_pruned = true;
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;  // move to next link
-          link = link->next;
-        }
-      }  // for all outgoing links
-      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
-        changed = true;  // difference new minus old is bigger than delta
-      tok->extra_cost = tok_extra_cost;
-      // will be +infinity or <= lattice_beam_.
-      // infinity indicates, that no forward link survived pruning
-    }  // for all Token on active_toks_[frame]
-    if (changed) *extra_costs_changed = true;
-
-    // Note: it's theoretically possible that aggressive compiler
-    // optimizations could cause an infinite loop here for small delta and
-    // high-dynamic-range scores.
-  }  // while changed
-}
-
-// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-// on the final frame.  If there are final tokens active, it uses
-// the final-probs for pruning, otherwise it treats all tokens as final.
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame_plus_one = active_toks_.size() - 1;
-
-  if (active_toks_[frame_plus_one].toks ==
-      NULL)  // empty list; should not happen.
-    KALDI_WARN << "No tokens alive at end of file";
-
-  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
-  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
-  decoding_finalized_ = true;
-  // We call DeleteElems() as a nicety, not because it's really necessary;
-  // otherwise there would be a time, after calling PruneTokensForFrame() on the
-  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
-  // to nonexistent tokens.
-  DeleteElems(toks_.Clear());
-
-  // Now go through tokens on this frame, pruning forward links...  may have to
-  // iterate a few times until there is no more change, because the list is not
-  // in topological order.  This is a modified version of the code in
-  // PruneForwardLinks, but here we also take account of the final-probs.
-  bool changed = true;
-  BaseFloat delta = 1.0e-05;
-  while (changed) {
-    changed = false;
-    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
-         tok = tok->next) {
-      ForwardLinkT *link, *prev_link = NULL;
-      // will recompute tok_extra_cost.  It has a term in it that corresponds
-      // to the "final-prob", so instead of initializing tok_extra_cost to
-      // infinity below we set it to the difference between the
-      // (score+final_prob) of this token, and the best such (score+final_prob).
-      BaseFloat final_cost;
-      if (final_costs_.empty()) {
-        final_cost = 0.0;
-      } else {
-        IterType iter = final_costs_.find(tok);
-        if (iter != final_costs_.end())
-          final_cost = iter->second;
-        else
-          final_cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
-      // tok_extra_cost will be a "min" over either directly being final, or
-      // being indirectly final through other links, and the loop below may
-      // decrease its value:
-      for (link = tok->links; link != NULL;) {
-        // See if we need to excise this link...
-        Token *next_tok = link->next_tok;
-        BaseFloat link_extra_cost =
-            next_tok->extra_cost +
-            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
-             next_tok->tot_cost);
-        if (link_extra_cost > config_.lattice_beam) {  // excise link
-          ForwardLinkT *next_link = link->next;
-          if (prev_link != NULL)
-            prev_link->next = next_link;
-          else
-            tok->links = next_link;
-          delete link;
-          link = next_link;  // advance link but leave prev_link the same.
-        } else {  // keep the link and update the tok_extra_cost if needed.
-          if (link_extra_cost < 0.0) {  // this is just a precaution.
-            // if (link_extra_cost < -0.01)
-            //   KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
-            link_extra_cost = 0.0;
-          }
-          if (link_extra_cost < tok_extra_cost)
-            tok_extra_cost = link_extra_cost;
-          prev_link = link;
-          link = link->next;
-        }
-      }
-      // prune away tokens worse than lattice_beam above best path.  This step
-      // was not necessary in the non-final case because then, this case
-      // showed up as having no forward links.  Here, the tok_extra_cost has
-      // an extra component relating to the final-prob.
-      if (tok_extra_cost > config_.lattice_beam)
-        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
-      // to be pruned in PruneTokensForFrame
-
-      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
-      tok->extra_cost =
-          tok_extra_cost;  // will be +infinity or <= lattice_beam_.
-    }
-  }  // while changed
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
-  if (!decoding_finalized_) {
-    BaseFloat relative_cost;
-    ComputeFinalCosts(NULL, &relative_cost, NULL);
-    return relative_cost;
-  } else {
-    // we're not allowed to call that function if FinalizeDecoding() has
-    // been called; return a cached value.
-    return final_relative_cost_;
-  }
-}
-
-// Prune away any tokens on this frame that have no forward links.
-// [we don't do this in PruneForwardLinks because it would give us
-// a problem with dangling pointers].
-// It's called by PruneActiveTokens if any forward links have been pruned
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(
-    int32 frame_plus_one) {
-  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
-  Token *&toks = active_toks_[frame_plus_one].toks;
-  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
-  Token *tok, *next_tok, *prev_tok = NULL;
-  for (tok = toks; tok != NULL; tok = next_tok) {
-    next_tok = tok->next;
-    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
-      // token is unreachable from end of graph; (no forward links survived)
-      // excise tok from list and delete tok.
-      if (prev_tok != NULL)
-        prev_tok->next = tok->next;
-      else
-        toks = tok->next;
-      delete tok;
-      num_toks_--;
-    } else {  // fetch next Token
-      prev_tok = tok;
-    }
-  }
-}
-
-// Go backwards through still-alive tokens, pruning them, starting not from
-// the current frame (where we want to keep all tokens) but from the frame
-// before that.  We go backwards through the frames and stop when we reach a
-// point where the delta-costs are not changing (and the delta controls when we
-// consider a cost to have "not changed").
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
-  int32 cur_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // The index "f" below represents a "frame plus one", i.e. you'd have to
-  // subtract one to get the corresponding index for the decodable object.
-  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
-    // Reason why we need to prune forward links in this situation:
-    // (1) we have never pruned them (new TokenList)
-    // (2) we have not yet pruned the forward links to the next f,
-    // after any of those tokens have changed their extra_cost.
-    if (active_toks_[f].must_prune_forward_links) {
-      bool extra_costs_changed = false, links_pruned = false;
-      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
-      if (extra_costs_changed && f > 0)  // any token has changed extra_cost
-        active_toks_[f - 1].must_prune_forward_links = true;
-      if (links_pruned)  // any link was pruned
-        active_toks_[f].must_prune_tokens = true;
-      active_toks_[f].must_prune_forward_links = false;  // job done
-    }
-    if (f + 1 < cur_frame_plus_one &&  // except for last f (no forward links)
-        active_toks_[f + 1].must_prune_tokens) {
-      PruneTokensForFrame(f + 1);
-      active_toks_[f + 1].must_prune_tokens = false;
-    }
-  }
-  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
-                << " to " << num_toks_;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
-    unordered_map<Token *, BaseFloat> *final_costs,
-    BaseFloat *final_relative_cost, BaseFloat *final_best_cost) const {
-  KALDI_ASSERT(!decoding_finalized_);
-  if (final_costs != NULL) final_costs->clear();
-  const Elem *final_toks = toks_.GetList();
-  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
-
-  while (final_toks != NULL) {
-    StateId state = final_toks->key;
-    Token *tok = final_toks->val;
-    const Elem *next = final_toks->tail;
-    BaseFloat final_cost = fst_->Final(state).Value();
-    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
-    best_cost = std::min(cost, best_cost);
-    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
-    if (final_costs != NULL && final_cost != infinity)
-      (*final_costs)[tok] = final_cost;
-    final_toks = next;
-  }
-  if (final_relative_cost != NULL) {
-    if (best_cost == infinity && best_cost_with_final == infinity) {
-      // Likely this will only happen if there are no tokens surviving.
-      // This seems the least bad way to handle it.
-      *final_relative_cost = infinity;
-    } else {
-      *final_relative_cost = best_cost_with_final - best_cost;
-    }
-  }
-  if (final_best_cost != NULL) {
-    if (best_cost_with_final != infinity) {  // final-state exists.
-      *final_best_cost = best_cost_with_final;
-    } else {  // no final-state exists.
-      *final_best_cost = best_cost;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(
-    DecodableInterface *decodable, int32 max_num_frames) {
-  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
-    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
-    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
-    // function after casting *this to the more specific type.
-    if (fst_->Type() == "const") {
-      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    } else if (fst_->Type() == "vector") {
-      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
-          reinterpret_cast<
-              LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
-              this);
-      this_cast->AdvanceDecoding(decodable, max_num_frames);
-      return;
-    }
-  }
-
-  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
-               "You must call InitDecoding() before AdvanceDecoding");
-  int32 num_frames_ready = decodable->NumFramesReady();
-  // num_frames_ready must be >= num_frames_decoded, or else
-  // the number of frames ready must have decreased (which doesn't
-  // make sense) or the decodable object changed between calls
-  // (which isn't allowed).
-  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
-  int32 target_frames_decoded = num_frames_ready;
-  if (max_num_frames >= 0)
-    target_frames_decoded =
-        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
-  while (NumFramesDecoded() < target_frames_decoded) {
-    if (NumFramesDecoded() % config_.prune_interval == 0) {
-      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
-    }
-    BaseFloat cost_cutoff = ProcessEmitting(decodable);
-    ProcessNonemitting(cost_cutoff);
-  }
-}
-
-// FinalizeDecoding() is a version of PruneActiveTokens that we call
-// (optionally) on the final frame.  Takes into account the final-prob of
-// tokens.  This function used to be called PruneActiveTokensFinal().
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
-  int32 final_frame_plus_one = NumFramesDecoded();
-  int32 num_toks_begin = num_toks_;
-  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
-  // sets decoding_finalized_.
-  PruneForwardLinksFinal();
-  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
-    bool b1, b2;               // values not used.
-    BaseFloat dontcare = 0.0;  // delta of zero means we must always update
-    PruneForwardLinks(f, &b1, &b2, dontcare);
-    PruneTokensForFrame(f + 1);
-  }
-  PruneTokensForFrame(0);
-  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to "
-                << num_toks_;
-}
-
-/// Gets the weight cutoff.  Also counts the active tokens.
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(
-    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
-    Elem **best_elem) {
-  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
-  // positive == high cost == bad.
-  size_t count = 0;
-  if (config_.max_active == std::numeric_limits<int32>::max() &&
-      config_.min_active == 0) {
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
-    return best_weight + config_.beam;
-  } else {
-    tmp_array_.clear();
-    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
-      BaseFloat w = e->val->tot_cost;
-      tmp_array_.push_back(w);
-      if (w < best_weight) {
-        best_weight = w;
-        if (best_elem) *best_elem = e;
-      }
-    }
-    if (tok_count != NULL) *tok_count = count;
-
-    BaseFloat beam_cutoff = best_weight + config_.beam,
-              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
-              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
-
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
-
-    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
-      std::nth_element(tmp_array_.begin(),
-                       tmp_array_.begin() + config_.max_active,
-                       tmp_array_.end());
-      max_active_cutoff = tmp_array_[config_.max_active];
-    }
-    if (max_active_cutoff < beam_cutoff) {  // max_active is tighter than beam.
-      if (adaptive_beam)
-        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
-      return max_active_cutoff;
-    }
-    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
-      if (config_.min_active == 0) {
-        min_active_cutoff = best_weight;
-      } else {
-        std::nth_element(
-            tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
-            tmp_array_.size() > static_cast<size_t>(config_.max_active)
-                ? tmp_array_.begin() + config_.max_active
-                : tmp_array_.end());
-        min_active_cutoff = tmp_array_[config_.min_active];
-      }
-    }
-    if (min_active_cutoff > beam_cutoff) {  // min_active is looser than beam.
-      if (adaptive_beam)
-        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
-      return min_active_cutoff;
-    } else {
-      *adaptive_beam = config_.beam;
-      return beam_cutoff;
-    }
-  }
-}
-
-template <typename FST, typename Token>
-BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
-    DecodableInterface *decodable) {
-  KALDI_ASSERT(active_toks_.size() > 0);
-  int32 frame =
-      active_toks_.size() - 1;  // frame is the frame-index
-                                // (zero-based) used to get likelihoods
-                                // from the decodable object.
-  active_toks_.resize(active_toks_.size() + 1);
-
-  Elem *final_toks =
-      toks_.Clear();  // analogous to swapping prev_toks_ / cur_toks_
-                      // in simple-decoder.h.   Removes the Elems from
-                      // being indexed in the hash in toks_.
-  Elem *best_elem = NULL;
-  BaseFloat adaptive_beam;
-  size_t tok_cnt;
-  BaseFloat cur_cutoff =
-      GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
-  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
-                << adaptive_beam;
-
-  PossiblyResizeHash(
-      tok_cnt);  // This makes sure the hash is always big enough.
-
-  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
-  // pruning "online" before having seen all tokens
-
-  BaseFloat cost_offset = 0.0;  // Used to keep probabilities in a good
-                                // dynamic range.
-
-  // First process the best token to get a hopefully
-  // reasonably tight bound on the next cutoff.  The only
-  // products of the next block are "next_cutoff" and "cost_offset".
-  if (best_elem) {
-    StateId state = best_elem->key;
-    Token *tok = best_elem->val;
-    cost_offset = -tok->tot_cost;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0) {  // propagate..
-        BaseFloat new_weight = arc.weight.Value() + cost_offset -
-                               decodable->LogLikelihood(frame, arc.ilabel) +
-                               tok->tot_cost;
-        if (state != arc.nextstate) {
-          new_weight += config_.length_penalty;
-        }
-        if (new_weight + adaptive_beam < next_cutoff)
-          next_cutoff = new_weight + adaptive_beam;
-      }
-    }
-  }
-
-  // Store the offset on the acoustic likelihoods that we're applying.
-  // Could just do cost_offsets_.push_back(cost_offset), but we
-  // do it this way as it's more robust to future code changes.
-  cost_offsets_.resize(frame + 1, 0.0);
-  cost_offsets_[frame] = cost_offset;
-
-  // the tokens are now owned here, in final_toks, and the hash is empty.
-  // 'owned' is a complex thing here; the point is we need to call DeleteElem
-  // on each elem 'e' to let toks_ know we're done with them.
-  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
-    // loop this way because we delete "e" as we go.
-    StateId state = e->key;
-    Token *tok = e->val;
-    if (tok->tot_cost <= cur_cutoff) {
-      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (arc.ilabel != 0) {  // propagate..
-          BaseFloat ac_cost = cost_offset -
-                              decodable->LogLikelihood(frame, arc.ilabel),
-                    graph_cost = arc.weight.Value();
-          if (state != arc.nextstate) {
-            graph_cost += config_.length_penalty;
-          }
-          BaseFloat cur_cost = tok->tot_cost,
-                    tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost >= next_cutoff)
-            continue;
-          else if (tot_cost + adaptive_beam < next_cutoff)
-            next_cutoff =
-                tot_cost + adaptive_beam;  // prune by best current token
-          // Note: the frame indexes into active_toks_ are one-based,
-          // hence the + 1.
-          Elem *e_next =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
-          // NULL: no change indicator needed
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_next->val->context_state = tok->context_state;
-            } else {
-              e_next->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-          // Add ForwardLink from tok to next_tok (put on head of list
-          // tok->links)
-          tok->links = new ForwardLinkT(e_next->val, arc.ilabel, arc.olabel,
-                                        graph_cost, ac_cost, is_start_boundary,
-                                        is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-        }
-      }  // for all arcs
-    }
-    e_tail = e->tail;
-    toks_.Delete(e);  // delete Elem
-  }
-  return next_cutoff;
-}
-
-// static inline
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
-  ForwardLinkT *l = tok->links, *m;
-  while (l != NULL) {
-    m = l->next;
-    delete l;
-    l = m;
-  }
-  tok->links = NULL;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
-  KALDI_ASSERT(!active_toks_.empty());
-  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
-  // Note: "frame" is the time-index we just processed, or -1 if
-  // we are processing the nonemitting transitions before the
-  // first frame (called from InitDecoding()).
-
-  // Processes nonemitting arcs for one frame.  Propagates within toks_.
-  // Note-- this queue structure is not very optimal as
-  // it may cause us to process states unnecessarily (e.g. more than once),
-  // but in the baseline code, turning this vector into a set to fix this
-  // problem did not improve overall speed.
-
-  KALDI_ASSERT(queue_.empty());
-
-  if (toks_.GetList() == NULL) {
-    if (!warned_) {
-      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
-      warned_ = true;
-    }
-  }
-
-  int before = 0, after = 0;
-  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
-    StateId state = e->key;
-    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(e);
-    ++before;
-  }
-
-  while (!queue_.empty()) {
-    ++after;
-    const Elem *e = queue_.back();
-    queue_.pop_back();
-
-    StateId state = e->key;
-    Token *tok =
-        e->val;  // would segfault if e is a NULL pointer but this can't happen.
-    BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost >= cutoff)  // Don't bother processing successors.
-      continue;
-    // If "tok" has any existing forward links, delete them,
-    // because we're about to regenerate them.  This is a kind
-    // of non-optimality (remember, this is the simple decoder),
-    // but since most states are emitting it's not a huge issue.
-    DeleteForwardLinks(tok);  // necessary when re-visiting
-    tok->links = NULL;
-    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == 0) {  // propagate nonemitting only...
-        BaseFloat graph_cost = arc.weight.Value(),
-                  tot_cost = cur_cost + graph_cost;
-        if (tot_cost < cutoff) {
-          bool changed;
-
-          Elem *e_new =
-              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
-
-          bool is_start_boundary = false;
-          bool is_end_boundary = false;
-          float context_score = 0;
-          if (context_graph_) {
-            if (arc.olabel == 0) {
-              e_new->val->context_state = tok->context_state;
-            } else {
-              e_new->val->context_state = context_graph_->GetNextState(
-                  tok->context_state, arc.olabel, &context_score,
-                  &is_start_boundary, &is_end_boundary);
-              graph_cost -= context_score;
-            }
-          }
-
-          tok->links =
-              new ForwardLinkT(e_new->val, 0, arc.olabel, graph_cost, 0,
-                               is_start_boundary, is_end_boundary, tok->links);
-          tok->links->context_score = context_score;
-
-          // "changed" tells us whether the new token has a different
-          // cost from before, or is new [if so, add into queue].
-          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
-            queue_.push_back(e_new);
-        }
-      }
-    }  // for all arcs
-  }    // while queue not empty
-  KALDI_VLOG(3) << "ProcessNonemitting " << before << " " << after;
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
-  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
-    e_tail = e->tail;
-    toks_.Delete(e);
-  }
-}
-
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<
-    FST, Token>::ClearActiveTokens() {  // a cleanup routine, at utt end/begin
-  for (size_t i = 0; i < active_toks_.size(); i++) {
-    // Delete all tokens alive on this frame, and any forward
-    // links they may have.
-    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
-      DeleteForwardLinks(tok);
-      Token *next_tok = tok->next;
-      delete tok;
-      num_toks_--;
-      tok = next_tok;
-    }
-  }
-  active_toks_.clear();
-  KALDI_ASSERT(num_toks_ == 0);
-}
-
-// static
-template <typename FST, typename Token>
-void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
-    Token *tok_list, std::vector<Token *> *topsorted_list) {
-  unordered_map<Token *, int32> token2pos;
-  using std::unordered_set;
-  typedef typename unordered_map<Token *, int32>::iterator IterType;
-  int32 num_toks = 0;
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next) num_toks++;
-  int32 cur_pos = 0;
-  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
-  // This is likely to be in closer to topological order than
-  // if we had given them ascending order, because of the way
-  // new tokens are put at the front of the list.
-  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
-    token2pos[tok] = num_toks - ++cur_pos;
-
-  unordered_set<Token *> reprocess;
-
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
-    Token *tok = iter->first;
-    int32 pos = iter->second;
-    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-      if (link->ilabel == 0) {
-        // We only need to consider epsilon links, since non-epsilon links
-        // transition between frames and this function only needs to sort a list
-        // of tokens from a single frame.
-        IterType following_iter = token2pos.find(link->next_tok);
-        if (following_iter != token2pos.end()) {  // another token on this
-                                                  // frame, so must consider it.
-          int32 next_pos = following_iter->second;
-          if (next_pos < pos) {  // reassign the position of the next Token.
-            following_iter->second = cur_pos++;
-            reprocess.insert(link->next_tok);
-          }
-        }
-      }
-    }
-    // In case we had previously assigned this token to be reprocessed, we can
-    // erase it from that set because it's "happy now" (we just processed it).
-    reprocess.erase(tok);
-  }
-
-  size_t max_loop = 1000000,
-         loop_count;  // max_loop is to detect epsilon cycles.
-  for (loop_count = 0; !reprocess.empty() && loop_count < max_loop;
-       ++loop_count) {
-    std::vector<Token *> reprocess_vec;
-    for (typename unordered_set<Token *>::iterator iter = reprocess.begin();
-         iter != reprocess.end(); ++iter)
-      reprocess_vec.push_back(*iter);
-    reprocess.clear();
-    for (typename std::vector<Token *>::iterator iter = reprocess_vec.begin();
-         iter != reprocess_vec.end(); ++iter) {
-      Token *tok = *iter;
-      int32 pos = token2pos[tok];
-      // Repeat the processing we did above (for comments, see above).
-      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
-        if (link->ilabel == 0) {
-          IterType following_iter = token2pos.find(link->next_tok);
-          if (following_iter != token2pos.end()) {
-            int32 next_pos = following_iter->second;
-            if (next_pos < pos) {
-              following_iter->second = cur_pos++;
-              reprocess.insert(link->next_tok);
-            }
-          }
-        }
-      }
-    }
-  }
-  KALDI_ASSERT(loop_count < max_loop &&
-               "Epsilon loops exist in your decoding "
-               "graph (this is not allowed!)");
-
-  topsorted_list->clear();
-  topsorted_list->resize(cur_pos,
-                         NULL);  // create a list with NULLs in between.
-  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
-    (*topsorted_list)[iter->second] = iter->first;
-}
-
-// Instantiate the template for the combination of token types and FST types
-// that we'll need.
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::StdToken>;
-
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::StdToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
-
-template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>,
-                                       decoder::BackpointerToken>;
-// template class LatticeFasterDecoderTpl<fst::ConstGrammarFst,
-// decoder::BackpointerToken>; template class
-// LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-decoder.h
deleted file mode 100644
index 0152b85447e354b770745b748d266b1ca2d57024..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-decoder.h
+++ /dev/null
@@ -1,558 +0,0 @@
-// decoder/lattice-faster-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-//                2021  Binbin Zhang, Zhendong Peng
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-
-#include <limits>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "decoder/context_graph.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
-#include "lat/determinize-lattice-pruned.h"
-#include "lat/kaldi-lattice.h"
-#include "util/hash-list.h"
-
-namespace kaldi {
-
-struct LatticeFasterDecoderConfig {
-  BaseFloat beam;
-  int32 max_active;
-  int32 min_active;
-  BaseFloat lattice_beam;
-  int32 prune_interval;
-  bool determinize_lattice;  // not inspected by this class... used in
-                             // command-line program.
-  BaseFloat beam_delta;
-  BaseFloat hash_ratio;
-  // Note: we don't make prune_scale configurable on the command line, it's not
-  // a very important parameter.  It affects the algorithm that prunes the
-  // tokens as we go.
-  BaseFloat prune_scale;
-  BaseFloat length_penalty;  // for balancing the del/ins ratio, suggested -3.0
-
-  // Most of the options inside det_opts are not actually queried by the
-  // LatticeFasterDecoder class itself, but by the code that calls it, for
-  // example in the function DecodeUtteranceLatticeFaster.
-  fst::DeterminizeLatticePhonePrunedOptions det_opts;
-
-  LatticeFasterDecoderConfig()
-      : beam(16.0),
-        max_active(std::numeric_limits<int32>::max()),
-        min_active(200),
-        lattice_beam(10.0),
-        prune_interval(25),
-        determinize_lattice(true),
-        beam_delta(0.5),
-        hash_ratio(2.0),
-        prune_scale(0.1),
-        length_penalty(0.0) {}
-  void Register(OptionsItf *opts) {
-    det_opts.Register(opts);
-    opts->Register("beam", &beam,
-                   "Decoding beam.  Larger->slower, more accurate.");
-    opts->Register("max-active", &max_active,
-                   "Decoder max active states.  Larger->slower; "
-                   "more accurate");
-    opts->Register("min-active", &min_active,
-                   "Decoder minimum #active states.");
-    opts->Register("lattice-beam", &lattice_beam,
-                   "Lattice generation beam.  Larger->slower, "
-                   "and deeper lattices");
-    opts->Register("prune-interval", &prune_interval,
-                   "Interval (in frames) at "
-                   "which to prune tokens");
-    opts->Register(
-        "determinize-lattice", &determinize_lattice,
-        "If true, "
-        "determinize the lattice (lattice-determinization, keeping only "
-        "best pdf-sequence for each word-sequence).");
-    opts->Register(
-        "beam-delta", &beam_delta,
-        "Increment used in decoding-- this "
-        "parameter is obscure and relates to a speedup in the way the "
-        "max-active constraint is applied.  Larger is more accurate.");
-    opts->Register("hash-ratio", &hash_ratio,
-                   "Setting used in decoder to "
-                   "control hash behavior");
-  }
-  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
-                 min_active <= max_active && prune_interval > 0 &&
-                 beam_delta > 0.0 && hash_ratio >= 1.0 && prune_scale > 0.0 &&
-                 prune_scale < 1.0);
-  }
-};
-
-namespace decoder {
-// We will template the decoder on the token type as well as the FST type; this
-// is a mechanism so that we can use the same underlying decoder code for
-// versions of the decoder that support quickly getting the best path
-// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
-// those that do not (LatticeFasterDecoder).
-
-// ForwardLinks are the links from a token to a token on the next frame.
-// or sometimes on the current frame (for input-epsilon links).
-template <typename Token>
-struct ForwardLink {
-  using Label = fst::StdArc::Label;
-
-  Token *next_tok;       // the next token [or NULL if represents final-state]
-  Label ilabel;          // ilabel on arc
-  Label olabel;          // olabel on arc
-  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
-  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
-  bool is_start_boundary;
-  bool is_end_boundary;
-  float context_score;
-  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
-                      // in the state-level lattice) from a token.
-  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
-                     BaseFloat graph_cost, BaseFloat acoustic_cost,
-                     bool is_start_boundary, bool is_end_boundary,
-                     ForwardLink *next)
-      : next_tok(next_tok),
-        ilabel(ilabel),
-        olabel(olabel),
-        graph_cost(graph_cost),
-        acoustic_cost(acoustic_cost),
-        is_start_boundary(is_start_boundary),
-        is_end_boundary(is_end_boundary),
-        context_score(0),
-        next(next) {}
-};
-
-struct StdToken {
-  using ForwardLinkT = ForwardLink<StdToken>;
-  using Token = StdToken;
-
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
-  // minimum difference between the cost of the best path that this link is a
-  // part of, and the cost of the absolute best path, under the assumption that
-  // any of the currently active states at the decoding front may eventually
-  // succeed (e.g. if you were to take the currently active states one by one
-  // and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  Token *next;
-
-  // This function does nothing and should be optimized out; it's needed
-  // so we can share the regular LatticeFasterDecoderTpl code and the code
-  // for LatticeFasterOnlineDecoder that supports fast traceback.
-  inline void SetBackpointer(Token *backpointer) {}
-
-  // This constructor just ignores the 'backpointer' argument.  That argument is
-  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
-  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
-  // fast way to obtain the best path).
-  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
-                  Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        context_state(0),
-        next(next) {}
-};
-
-struct BackpointerToken {
-  using ForwardLinkT = ForwardLink<BackpointerToken>;
-  using Token = BackpointerToken;
-
-  // BackpointerToken is like Token but also
-  // Standard token type for LatticeFasterDecoder.  Each active HCLG
-  // (decoding-graph) state on each frame has one token.
-
-  // tot_cost is the total (LM + acoustic) cost from the beginning of the
-  // utterance up to this point.  (but see cost_offset_, which is subtracted
-  // to keep it in a good numerical range).
-  BaseFloat tot_cost;
-
-  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
-  // the minimum difference between the cost of the best path, and the cost of
-  // this is on, and the cost of the absolute best path, under the assumption
-  // that any of the currently active states at the decoding front may
-  // eventually succeed (e.g. if you were to take the currently active states
-  // one by one and compute this difference, and then take the minimum).
-  BaseFloat extra_cost;
-
-  int context_state = 0;
-
-  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
-  // use for lattice generation.
-  ForwardLinkT *links;
-
-  // 'next' is the next in the singly-linked list of tokens for this frame.
-  BackpointerToken *next;
-
-  // Best preceding BackpointerToken (could be a on this frame, connected to
-  // this via an epsilon transition, or on a previous frame).  This is only
-  // required for an efficient GetBestPath function in
-  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
-  // (the "links" list is what stores the forward links, for that).
-  Token *backpointer;
-
-  inline void SetBackpointer(Token *backpointer) {
-    this->backpointer = backpointer;
-  }
-
-  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost,
-                          ForwardLinkT *links, Token *next, Token *backpointer)
-      : tot_cost(tot_cost),
-        extra_cost(extra_cost),
-        links(links),
-        next(next),
-        backpointer(backpointer),
-        context_state(0) {}
-};
-
-}  // namespace decoder
-
-/** This is the "normal" lattice-generating decoder.
-    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
-     for more information.
-
-   The decoder is templated on the FST type and the token type.  The token type
-   will normally be StdToken, but also may be BackpointerToken which is to
-   support quick lookup of the current best path (see
-   lattice-faster-online-decoder.h)
-
-   The FST you invoke this decoder which is expected to equal
-   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
-   FST == StdFst and it notices that the actual FST type is
-   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
-   will internally cast itself to one that is templated on those more specific
-   types; this is an optimization for speed.
- */
-template <typename FST, typename Token = decoder::StdToken>
-class LatticeFasterDecoderTpl {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph);
-
-  // This version of the constructor takes ownership of the fst, and will delete
-  // it when this object is destroyed.
-  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config, FST *fst);
-
-  void SetOptions(const LatticeFasterDecoderConfig &config) {
-    config_ = config;
-  }
-
-  const LatticeFasterDecoderConfig &GetOptions() const { return config_; }
-
-  ~LatticeFasterDecoderTpl();
-
-  /// Decodes until there are no more frames left in the "decodable" object..
-  /// note, this may block waiting for input if the "decodable" object blocks.
-  /// Returns true if any kind of traceback is available (not necessarily from a
-  /// final state).
-  bool Decode(DecodableInterface *decodable);
-
-  /// says whether a final-state was active on the last frame.  If it was not,
-  /// the lattice (or traceback) will end with states that are not final-states.
-  bool ReachedFinal() const {
-    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
-  }
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.  Note: this just calls
-  /// GetRawLattice() and figures out the shortest path.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// Outputs an FST corresponding to the raw, state-level
-  /// tracebacks.  Returns true if result is nonempty.
-  /// If "use_final_probs" is true AND we reached the final-state
-  /// of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  /// The raw lattice will be topologically sorted.
-  ///
-  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
-  /// which also supports a pruning beam, in case for some reason
-  /// you want it pruned tighter than the regular lattice beam.
-  /// We could put that here in future needed.
-  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// [Deprecated, users should now use GetRawLattice and determinize it
-  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
-  /// Outputs an FST corresponding to the lattice-determinized
-  /// lattice (one path per word sequence).   Returns true if result is
-  /// nonempty. If "use_final_probs" is true AND we reached the final-state of
-  /// the graph then it will include those as final-probs, else it will treat
-  /// all final-probs as one.
-  bool GetLattice(CompactLattice *ofst, bool use_final_probs = true) const;
-
-  /// InitDecoding initializes the decoding, and should only be used if you
-  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
-  /// call this.  You can also call InitDecoding if you have already decoded an
-  /// utterance and want to start with a new utterance.
-  void InitDecoding();
-
-  /// This will decode until there are no more frames ready in the decodable
-  /// object.  You can keep calling it each time more frames become available.
-  /// If max_num_frames is specified, it specifies the maximum number of frames
-  /// the function will decode before returning.
-  void AdvanceDecoding(DecodableInterface *decodable,
-                       int32 max_num_frames = -1);
-
-  /// This function may be optionally called after AdvanceDecoding(), when you
-  /// do not plan to decode any further.  It does an extra pruning step that
-  /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more completely, particularly toward the end of the
-  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
-  /// will fail), and you cannot call GetLattice() and related functions with
-  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
-  void FinalizeDecoding();
-
-  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
-  /// more information.  It returns the difference between the best (final-cost
-  /// plus cost) of any token on the final frame, and the best cost of any token
-  /// on the final frame.  If it is infinity it means no final-states were
-  /// present on the final frame.  It will usually be nonnegative.  If it not
-  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
-  /// take it as a good indication that we reached the final-state with
-  /// reasonable likelihood.
-  BaseFloat FinalRelativeCost() const;
-
-  // Returns the number of frames decoded so far.  The value returned changes
-  // whenever we call ProcessEmitting().
-  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
-
- protected:
-  // we make things protected instead of private, as code in
-  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
-  // internals.
-
-  // Deletes the elements of the singly linked list tok->links.
-  inline static void DeleteForwardLinks(Token *tok);
-
-  // head of per-frame list of Tokens (list is in topological order),
-  // and something saying whether we ever pruned it using PruneForwardLinks.
-  struct TokenList {
-    Token *toks;
-    bool must_prune_forward_links;
-    bool must_prune_tokens;
-    TokenList()
-        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true) {}
-  };
-
-  using Elem = typename HashList<StateId, Token *>::Elem;
-  // Equivalent to:
-  //  struct Elem {
-  //    StateId key;
-  //    Token *val;
-  //    Elem *tail;
-  //  };
-
-  void PossiblyResizeHash(size_t num_toks);
-
-  // FindOrAddToken either locates a token in hash of toks_, or if necessary
-  // inserts a new, empty token (i.e. with no forward links) for the current
-  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
-  // singly linked list of tokens active on this frame (whose head is at
-  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
-  // index plus one, which is used to index into the active_toks_ array.
-  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
-  // token was newly created or the cost changed.
-  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
-  // hopefully be optimized out).
-  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
-                              BaseFloat tot_cost, Token *backpointer,
-                              bool *changed);
-
-  // prunes outgoing links for all tokens in active_toks_[frame]
-  // it's called by PruneActiveTokens
-  // all links, that have link_extra_cost > lattice_beam are pruned
-  // delta is the amount by which the extra_costs must change
-  // before we set *extra_costs_changed = true.
-  // If delta is larger,  we'll tend to go back less far
-  //    toward the beginning of the file.
-  // extra_costs_changed is set to true if extra_cost was changed for any token
-  // links_pruned is set to true if any link in any token was pruned
-  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
-                         bool *links_pruned, BaseFloat delta);
-
-  // This function computes the final-costs for tokens active on the final
-  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
-  // pointer to the final-prob of the corresponding state, for all Tokens
-  // that correspond to states that have final-probs.  This map will be
-  // empty if there were no final-probs.  It outputs to
-  // final_relative_cost, if non-NULL, the difference between the best
-  // forward-cost including the final-prob cost, and the best forward-cost
-  // without including the final-prob cost (this will usually be positive), or
-  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
-  // outputs this quanitity].  It outputs to final_best_cost, if
-  // non-NULL, the lowest for any token t active on the final frame, of
-  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
-  // the graph of the state corresponding to token t, or the best of
-  // forward-cost[t] if there were no final-probs active on the final frame.
-  // You cannot call this after FinalizeDecoding() has been called; in that
-  // case you should get the answer from class-member variables.
-  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
-                         BaseFloat *final_relative_cost,
-                         BaseFloat *final_best_cost) const;
-
-  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
-  // on the final frame.  If there are final tokens active, it uses
-  // the final-probs for pruning, otherwise it treats all tokens as final.
-  void PruneForwardLinksFinal();
-
-  // Prune away any tokens on this frame that have no forward links.
-  // [we don't do this in PruneForwardLinks because it would give us
-  // a problem with dangling pointers].
-  // It's called by PruneActiveTokens if any forward links have been pruned
-  void PruneTokensForFrame(int32 frame_plus_one);
-
-  // Go backwards through still-alive tokens, pruning them if the
-  // forward+backward cost is more than lat_beam away from the best path.  It's
-  // possible to prove that this is "correct" in the sense that we won't lose
-  // anything outside of lat_beam, regardless of what happens in the future.
-  // delta controls when it considers a cost to have changed enough to continue
-  // going backward and propagating the change.  larger delta -> will recurse
-  // less far.
-  void PruneActiveTokens(BaseFloat delta);
-
-  /// Gets the weight cutoff.  Also counts the active tokens.
-  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
-                      BaseFloat *adaptive_beam, Elem **best_elem);
-
-  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
-  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
-  /// use.
-  BaseFloat ProcessEmitting(DecodableInterface *decodable);
-
-  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
-  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
-  /// preceding ProcessEmitting().
-  void ProcessNonemitting(BaseFloat cost_cutoff);
-
-  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
-  // more than one list (e.g. for current and previous frames), but only one of
-  // them at a time can be indexed by StateId.  It is indexed by frame-index
-  // plus one, where the frame-index is zero-based, as used in decodable object.
-  // That is, the emitting probs of frame t are accounted for in tokens at
-  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
-  // the graph.
-  HashList<StateId, Token *> toks_;
-
-  std::vector<TokenList> active_toks_;  // Lists of tokens, indexed by
-  // frame (members of TokenList are toks, must_prune_forward_links,
-  // must_prune_tokens).
-  std::vector<const Elem *>
-      queue_;  // temp variable used in ProcessNonemitting,
-  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.
-
-  // fst_ is a pointer to the FST we are decoding from.
-  const FST *fst_;
-  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
-  // object is destroyed.
-  bool delete_fst_;
-
-  std::vector<BaseFloat> cost_offsets_;  // This contains, for each
-  // frame, an offset that was added to the acoustic log-likelihoods on that
-  // frame in order to keep everything in a nice dynamic range i.e.  close to
-  // zero, to reduce roundoff errors.
-  LatticeFasterDecoderConfig config_;
-  int32 num_toks_;  // current total #toks allocated...
-  bool warned_;
-
-  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
-  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
-  /// if this is set, then the output of ComputeFinalCosts() is in the next
-  /// three variables.  The reason we need to do this is that after
-  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
-  /// of the tokens on the last frame are freed, so we free the list from toks_
-  /// to avoid having dangling pointers hanging around.
-  bool decoding_finalized_;
-  /// For the meaning of the next 3 variables, see the comment for
-  /// decoding_finalized_ above., and ComputeFinalCosts().
-  unordered_map<Token *, BaseFloat> final_costs_;
-  BaseFloat final_relative_cost_;
-  BaseFloat final_best_cost_;
-
-  std::shared_ptr<wenet::ContextGraph> context_graph_ = nullptr;
-
-  // There are various cleanup tasks... the toks_ structure contains
-  // singly linked lists of Token pointers, where Elem is the list type.
-  // It also indexes them in a hash, indexed by state (this hash is only
-  // maintained for the most recent frame).  toks_.Clear()
-  // deletes them from the hash and returns the list of Elems.  The
-  // function DeleteElems calls toks_.Delete(elem) for each elem in
-  // the list, which returns ownership of the Elem to the toks_ structure
-  // for reuse, but does not delete the Token pointer.  The Token pointers
-  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
-  // but are also linked together on each frame by their own linked-list,
-  // using the "next" pointer.  We delete them manually.
-  void DeleteElems(Elem *list);
-
-  // This function takes a singly linked list of tokens for a single frame, and
-  // outputs a list of them in topological order (it will crash if no such order
-  // can be found, which will typically be due to decoding graphs with epsilon
-  // cycles, which are not allowed).  Note: the output list may contain NULLs,
-  // which the caller should pass over; it just happens to be more efficient for
-  // the algorithm to output a list that contains NULLs.
-  static void TopSortTokens(Token *tok_list,
-                            std::vector<Token *> *topsorted_list);
-
-  void ClearActiveTokens();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
-};
-
-typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken>
-    LatticeFasterDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-online-decoder.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-online-decoder.cc
deleted file mode 100644
index 2345b4d129ff905784762e973bad279f2fb55d31..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-online-decoder.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-// decoder/lattice-faster-online-decoder.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.cc, about how to maintain this
-// file in sync with lattice-faster-decoder.cc
-
-#include <limits>
-#include <queue>
-#include <unordered_map>
-#include <utility>
-
-#include "decoder/lattice-faster-online-decoder.h"
-
-namespace kaldi {
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
-    bool use_final_probs) const {
-  Lattice lat1;
-  {
-    Lattice raw_lat;
-    this->GetRawLattice(&raw_lat, use_final_probs);
-    ShortestPath(raw_lat, &lat1);
-  }
-  Lattice lat2;
-  GetBestPath(&lat2, use_final_probs);
-  BaseFloat delta = 0.1;
-  int32 num_paths = 1;
-  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
-    KALDI_WARN << "Best-path test failed";
-    return false;
-  } else {
-    return true;
-  }
-}
-
-// Outputs an FST corresponding to the single best path through the lattice.
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(
-    Lattice *olat, bool use_final_probs) const {
-  olat->DeleteStates();
-  BaseFloat final_graph_cost;
-  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
-  if (iter.Done()) return false;  // would have printed warning.
-  StateId state = olat->AddState();
-  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
-  while (!iter.Done()) {
-    LatticeArc arc;
-    iter = TraceBackBestPath(iter, &arc);
-    arc.nextstate = state;
-    StateId new_state = olat->AddState();
-    olat->AddArc(new_state, arc);
-    state = new_state;
-  }
-  olat->SetStart(state);
-  return true;
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
-    bool use_final_probs, BaseFloat *final_cost_out) const {
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "BestPathEnd() with use_final_probs == false";
-  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
-               "You cannot call BestPathEnd if no frames were decoded.");
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  // Singly linked list of tokens on last frame (access list through "next"
-  // pointer).
-  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-  BaseFloat best_final_cost = 0;
-  Token *best_tok = NULL;
-  for (Token *tok = this->active_toks_.back().toks; tok != NULL;
-       tok = tok->next) {
-    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
-    if (use_final_probs && !final_costs.empty()) {
-      // if we are instructed to use final-probs, and any final tokens were
-      // active on final frame, include the final-prob in the cost of the token.
-      typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-          final_costs.find(tok);
-      if (iter != final_costs.end()) {
-        final_cost = iter->second;
-        cost += final_cost;
-      } else {
-        cost = std::numeric_limits<BaseFloat>::infinity();
-      }
-    }
-    if (cost < best_cost) {
-      best_cost = cost;
-      best_tok = tok;
-      best_final_cost = final_cost;
-    }
-  }
-  if (best_tok ==
-      NULL) {  // this should not happen, and is likely a code error or
-    // caused by infinities in likelihoods, but I'm not making
-    // it a fatal error for now.
-    KALDI_WARN << "No final token found.";
-  }
-  if (final_cost_out) *final_cost_out = best_final_cost;
-  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
-}
-
-template <typename FST>
-typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator
-LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(BestPathIterator iter,
-                                                      LatticeArc *oarc) const {
-  KALDI_ASSERT(!iter.Done() && oarc != NULL);
-  Token *tok = static_cast<Token *>(iter.tok);
-  int32 cur_t = iter.frame, step_t = 0;
-  if (tok->backpointer != NULL) {
-    // retrieve the correct forward link(with the best link cost)
-    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
-    ForwardLinkT *link;
-    for (link = tok->backpointer->links; link != NULL; link = link->next) {
-      if (link->next_tok == tok) {  // this is a link to "tok"
-        BaseFloat graph_cost = link->graph_cost,
-                  acoustic_cost = link->acoustic_cost;
-        BaseFloat cost = graph_cost + acoustic_cost;
-        if (cost < best_cost) {
-          oarc->ilabel = link->ilabel;
-          oarc->olabel = link->olabel;
-          if (link->ilabel != 0) {
-            KALDI_ASSERT(static_cast<size_t>(cur_t) <
-                         this->cost_offsets_.size());
-            acoustic_cost -= this->cost_offsets_[cur_t];
-            step_t = -1;
-          } else {
-            step_t = 0;
-          }
-          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
-          best_cost = cost;
-        }
-      }
-    }
-    if (link == NULL &&
-        best_cost ==
-            std::numeric_limits<BaseFloat>::infinity()) {  // Did not find
-                                                           // correct link.
-      KALDI_ERR << "Error tracing best-path back (likely "
-                << "bug in token-pruning algorithm)";
-    }
-  } else {
-    oarc->ilabel = 0;
-    oarc->olabel = 0;
-    oarc->weight = LatticeWeight::One();  // zero costs.
-  }
-  return BestPathIterator(tok->backpointer, cur_t + step_t);
-}
-
-template <typename FST>
-bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
-    Lattice *ofst, bool use_final_probs, BaseFloat beam) const {
-  typedef LatticeArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  typedef Arc::Label Label;
-
-  // Note: you can't use the old interface (Decode()) if you want to
-  // get the lattice with use_final_probs = false.  You'd have to do
-  // InitDecoding() and then AdvanceDecoding().
-  if (this->decoding_finalized_ && !use_final_probs)
-    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
-              << "GetRawLattice() with use_final_probs == false";
-
-  unordered_map<Token *, BaseFloat> final_costs_local;
-
-  const unordered_map<Token *, BaseFloat> &final_costs =
-      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
-  if (!this->decoding_finalized_ && use_final_probs)
-    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
-
-  ofst->DeleteStates();
-  // num-frames plus one (since frames are one-based, and we have
-  // an extra frame for the start-state).
-  int32 num_frames = this->active_toks_.size() - 1;
-  KALDI_ASSERT(num_frames > 0);
-  for (int32 f = 0; f <= num_frames; f++) {
-    if (this->active_toks_[f].toks == NULL) {
-      KALDI_WARN << "No tokens active on frame " << f
-                 << ": not producing lattice.\n";
-      return false;
-    }
-  }
-  unordered_map<Token *, StateId> tok_map;
-  std::queue<std::pair<Token *, int32> > tok_queue;
-  // First initialize the queue and states.  Put the initial state on the queue;
-  // this is the last token in the list active_toks_[0].toks.
-  for (Token *tok = this->active_toks_[0].toks; tok != NULL; tok = tok->next) {
-    if (tok->next == NULL) {
-      tok_map[tok] = ofst->AddState();
-      ofst->SetStart(tok_map[tok]);
-      std::pair<Token *, int32> tok_pair(tok, 0);  // #frame = 0
-      tok_queue.push(tok_pair);
-    }
-  }
-
-  // Next create states for "good" tokens
-  while (!tok_queue.empty()) {
-    std::pair<Token *, int32> cur_tok_pair = tok_queue.front();
-    tok_queue.pop();
-    Token *cur_tok = cur_tok_pair.first;
-    int32 cur_frame = cur_tok_pair.second;
-    KALDI_ASSERT(cur_frame >= 0 && cur_frame <= this->cost_offsets_.size());
-
-    typename unordered_map<Token *, StateId>::const_iterator iter =
-        tok_map.find(cur_tok);
-    KALDI_ASSERT(iter != tok_map.end());
-    StateId cur_state = iter->second;
-
-    for (ForwardLinkT *l = cur_tok->links; l != NULL; l = l->next) {
-      Token *next_tok = l->next_tok;
-      if (next_tok->extra_cost < beam) {
-        // so both the current and the next token are good; create the arc
-        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
-        StateId nextstate;
-        if (tok_map.find(next_tok) == tok_map.end()) {
-          nextstate = tok_map[next_tok] = ofst->AddState();
-          tok_queue.push(std::pair<Token *, int32>(next_tok, next_frame));
-        } else {
-          nextstate = tok_map[next_tok];
-        }
-        BaseFloat cost_offset =
-            (l->ilabel != 0 ? this->cost_offsets_[cur_frame] : 0);
-        Arc arc(l->ilabel, l->olabel,
-                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
-                nextstate);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    if (cur_frame == num_frames) {
-      if (use_final_probs && !final_costs.empty()) {
-        typename unordered_map<Token *, BaseFloat>::const_iterator iter =
-            final_costs.find(cur_tok);
-        if (iter != final_costs.end())
-          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
-      } else {
-        ofst->SetFinal(cur_state, LatticeWeight::One());
-      }
-    }
-  }
-  return (ofst->NumStates() != 0);
-}
-
-// Instantiate the template for the FST types that we'll need.
-template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-
-}  // end namespace kaldi.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-online-decoder.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-online-decoder.h
deleted file mode 100644
index dc50cfa73e6574e9625eda9045c47f674fcbc1e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/decoder/lattice-faster-online-decoder.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// decoder/lattice-faster-online-decoder.h
-
-// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
-//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-//                2018  Zhehuai Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// see note at the top of lattice-faster-decoder.h, about how to maintain this
-// file in sync with lattice-faster-decoder.h
-
-#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
-
-#include "decoder/lattice-faster-decoder.h"
-
-#include <memory>
-
-namespace kaldi {
-
-/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
-    supports an efficient way to get the best path (see the function
-    BestPathEnd()), which is useful in endpointing and in situations where you
-    might want to frequently access the best path.
-
-    This is only templated on the FST type, since the Token type is required to
-    be BackpointerToken.  Actually it only makes sense to instantiate
-    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so
-   indirectly via this child class.
- */
-template <typename FST>
-class LatticeFasterOnlineDecoderTpl
-    : public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
- public:
-  using Arc = typename FST::Arc;
-  using Label = typename Arc::Label;
-  using StateId = typename Arc::StateId;
-  using Weight = typename Arc::Weight;
-  using Token = decoder::BackpointerToken;
-  using ForwardLinkT = decoder::ForwardLink<Token>;
-
-  // Instantiate this class once for each thing you have to decode.
-  // This version of the constructor does not take ownership of
-  // 'fst'.
-  LatticeFasterOnlineDecoderTpl(
-      const FST &fst, const LatticeFasterDecoderConfig &config,
-      const std::shared_ptr<wenet::ContextGraph> &context_graph)
-      : LatticeFasterDecoderTpl<FST, Token>(fst, config, context_graph) {}
-
-  // This version of the initializer takes ownership of 'fst', and will delete
-  // it when this object is destroyed.
-  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
-                                FST *fst)
-      : LatticeFasterDecoderTpl<FST, Token>(config, fst) {}
-
-  struct BestPathIterator {
-    void *tok;
-    int32 frame;
-    // note, "frame" is the frame-index of the frame you'll get the
-    // transition-id for next time, if you call TraceBackBestPath on this
-    // iterator (assuming it's not an epsilon transition).  Note that this
-    // is one less than you might reasonably expect, e.g. it's -1 for
-    // the nonemitting transitions before the first frame.
-    BestPathIterator(void *t, int32 f) : tok(t), frame(f) {}
-    bool Done() const { return tok == NULL; }
-  };
-
-  /// Outputs an FST corresponding to the single best path through the lattice.
-  /// This is quite efficient because it doesn't get the entire raw lattice and
-  /// find the best path through it; instead, it uses the BestPathEnd and
-  /// BestPathIterator so it basically traces it back through the lattice.
-  /// Returns true if result is nonempty (using the return status is deprecated,
-  /// it will become void).  If "use_final_probs" is true AND we reached the
-  /// final-state of the graph then it will include those as final-probs, else
-  /// it will treat all final-probs as one.
-  bool GetBestPath(Lattice *ofst, bool use_final_probs = true) const;
-
-  /// This function does a self-test of GetBestPath().  Returns true on
-  /// success; returns false and prints a warning on failure.
-  bool TestGetBestPath(bool use_final_probs = true) const;
-
-  /// This function returns an iterator that can be used to trace back
-  /// the best path.  If use_final_probs == true and at least one final state
-  /// survived till the end, it will use the final-probs in working out the best
-  /// final Token, and will output the final cost to *final_cost (if non-NULL),
-  /// else it will use only the forward likelihood, and will put zero in
-  /// *final_cost (if non-NULL).
-  /// Requires that NumFramesDecoded() > 0.
-  BestPathIterator BestPathEnd(bool use_final_probs,
-                               BaseFloat *final_cost = NULL) const;
-
-  /// This function can be used in conjunction with BestPathEnd() to trace back
-  /// the best path one link at a time (e.g. this can be useful in endpoint
-  /// detection).  By "link" we mean a link in the graph; not all links cross
-  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
-  /// that as a frame.  The return value is the updated iterator.  It outputs
-  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc"
-  /// pointer, while leaving its "nextstate" variable unchanged.
-  BestPathIterator TraceBackBestPath(BestPathIterator iter,
-                                     LatticeArc *arc) const;
-
-  /// Behaves the same as GetRawLattice but only processes tokens whose
-  /// extra_cost is smaller than the best-cost plus the specified beam.
-  /// It is only worthwhile to call this function if beam is less than
-  /// the lattice_beam specified in the config; otherwise, it would
-  /// return essentially the same thing as GetRawLattice, but more slowly.
-  bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs,
-                           BaseFloat beam) const;
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
-};
-
-typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstaddselfloops.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstaddselfloops.cc
deleted file mode 100644
index 145bf006f2324136c5fea4a8d0012a7a4126c646..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstaddselfloops.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// fstbin/fstaddselfloops.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#include "util/simple-io-funcs.h"
-
-/* some test examples:
-  pushd ~/tmpdir
- ( echo 3; echo  4) > in.list
- ( echo 5; echo  6) > out.list
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
- | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
- fstcompile | fstaddselfloops in.list out.list | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Adds self-loops to states of an FST to propagate disambiguation "
-        "symbols through it\n"
-        "They are added on each final state and each state with non-epsilon "
-        "output symbols\n"
-        "on at least one arc out of the state.  Useful in conjunction with "
-        "predeterminize\n"
-        "\n"
-        "Usage:  fstaddselfloops in-disambig-list out-disambig-list  [in.fst "
-        "[out.fst] ]\n"
-        "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
-        "in.list and out.list are lists of integers, one per line, of the\n"
-        "same length.\n";
-
-    ParseOptions po(usage);
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string disambig_in_rxfilename = po.GetArg(1),
-                disambig_out_rxfilename = po.GetArg(2),
-                fst_in_filename = po.GetOptArg(3),
-                fst_out_filename = po.GetOptArg(4);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    std::vector<int32> disambig_in;
-    if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_in_rxfilename);
-
-    std::vector<int32> disambig_out;
-    if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
-      KALDI_ERR
-          << "fstaddselfloops: Could not read disambiguation symbols from "
-          << kaldi::PrintableRxfilename(disambig_out_rxfilename);
-
-    if (disambig_in.size() != disambig_out.size())
-      KALDI_ERR
-          << "fstaddselfloops: mismatch in size of disambiguation symbols";
-
-    AddSelfLoops(fst, disambig_in, disambig_out);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstdeterminizestar.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstdeterminizestar.cc
deleted file mode 100644
index e818143025c0fd5d389c28c77715d65711fe63f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstdeterminizestar.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// fstbin/fstdeterminizestar.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/parse-options.h"
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-#include <signal.h>  // Comment this line and the call to signal below if
-// it causes compilation problems.  It is only to enable a debugging procedure
-// when determinization does not terminate.  We are disabling this code if
-// compiling on Windows because signal.h is not available there, and on
-// MacOS due to a problem with <signal.h> in the initial release of Sierra.
-#endif
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
- ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
- fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
- 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
-
-  cd ~/tmpdir
-  while true; do
-    fstrand > 1.fst
-    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
- > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
- "." done
-
- Test of debugging [with non-determinizable input]:
- ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
- "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
- of fstdeterminizestar] # prints out a bunch of debugging output showing the
- mess it got itself into.
-*/
-
-bool debug_location = false;
-void signal_handler(int) { debug_location = true; }
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Removes epsilons and determinizes in one step\n"
-        "\n"
-        "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
-        "\n"
-        "See also: fstdeterminizelog, lattice-determinize\n";
-
-    float delta = kDelta;
-    int max_states = -1;
-    bool use_log = false;
-    ParseOptions po(usage);
-    po.Register("use-log", &use_log, "Determinize in log semiring.");
-    po.Register("delta", &delta,
-                "Delta value used to determine equivalence of weights.");
-    po.Register(
-        "max-states", &max_states,
-        "Maximum number of states in determinized FST before it will abort.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
-
-    // This enables us to get traceback info from determinization that is
-    // not seeming to terminate.
-#if !defined(_MSC_VER) && !defined(__APPLE__)
-    signal(SIGUSR1, signal_handler);
-#endif
-    // Normal case: just files.
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
-
-    ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
-    if (use_log) {
-      DeterminizeStarInLog(fst, delta, &debug_location, max_states);
-    } else {
-      VectorFst<StdArc> det_fst;
-      DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
-      *fst = det_fst;  // will do shallow copy and then det_fst goes
-      // out of scope anyway.
-    }
-    WriteFstKaldi(*fst, fst_out_str);
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstisstochastic.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstisstochastic.cc
deleted file mode 100644
index 468ed0daa7d37cb9a25cf25264f86e48e137b975..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstisstochastic.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// fstbin/fstisstochastic.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-// e.g. of test:
-// echo " 0 0" | fstcompile | fstisstochastic
-// should return 0 and print "0 0" [meaning, min and
-// max weight are one = exp(0)]
-// echo " 0 1" | fstcompile | fstisstochastic
-// should  return 1, not stochastic, and print 1 1
-// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
-// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
-// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
-// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
-// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
-// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
-// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
-// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
-// though not stochastic because we gave it an absurdly large delta.
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Checks whether an FST is stochastic and exits with success if so.\n"
-        "Prints out maximum error (in log units).\n"
-        "\n"
-        "Usage:  fstisstochastic [ in.fst ]\n";
-
-    float delta = 0.01;
-    bool test_in_log = true;
-
-    ParseOptions po(usage);
-    po.Register("delta", &delta, "Maximum error to accept.");
-    po.Register("test-in-log", &test_in_log,
-                "Test stochasticity in log semiring.");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 1) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1);
-
-    Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
-
-    bool ans;
-    StdArc::Weight min, max;
-    if (test_in_log)
-      ans = IsStochasticFstInLog(*fst, delta, &min, &max);
-    else
-      ans = IsStochasticFst(*fst, delta, &min, &max);
-
-    std::cout << min.Value() << " " << max.Value() << '\n';
-    delete fst;
-    if (ans)
-      return 0;  // success;
-    else
-      return 1;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstminimizeencoded.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstminimizeencoded.cc
deleted file mode 100644
index ae9ca6d75abe67d9a195572dd6d91ec3c7b44851..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fstminimizeencoded.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// fstbin/fstminimizeencoded.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-/* some test  examples:
- ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
- ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
- fstminimizeencoded | fstprint
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-
-    const char *usage =
-        "Minimizes FST after encoding [similar to fstminimize, but no "
-        "weight-pushing]\n"
-        "\n"
-        "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";
-
-    float delta = kDelta;
-    ParseOptions po(usage);
-    po.Register("delta", &delta,
-                "Delta likelihood used for quantization of weights");
-    po.Read(argc, argv);
-
-    if (po.NumArgs() > 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst_in_filename = po.GetOptArg(1),
-                fst_out_filename = po.GetOptArg(2);
-
-    VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
-
-    MinimizeEncoded(fst, delta);
-
-    WriteFstKaldi(*fst, fst_out_filename);
-
-    delete fst;
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-  return 0;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fsttablecompose.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fsttablecompose.cc
deleted file mode 100644
index bdd476da78b8cb8823c60abf33b5278e05bfd92c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstbin/fsttablecompose.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// fstbin/fsttablecompose.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/table-matcher.h"
-#include "util/parse-options.h"
-
-/*
-  cd ~/tmpdir
-  while true; do
-    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
-  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
-    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
-    echo -n "."
-  done
-
-*/
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;  // NOLINT
-    using namespace fst;  // NOLINT
-    using kaldi::int32;
-    /*
-      fsttablecompose should always give equivalent results to compose,
-      but it is more efficient for certain kinds of inputs.
-      In particular, it is useful when, say, the left FST has states
-      that typically either have epsilon olabels, or
-      one transition out for each of the possible symbols (as the
-      olabel).  The same with the input symbols of the right-hand FST
-      is possible.
-    */
-
-    const char *usage =
-        "Composition algorithm [between two FSTs of standard type, in "
-        "tropical\n"
-        "semiring] that is more efficient for certain cases-- in particular,\n"
-        "where one of the FSTs (the left one, if --match-side=left) has large\n"
-        "out-degree\n"
-        "\n"
-        "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
-        "(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
-
-    ParseOptions po(usage);
-
-    TableComposeOptions opts;
-    std::string match_side = "left";
-    std::string compose_filter = "sequence";
-
-    po.Register("connect", &opts.connect, "If true, trim FST before output.");
-    po.Register("match-side", &match_side,
-                "Side of composition to do table "
-                "match, one of: \"left\" or \"right\".");
-    po.Register("compose-filter", &compose_filter,
-                "Composition filter to use, "
-                "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
-
-    po.Read(argc, argv);
-
-    if (match_side == "left") {
-      opts.table_match_type = MATCH_OUTPUT;
-    } else if (match_side == "right") {
-      opts.table_match_type = MATCH_INPUT;
-    } else {
-      KALDI_ERR << "Invalid match-side option: " << match_side;
-    }
-
-    if (compose_filter == "alt_sequence") {
-      opts.filter_type = ALT_SEQUENCE_FILTER;
-    } else if (compose_filter == "auto") {
-      opts.filter_type = AUTO_FILTER;
-    } else if (compose_filter == "match") {
-      opts.filter_type = MATCH_FILTER;
-    } else if (compose_filter == "sequence") {
-      opts.filter_type = SEQUENCE_FILTER;
-    } else {
-      KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
-    }
-
-    if (po.NumArgs() < 2 || po.NumArgs() > 3) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
-                fst_out_str = po.GetOptArg(3);
-
-    VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
-
-    VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
-
-    // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
-    if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
-      KALDI_WARN << "The first FST is not olabel sorted.";
-    }
-    if (fst2->Properties(fst::kILabelSorted, true) == 0) {
-      KALDI_WARN << "The second FST is not ilabel sorted.";
-    }
-
-    VectorFst<StdArc> composed_fst;
-
-    TableCompose(*fst1, *fst2, &composed_fst, opts);
-
-    delete fst1;
-    delete fst2;
-
-    WriteFstKaldi(composed_fst, fst_out_str);
-    return 0;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-lattice-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-lattice-inl.h
deleted file mode 100644
index 0bfbc8f41c7e439b1fac037f60490e04fdcbdd8b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-lattice-inl.h
+++ /dev/null
@@ -1,1357 +0,0 @@
-// fstext/determinize-lattice-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
-// Do not include this file directly.  It is included by determinize-lattice.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.  It is constructed in such a way that
-// finding the string-id of the successor of (string, next-label) has constant
-// time.
-
-// Note: class IntType, typically int32, is the type of the element in the
-// string (typically a template argument of the CompactLatticeWeightTpl).
-
-template <class IntType>
-class LatticeStringRepository {
- public:
-  struct Entry {
-    const Entry *parent;  // NULL for empty string.
-    IntType i;
-    inline bool operator==(const Entry &other) const {
-      return (parent == other.parent && i == other.i);
-    }
-    Entry() {}
-    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
-  };
-  // Note: all Entry* pointers returned in function calls are
-  // owned by the repository itself, not by the caller!
-
-  // Interface guarantees empty string is NULL.
-  inline const Entry *EmptyString() { return NULL; }
-
-  // Returns string of "parent" with i appended.  Pointer
-  // owned by repository
-  const Entry *Successor(const Entry *parent, IntType i) {
-    new_entry_->parent = parent;
-    new_entry_->i = i;
-
-    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
-    if (pr.second) {  // Was successfully inserted (was not there).  We need to
-                      // replace the element we inserted, which resides on the
-                      // stack, with one from the heap.
-      const Entry *ans = new_entry_;
-      new_entry_ = new Entry();
-      return ans;
-    } else {  // Was not inserted because an equivalent Entry already
-              // existed.
-      return *pr.first;
-    }
-  }
-
-  const Entry *Concatenate(const Entry *a, const Entry *b) {
-    if (a == NULL)
-      return b;
-    else if (b == NULL)
-      return a;
-    std::vector<IntType> v;
-    ConvertToVector(b, &v);
-    const Entry *ans = a;
-    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
-    return ans;
-  }
-  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
-    std::vector<IntType> a_vec, b_vec;
-    ConvertToVector(a, &a_vec);
-    ConvertToVector(b, &b_vec);
-    const Entry *ans = NULL;
-    for (size_t i = 0;
-         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
-      ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // removes any elements from b that are not part of
-  // a common prefix with a.
-  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
-    size_t a_size = Size(a), b_size = b->size();
-    while (a_size > b_size) {
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size > a_size) b_size = a_size;
-    typename std::vector<IntType>::iterator b_begin = b->begin();
-    while (a_size != 0) {
-      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
-      a = a->parent;
-      a_size--;
-    }
-    if (b_size != b->size()) b->resize(b_size);
-  }
-
-  // removes the first n elements of a.
-  const Entry *RemovePrefix(const Entry *a, size_t n) {
-    if (n == 0) return a;
-    std::vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    assert(a_vec.size() >= n);
-    const Entry *ans = NULL;
-    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
-    return ans;
-  }
-
-  // Returns true if a is a prefix of b.  If a is prefix of b,
-  // time taken is |b| - |a|.  Else, time taken is |b|.
-  bool IsPrefixOf(const Entry *a, const Entry *b) const {
-    if (a == NULL) return true;  // empty string prefix of all.
-    if (a == b) return true;
-    if (b == NULL) return false;
-    return IsPrefixOf(a, b->parent);
-  }
-
-  inline size_t Size(const Entry *entry) const {
-    size_t ans = 0;
-    while (entry != NULL) {
-      ans++;
-      entry = entry->parent;
-    }
-    return ans;
-  }
-
-  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
-    size_t length = Size(entry);
-    out->resize(length);
-    if (entry != NULL) {
-      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
-      while (entry != NULL) {
-        *iter = entry->i;
-        entry = entry->parent;
-        ++iter;
-      }
-    }
-  }
-
-  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
-    const Entry *e = NULL;
-    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
-    return e;
-  }
-
-  LatticeStringRepository() { new_entry_ = new Entry; }
-
-  void Destroy() {
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter)
-      delete *iter;
-    SetType tmp;
-    tmp.swap(set_);
-    if (new_entry_) {
-      delete new_entry_;
-      new_entry_ = NULL;
-    }
-  }
-
-  // Rebuild will rebuild this object, guaranteeing only
-  // to preserve the Entry values that are in the vector pointed
-  // to (this list does not have to be unique).  The point of
-  // this is to save memory.
-  void Rebuild(const std::vector<const Entry *> &to_keep) {
-    SetType tmp_set;
-    for (typename std::vector<const Entry *>::const_iterator iter =
-             to_keep.begin();
-         iter != to_keep.end(); ++iter)
-      RebuildHelper(*iter, &tmp_set);
-    // Now delete all elems not in tmp_set.
-    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
-         ++iter) {
-      if (tmp_set.count(*iter) == 0)
-        delete (*iter);  // delete the Entry; not needed.
-    }
-    set_.swap(tmp_set);
-  }
-
-  ~LatticeStringRepository() { Destroy(); }
-  int32 MemSize() const {
-    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
-    // on the size this structure might take.
-  }
-
- private:
-  class EntryKey {  // Hash function object.
-   public:
-    inline size_t operator()(const Entry *entry) const {
-      size_t prime = 49109;
-      return static_cast<size_t>(entry->i) +
-             prime * reinterpret_cast<size_t>(entry->parent);
-    }
-  };
-  class EntryEqual {
-   public:
-    inline bool operator()(const Entry *e1, const Entry *e2) const {
-      return (*e1 == *e2);
-    }
-  };
-  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;
-
-  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
-    while (true) {
-      if (to_add == NULL) return;
-      typename SetType::iterator iter = tmp_set->find(to_add);
-      if (iter == tmp_set->end()) {  // not in tmp_set.
-        tmp_set->insert(to_add);
-        to_add = to_add->parent;  // and loop.
-      } else {
-        return;
-      }
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
-  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
-                      // to avoid unnecessary news and deletes.
-  SetType set_;
-};
-
-// class LatticeDeterminizer is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
-// > w2.  This requires that there be a total order on the weights.
-
-template <class Weight, class IntType>
-class LatticeDeterminizer {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight>
-      CompactArc;              // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
-  // (the weight stores the original output-symbol strings).  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
-    assert(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_arcs_.size());
-    if (destroy) FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-        CompactWeight weight(temp_arc.weight, seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;           // includes string and weight.
-          ofst->AddArc(this_state, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        std::swap(temp, this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      std::swap(temp, output_arcs_);
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create
-  // extra states to handle sequences of symbols on the output.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy) FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on
-    // output.
-    for (OutputStateId s = 0; s < nStates; s++) {
-      OutputStateId news = ofst->AddState();
-      assert(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
-      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                    end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        std::vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state,
-                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state;
-          // Have to be careful with this integer comparison (i+1 < seq.size())
-          // because unsigned. i < seq.size()-1 could fail for zero-length
-          // sequences.
-          for (size_t i = 0; i + 1 < seq.size(); i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel
-                                 : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating
-      // memory
-      if (destroy) {
-        std::vector<TempArc> temp;
-        temp.swap(this_vec);
-      }
-    }
-    if (destroy) {
-      std::vector<std::vector<TempArc> > temp;
-      temp.swap(output_arcs_);
-      repository_.Destroy();
-    }
-  }
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
-      : num_arcs_(0),
-        num_elems_(0),
-        ifst_(ifst.Copy()),
-        opts_(opts),
-        equal_(opts_.delta),
-        determinized_(false),
-        minimal_hash_(3, hasher_, equal_),
-        initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
-         iter != minimal_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      MinimalSubsetHash tmp;
-      tmp.swap(minimal_hash_);
-    }
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    {
-      InitialSubsetHash tmp;
-      tmp.swap(initial_hash_);
-    }
-    {
-      std::vector<std::vector<Element> *> output_states_tmp;
-      output_states_tmp.swap(output_states_);
-    }
-    {
-      std::vector<char> tmp;
-      tmp.swap(isymbol_or_final_);
-    }
-    {
-      std::vector<OutputStateId> tmp;
-      tmp.swap(queue_);
-    }
-    {
-      std::vector<std::pair<Label, Element> > tmp;
-      tmp.swap(all_elems_tmp_);
-    }
-  }
-
-  ~LatticeDeterminizer() {
-    FreeMostMemory();  // rest is deleted by destructors.
-  }
-  void RebuildRepository() {  // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_arcs_.size(); i++)
-      for (size_t j = 0; j < output_arcs_[i].size(); j++)
-        needed_strings.push_back(output_arcs_[i][j].string);
-
-    // the following loop covers strings present in minimal_hash_
-    // which are also accessible via output_states_.
-    for (size_t i = 0; i < output_states_.size(); i++)
-      for (size_t j = 0; j < output_states_[i]->size(); j++)
-        needed_strings.push_back((*(output_states_[i]))[j].string);
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator iter =
-             initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const std::vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      for (size_t i = 0; i < vec.size(); i++)
-        needed_strings.push_back(vec[i].string);
-      needed_strings.push_back(elem.string);
-    }
-
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(
-        std::unique(needed_strings.begin(), needed_strings.end()),
-        needed_strings.end());  // uniq the strings.
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-          arcs_size = num_arcs_ * sizeof(TempArc),
-          elems_size = num_elems_ * sizeof(Element),
-          total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 &&
-        total_size > opts_.max_mem) {  // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-            new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
-                       "shrank from "
-                    << repo_size << " to " << new_repo_size
-                    << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.
-        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
-                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
-                   << repo_size << "," << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Returns true on success.  Can fail for out-of-memory
-  // or max-states related reasons.
-  bool Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-    try {
-      InitializeDeterminization();  // some start-up tasks.
-      while (!queue_.empty()) {
-        OutputStateId out_state = queue_.back();
-        queue_.pop_back();
-        ProcessState(out_state);
-        if (debug_ptr && *debug_ptr) Debug();  // will exit.
-        if (!CheckMemoryUsage()) return false;
-      }
-      return (determinized_ = true);
-    } catch (const std::bad_alloc &) {
-      int32 repo_size = repository_.MemSize(),
-            arcs_size = num_arcs_ * sizeof(TempArc),
-            elems_size = num_elems_ * sizeof(Element),
-            total_size = repo_size + arcs_size + elems_size;
-      KALDI_WARN
-          << "Memory allocation error doing lattice determinization; using "
-          << total_size << " bytes (max = " << opts_.max_mem
-          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
-          << elems_size << ")";
-      return (determinized_ = false);
-    } catch (const std::runtime_error &) {
-      KALDI_WARN << "Caught exception doing lattice determinization";
-      return (determinized_ = false);
-    }
-  }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId
-      StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;   // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry *StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state;  // use StateId as this is usually InputStateId but in one
-                    // case OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator<(const Element &other) const { return state < other.state; }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of
-                      // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
-                             SubsetKey, SubsetEqual>
-      MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
-                             SubsetEqual>
-      InitialSubsetHash;
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(std::vector<Element> *subset) {
-    assert(!subset->empty());
-    typename std::vector<Element>::iterator cur_in = subset->begin(),
-                                            cur_out = subset->begin(),
-                                            end = subset->end();
-    while (cur_in != end) {
-      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it adds it to the queue.
-  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
-    typename MinimalSubsetHash::const_iterator iter =
-        minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end())  // Found a matching subset.
-      return iter->second;
-    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
-    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-    output_states_.push_back(subset_ptr);
-    num_elems_ += subset_ptr->size();
-    output_arcs_.push_back(std::vector<TempArc>());
-    minimal_hash_[subset_ptr] = ans;
-    queue_.push_back(ans);
-    return ans;
-  }
-
-  // Given a normalized initial subset of elements (i.e. before epsilon
-  // closure), compute the corresponding output-state.
-  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter =
-        initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) {  // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    std::vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset);    // follow epsilons.
-    ConvertToMinimal(&subset);  // remove all but emitting and final states.
-
-    Element elem;  // will be used to store remaining weight and string, and
-                   // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight,
-                    &elem.string);  // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    OutputStateId ans = MinimalToStateId(subset);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    std::vector<Element> *initial_subset_ptr =
-        new std::vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
-                     StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    std::vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len)
-      return -1;
-    else if (a_len < b_len)
-      return 1;
-    for (int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i])
-        return -1;
-      else if (a_vec[i] > b_vec[i])
-        return 1;
-    }
-    assert(
-        0);  // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following
-  // epsilon links. Called by InitialToStateId and Initialize. Has no side
-  // effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring),
-  // unless input_subset was.
-  void EpsilonClosure(std::vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::deque<Element> queue;
-    std::unordered_map<InputStateId, Element> cur_subset;
-    typedef
-        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename std::vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push_back(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted =
-        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false;  // relates to an optimization, see below.
-    int counter =
-        0;  // stops infinite loops here for non-lattice-determinizable input;
-    // useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.front();
-      queue.pop_front();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in
-      // "cur_subset", both the new (optimal) and old (less-optimal) Element
-      // will still be in "queue".  The next if-statement stops us from wasting
-      // compute by processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem) continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure";
-      }
-      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0)
-          break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0 &&
-            arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // now must append strings
-          if (arc.olabel == 0)
-            next_elem.string = elem.string;
-          else
-            next_elem.string = repository_.Successor(elem.string, arc.olabel);
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            cur_subset[next_elem.state] = next_elem;
-            queue.push_back(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = Compare(next_elem.weight, next_elem.string,
-                               iter->second.weight, iter->second.string);
-            if (comp ==
-                1) {  // next_elem is better, so use its (weight, string)
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push_back(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    {  // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is
-      // order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // processes final-weights for this subset.
-
-    // minimal_subset may be empty if the graphs is not connected/trimmed, I
-    // think, do don't check that it's nonempty.
-    bool is_final = false;
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
-    Weight final_weight = Weight::Zero();
-    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
-                                                  end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-          (!is_final || Compare(this_final_weight, this_final_string,
-                                final_weight, final_string) == 1)) {  // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[output_state].push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
-                       StringId *common_str) {
-    if (elems->empty()) {              // just set common_str, tot_weight
-      KALDI_WARN << "[empty subset]";  // TEMP
-      // to defaults and return...
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    std::vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for (size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for (size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(std::vector<Element> *subset) {
-    typedef typename std::vector<Element>::iterator IterType;
-
-    // This assert is designed to fail (usually) if the subset is not sorted on
-    // state.
-    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
-                    cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Processes a transition from state "state".  The set of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are not necessarily unique (i.e. there may be >1 entry
-  // associated with each), and any such sets of Elements have to be merged
-  // within this routine (we take the [weight, string] pair that's better in the
-  // semiring).
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset) {
-    MakeSubsetUnique(subset);  // remove duplicates with the same state.
-
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_arcs_[state].push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, which only compares the state
-  // when ordering the "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions processes emitting transitions (transitions
-  // with ilabels) out of this subset of states.
-  // Does not consider final states.  Breaks the emitting transitions up by
-  // ilabel, and creates a new transition in the determinized FST for each
-  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
-  // and then sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-
-  void ProcessTransitions(OutputStateId output_state) {
-    const std::vector<Element> &minimal_subset =
-        *(output_states_[output_state]);
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    std::vector<std::pair<Label, Element> > &all_elems(
-        all_elems_tmp_);  // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        minimal_subset.begin(),
-                                                    end = minimal_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0 &&
-              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
-                                               // ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0)  // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      assert(!this_subset.empty());  // temp.
-      ProcessTransition(output_state, ilabel, &this_subset);
-    }
-    all_elems.clear();  // as it's a class variable-- want it to stay
-    // emtpy.
-  }
-
-  // ProcessState does the processing of a determinized state, i.e. it creates
-  // transitions out of it and the final-probability if any.
-  void ProcessState(OutputStateId output_state) {
-    ProcessFinal(output_state);
-    ProcessTransitions(output_state);
-  }
-
-  void Debug() {  // this function called if you send a signal
-    // SIGUSR1 to the process (and it's caught by the handler in
-    // fstdeterminizestar).  It prints out some traceback
-    // info and exits.
-
-    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-    // free up memory from the hash as we need a little memory
-    {
-      MinimalSubsetHash hash_tmp;
-      hash_tmp.swap(minimal_hash_);
-    }
-
-    if (output_arcs_.size() <= 2) {
-      KALDI_ERR << "Nothing to trace back";
-    }
-    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
-    // one as we might be halfway into constructing it.
-
-    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-    for (size_t i = 0; i < max_state; i++) {
-      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-        OutputStateId nextstate = output_arcs_[i][j].nextstate;
-        // Always find an earlier-numbered predecessor; this
-        // is always possible because of the way the algorithm
-        // works.
-        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-      }
-    }
-    std::vector<std::pair<Label, StringId> > traceback;
-    // 'traceback' is a pair of (ilabel, olabel-seq).
-    OutputStateId cur_state = max_state;  // A recently constructed state.
-
-    while (cur_state != 0 && cur_state != kNoStateId) {
-      OutputStateId last_state = predecessor[cur_state];
-      std::pair<Label, StringId> p;
-      size_t i;
-      for (i = 0; i < output_arcs_[last_state].size(); i++) {
-        if (output_arcs_[last_state][i].nextstate == cur_state) {
-          p.first = output_arcs_[last_state][i].ilabel;
-          p.second = output_arcs_[last_state][i].string;
-          traceback.push_back(p);
-          break;
-        }
-      }
-      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-      cur_state = last_state;
-    }
-    if (cur_state == kNoStateId)
-      KALDI_WARN << "Traceback did not reach start state "
-                 << "(possibly debug-code error)";
-
-    std::stringstream ss;
-    ss << "Traceback follows in format "
-       << "ilabel (olabel olabel) ilabel (olabel) ... :";
-    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-      ss << ' ' << traceback[i].first << " ( ";
-      std::vector<Label> seq;
-      repository_.ConvertToVector(traceback[i].second, &seq);
-      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-      ss << ')';
-    }
-    KALDI_ERR << ss.str();
-  }
-
-  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    assert(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state);  // will only recurse once.
-  }
-
-  void InitializeDeterminization() {
-    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
-              ->NumStates();
-      minimal_hash_.rehash(num_states / 2 + 3);
-      initial_hash_.rehash(num_states / 2 + 3);
-#endif
-    }
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Insert determinized-state corresponding to the start state into hash
-         and queue.  Unlike all the other states, we don't "normalize" the
-         representation of this determinized-state before we put it into
-         minimal_hash_.  This is actually what we want, as otherwise we'd have
-         problems dealing with any extra weight and string and might have to
-         create a "super-initial" state which would make the output
-         nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for
-         correctness. Note, we don't put anything in the initial_hash_.  The
-         initial_hash_ is only a lookaside buffer anyway, so this isn't a
-         problem-- it will get populated later if it needs to be.
-      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      std::vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset);    // follow through epsilon-inputs links
-      ConvertToMinimal(&subset);  // remove all but final states and
-      // states with input-labels on arcs out of them.
-      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
-      assert(output_arcs_.empty() && output_states_.empty());
-      // add the new state...
-      output_states_.push_back(subset_ptr);
-      output_arcs_.push_back(std::vector<TempArc>());
-      OutputStateId initial_state = 0;
-      minimal_hash_[subset_ptr] = initial_state;
-      queue_.push_back(initial_state);
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);
-
-  std::vector<std::vector<Element> *>
-      output_states_;  // maps from output state to
-                       // minimal representation [normalized].
-                       // View pointers as owned in
-                       // minimal_hash_.
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
-  int num_elems_;  // keep track of memory usage: number of elems in
-                   // output_states_
-
-  const Fst<Arc> *ifst_;
-  DeterminizeLatticeOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_;  // set to true when user called Determinize(); used to
-                       // make
-  // sure this object is used correctly.
-  MinimalSubsetHash
-      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                      // representation" (only include final and states and
-                      // states with nonzero ilabel on arc out of them.  Owns
-                      // the pointers in its keys.
-  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
-                                    // represents the OutputStateId together
-                                    // with an extra weight and string.  Subset
-                                    // is "initial representation".  The extra
-                                    // weight and string is needed because after
-                                    // we convert to minimal representation and
-                                    // normalize, there may be an extra weight
-                                    // and string.  Owns the pointers
-                                    // in its keys.
-  std::vector<OutputStateId>
-      queue_;  // Queue of output-states to process.  Starts with
-  // state 0, and increases and then (hopefully) decreases in length during
-  // determinization.  LIFO queue (queue discipline doesn't really matter).
-
-  std::vector<std::pair<Label, Element> >
-      all_elems_tmp_;  // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType>
-      repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-};
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
-                        MutableFst<ArcTpl<Weight> > *ofst,
-                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts, bool *debug_ptr) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
-  if (!det.Determinize(debug_ptr)) return false;
-  det.Output(ofst);
-  return true;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-lattice.h
deleted file mode 100644
index 4a42511970f6eb9a15baad12b324fc514b5e34fd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-lattice.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// fstext/determinize-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-lattice.cc
-
-/*
-   DeterminizeLattice implements a special form of determinization
-   with epsilon removal, optimized for a phase of lattice generation.
-   Its input is an FST with weight-type BaseWeightType (usually a pair of
-   floats, with a lexicographical type of order, such as
-   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
-   input symbols equal to words, and output-symbols equal to p.d.f's (so like
-   the inverse of HCLG).  Imagine representing this as an acceptor of type
-   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
-   and the weights contain the original weights together with strings (with zero
-   or one symbol in them) containing the original output labels (the p.d.f.'s).
-   We determinize this using acceptor determinization with epsilon removal.
-   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
-   kind of semiring where we always take the string corresponding to the best
-   cost (of type BaseWeightType), and discard the other.  This corresponds to
-   taking the best output-label sequence (of p.d.f.'s) for each input-label
-   sequence (of words).  We couldn't use the Gallic weight for this, or it would
-   die as soon as it detected that the input FST was non-functional.  In our
-   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
-   that there is a function Compare(const BaseWeightType &a, const
-   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
-   b, a > b) in the total order on the BaseWeightType... this information should
-   be the same as NaturalLess would give, but it's more efficient to do it this
-   way. You can define this for things like TropicalWeight if you need to
-   instantiate this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and
-   the ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its
-   (end-state, weight) pairs, this will be a valid and more compact
-   representation, and will lead to a smaller set of determinized states (like
-   early minimization).  Call this collection of (end-state, weight) pairs the
-   "minimal representation".  As a mechanism to reduce compute, we can also
-   consider another representation. In the determinization algorithm, we start
-   off with a set of (begin-state, weight) pairs (where the "begin-states" are
-   initial or have a label on the transition into them), and the "canonical
-   representation" consists of the epsilon-closure of this set (i.e. follow
-   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
-   normalized, the "initial representation".  If two initial representations are
-   the same, the "canonical representation" and hence the "minimal
-   representation" will be the same.  We can use this to reduce compute.  Note
-   that if two initial representations are different, this does not preclude the
-   other representations from being the same.
-
-*/
-
-struct DeterminizeLatticeOptions {
-  float delta;  // A small offset used to measure equality of weights.
-  int max_mem;  // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this
-  // threshold.
-  int max_loop;  // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which
-    the output strings are represented using sequences of arcs, where all but
-    the first one has an epsilon on the input side.  The debug_ptr argument is
-    an optional pointer to a bool that, if it becomes true while the algorithm
-    is executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).  More
-    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
-    more than max_states, it will throw std::runtime_error (otherwise this code
-    does not use exceptions).  This is mainly useful for debug.  */
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural"
-   output format, where the output sequences are encoded using the
-   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
-   represented directly as strings) More efficient if ifst is arc-sorted on
-   input label. If the #arcs gets more than max_arcs, it will throw
-   std::runtime_error (otherwise this code does not use exceptions).  This is
-   mainly useful for debug.
-*/
-template <class Weight, class IntType>
-bool DeterminizeLattice(
-    const Fst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
-    bool *debug_ptr = NULL);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-lattice-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-star-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-star-inl.h
deleted file mode 100644
index b5b6d2fa2eb684de1de6b3bcd7525e48175e14c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-star-inl.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// fstext/determinize-star-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//           2015 Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
-// Do not include this file directly.  It is included by determinize-star.h
-
-#include <algorithm>
-#include <climits>
-#include <deque>
-#include <limits>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-
-#include "base/kaldi-error.h"
-
-namespace fst {
-
-// This class maps back and forth from/to integer id's to sequences of strings.
-// used in determinization algorithm.
-
-template <class Label, class StringId>
-class StringRepository {
-  // Label and StringId are both integer types, possibly the same.
-  // This is a utility that maps back and forth between a vector<Label> and
-  // StringId representation of sequences of Labels.  It is to save memory, and
-  // to save compute. We treat sequences of length zero and one separately, for
-  // efficiency.
-
- public:
-  class VectorKey {  // Hash function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec) const {
-      assert(vec != NULL);
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Label>::const_iterator it = vec->begin();
-           it != vec->end(); it++) {
-        hash += factor * (*it);
-        factor *= 103333;  // just an arbitrary prime number.
-      }
-      return hash;
-    }
-  };
-  class VectorEqual {  // Equality-operator function object.
-   public:
-    size_t operator()(const std::vector<Label> *vec1,
-                      const std::vector<Label> *vec2) const {
-      return (*vec1 == *vec2);
-    }
-  };
-
-  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
-                        VectorEqual>
-      MapType;
-
-  StringId IdOfEmpty() { return no_symbol; }
-
-  StringId IdOfLabel(Label l) {
-    if (l >= 0 && l <= (Label)single_symbol_range) {
-      return l + single_symbol_start;
-    } else {
-      // l is out of the allowed range so we have to treat it as a sequence of
-      // length one.  Should be v. rare.
-      std::vector<Label> v;
-      v.push_back(l);
-      return IdOfSeqInternal(v);
-    }
-  }
-
-  StringId IdOfSeq(
-      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
-    size_t sz = v.size();
-    if (sz == 0)
-      return no_symbol;
-    else if (v.size() == 1)
-      return IdOfLabel(v[0]);
-    else
-      return IdOfSeqInternal(v);
-  }
-
-  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
-  void SeqOfId(StringId id, std::vector<Label> *v) {
-    if (id == no_symbol) {
-      v->clear();
-    } else if (id >= single_symbol_start) {
-      v->resize(1);
-      (*v)[0] = id - single_symbol_start;
-    } else {
-      assert(static_cast<size_t>(id) < vec_.size());
-      *v = *(vec_[id]);
-    }
-  }
-  StringId RemovePrefix(StringId id, size_t prefix_len) {
-    if (prefix_len == 0) {
-      return id;
-    } else {
-      std::vector<Label> v;
-      SeqOfId(id, &v);
-      size_t sz = v.size();
-      assert(sz >= prefix_len);
-      std::vector<Label> v_noprefix(sz - prefix_len);
-      for (size_t i = 0; i < sz - prefix_len; i++)
-        v_noprefix[i] = v[i + prefix_len];
-      return IdOfSeq(v_noprefix);
-    }
-  }
-
-  StringRepository() {
-    // The following are really just constants but don't want to complicate
-    // compilation so make them class variables.  Due to the brokenness of
-    // <limits>, they can't be accessed as constants.
-    string_end = (std::numeric_limits<StringId>::max() / 2) -
-                 1;  // all hash values must be <= this.
-    no_symbol = (std::numeric_limits<StringId>::max() /
-                 2);  // reserved for empty sequence.
-    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
-    single_symbol_range =
-        std::numeric_limits<StringId>::max() - single_symbol_start;
-  }
-  void Destroy() {
-    for (typename std::vector<std::vector<Label> *>::iterator iter =
-             vec_.begin();
-         iter != vec_.end(); ++iter)
-      delete *iter;
-    std::vector<std::vector<Label> *> tmp_vec;
-    tmp_vec.swap(vec_);
-    MapType tmp_map;
-    tmp_map.swap(map_);
-  }
-  ~StringRepository() { Destroy(); }
-
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);
-
-  StringId IdOfSeqInternal(const std::vector<Label> &v) {
-    typename MapType::iterator iter = map_.find(&v);
-    if (iter != map_.end()) {
-      return iter->second;
-    } else {  // must add it to map.
-      StringId this_id = (StringId)vec_.size();
-      std::vector<Label> *v_new = new std::vector<Label>(v);
-      vec_.push_back(v_new);
-      map_[v_new] = this_id;
-      assert(this_id < string_end);  // or we used up the labels.
-      return this_id;
-    }
-  }
-
-  std::vector<std::vector<Label> *> vec_;
-  MapType map_;
-
-  static const StringId string_start =
-      (StringId)0;      // This must not change.  It's assumed.
-  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
-                        // hash values must be <= this.
-  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
-                        // for empty sequence.
-  StringId
-      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
-  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
-                                 // single_symbol_start;
-};
-
-template <class F>
-class DeterminizerStar {
-  typedef typename F::Arc Arc;
-
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
-  // correspondence between our states and the states in ofst.  If destroy ==
-  // true, release memory as we go (but we cannot output again).
-  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
-
-  // Output to standard FST.  We will create extra states to handle sequences of
-  // symbols on the output.  If destroy == true, release memory as we go (but we
-  // cannot output again).
-
-  void Output(MutableFst<Arc> *ofst, bool destroy = true);
-
-  // Initializer.  After initializing the object you will typically call
-  // Determinize() and then one of the Output functions.
-  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
-                   int max_states = -1, bool allow_partial = false)
-      : ifst_(ifst.Copy()),
-        delta_(delta),
-        max_states_(max_states),
-        determinized_(false),
-        allow_partial_(allow_partial),
-        is_partial_(false),
-        equal_(delta),
-        hash_(ifst.Properties(kExpanded, false)
-                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
-                                ->NumStates() /
-                            2 +
-                        3
-                  : 20,
-              hasher_, equal_),
-        epsilon_closure_(ifst_, max_states, &repository_, delta) {}
-
-  void Determinize(bool *debug_ptr) {
-    assert(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".
-    InputStateId start_id = ifst_->Start();
-    if (start_id == kNoStateId) {
-      determinized_ = true;
-      return;  // Nothing to do.
-    } else {   // Insert start state into hash and queue.
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
-      std::vector<Element> vec;
-      vec.push_back(elem);
-      OutputStateId cur_id = SubsetToStateId(vec);
-      assert(cur_id == 0 && "Do not call Determinize twice.");
-    }
-    while (!Q_.empty()) {
-      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
-      Q_.pop_front();
-      ProcessSubset(cur_pair);
-      if (debug_ptr && *debug_ptr) Debug();  // will exit.
-      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
-        if (allow_partial_ == false) {
-          KALDI_ERR << "Determinization aborted since passed " << max_states_
-                    << " states";
-        } else {
-          KALDI_WARN << "Determinization terminated since passed "
-                     << max_states_
-                     << " states, partial results will be generated";
-          is_partial_ = true;
-          break;
-        }
-      }
-    }
-    determinized_ = true;
-  }
-
-  bool IsPartial() { return is_partial_; }
-
-  // frees all except output_arcs_, which contains the important info
-  // we need to output.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    for (typename SubsetHash::iterator iter = hash_.begin();
-         iter != hash_.end(); ++iter)
-      delete iter->first;
-    SubsetHash tmp;
-    tmp.swap(hash_);
-  }
-
-  ~DeterminizerStar() { FreeMostMemory(); }
-
- private:
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId InputStateId;
-  typedef typename Arc::StateId
-      OutputStateId;  // same as above but distinguish states in output Fst.
-  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
-  typedef StringRepository<Label, StringId> StringRepositoryType;
-
-  // Element of a subset [of original states]
-
-  struct Element {
-    InputStateId state;
-    StringId string;
-    Weight weight;
-    bool operator!=(const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation,
-  // essentially of a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
-                       // Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that
-  // is order-dependent.  However the weights are not included in the hashing
-  // function-- we hash subsets that differ only in weight to the same key. This
-  // is not optimal in terms of the O(N) performance but typically if we have a
-  // lot of determinized states that differ only in weight then the input
-  // probably was pathological in some way, or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple
-  //   cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a
-  // small difference.
-
-  class SubsetKey {
-   public:
-    size_t operator()(const std::vector<Element> *subset)
-        const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename std::vector<Element>::const_iterator iter = subset->begin();
-           iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + 103333 * iter->string;
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on
-  // state-id and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state || iter1->string != iter2->string ||
-            !ApproxEqual(iter1->weight, iter2->weight, delta_))
-          return false;
-      }
-      return true;
-    }
-    float delta_;
-    explicit SubsetEqual(float delta) : delta_(delta) {}
-    SubsetEqual() : delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator()(const std::vector<Element> *s1,
-                    const std::vector<Element> *s2) const {
-      size_t sz = s1->size();
-      assert(sz >= 0);
-      if (sz != s2->size()) return false;
-      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
-                                                    iter1_end = s1->end(),
-                                                    iter2 = s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to store subsets.
-  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
-                        SubsetEqual>
-      SubsetHash;
-
-  class EpsilonClosure {
-   public:
-    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
-                   StringRepository<Label, StringId> *repository, float delta)
-        : ifst_(ifst),
-          max_states_(max_states),
-          repository_(repository),
-          delta_(delta) {}
-
-    // This function computes epsilon closure of subset of states by following
-    // epsilon links. Called by ProcessSubset. Has no side effects except on the
-    // repository.
-    void GetEpsilonClosure(const std::vector<Element> &input_subset,
-                           std::vector<Element> *output_subset);
-
-   private:
-    struct EpsilonClosureInfo {
-      EpsilonClosureInfo() {}
-      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
-          : element(e), weight_to_process(w), in_queue(i) {}
-      // the weight in the Element struct is the total current weight
-      // that has been processed already
-      Element element;
-      // this stores the weight that we haven't processed (propagated)
-      Weight weight_to_process;
-      // whether "this" struct is in the queue
-      // we store the info here so that we don't have to look it up every time
-      bool in_queue;
-      bool operator<(const EpsilonClosureInfo &other) const {
-        return this->element.state < other.element.state;
-      }
-    };
-
-    // to further speed up EpsilonClosure() computation, we have 2 queues
-    // the 2nd queue is used when we first iterate over the input set -
-    // if queue_2_.empty() then we directly set output_set equal to input_set
-    // and return immediately
-    // Since Epsilon arcs are relatively rare, this way we could efficiently
-    // detect the epsilon-free case, without having to waste our computation
-    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
-    // level-by-level traversal, which could avoid some (unfortunately not all)
-    // duplicate computation if epsilons form a DAG that is not a tree
-    //
-    // We put the queues here for better efficiency for memory allocation
-    std::deque<typename Arc::StateId> queue_;
-    std::vector<Element> queue_2_;
-
-    // the following 2 structures together form our *virtual "map"*
-    // basically we need a map from state_id to EpsilonClosureInfo that operates
-    // in O(1) time, while still takes relatively small mem, and this does it
-    // well for efficiency we don't clear id_to_index_ of its outdated
-    // information As a result each time we do a look-up, we need to check if
-    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
-    // than using a std::map<StateId, EpsilonClosureInfo>
-    std::vector<int> id_to_index_;
-    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
-    // EpsilonClosure(). This needed because we need an efficient way to
-    // traverse the virtual map - it is just too costly to traverse the
-    // id_to_index_ vector.
-    std::vector<EpsilonClosureInfo> ecinfo_;
-
-    // Add one element (elem) into cur_subset
-    // it also adds the necessary stuff to queue_, set the correct weight
-    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
-
-    // Sub-routine that we call in EpsilonClosure()
-    // It takes the current "unprocessed_weight" and propagate it to the
-    // states accessible from elem.state by an epsilon arc
-    // and add the results to cur_subset.
-    // save_to_queue_2 is set true when we iterate over the initial subset
-    // - then we save it to queue_2 s.t. if it's empty, we directly return
-    // the input set
-    void ExpandOneElement(const Element &elem, bool sorted,
-                          const Weight &unprocessed_weight,
-                          bool save_to_queue_2 = false);
-
-    // no pointers below would take the ownership
-    const Fst<Arc> *ifst_;
-    int max_states_;
-    StringRepository<Label, StringId> *repository_;
-    float delta_;
-  };
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and output_arcs_.
-
-  void ProcessFinal(const std::vector<Element> &closed_subset,
-                    OutputStateId state) {
-    // processes final-weights for this subset.
-    bool is_final = false;
-    StringId final_string = 0;  // = 0 to keep compiler happy.
-    Weight final_weight =
-        Weight::One();  // This value will never be accessed, and
-    // we just set it to avoid spurious compiler warnings.  We avoid setting it
-    // to Zero() because floating-point infinities can sometimes generate
-    // interrupts and slow things down.
-    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
-                                                  end = closed_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = ifst_->Final(elem.state);
-      if (this_final_weight != Weight::Zero()) {
-        if (!is_final) {  // first final-weight
-          final_string = elem.string;
-          final_weight = Times(elem.weight, this_final_weight);
-          is_final = true;
-        } else {  // already have one.
-          if (final_string != elem.string) {
-            KALDI_ERR << "FST was not functional -> not determinizable";
-          }
-          final_weight =
-              Plus(final_weight, Times(elem.weight, this_final_weight));
-        }
-      }
-    }
-    if (is_final) {
-      // store final weights in TempArc structure, just like a transition.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate =
-          kNoStateId;  // special marker meaning "final weight".
-      temp_arc.ostring = final_string;
-      temp_arc.weight = final_weight;
-      output_arcs_[state].push_back(temp_arc);
-    }
-  }
-
-  // ProcessTransition is called from "ProcessTransitions".  Broken out for
-  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
-  // and hash_.
-  void ProcessTransition(OutputStateId state, Label ilabel,
-                         std::vector<Element> *subset);
-
-  // "less than" operator for pair<Label, Element>.   Used in
-  // ProcessTransitions. Lexicographical order, with comparing the state only
-  // for "Element".
-
-  class PairComparator {
-   public:
-    inline bool operator()(const std::pair<Label, Element> &p1,
-                           const std::pair<Label, Element> &p2) {
-      if (p1.first < p2.first) {
-        return true;
-      } else if (p1.first > p2.first) {
-        return false;
-      } else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-  // ProcessTransitions handles transitions out of this subset of states.
-  // Ignores epsilon transitions (epsilon closure already handled that).
-  // Does not consider final states.  Breaks the transitions up by ilabel,
-  // and creates a new transition in determinized FST, for each ilabel.
-  // Does this by creating a big vector of pairs <Label, Element> and then
-  // sorting them using a lexicographical ordering, and calling
-  // ProcessTransition for each range with the same ilabel. Side effects on
-  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
-  void ProcessTransitions(const std::vector<Element> &closed_subset,
-                          OutputStateId state) {
-    std::vector<std::pair<Label, Element> > all_elems;
-    {  // Push back into "all_elems", elements corresponding to all
-       // non-epsilon-input transitions
-      // out of all states in "closed_subset".
-      typename std::vector<Element>::const_iterator iter =
-                                                        closed_subset.begin(),
-                                                    end = closed_subset.end();
-      for (; iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-             aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel !=
-              0) {  // Non-epsilon transition -- ignore epsilons here.
-            std::pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
-                                    // handle separately for efficiency
-              next_elem.string = elem.string;
-            } else {
-              std::vector<Label> seq;
-              repository_.SeqOfId(elem.string, &seq);
-              seq.push_back(arc.olabel);
-              next_elem.string = repository_.IdOfSeq(seq);
-            }
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
-        PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    std::vector<Element> this_subset;
-    while (cur != end) {
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      this_subset.clear();
-      while (cur != end && cur->first == ilabel) {
-        this_subset.push_back(cur->second);
-        cur++;
-      }
-      // We now have a subset for this ilabel.
-      ProcessTransition(state, ilabel, &this_subset);
-    }
-  }
-
-  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
-  // output fst.  This is a hash lookup; if no such state exists, it adds a new
-  // state to the hash and adds a new pair to the queue. Side effects on hash_
-  // and Q_, and on output_arcs_ [just affects the size].
-  OutputStateId SubsetToStateId(
-      const std::vector<Element> &subset) {  // may add the subset to the queue.
-    typedef typename SubsetHash::iterator IterType;
-    IterType iter = hash_.find(&subset);
-    if (iter == hash_.end()) {  // was not there.
-      std::vector<Element> *new_subset = new std::vector<Element>(subset);
-      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
-      bool ans =
-          hash_
-              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
-                  new_subset, new_state_id))
-              .second;
-      assert(ans);
-      output_arcs_.push_back(std::vector<TempArc>());
-      if (allow_partial_ == false) {
-        // If --allow-partial is not requested, we do the old way.
-        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      } else {
-        // If --allow-partial is requested, we do breadth first search. This
-        // ensures that when we return partial results, we return the states
-        // that are reachable by the fewest steps from the start state.
-        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
-            new_subset, new_state_id));
-      }
-      return new_state_id;
-    } else {
-      return iter->second;  // the OutputStateId.
-    }
-  }
-
-  // ProcessSubset does the processing of a determinized state, i.e. it creates
-  // transitions out of it and adds new determinized states to the queue if
-  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
-  // possibly larger set of (states, weights)).  After that we ignore epsilons.
-  // We process the final-weight of the state, and then handle transitions out
-  // (this may add more determinized states to the queue).
-  void ProcessSubset(
-      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
-    const std::vector<Element> *subset = pair.first;
-    OutputStateId state = pair.second;
-
-    std::vector<Element> closed_subset;  // subset after epsilon closure.
-    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
-
-    // Now follow non-epsilon arcs [and also process final states]
-    ProcessFinal(closed_subset, state);
-
-    // Now handle transitions out of these states.
-    ProcessTransitions(closed_subset, state);
-  }
-
-  void Debug();
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
-  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
-      Q_;  // queue of subsets to be processed.
-
-  std::vector<std::vector<TempArc> >
-      output_arcs_;  // essentially an FST in our format.
-
-  const Fst<Arc> *ifst_;
-  float delta_;
-  int max_states_;
-  bool determinized_;   // used to check usage.
-  bool allow_partial_;  // output paritial results or not
-  bool is_partial_;     // if we get partial results or not
-  SubsetKey hasher_;    // object that computes keys-- has no data members.
-  SubsetEqual
-      equal_;  // object that compares subsets-- only data member is delta_.
-  SubsetHash hash_;  // hash from Subset to StateId in final Fst.
-
-  StringRepository<Label, StringId>
-      repository_;  // associate integer id's with sequences of labels.
-  EpsilonClosure epsilon_closure_;
-};
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<typename F::Arc> *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
-                     bool *debug_ptr, int max_states, bool allow_partial) {
-  ofst->SetOutputSymbols(ifst.InputSymbols());
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
-  det.Determinize(debug_ptr);
-  det.Output(ofst);
-  return det.IsPartial();
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
-    const std::vector<Element> &input_subset,
-    std::vector<Element> *output_subset) {
-  ecinfo_.resize(0);
-  size_t size = input_subset.size();
-  // find whether input fst is known to be sorted in input label.
-  bool sorted =
-      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-
-  // size is still the input_subset.size()
-  for (size_t i = 0; i < size; i++) {
-    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
-  }
-
-  size_t s = queue_2_.size();
-  if (s == 0) {
-    *output_subset = input_subset;
-    return;
-  } else {
-    // queue_2 not empty. Need to create the vector<info>
-    for (size_t i = 0; i < size; i++) {
-      // the weight has not been processed yet,
-      // so put all of them in the "weight_to_process"
-      ecinfo_.push_back(
-          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
-      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight
-
-      if (id_to_index_.size() < input_subset[i].state + 1) {
-        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
-      }
-      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
-    }
-  }
-
-  {
-    Element elem;
-    elem.weight = Weight::Zero();
-    for (size_t i = 0; i < s; i++) {
-      elem.state = queue_2_[i].state;
-      elem.string = queue_2_[i].string;
-      AddOneElement(elem, queue_2_[i].weight);
-    }
-    queue_2_.resize(0);
-  }
-
-  int counter = 0;  // relates to max-states option, used for test.
-  while (!queue_.empty()) {
-    InputStateId id = queue_.front();
-
-    // no need to check validity of the index
-    // since anything in the queue we are sure they're in the "virtual set"
-    int index = id_to_index_[id];
-    EpsilonClosureInfo &info = ecinfo_[index];
-    Element &elem = info.element;
-    Weight unprocessed_weight = info.weight_to_process;
-
-    elem.weight = Plus(elem.weight, unprocessed_weight);
-    info.weight_to_process = Weight::Zero();
-
-    info.in_queue = false;
-    queue_.pop_front();
-
-    if (max_states_ > 0 && counter++ > max_states_) {
-      KALDI_ERR << "Determinization aborted since looped more than "
-                << max_states_ << " times during epsilon closure";
-    }
-
-    // generally we need to be careful about iterator-invalidation problem
-    // here we pass a reference (elem), which could be an issue.
-    // In the beginning of ExpandOneElement, we make a copy of elem.string
-    // to avoid that issue
-    ExpandOneElement(elem, sorted, unprocessed_weight);
-  }
-
-  {
-    // this sorting is based on StateId
-    sort(ecinfo_.begin(), ecinfo_.end());
-
-    output_subset->clear();
-
-    size = ecinfo_.size();
-    output_subset->reserve(size);
-    for (size_t i = 0; i < size; i++) {
-      EpsilonClosureInfo &info = ecinfo_[i];
-      if (info.weight_to_process != Weight::Zero()) {
-        info.element.weight = Plus(info.element.weight, info.weight_to_process);
-      }
-      output_subset->push_back(info.element);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
-    const Element &elem, const Weight &unprocessed_weight) {
-  // first we try to find the element info in the ecinfo_ vector
-  int index = -1;
-  if (elem.state < id_to_index_.size()) {
-    index = id_to_index_[elem.state];
-  }
-  if (index != -1) {
-    if (index >= ecinfo_.size()) {
-      index = -1;
-    } else if (ecinfo_[index].element.state != elem.state) {
-      // since ecinfo_ might store outdated information, we need to check
-      index = -1;
-    }
-  }
-
-  if (index == -1) {
-    // was no such StateId: insert and add to queue.
-    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
-    size_t size = id_to_index_.size();
-    if (size < elem.state + 1) {
-      // double the size to reduce memory operations
-      id_to_index_.resize(2 * elem.state + 1, -1);
-    }
-    id_to_index_[elem.state] = ecinfo_.size() - 1;
-    queue_.push_back(elem.state);
-
-  } else {  // one is already there.  Add weights.
-    EpsilonClosureInfo &info = ecinfo_[index];
-    if (info.element.string != elem.string) {
-      // Non-functional FST.
-      std::ostringstream ss;
-      ss << "FST was not functional -> not determinizable.";
-      {  // Print some debugging information.  Can be helpful to debug
-        // the inputs when FSTs are mysteriously non-functional.
-        std::vector<Label> tmp_seq;
-        repository_->SeqOfId(info.element.string, &tmp_seq);
-        ss << "\nFirst string:";
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-        ss << "\nSecond string:";
-        repository_->SeqOfId(elem.string, &tmp_seq);
-        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
-      }
-      KALDI_ERR << ss.str();
-    }
-
-    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);
-
-    if (!info.in_queue) {
-      // this is because the code in "else" below: the
-      // iter->second.weight_to_process might not be Zero()
-      Weight weight = Plus(info.element.weight, info.weight_to_process);
-
-      // What is done below is, we propagate the weight (by adding them
-      // to the queue only when the change is big enough;
-      // otherwise we just store the weight, until before returning
-      // we add the element.weight and weight_to_process together
-      if (!ApproxEqual(weight, info.element.weight, delta_)) {
-        // add extra part of weight to queue.
-        info.in_queue = true;
-        queue_.push_back(elem.state);
-      }
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
-    const Element &elem, bool sorted, const Weight &unprocessed_weight,
-    bool save_to_queue_2) {
-  StringId str =
-      elem.string;  // copy it here because there is an iterator-
-                    // - invalidation problem (it really happens for some FSTs)
-
-  // now we are going to propagate the "unprocessed_weight"
-  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
-       aiter.Next()) {
-    const Arc &arc = aiter.Value();
-    if (sorted && arc.ilabel > 0) {
-      break;
-      // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-    }
-    if (arc.ilabel != 0) {
-      continue;  // we only process epsilons here
-    }
-    Element next_elem;
-    next_elem.state = arc.nextstate;
-    next_elem.weight = Weight::Zero();
-    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);
-
-    // now must append strings
-    if (arc.olabel == 0) {
-      next_elem.string = str;
-    } else {
-      std::vector<Label> seq;
-      repository_->SeqOfId(str, &seq);
-      if (arc.olabel != 0) seq.push_back(arc.olabel);
-      next_elem.string = repository_->IdOfSeq(seq);
-    }
-    if (save_to_queue_2) {
-      next_elem.weight = next_unprocessed_weight;
-      queue_2_.push_back(next_elem);
-    } else {
-      AddOneElement(next_elem, next_unprocessed_weight);
-    }
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
-                                 bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  typedef GallicWeight<Label, Weight> ThisGallicWeight;
-  typedef typename Arc::StateId StateId;
-  if (destroy) FreeMostMemory();
-  StateId nStates = static_cast<StateId>(output_arcs_.size());
-  ofst->DeleteStates();
-  ofst->SetStart(kNoStateId);
-  if (nStates == 0) {
-    return;
-  }
-  for (StateId s = 0; s < nStates; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  // now process transitions.
-  for (StateId this_state = 0; this_state < nStates; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      GallicArc<Arc> new_arc;
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      StringWeight<Label, STRING_LEFT> string_weight;
-      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
-      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
-
-      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-        ofst->SetFinal(this_state, gallic_weight);
-      } else {  // is really an arc.
-        new_arc.nextstate = temp_arc.nextstate;
-        new_arc.ilabel = temp_arc.ilabel;
-        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-        new_arc.weight = gallic_weight;    // includes string and weight.
-        ofst->AddArc(this_state, new_arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
-  assert(determinized_);
-  if (destroy) determinized_ = false;
-  // Outputs to standard fst.
-  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
-  if (destroy) FreeMostMemory();
-  ofst->DeleteStates();
-  if (num_states == 0) {
-    ofst->SetStart(kNoStateId);
-    return;
-  }
-  // Add basic states-- but will add extra ones to account for strings on
-  // output.
-  for (OutputStateId s = 0; s < num_states; s++) {
-    OutputStateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(0);
-  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
-    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
-
-    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
-                                                  end = this_vec.end();
-    for (; iter != end; ++iter) {
-      const TempArc &temp_arc(*iter);
-      std::vector<Label> seq;
-      repository_.SeqOfId(temp_arc.ostring, &seq);
-      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-        // Make a sequence of states going to a final state, with the strings as
-        // labels. Put the weight on the first arc.
-        OutputStateId cur_state = this_state;
-        for (size_t i = 0; i < seq.size(); i++) {
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = 0;  // epsilon.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        ofst->SetFinal(cur_state,
-                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-      } else {  // Really an arc.
-        OutputStateId cur_state = this_state;
-        // Have to be careful with this integer comparison (i+1 < seq.size())
-        // because unsigned. i < seq.size()-1 could fail for zero-length
-        // sequences.
-        for (size_t i = 0; i + 1 < seq.size(); i++) {
-          // for all but the last element of seq, create new state.
-          OutputStateId next_state = ofst->AddState();
-          Arc arc;
-          arc.nextstate = next_state;
-          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (i == 0 ? temp_arc.ilabel
-                               : 0);  // put ilabel on first element of seq.
-          arc.olabel = seq[i];
-          ofst->AddArc(cur_state, arc);
-          cur_state = next_state;
-        }
-        // Add the final arc in the sequence.
-        Arc arc;
-        arc.nextstate = temp_arc.nextstate;
-        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-        ofst->AddArc(cur_state, arc);
-      }
-    }
-    // Free up memory.  Do this inside the loop as ofst is also allocating
-    // memory
-    if (destroy) {
-      std::vector<TempArc> temp;
-      temp.swap(this_vec);
-    }
-  }
-  if (destroy) {
-    std::vector<std::vector<TempArc> > temp;
-    temp.swap(output_arcs_);
-    repository_.Destroy();
-  }
-}
-
-template <class F>
-void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
-                                            std::vector<Element> *subset) {
-  // At input, "subset" may contain duplicates for a given dest state (but in
-  // sorted order).  This function removes duplicates from "subset", normalizes
-  // it, and adds a transition to the dest. state (possibly affecting Q_ and
-  // hash_, if state did not exist).
-
-  typedef typename std::vector<Element>::iterator IterType;
-  {  // This block makes the subset have one unique Element per state, adding
-     // the weights.
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put
-      // an element, cur_in points to location of next element we want to
-      // process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end &&
-             cur_in->state == cur_out->state) {  // merge elements.
-        if (cur_in->string != cur_out->string) {
-          KALDI_ERR << "FST was not functional -> not determinizable";
-        }
-        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  StringId common_str;
-  Weight tot_weight;
-  {  // This block computes common_str and tot_weight (essentially: the common
-     // divisor)
-    // and removes them from the elements.
-    std::vector<Label> seq;
-
-    IterType begin = subset->begin(), iter, end = subset->end();
-    {  // This block computes "seq", which is the common prefix, and
-       // "common_str",
-      // which is the StringId version of "seq".
-      std::vector<Label> tmp_seq;
-      for (iter = begin; iter != end; ++iter) {
-        if (iter == begin) {
-          repository_.SeqOfId(iter->string, &seq);
-        } else {
-          repository_.SeqOfId(iter->string, &tmp_seq);
-          if (tmp_seq.size() < seq.size())
-            seq.resize(tmp_seq.size());  // size of shortest one.
-          for (size_t i = 0; i < seq.size();
-               i++)  // seq.size() is the shorter one at this point.
-            if (tmp_seq[i] != seq[i]) seq.resize(i);
-        }
-        if (seq.size() == 0) break;  // will not get any prefix.
-      }
-      common_str = repository_.IdOfSeq(seq);
-    }
-
-    {  // This block computes "tot_weight".
-      iter = begin;
-      tot_weight = iter->weight;
-      for (++iter; iter != end; ++iter)
-        tot_weight = Plus(tot_weight, iter->weight);
-    }
-
-    // Now divide out common stuff from elements.
-    size_t prefix_len = seq.size();
-    for (iter = begin; iter != end; ++iter) {
-      iter->weight = Divide(iter->weight, tot_weight);
-      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
-    }
-  }
-
-  // Now add an arc to the state that the subset represents.
-  // We may create a new state id for this (in SubsetToStateId).
-  TempArc temp_arc;
-  temp_arc.ilabel = ilabel;
-  temp_arc.nextstate =
-      SubsetToStateId(*subset);  // may or may not really add the subset.
-  temp_arc.ostring = common_str;
-  temp_arc.weight = tot_weight;
-  output_arcs_[state].push_back(temp_arc);  // record the arc.
-}
-
-template <class F>
-void DeterminizerStar<F>::Debug() {
-  // this function called if you send a signal
-  // SIGUSR1 to the process (and it's caught by the handler in
-  // fstdeterminizestar).  It prints out some traceback
-  // info and exits.
-
-  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
-  // free up memory from the hash as we need a little memory
-  {
-    SubsetHash hash_tmp;
-    std::swap(hash_tmp, hash_);
-  }
-
-  if (output_arcs_.size() <= 2) {
-    KALDI_ERR << "Nothing to trace back";
-  }
-  size_t max_state = output_arcs_.size() - 2;  // don't take the last
-  // one as we might be halfway into constructing it.
-
-  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
-  for (size_t i = 0; i < max_state; i++) {
-    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
-      OutputStateId nextstate = output_arcs_[i][j].nextstate;
-      // Always find an earlier-numbered predecessor; this
-      // is always possible because of the way the algorithm
-      // works.
-      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
-    }
-  }
-  std::vector<std::pair<Label, StringId> > traceback;
-  // 'traceback' is a pair of (ilabel, olabel-seq).
-  OutputStateId cur_state = max_state;  // A recently constructed state.
-
-  while (cur_state != 0 && cur_state != kNoStateId) {
-    OutputStateId last_state = predecessor[cur_state];
-    std::pair<Label, StringId> p;
-    size_t i;
-    for (i = 0; i < output_arcs_[last_state].size(); i++) {
-      if (output_arcs_[last_state][i].nextstate == cur_state) {
-        p.first = output_arcs_[last_state][i].ilabel;
-        p.second = output_arcs_[last_state][i].ostring;
-        traceback.push_back(p);
-        break;
-      }
-    }
-    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
-    cur_state = last_state;
-  }
-  if (cur_state == kNoStateId)
-    KALDI_WARN << "Traceback did not reach start state "
-               << "(possibly debug-code error)";
-
-  std::stringstream ss;
-  ss << "Traceback follows in format "
-     << "ilabel (olabel olabel) ilabel (olabel) ... :";
-  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
-    ss << ' ' << traceback[i].first << " ( ";
-    std::vector<Label> seq;
-    repository_.SeqOfId(traceback[i].second, &seq);
-    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
-    ss << ')';
-  }
-  KALDI_ERR << ss.str();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-star.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-star.h
deleted file mode 100644
index ec4b236995be03a2d9473cb3da5da3be73006cdb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/determinize-star.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// fstext/determinize-star.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Guoguo Chen
-//                2015  Hainan Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <stdexcept>  // this algorithm uses exceptions
-#include <vector>
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-// For example of usage, see test-determinize-star.cc
-
-/*
-   DeterminizeStar implements determinization with epsilon removal, which we
-   distinguish with a star.
-
-   We define a determinized* FST as one in which no state has more than one
-   transition with the same input-label.  Epsilon input labels are not allowed
-   except starting from states that have exactly one arc exiting them (and are
-   not final).  [In the normal definition of determinized, epsilon-input labels
-   are not allowed at all, whereas in Mohri's definition, epsilons are treated
-   as ordinary symbols].  The determinized* definition is intended to simulate
-   the effect of allowing strings of output symbols at each state.
-
-   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
-   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
-   float-weight.  It does epsilon removal and determinization.
-   This algorithm may fail if the input has epsilon cycles under
-   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
-   semiring, or there are negative cost epsilon cycles).
-
-   This implementation is much less fancy than the one in fst/determinize.h, and
-   does not have an "on-demand" version.
-
-   The algorithm is a fairly normal determinization algorithm.  We keep in
-   memory the subsets of states, together with their leftover strings and their
-   weights.  The only difference is we detect input epsilon transitions and
-   treat them "specially".
-*/
-
-// This algorithm will be slightly faster if you sort the input fst on input
-// label.
-
-/**
-    This function implements the normal version of DeterminizeStar, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  The debug_ptr argument is an
-    optional pointer to a bool that, if it becomes true while the algorithm is
-    executing, the algorithm will print a traceback and terminate (used in
-    fstdeterminizestar.cc debug non-terminating determinization).
-    If max_states is positive, it will stop determinization and throw an
-    exception as soon as the max-states is reached. This can be useful in test.
-    If allow_partial is true, the algorithm will output partial results when the
-    specified max_states is reached (when larger than zero), instead of throwing
-    out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/*  This is a version of DeterminizeStar with a slightly more "natural" output
-   format, where the output sequences are encoded using the GallicArc (i.e. the
-   output symbols are strings. If max_states is positive, it will stop
-   determinization and throw an exception as soon as the max-states is reached.
-   This can be useful in test. If allow_partial is true, the algorithm will
-   output partial results when the specified max_states is reached (when larger
-   than zero), instead of throwing out an error.
-
-    Caution, the return status is un-intuitive: this function will return false
-   if determinization completed normally, and true if it was stopped early by
-    reaching the 'max-states' limit, and a partial FST was generated.
-*/
-template <class F>
-bool DeterminizeStar(F &ifst,  // NOLINT
-                     MutableFst<GallicArc<typename F::Arc> > *ofst,
-                     float delta = kDelta, bool *debug_ptr = NULL,
-                     int max_states = -1, bool allow_partial = false);
-
-/// @} end "addtogroup fst_extensions"
-
-}  // end namespace fst
-
-#include "fstext/determinize-star-inl.h"
-
-#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-lib.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-lib.h
deleted file mode 100644
index fa27488bf7fd5e77ac6a7223c66bc4d155d65527..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-lib.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// fstext/fstext-lib.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
-#define KALDI_FSTEXT_FSTEXT_LIB_H_
-
-#include "fst/fstlib.h"
-#include "fstext/determinize-lattice.h"
-#include "fstext/determinize-star.h"
-#include "fstext/fstext-utils.h"
-#include "fstext/kaldi-fst-io.h"
-#include "fstext/lattice-utils.h"
-#include "fstext/lattice-weight.h"
-#include "fstext/pre-determinize.h"
-#include "fstext/table-matcher.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-utils-inl.h
deleted file mode 100644
index d888bf98eec74dd8a3e628c425b5c1bef401b9a8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-utils-inl.h
+++ /dev/null
@@ -1,1265 +0,0 @@
-// fstext/fstext-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
-
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fstext/determinize-star.h"
-#include "fstext/pre-determinize.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-io.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.olabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
-  typename Arc::Label ans = 0;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      ans = std::max(ans, arc.ilabel);
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
-  typedef typename Arc::StateId StateId;
-  StateId num_arcs = 0;
-  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
-  return num_arcs;
-}
-
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  std::set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.olabel);
-    }
-  }
-
-  // Remove epsilon, if instructed.
-  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
-    all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-}
-
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  unordered_set<I> all_syms;
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      all_syms.insert(arc.ilabel);
-    }
-  }
-  // Remove epsilon, if instructed.
-  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
-  KALDI_ASSERT(symbols != NULL);
-  kaldi::CopySetToVector(all_syms, symbols);
-  std::sort(symbols->begin(), symbols->end());
-}
-
-template <class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0)
-      ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
-                       kILabelSorted | kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
-      : to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-  }
-
- private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
-template <class Arc, class I>
-using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
-
-// Lookahead composition is used for optimized online
-// composition of FSTs during decoding. See
-// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
-// see DefaultLookAhead in fst/compose.h
-template <class Arc, class I>
-LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
-                                          const Fst<Arc> &ifst2,
-                                          const std::vector<I> &to_remove) {
-  fst::CacheOptions cache_opts(true, 1 << 25LL);
-  fst::CacheOptions cache_opts_map(true, 0);
-  fst::ArcMapFstOptions arcmap_opts(cache_opts);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
-                                  mapper, arcmap_opts);
-}
-
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-class MapInputSymbolsMapper {
- public:
-  Arc operator()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
-                                           (*symbol_mapping_).size()))
-      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
-    return ans;
-  }
-  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {  // Not tested.
-    bool remove_epsilons =
-        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
-    bool add_epsilons = (symbol_mapping_->size() > 1 &&
-                         *std::min_element(symbol_mapping_->begin() + 1,
-                                           symbol_mapping_->end()) == 0);
-
-    // remove the following as we don't know now if any of them are true.
-    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
-                             kNonIDeterministic | kILabelSorted |
-                             kNotILabelSorted;
-    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
-    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
-    uint64 props_to_add = 0;
-    if (remove_epsilons && !add_epsilons)
-      props_to_add |= kNoEpsilons | kNoIEpsilons;
-    return (props & ~props_to_remove) | props_to_add;
-  }
-  // initialize with copy = false only if the "to_remove" argument will not be
-  // deleted in the lifetime of this object.
-  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-    if (copy)
-      symbol_mapping_ = new std::vector<I>(to_remove);
-    else
-      symbol_mapping_ = &to_remove;
-    owned = copy;
-  }
-  ~MapInputSymbolsMapper() {
-    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
-  }
-
- private:
-  bool owned;
-  const std::vector<I> *symbol_mapping_;
-};
-
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_mapping,
-                     MutableFst<Arc> *fst) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  // false == don't copy the "symbol_mapping", retain pointer--
-  // safe since short-lived object.
-  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
-  Map(fst, mapper);
-}
-
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  Weight tot_weight = Weight::One();
-  std::vector<I> ilabel_seq;
-  std::vector<I> olabel_seq;
-
-  StateId cur_state = fst.Start();
-  if (cur_state == kNoStateId) {  // empty sequence.
-    if (isymbols_out != NULL) isymbols_out->clear();
-    if (osymbols_out != NULL) osymbols_out->clear();
-    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
-    return true;
-  }
-  while (1) {
-    Weight w = fst.Final(cur_state);
-    if (w != Weight::Zero()) {  // is final..
-      tot_weight = Times(w, tot_weight);
-      if (fst.NumArcs(cur_state) != 0) return false;
-      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
-      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
-      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
-      return true;
-    } else {
-      if (fst.NumArcs(cur_state) != 1) return false;
-
-      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
-      const Arc &arc = iter.Value();
-      tot_weight = Times(arc.weight, tot_weight);
-      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
-      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
-      cur_state = arc.nextstate;
-    }
-  }
-}
-
-// see fstext-utils.h for comment.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  fsts_out->clear();
-  StateId start_state = fst.Start();
-  if (start_state == kNoStateId) return;  // No output.
-  size_t n_arcs = fst.NumArcs(start_state);
-  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
-  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));
-
-  if (start_is_final) {
-    fsts_out->resize(fsts_out->size() + 1);
-    StateId start_state_out = fsts_out->back().AddState();
-    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
-  }
-
-  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
-       !start_aiter.Done(); start_aiter.Next()) {
-    fsts_out->resize(fsts_out->size() + 1);
-    VectorFst<Arc> &ofst = fsts_out->back();
-    const Arc &first_arc = start_aiter.Value();
-    StateId cur_state = start_state, cur_ostate = ofst.AddState();
-    ofst.SetStart(cur_ostate);
-    StateId next_ostate = ofst.AddState();
-    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
-                                first_arc.weight, next_ostate));
-    cur_state = first_arc.nextstate;
-    cur_ostate = next_ostate;
-    while (1) {
-      size_t this_n_arcs = fst.NumArcs(cur_state);
-      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
-                                       // about the input.
-      if (this_n_arcs == 1) {
-        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
-        // or problem with ShortestPath.
-        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
-        const Arc &arc = aiter.Value();
-        next_ostate = ofst.AddState();
-        ofst.AddArc(cur_ostate,
-                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
-        cur_state = arc.nextstate;
-        cur_ostate = next_ostate;
-      } else {
-        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
-        // or problem with ShortestPath.
-        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
-        break;
-      }
-    }
-  }
-}
-
-// see fstext-utils.sh for comment.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out) {
-  KALDI_ASSERT(n > 0);
-  KALDI_ASSERT(fsts_out != NULL);
-  VectorFst<Arc> nbest_fst;
-  ShortestPath(fst, &nbest_fst, n);
-  ConvertNbestToVector(nbest_fst, fsts_out);
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    KALDI_ASSERT(labels[i].size() != 0);
-    StateId next_state = ofst->AddState();
-    for (size_t j = 0; j < labels[i].size(); j++) {
-      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
-      ofst->AddArc(cur_state, arc);
-    }
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  ofst->DeleteStates();
-  StateId cur_state = ofst->AddState();
-  ofst->SetStart(cur_state);
-  for (size_t i = 0; i < labels.size(); i++) {
-    StateId next_state = ofst->AddState();
-    Arc arc(labels[i], labels[i], Weight::One(), next_state);
-    ofst->AddArc(cur_state, arc);
-    cur_state = next_state;
-  }
-  ofst->SetFinal(cur_state, Weight::One());
-}
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out) {
-  KALDI_ASSERT(syms_out != NULL);
-  syms_out->clear();
-  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
-    if (include_eps || iter.Value() != 0) {
-      syms_out->push_back(iter.Value());
-      KALDI_ASSERT(syms_out->back() ==
-                   iter.Value());  // an integer-range thing.
-    }
-  }
-}
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-}
-
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta) {
-  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
-  std::vector<typename Arc::Label> extra_syms;
-  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
-  DeterminizeStar(*ifst, ofst, delta);
-  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
-  MinimizeEncoded(ofst, delta);
-}
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
-                                 bool *debug_ptr, int max_states) {
-  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
-  // the DeterminizeStar algorithm (which also removes epsilons).
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
-  // DeterminizeInLog determinizes 'fst' in the log semiring.
-
-  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // make fst empty to free up memory. [actually may make no
-               // difference..]
-  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
-  Determinize(*fst_log, fst_det_log);
-  Cast(*fst_det_log, fst);
-  delete fst_log;
-  delete fst_det_log;
-}
-
-// make it inline to avoid having to put it in a .cc file.
-// destructive algorithm (changes ifst as well as ofst).
-inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                                VectorFst<StdArc> *ofst,
-                                                float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
-                         // tropical, which is important.
-  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
-                                 // log semiring so do it with StdARc.
-}
-
-inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                        VectorFst<StdArc> *ofst, float delta) {
-  VectorFst<LogArc> *ifst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*ifst, ifst_log);
-  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
-  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
-  Cast(*ofst_log, ofst);
-  delete ifst_log;
-  delete ofst_log;
-}
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *ifst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      arc.weight = Weight::One();
-      aiter.SetValue(arc);
-    }
-    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
-  }
-  ifst->SetProperties(kUnweighted, kUnweighted);
-}
-
-// Used in PrecedingInputSymbolsAreSame (non-functor version), and
-// similar routines.
-template <class T>
-struct IdentityFunction {
-  typedef T Arg;
-  typedef T Result;
-  T operator()(const T &t) const { return t; }
-};
-
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>  // F is functor type from labels to classes.
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-
-  if (start_is_epsilon) {
-    StateId start_state = fst.Start();
-    if (start_state < 0 || start_state == kNoStateId)
-      return true;  // empty fst-- doesn't matter.
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = 0;
-  }
-
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= arc.nextstate)
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
-  IdentityFunction<typename Arc::Label> f;
-  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  const ClassType noClass = f(kNoLabel), epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass)
-        c = f(arc.ilabel);
-      else if (c != f(arc.ilabel))
-        return false;
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst.Final(s) != Weight::Zero())
-      return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
-                                   MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename F::Result ClassType;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  std::vector<ClassType> classes;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
-    StateId start_state = fst->Start();
-    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
-      return;
-    classes.resize(start_state + 1, noClass);
-    classes[start_state] = epsClass;
-  }
-
-  // Find bad states (states with multiple input-symbols into them).
-  std::set<StateId> bad_states;  // states that we need to change.
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (classes.size() <= static_cast<size_t>(arc.nextstate))
-        classes.resize(arc.nextstate + 1, noClass);
-      if (classes[arc.nextstate] == noClass)
-        classes[arc.nextstate] = f(arc.ilabel);
-      else if (classes[arc.nextstate] != f(arc.ilabel))
-        bad_states.insert(arc.nextstate);
-    }
-  }
-  if (bad_states.empty()) return;  // Nothing to do.
-  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
-      bad_states);  // faster lookup.
-
-  // Work out list of arcs we have to change as (state, arc-offset).
-  // Can't do the actual changes in this pass, since we have to add new
-  // states which invalidates the iterators.
-  std::vector<std::pair<StateId, size_t> > arcs_to_change;
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
-        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
-    }
-  }
-  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().
-
-  std::map<std::pair<StateId, ClassType>, StateId> state_map;
-  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.
-
-  for (size_t i = 0; i < arcs_to_change.size(); i++) {
-    StateId s = arcs_to_change[i].first;
-    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
-    aiter.Seek(arcs_to_change[i].second);
-    Arc arc = aiter.Value();
-
-    // Transition is non-eps transition to "bad" state.  Introduce new state (or
-    // find existing one).
-    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
-    if (state_map.count(p) == 0) {
-      StateId newstate = state_map[p] = fst->AddState();
-      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
-    }
-    StateId dst_state = state_map[p];
-    arc.nextstate = dst_state;
-
-    // Initialize the MutableArcIterator only now, as the call to NewState()
-    // may have invalidated the first arc iterator.
-    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-    maiter.Seek(arcs_to_change[i].second);
-    maiter.SetValue(arc);
-  }
-}
-
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
-  IdentityFunction<typename Arc::Label> f;
-  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
-}
-
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  typedef typename F::Result ClassType;
-  std::vector<StateId> bad_states;
-  ClassType noClass = f(kNoLabel);
-  ClassType epsClass = f(0);
-  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    ClassType c = noClass;
-    bool bad = false;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (c == noClass) {
-        c = f(arc.ilabel);
-      } else if (c != f(arc.ilabel)) {
-        bad = true;
-        break;
-      }
-    }
-    if (end_is_epsilon && c != noClass && c != epsClass &&
-        fst->Final(s) != Weight::Zero())
-      bad = true;
-    if (bad) bad_states.push_back(s);
-  }
-  std::vector<Arc> my_arcs;
-  for (size_t i = 0; i < bad_states.size(); i++) {
-    StateId s = bad_states[i];
-    my_arcs.clear();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next())
-      my_arcs.push_back(aiter.Value());
-
-    for (size_t j = 0; j < my_arcs.size(); j++) {
-      Arc &arc = my_arcs[j];
-      if (arc.ilabel != 0) {
-        StateId newstate = fst->AddState();
-        // Create a new state for each non-eps arc in original FST, out of each
-        // bad state. Not as optimal as it could be, but does avoid some
-        // complicated weight-pushing issues in which, to maintain
-        // stochasticity, we would have to know which semiring we want to
-        // maintain stochasticity in.
-        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
-        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
-        maiter.Seek(j);
-        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
-      }
-    }
-  }
-}
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-
-  VectorFst<Arc> *ans = new VectorFst<Arc>;
-  StateId loop_state = ans->AddState();  // = 0.
-  ans->SetStart(loop_state);
-  ans->SetFinal(loop_state, Weight::One());
-
-  // "cache" is used as an optimization when some of the pointers in "fsts"
-  // may have the same value.
-  unordered_map<const ExpandedFst<Arc> *, Arc> cache;
-
-  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
-    const ExpandedFst<Arc> *fst = fsts[i];
-    if (fst == NULL) continue;
-    {  // optimization with cache: helpful if some members of "fsts" may
-      // contain the same pointer value (e.g. in GetHTransducer).
-      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
-          cache.find(fst);
-      if (iter != cache.end()) {
-        Arc arc = iter->second;
-        arc.olabel = i;
-        ans->AddArc(0, arc);
-        continue;
-      }
-    }
-
-    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
-                 kAcceptor);  // expect acceptor.
-
-    StateId fst_num_states = fst->NumStates();
-    StateId fst_start_state = fst->Start();
-
-    if (fst_start_state == kNoStateId) continue;  // empty fst.
-
-    bool share_start_state =
-        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
-        fst->NumArcs(fst_start_state) == 1 &&
-        fst->Final(fst_start_state) == Weight::Zero();
-
-    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
-    for (StateId s = 0; s < fst_num_states; s++) {
-      if (s == fst_start_state && share_start_state)
-        state_map[s] = loop_state;
-      else
-        state_map[s] = ans->AddState();
-    }
-    if (!share_start_state) {
-      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
-      cache[fst] = arc;
-      ans->AddArc(0, arc);
-    }
-    for (StateId s = 0; s < fst_num_states; s++) {
-      // Add arcs out of state s.
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
-           aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
-        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
-        ans->AddArc(state_map[s], newarc);
-        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
-      }
-      if (fst->Final(s) != Weight::Zero()) {
-        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
-        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
-      }
-    }
-  }
-  return ans;
-}
-
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    typename Arc::StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      bool change = false;
-      if (clear_input && arc.ilabel != 0) {
-        arc.ilabel = 0;
-        change = true;
-      }
-      if (clear_output && arc.olabel != 0) {
-        arc.olabel = 0;
-        change = true;
-      }
-      if (change) {
-        aiter.SetValue(arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId s = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
-         aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(arc.weight.Value() * scale);
-      aiter.SetValue(arc);
-    }
-    if (fst->Final(s) != Weight::Zero())
-      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
-  }
-}
-
-// return arc-offset of self-loop with ilabel (or -1 if none exists).
-// if more than one such self-loop, pick first one.
-template <class Arc>
-ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
-  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
-    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
-      return static_cast<ssize_t>(aiter.Position());
-  return static_cast<ssize_t>(-1);
-}
-
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
-  srand(rand_seed);
-  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
-  // make sure all states can reach final-state (or this algorithm may enter
-  // infinite loop.
-  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);
-
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-
-  if (ifst.Start() == kNoStateId) {
-    KALDI_WARN << "Empty input fst.";
-    return false;
-  }
-  // First select path through ifst.
-  std::vector<StateId> path;
-  std::vector<size_t> arc_offsets;  // arc taken out of each state.
-  std::vector<int> nof_ilabels;
-
-  StateId num_ilabels = 0;
-  int retry_no = 0;
-
-  // Under normal circumstances, this will be one-pass-only process
-  // Multiple tries might be needed in special cases, typically when
-  // the number of frames is close to number of transitions from
-  // the start node to the final node. It usually happens for really
-  // short utterances
-  do {
-    num_ilabels = 0;
-    arc_offsets.clear();
-    path.clear();
-    path.push_back(ifst.Start());
-
-    while (1) {
-      // Select either an arc or final-prob.
-      StateId s = path.back();
-      size_t num_arcs = ifst.NumArcs(s);
-      size_t num_arcs_tot = num_arcs;
-      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
-      // kaldi::RandInt is a bit like Rand(), but gets around situations
-      // where RAND_MAX is very small.
-      // Change this to Rand() % num_arcs_tot if compile issues arise
-      size_t arc_offset =
-          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));
-
-      if (arc_offset < num_arcs) {  // an actual arc.
-        ArcIterator<Fst<Arc> > aiter(ifst, s);
-        aiter.Seek(arc_offset);
-        const Arc &arc = aiter.Value();
-        if (arc.nextstate == s) {
-          continue;  // don't take this self-loop arc
-        } else {
-          arc_offsets.push_back(arc_offset);
-          path.push_back(arc.nextstate);
-          if (arc.ilabel != 0) num_ilabels++;
-        }
-      } else {
-        break;  // Chose final-prob.
-      }
-    }
-
-    nof_ilabels.push_back(num_ilabels);
-  } while ((++retry_no < num_retries) && (num_ilabels > length));
-
-  if (num_ilabels > length) {
-    std::stringstream ilabel_vec;
-    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
-              std::ostream_iterator<int>(ilabel_vec, ","));
-    std::string s = ilabel_vec.str();
-    s.erase(s.end() - 1);
-    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
-    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
-               << " to align.";
-    return false;  // can't make it shorter by adding self-loops!.
-  }
-
-  StateId num_self_loops = 0;
-  std::vector<ssize_t> self_loop_offsets(path.size());
-  for (size_t i = 0; i < path.size(); i++)
-    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
-        static_cast<ssize_t>(-1))
-      num_self_loops++;
-
-  if (num_self_loops == 0 && num_ilabels < length) {
-    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
-    return false;  // no self-loops to make it longer.
-  }
-
-  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.
-
-  StateId min_num_loops = 0;
-  if (num_extra != 0)
-    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
-  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
-  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);
-
-  ofst->AddState();
-  ofst->SetStart(0);
-  StateId cur_state = 0;
-  StateId counter = 0;  // tell us when we should stop adding one more loop.
-  for (size_t i = 0; i < path.size(); i++) {
-    // First, add any self-loops that are necessary.
-    StateId num_loops = 0;
-    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
-      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
-      counter++;
-    }
-    for (StateId j = 0; j < num_loops; j++) {
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(self_loop_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i] &&
-                   arc.ilabel != 0);  // make sure self-loop with ilabel.
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    }
-    if (i + 1 < path.size()) {  // add forward transition.
-      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
-      aiter.Seek(arc_offsets[i]);
-      Arc arc = aiter.Value();
-      KALDI_ASSERT(arc.nextstate == path[i + 1]);
-      StateId next_state = ofst->AddState();
-      ofst->AddArc(cur_state,
-                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
-      cur_state = next_state;
-    } else {  // add final-prob.
-      Weight weight = ifst.Final(path[i]);
-      KALDI_ASSERT(weight != Weight::Zero());
-      ofst->SetFinal(cur_state, weight);
-    }
-  }
-  return true;
-}
-
-// This function identifies two types of useless arcs:
-// those where arc A and arc B both go from state X to
-// state Y with the same input symbol (remove the one
-// with smaller probability, or an arbitrary one if they
-// are the same); and those where A is an arc from state X
-// to state X, with epsilon input symbol [remove A].
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  StateId non_coacc_state = kNoStateId;
-  size_t num_arcs_removed = 0, tot_arcs = 0;
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    std::vector<size_t> arcs_to_delete;
-    std::vector<Arc> arcs;
-    // pair2arclist lets us look up the arcs
-    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
-    StateId state = siter.Value();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      size_t pos = arcs.size();
-      const Arc &arc = aiter.Value();
-      arcs.push_back(arc);
-      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
-    }
-    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
-        iter = pair2arclist.begin(),
-        end = pair2arclist.end();
-    for (; iter != end; ++iter) {
-      const std::vector<size_t> &poslist = iter->second;
-      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
-        size_t best_pos = poslist[0];
-        Weight best_weight = arcs[best_pos].weight;
-        for (size_t j = 1; j < poslist.size(); j++) {
-          size_t pos = poslist[j];
-          Weight this_weight = arcs[pos].weight;
-          if (nl(this_weight,
-                 best_weight)) {  // NaturalLess seems to be somehow
-            // "backwards".
-            best_weight = this_weight;  // found a better one.
-            best_pos = pos;
-          }
-        }
-        for (size_t j = 0; j < poslist.size(); j++)
-          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
-      } else {
-        KALDI_ASSERT(poslist.size() == 1);
-        size_t pos = poslist[0];
-        Arc &arc = arcs[pos];
-        if (arc.ilabel == 0 && arc.nextstate == state)
-          arcs_to_delete.push_back(pos);
-      }
-    }
-    tot_arcs += arcs.size();
-    if (arcs_to_delete.size() != 0) {
-      num_arcs_removed += arcs_to_delete.size();
-      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
-      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
-      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
-        size_t pos = arcs_to_delete[j];
-        maiter.Seek(pos);
-        arcs[pos].nextstate = non_coacc_state;
-        maiter.SetValue(arcs[pos]);
-      }
-    }
-  }
-  if (non_coacc_state != kNoStateId) Connect(fst);
-  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
-                << "arcs.";
-}
-
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(phi_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef PhiMatcher<SortedMatcher<F> > PM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);
-
-  // the false below is something called phi_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow phi transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = phi_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-template <class Arc>
-void PropagateFinalInternal(typename Arc::Label phi_label,
-                            typename Arc::StateId s, MutableFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  if (fst->Final(s) == Weight::Zero()) {
-    // search for phi transition.  We assume there
-    // is just one-- phi nondeterminism is not allowed
-    // anyway.
-    int num_phis = 0;
-    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel == phi_label) {
-        num_phis++;
-        if (arc.nextstate == s) continue;  // don't expect
-        // phi loops but ignore them anyway.
-
-        // If this recurses infinitely, it means there
-        // are loops of phi transitions, which there should
-        // not be in a normal backoff LM.  We could make this
-        // routine work for this case, but currently there is
-        // no need.
-        PropagateFinalInternal(phi_label, arc.nextstate, fst);
-        if (fst->Final(arc.nextstate) != Weight::Zero())
-          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
-      }
-      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
-    }
-  }
-}
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  if (fst->Properties(kIEpsilons, true))  // just warn.
-    KALDI_WARN << "PropagateFinal: this may not work as desired "
-                  "since your FST has input epsilons.";
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++)
-    PropagateFinalInternal(phi_label, s, fst);
-}
-
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
-  KALDI_ASSERT(rho_label !=
-               kNoLabel);  // just use regular compose in this case.
-  typedef Fst<Arc> F;
-  typedef RhoMatcher<SortedMatcher<F> > RM;
-  CacheOptions base_opts;
-  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
-  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);
-
-  // the false below is something called rho_loop which is something I don't
-  // fully understand, but I don't think we want it.
-
-  // These pointers are taken ownership of, by ComposeFst.
-  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
-  SortedMatcher<F> *sorted_matcher =
-      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
-  // not to use this matcher, as this would mean we would
-  // not follow rho transitions.
-  impl_opts.matcher1 = sorted_matcher;
-  impl_opts.matcher2 = rho_matcher;
-  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
-  Connect(ofst);
-}
-
-// Declare an override of the template below.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);
-
-// Will override this for LogArc where NaturalLess will not work.
-template <class Arc>
-inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
-                            typename Arc::Weight *min_sum,
-                            typename Arc::Weight *max_sum) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  NaturalLess<Weight> nl;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = Arc::Weight::One();
-  if (max_sum) *max_sum = Arc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
-      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Overriding template for LogArc as NaturalLess does not work there.
-template <>
-inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
-                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
-  typedef LogArc Arc;
-  typedef Arc::StateId StateId;
-  typedef Arc::Weight Weight;
-  bool first_time = true;
-  bool ans = true;
-  if (min_sum) *min_sum = LogArc::Weight::One();
-  if (max_sum) *max_sum = LogArc::Weight::One();
-  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
-    StateId s = siter.Value();
-    Weight sum = fst.Final(s);
-    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      sum = Plus(sum, arc.weight);
-    }
-    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
-    if (first_time) {
-      first_time = false;
-      if (max_sum) *max_sum = sum;
-      if (min_sum) *min_sum = sum;
-    } else {
-      // note that max and min are reversed from their normal
-      // meanings here (max and min w.r.t. the underlying probabilities).
-      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
-      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
-    }
-  }
-  if (first_time) {  // just avoid NaNs if FST was empty.
-    if (max_sum) *max_sum = Weight::One();
-    if (min_sum) *min_sum = Weight::One();
-  }
-  return ans;
-}
-
-// Tests whether a tropical FST is stochastic in the log
-// semiring. (casts it and does the check.)
-// This function deals with the generic fst.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
-// Otherwise, it will be died with an error.
-inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
-                                 StdArc::Weight *min_sum,
-                                 StdArc::Weight *max_sum) {
-  bool ans = false;
-  LogArc::Weight log_min = LogArc::Weight::One(),
-                 log_max = LogArc::Weight::Zero();
-  if (fst.Type() == "const") {
-    ConstFst<LogArc> logfst;
-    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else if (fst.Type() == "vector") {
-    VectorFst<LogArc> logfst;
-    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
-    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
-  } else {
-    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
-              << "or VectorFst<StdArc>";
-  }
-  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
-  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
-  return ans;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-utils.h
deleted file mode 100644
index b0aed022be814dbe88dd8f4ec572b7695e0e5874..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/fstext-utils.h
+++ /dev/null
@@ -1,386 +0,0 @@
-// fstext/fstext-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-//                2014  Telepoint Global Hosting Service, LLC. (Author: David
-//                Snyder)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
-#define KALDI_FSTEXT_FSTEXT_UTILS_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "fstext/determinize-star.h"
-#include "fstext/remove-eps-local.h"
-#include "base/kaldi-common.h"  // for error reporting macros.
-#include "util/text-utils.h"  // for SplitStringToVector
-#include "fst/script/print-impl.h"
-
-namespace fst {
-
-/// Returns the highest numbered output symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
-
-/// Returns the highest numbered input symbol id of the FST (or zero
-/// for an empty FST.
-template <class Arc>
-typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
-
-/// Returns the total number of arcs in an FST.
-template <class Arc>
-typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
-
-/// GetInputSymbols gets the list of symbols on the input of fst
-/// (including epsilon, if include_eps == true), as a sorted, unique
-/// list.
-template <class Arc, class I>
-void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
-                     std::vector<I> *symbols);
-
-/// GetOutputSymbols gets the list of symbols on the output of fst
-/// (including epsilon, if include_eps == true)
-template <class Arc, class I>
-void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
-                      std::vector<I> *symbols);
-
-/// ClearSymbols sets all the symbols on the input and/or
-/// output side of the FST to zero, as specified.
-/// It does not alter the symbol tables.
-template <class Arc>
-void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
-
-template <class I>
-void GetSymbols(const SymbolTable &symtab, bool include_eps,
-                std::vector<I> *syms_out);
-
-inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
-                                 bool *debug_ptr = NULL, int max_states = -1);
-
-// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
-// kPushWeights|kPushLabels);
-
-template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
-void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
-  // PushInLog pushes the FST
-  // and returns a new pushed FST (labels and weights pushed to the left).
-  VectorFst<LogArc> *fst_log =
-      new VectorFst<LogArc>;  // Want to determinize in log semiring.
-  Cast(*fst, fst_log);
-  VectorFst<StdArc> tmp;
-  *fst = tmp;  // free up memory.
-  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
-  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
-  Cast(*fst_pushed_log, fst);
-  delete fst_log;
-  delete fst_pushed_log;
-}
-
-// Minimizes after encoding; applicable to all FSTs.  It is like what you get
-// from the Minimize() function, except it will not push the weights, or the
-// symbols.  This is better for our recipes, as we avoid ever pushing the
-// weights.  However, it will only minimize optimally if your graphs are such
-// that the symbols are as far to the left as they can go, and the weights
-// in combinable paths are the same... hard to formalize this, but it's
-// something that is satisified by our normal FSTs.
-template <class Arc>
-void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
-  Map(fst, QuantizeMapper<Arc>(delta));
-  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
-  Encode(fst, &encoder);
-  internal::AcceptorMinimize(fst);
-  Decode(fst, encoder);
-}
-
-/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
-/// If the FST is not just a linear sequence, it returns false.   If it is
-/// a linear sequence (including the empty FST), it returns true.  In this
-/// case it outputs the symbol
-/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
-/// the total weight as "tot_weight". The total weight will be Weight::Zero()
-/// if the FST is empty.  If any of the output pointers are NULL, it does not
-/// create that output.
-template <class Arc, class I>
-bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
-                             std::vector<I> *osymbols_out,
-                             typename Arc::Weight *tot_weight_out);
-
-/// This function converts an FST with a special structure, which is
-/// output by the OpenFst functions ShortestPath and RandGen, and converts
-/// them into a std::vector of separate FSTs.  This special structure is that
-/// the only state that has more than one (arcs-out or final-prob) is the
-/// start state.  fsts_out is resized to the appropriate size.
-template <class Arc>
-void ConvertNbestToVector(const Fst<Arc> &fst,
-                          std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Takes the n-shortest-paths (using ShortestPath), but outputs
-/// the result as a vector of up to n fsts.  This function will
-/// size the "fsts_out" vector to however many paths it got
-/// (which will not exceed n).  n must be >= 1.
-template <class Arc>
-void NbestAsFsts(const Fst<Arc> &fst, size_t n,
-                 std::vector<VectorFst<Arc> > *fsts_out);
-
-/// Creates unweighted linear acceptor from symbol sequence.
-template <class Arc, class I>
-void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
-
-/// Creates an unweighted acceptor with a linear structure, with alternatives
-/// at each position.  Epsilon is treated like a normal symbol here.
-/// Each position in "labels" must have at least one alternative.
-template <class Arc, class I>
-void MakeLinearAcceptorWithAlternatives(
-    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
-
-/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
-/// symbols. This is a form of determinization that will never blow up. Note
-/// that ifst is non-const and can be considered to be destroyed by this
-/// operation.
-/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
-/// to log and do this, and maintain equivalence in tropical.
-
-template <class Arc>
-void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
-                            float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
-/// also minimizes (encoded minimization, which is safe).  This algorithm will
-/// destroy "ifst".
-template <class Arc>
-void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
-                                    float delta = kDelta);
-
-/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
-/// except it first casts tothe log semiring.
-void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
-                                         VectorFst<StdArc> *ofst,
-                                         float delta = kDelta);
-
-/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
-/// the input side of the FST, replacing them with epsilon.
-template <class Arc, class I>
-void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
-                            MutableFst<Arc> *fst);
-
-// MapInputSymbols will replace any input symbol i that is between 0 and
-// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
-// table of the FST.
-template <class Arc, class I>
-void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
-
-template <class Arc>
-void RemoveWeights(MutableFst<Arc> *fst);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs entering any given state all have the same value.
-/// if "start_is_epsilon", treat start-state as an epsilon input arc
-/// [i.e. ensure only epsilon can enter start-state].
-template <class Arc>
-bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
-
-/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
-/// labels to classes. The function tests whether the symbols preceding any
-/// given state are in the same class. Formally, f is of a type F that has an
-/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
-/// is an integer type and F::Arc can be constructed from Arc::Label. this must
-/// apply to valid labels and also to kNoLabel (so we can have a marker for the
-/// invalid labels.
-template <class Arc, class F>
-bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
-                                       const Fst<Arc> &fst, const F &f);
-
-/// Returns true if and only if the FST is such that the input symbols
-/// on arcs exiting any given state all have the same value.
-/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
-/// end-states cannot have non-epsilon output transitions.]
-template <class Arc>
-bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
-
-template <class Arc, class F>
-bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
-                                       const F &f);
-
-/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
-/// state have the same input symbol.  It does this by detecting states
-/// that have differing input symbols going in, and inserting, for each of
-/// the preceding arcs with non-epsilon input symbol, a new dummy state that
-/// has an epsilon link to the fst state.
-/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
-/// into it.
-template <class Arc>
-void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
-/// state have the same input symbol.  It does this by detecting states that
-/// have differing input symbols on arcs that exit it, and inserting, for each
-/// of the following arcs with non-epsilon input symbol, a new dummy state that
-/// has an input-epsilon link from the fst state.  The output symbol and weight
-/// stay on the link to the dummy state (in order to keep the FST
-/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
-/// treat "being a final-state" like having an epsilon output link.
-template <class Arc>
-void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
-
-/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
-/// labels to classes.
-template <class Arc, class F>
-void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
-                                        MutableFst<Arc> *fst, const F &f);
-
-/// MakeLoopFst creates an FST that has a state that is both initial and
-/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
-/// it has an arc out whose output-symbol is i and which goes to a
-/// sub-graph whose input language is equivalent to fsts[i], where the
-/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
-/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
-/// but not output-epsilon free necessarily, and arcs are sorted on output
-/// label. Note: if some of the pointers in the input vector "fsts" have the
-/// same value, "MakeLoopFst" uses this to speed up the computation.
-
-/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
-/// Let L[i] be the language that the acceptor fsts[i] accepts.
-/// Let the language K be the set of input-output pairs i:l such
-/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
-/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
-/// of K.
-
-/// We could have implemented this via a combination of "project",
-/// "concat", "union" and "closure".  But that FST would have been
-/// less well optimized and would have a lot of final-states.
-
-template <class Arc>
-VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
-
-/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
-/// It multiplies the arc and final weights by "scale" [this is not the Mul
-/// operation of the semiring, it's actual multiplication, which is equivalent
-/// to taking a power in the semiring].
-template <class Arc>
-void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
-
-/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
-/// "length" input symbols.  It returns true on success, false on failure
-/// (failure is partly random but should never happen in practice for normal
-/// speech models.) It generates a random path through the input FST, finds out
-/// which subset of the states it visits along the way have self-loops with
-/// inupt symbols on them, and outputs a path with exactly enough self-loops to
-/// have the requested number of input symbols. Note that EqualAlign does not
-/// use the probabilities on the FST.  It just uses equal probabilities in the
-/// first stage of selection (since the output will anyway not be a truly random
-/// sample from the FST). The input fst "ifst" must be connected or this may
-/// enter an infinite loop.
-template <class Arc>
-bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
-                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
-
-// RemoveUselessArcs removes arcs such that there is no input symbol
-// sequence for which the best path through the FST would contain
-// those arcs [for these purposes, epsilon is not treated as a real symbol].
-// This is mainly geared towards decoding-graph FSTs which may contain
-// transitions that have less likely words on them that would never be
-// taken.  We do not claim that this algorithm removes all such arcs;
-// it just does the best job it can.
-// Only works for tropical (not log) semiring as it uses
-// NaturalLess.
-template <class Arc>
-void RemoveUselessArcs(MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) is treated as a backoff
-// LM, with the phi symbol (e.g. #0) treated as a
-// "failure transition", only taken when we don't
-// have a match for the requested symbol.
-template <class Arc>
-void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PropagateFinal propagates final-probs through
-// "phi" transitions (note that here, phi_label may
-// be epsilon if you want).  If you have a backoff LM
-// with special symbols ("phi") on the backoff arcs
-// instead of epsilon, you may use PhiCompose to compose
-// with it, but this won't do the right thing w.r.t.
-// final probabilities.  You should first call PropagateFinal
-// on the FST with phi's i it (fst2 in PhiCompose above),
-// to fix this.  If a state does not have a final-prob,
-// but has a phi transition, it makes the state's final-prob
-// (phi-prob * final-prob-of-dest-state), and does this
-// recursively i.e. follows phi transitions on the dest state
-// first.  It behaves as if there were a super-final state
-// with a special symbol leading to it, from each currently
-// final state.  Note that this may not behave as desired
-// if there are epsilons in your FST; it might be better
-// to remove those before calling this function.
-
-template <class Arc>
-void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
-
-// PhiCompose is a version of composition where
-// the right hand FST (fst2) has speciall "rho transitions"
-// which are taken whenever no normal transition matches; these
-// transitions will be rewritten with whatever symbol was on
-// the first FST.
-template <class Arc>
-void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
-                typename Arc::Label rho_label, MutableFst<Arc> *fst);
-
-/** This function returns true if, in the semiring of the FST, the sum (within
-    the semiring) of all the arcs out of each state in the FST is one, to within
-    delta.  After MakeStochasticFst, this should be true (for a connected FST).
-
-    @param fst [in] the FST that we are testing.
-    @param delta [in] the tolerance to within which we test equality to 1.
-    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
-   of weights.
-    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
-   of weights.
-    @return Returns true if the FST is stochastic, and false otherwise.
-*/
-
-template <class Arc>
-bool IsStochasticFst(const Fst<Arc> &fst,
-                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-                     typename Arc::Weight *min_sum = NULL,
-                     typename Arc::Weight *max_sum = NULL);
-
-// IsStochasticFstInLog makes sure it's stochastic after casting to log.
-inline bool IsStochasticFstInLog(
-    const Fst<StdArc> &fst,
-    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
-    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
-
-}  // end namespace fst
-
-#include "fstext/fstext-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io-inl.h
deleted file mode 100644
index a80505dd8a56eb8b3843b62253faca30ff6da978..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io-inl.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// fstext/kaldi-fst-io-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
-
-#include <string>
-#include <vector>
-
-#include "util/text-utils.h"
-
-namespace fst {
-
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
-  bool ok;
-  if (binary) {
-    // Binary-mode writing.
-    ok = t.Write(os, FstWriteOptions());
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.  Write a
-    // newline to start the FST; in a table, the first line of the FST will
-    // appear on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
-                            acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not something in the original
-    // OpenFst code].
-    os << '\n';
-    ok = os.good();
-  }
-  if (!ok) {
-    KALDI_ERR << "Error writing FST to stream";
-  }
-}
-
-// Utility function used in ReadFstKaldi
-template <class W>
-inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
-  std::istringstream strm(s);
-  strm >> *w;
-  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
-    return false;
-  }
-  return true;
-}
-
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-  if (binary) {
-    // We don't have access to the filename here, so write [unknown].
-    VectorFst<Arc> *ans =
-        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
-    if (ans == NULL) {
-      KALDI_ERR << "Error reading FST from stream.";
-    }
-    *fst = *ans;  // shallow copy.
-    delete ans;
-  } else {
-    // Consume the \r on Windows, the \n that the text-form FST format starts
-    // with, and any extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') {
-      is.get();  // consume the newline.
-    } else {     // saw spaces but no newline.. this is not expected.
-      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
-                << " at file position " << is.tellg();
-    }
-    using kaldi::ConvertStringToInteger;
-    using kaldi::SplitStringToIntegers;
-    using std::string;
-    using std::vector;
-    fst->DeleteStates();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_ERR << "Bad line in FST: " << line;
-      }
-      while (s >= fst->NumStates()) fst->AddState();
-      if (nline == 1) fst->SetStart(s);
-
-      bool ok = true;
-      Arc arc;
-      Weight w;
-      StateId d = s;
-      switch (col.size()) {
-        case 1:
-          fst->SetFinal(s, Weight::One());
-          break;
-        case 2:
-          if (!StrToWeight(col[1], true, &w))
-            ok = false;
-          else
-            fst->SetFinal(s, w);
-          break;
-        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
-          ok = false;
-          break;
-        case 4:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel);
-          if (ok) {
-            d = arc.nextstate;
-            arc.weight = Weight::One();
-            fst->AddArc(s, arc);
-          }
-          break;
-        case 5:
-          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-               ConvertStringToInteger(col[2], &arc.ilabel) &&
-               ConvertStringToInteger(col[3], &arc.olabel) &&
-               StrToWeight(col[4], false, &arc.weight);
-          if (ok) {
-            d = arc.nextstate;
-            fst->AddArc(s, arc);
-          }
-          break;
-        default:
-          ok = false;
-      }
-      while (d >= fst->NumStates()) fst->AddState();
-      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
-    }
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
-  try {
-    WriteFstKaldi(os, binary, t);
-    return true;
-  } catch (...) {
-    return false;
-  }
-}
-
-template <class Arc>  // static
-bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
-  Clear();
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Fst";
-    return false;
-  } else if (isspace(c)) {  // The text form of the FST begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is
-    // not space).
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, false, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  } else {  // reading a binary FST.
-    try {
-      t_ = new VectorFst<Arc>();
-      ReadFstKaldi(is, true, t_);
-    } catch (...) {
-      Clear();
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace fst.
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io.cc
deleted file mode 100644
index bd919a6221e7e0d84525a573682567b8aee734ad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// fstext/kaldi-fst-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fstext/kaldi-fst-io.h"
-
-#include <string>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-io.h"
-
-namespace fst {
-
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  if (!hdr.Read(ki.Stream(), rxfilename))
-    KALDI_ERR << "Reading FST: error reading FST header from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  FstReadOptions ropts("<unspecified>", &hdr);
-  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst)
-    KALDI_ERR << "Could not read fst from "
-              << kaldi::PrintableRxfilename(rxfilename);
-  return fst;
-}
-
-// Register const fst to load it automatically. Other types like
-// olabel_lookahead or ngram or compact_fst should be registered
-// through OpenFst registration API.
-static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
-static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
-
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
-  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
-  // for compatibility with OpenFst conventions.
-  kaldi::Input ki(rxfilename);
-  fst::FstHeader hdr;
-  // Read FstHeader which contains the type of FST
-  if (!hdr.Read(ki.Stream(), rxfilename)) {
-    if (throw_on_err) {
-      KALDI_ERR << "Reading FST: error reading FST header from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "We fail to read FST header from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Check the type of Arc
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    if (throw_on_err) {
-      KALDI_ERR << "FST with arc type " << hdr.ArcType()
-                << " is not supported.";
-    } else {
-      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
-                 << " is not supported. A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  // Read the FST
-  FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
-  if (!fst) {
-    if (throw_on_err) {
-      KALDI_ERR << "Could not read fst from "
-                << kaldi::PrintableRxfilename(rxfilename);
-    } else {
-      KALDI_WARN << "Could not read fst from "
-                 << kaldi::PrintableRxfilename(rxfilename)
-                 << ". A NULL pointer is returned.";
-      return NULL;
-    }
-  }
-  return fst;
-}
-
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
-  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-  std::string real_type = fst->Type();
-  KALDI_ASSERT(real_type == "vector" || real_type == "const");
-  if (real_type == "vector") {
-    return dynamic_cast<VectorFst<StdArc> *>(fst);
-  } else {
-    // As the 'fst' can't cast to VectorFst, we create a new
-    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
-    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
-    delete fst;
-    return new_fst;
-  }
-}
-
-void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
-  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
-  *ofst = *fst;
-  delete fst;
-}
-
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
-  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
-  // for compatibility with OpenFst conventions.
-  bool write_binary = true, write_header = false;
-  kaldi::Output ko(wxfilename, write_binary, write_header);
-  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
-  fst.Write(ko.Stream(), wopts);
-}
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-}  // end namespace fst
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io.h
deleted file mode 100644
index 4938aa4825d2bdcbe27fd2814b219d19fcf86eff..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/kaldi-fst-io.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// fstext/kaldi-fst-io.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
-//                2013  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
-#define KALDI_FSTEXT_KALDI_FST_IO_H_
-
-#include <string>
-#include <utility>
-
-#include "fst/fst-decl.h"
-#include "fst/fstlib.h"
-#include "fst/script/print-impl.h"
-
-#include "base/kaldi-common.h"
-
-// Some functions for writing Fsts.
-// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
-// generic I/O mechanisms, because we want files containing just FSTs to
-// be readable by OpenFST's native binaries, which is not compatible
-// with the normal \0B header that identifies Kaldi files as containing
-// binary data.
-// So use the functions here with your eyes open, and with caution!
-namespace fst {
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error returns NULL. Only supports VectorFst and exists
-// mainly for backward code compabibility.
-VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
-
-// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
-// otherwise it prints a warning and returns. Note:this
-// doesn't support the text-mode option that we generally like to support.
-// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding). Other
-// types could be also loaded if registered inside OpenFst.
-Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
-                                 bool throw_on_err = true);
-
-// This function attempts to dynamic_cast the pointer 'fst' (which will likely
-// have been returned by ReadFstGeneric()), to the more derived
-// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
-// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
-// initialized by 'fst'), prints a warning, and deletes 'fst'.
-VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);
-
-// Version of ReadFstKaldi() that writes to a pointer.  Assumes
-// the FST is binary with no binary marker.  Crashes on error.
-void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);
-
-// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
-// as it doesn't support the text-mode option.
-void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);
-
-// This is a more general Kaldi-type-IO mechanism of writing FSTs to
-// streams, supporting binary or text-mode writing.  (note: we just
-// write the integers, symbol tables are not supported).
-// On error, throws using KALDI_ERR.
-template <class Arc>
-void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);
-
-// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
-// supporting binary or text-mode reading/writing.
-template <class Arc>
-void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);
-
-// Read an FST file for LM (G.fst) and make it an acceptor,
-// and make sure it is sorted on labels
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
-
-// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
-// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
-// read/write collections of FSTs indexed by strings, using the Table concept (
-// see ../util/kaldi-table.h).
-// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
-// spotting stuff introduced more types of FSTs, we made it also templated on
-// the arc.
-template <class Arc>
-class VectorFstTplHolder {
- public:
-  typedef VectorFst<Arc> T;
-
-  VectorFstTplHolder() : t_(NULL) {}
-
-  static bool Write(std::ostream &os, bool binary, const T &t);
-
-  void Copy(const T &t) {  // copies it into the holder.
-    Clear();
-    t_ = new T(t);
-  }
-
-  // Reads into the holder.
-  bool Read(std::istream &is);
-
-  // It's potentially a binary format, so must read in binary mode (linefeed
-  // translation will corrupt the file.  We don't know till we open the file if
-  // it's really binary, so we need to read in binary mode to be on the safe
-  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
-  // them.
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    // code error if !t_.
-    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
-    return *t_;
-  }
-
-  void Clear() {
-    if (t_) {
-      delete t_;
-      t_ = NULL;
-    }
-  }
-
-  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }
-
-  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
-                    const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~VectorFstTplHolder() { Clear(); }
-  // No destructor.  Assignment and
-  // copy constructor take their default implementations.
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
-  T *t_;
-};
-
-// Now make the original VectorFstHolder as the typedef of
-// VectorFstHolder<StdArc>.
-typedef VectorFstTplHolder<StdArc> VectorFstHolder;
-
-}  // end namespace fst
-
-#include "fstext/kaldi-fst-io-inl.h"
-
-#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-utils-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-utils-inl.h
deleted file mode 100644
index 82e5f0665dfeb0d4e1cb1b5b6285f6c05924f0f9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-utils-inl.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// fstext/lattice-utils-inl.h
-
-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
-// Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
-// Do not include this file directly.  It is included by lattice-utils.h
-
-#include <utility>
-#include <vector>
-
-namespace fst {
-
-/* Convert from FST with arc-type Weight, to one with arc-type
-   CompactLatticeWeight.  Uses FactorFst to identify chains
-   of states which can be turned into a single output arc. */
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-
-  VectorFst<ArcTpl<Weight> > ffst;
-  std::vector<std::vector<Int> > labels;
-  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
-    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
-                                   // ilabels.
-  } else {
-    VectorFst<ArcTpl<Weight> > invfst(ifst);
-    Invert(&invfst);
-    Factor(invfst, &ffst, &labels);
-  }
-
-  TopSort(&ffst);  // Put the states in ffst in topological order, which is
-  // easier on the eye when reading the text-form lattices and corresponds to
-  // what we get when we generate the lattices in the decoder.
-
-  ofst->DeleteStates();
-
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ffst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ffst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    Weight final_weight = ffst.Final(s);
-    if (final_weight != Weight::Zero()) {
-      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
-      ofst->SetFinal(s, final_compact_weight);
-    }
-    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
-         iter.Next()) {
-      const Arc &arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
-      // note: zero-weight arcs not allowed anyway so weight should not be zero,
-      // but no harm in checking.
-      CompactArc compact_arc(arc.olabel, arc.olabel,
-                             CompactWeight(arc.weight, labels[arc.ilabel]),
-                             arc.nextstate);
-      ofst->AddArc(s, compact_arc);
-    }
-  }
-}
-
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
-  typedef ArcTpl<Weight> Arc;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc;
-  ofst->DeleteStates();
-  // make the states in the new FST have the same numbers as
-  // the original ones, and add chains of states as necessary
-  // to encode the string-valued weights.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    CompactWeight final_weight = ifst.Final(s);
-    if (final_weight != CompactWeight::Zero()) {
-      StateId cur_state = s;
-      size_t string_length = final_weight.String().size();
-      for (size_t n = 0; n < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = 0;
-        Arc arc(ilabel, final_weight.String()[n],
-                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
-        if (invert) std::swap(arc.ilabel, arc.olabel);
-        ofst->AddArc(cur_state, arc);
-        cur_state = next_state;
-      }
-      ofst->SetFinal(cur_state,
-                     string_length > 0 ? Weight::One() : final_weight.Weight());
-    }
-    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      const CompactArc &arc = iter.Value();
-      size_t string_length = arc.weight.String().size();
-      StateId cur_state = s;
-      // for all but the last element in the string--
-      // add a temporary state.
-      for (size_t n = 0; n + 1 < string_length; n++) {
-        StateId next_state = ofst->AddState();
-        Label ilabel = (n == 0 ? arc.ilabel : 0),
-              olabel = static_cast<Label>(arc.weight.String()[n]);
-        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
-        Arc new_arc(ilabel, olabel, weight, next_state);
-        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-        ofst->AddArc(cur_state, new_arc);
-        cur_state = next_state;
-      }
-      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
-            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
-                                        : 0);
-      Weight weight =
-          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
-      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
-      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
-      ofst->AddArc(cur_state, new_arc);
-    }
-  }
-}
-
-// This function converts lattices between float and double;
-// it works for both CompactLatticeWeight and LatticeWeight.
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst) {
-  typedef ArcTpl<WeightIn> ArcIn;
-  typedef ArcTpl<WeightOut> ArcOut;
-  typedef typename ArcIn::StateId StateId;
-  ofst->DeleteStates();
-  // The states will be numbered exactly the same as the original FST.
-  // Add the states to the new FST.
-  StateId num_states = ifst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    StateId news = ofst->AddState();
-    assert(news == s);
-  }
-  ofst->SetStart(ifst.Start());
-  for (StateId s = 0; s < num_states; s++) {
-    WeightIn final_iweight = ifst.Final(s);
-    if (final_iweight != WeightIn::Zero()) {
-      WeightOut final_oweight;
-      ConvertLatticeWeight(final_iweight, &final_oweight);
-      ofst->SetFinal(s, final_oweight);
-    }
-    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
-         iter.Next()) {
-      ArcIn arc = iter.Value();
-      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
-      ArcOut oarc;
-      ConvertLatticeWeight(arc.weight, &oarc.weight);
-      oarc.ilabel = arc.ilabel;
-      oarc.olabel = arc.olabel;
-      oarc.nextstate = arc.nextstate;
-      ofst->AddArc(s, oarc);
-    }
-  }
-}
-
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst) {
-  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
-  if (scale == DefaultLatticeScale())  // nothing to do.
-    return;
-  typedef ArcTpl<Weight> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
-      aiter.SetValue(arc);
-    }
-    Weight final_weight = fst->Final(s);
-    if (final_weight != Weight::Zero())
-      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
-  }
-}
-
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef MutableFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst->NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      Arc arc = aiter.Value();
-      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
-      aiter.SetValue(arc);
-    }
-    W final_weight = fst->Final(s);
-    if (final_weight != W::Zero())
-      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
-  }
-}
-
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
-  typedef CompactLatticeWeightTpl<Weight, Int> W;
-  typedef ArcTpl<W> Arc;
-  typedef ExpandedFst<Arc> Fst;
-  typedef typename Arc::StateId StateId;
-  StateId num_states = fst.NumStates();
-  for (StateId s = 0; s < num_states; s++) {
-    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (!arc.weight.String().empty()) return true;
-    }
-    W final_weight = fst.Final(s);
-    if (!final_weight.String().empty()) return true;
-  }
-  return false;
-}
-
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
-  int32 num_states_cache = 50000;
-  fst::CacheOptions cache_opts(true, num_states_cache);
-  fst::MapFstOptions mapfst_opts(cache_opts);
-  StdToLatticeMapper<Real> mapper;
-  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
-      map_fst(ifst, mapper, mapfst_opts);
-  *ofst = map_fst;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-utils.h
deleted file mode 100644
index 72f23a3ccc1827033c6f1bf2ec557f3a953945e4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-utils.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// fstext/lattice-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
-#define KALDI_FSTEXT_LATTICE_UTILS_H_
-
-#include <vector>
-
-#include "fst/fstlib.h"
-#include "fstext/lattice-weight.h"
-
-namespace fst {
-
-// The template ConvertLattice does conversions to and from
-// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
-// between float and double, and to convert from LatticeWeight
-// to TropicalWeight.  It's used in the I/O code for lattices,
-// and for converting lattices to standard FSTs (e.g. for creating
-// decoding graphs from lattices).
-
-/**
-   Convert lattice from a normal FST to a CompactLattice FST.
-   This is a bit like converting to the Gallic semiring, except
-   the semiring behaves in a different way (designed to take
-   the best path).
-   Note: the ilabels end up as the symbols on the arcs of the
-   output acceptor, and the olabels go to the strings.  To make
-   it the other way around (useful for the speech-recognition
-   application), set invert=true [the default].
-*/
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
-    bool invert = true);
-
-/**
-   Convert lattice CompactLattice  format to Lattice.  This is a bit
-   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
-   must be an acceptor (i.e., ilabels and olabels should be identical).  If
-   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
-   strings in the weights of "ifst" becomes the olabels.  If invert=true
-   [default], this is reversed (useful for speech recognition lattices; our
-   standard non-compact format has the words on the output side to match HCLG).
-   */
-template <class Weight, class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
-    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);
-
-/**
-  Convert between CompactLattices and Lattices of different floating point
-  types... this works between any pair of weight types for which
-  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
-  conversion from LatticeWeight to TropicalWeight.
- */
-template <class WeightIn, class WeightOut>
-void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
-                    MutableFst<ArcTpl<WeightOut> > *ofst);
-
-// Now define some ConvertLattice functions that require two phases of
-// conversion (don't bother coding these separately as they will be used rarely.
-
-// Lattice with float to CompactLattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-// Lattice with double to CompactLattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-        *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with double to Lattice with float.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts CompactLattice with float to Lattice with double.
-template <class Int>
-void ConvertLattice(
-    const ExpandedFst<
-        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
-    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
-  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
-      fst;
-  ConvertLattice(ifst, &fst);
-  ConvertLattice(fst, ofst);
-}
-
-/// Converts TropicalWeight to LatticeWeight (puts all the weight on
-/// the first float in the lattice's pair).
-template <class Real>
-void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
-                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);
-
-/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
-inline std::vector<std::vector<double> > DefaultLatticeScale() {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = 1.0;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = 1.0;
-  return ans;
-}
-
-inline std::vector<std::vector<double> > LatticeScale(double lmwt,
-                                                      double acwt) {
-  std::vector<std::vector<double> > ans(2);
-  ans[0].resize(2, 0.0);
-  ans[1].resize(2, 0.0);
-  ans[0][0] = lmwt;
-  ans[1][1] = acwt;
-  return ans;
-}
-
-/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
-    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
-    in "scale".  E.g. typically scale would equal
-     [ 1   0;
-       0  acwt ]
-    if we want to scale the acoustics by "acwt".
- */
-template <class Weight, class ScaleFloat>
-void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
-                  MutableFst<ArcTpl<Weight> > *fst);
-
-/// Removes state-level alignments (the strings that are
-/// part of the weights).
-template <class Weight, class Int>
-void RemoveAlignmentsFromCompactLattice(
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-/// Returns true if lattice has alignments, i.e. it has
-/// any nonempty strings inside its weights.
-template <class Weight, class Int>
-bool CompactLatticeHasAlignment(
-    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);
-
-/// Class StdToLatticeMapper maps a normal arc (StdArc)
-/// to a LatticeArc by putting the StdArc weight as the first
-/// element of the LatticeWeight.  Useful when doing LM
-/// rescoring.
-template <class Real>
-class StdToLatticeMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  LatticeArc operator()(const StdArc &arc) {
-    // Note: we have to check whether the arc's weight is zero below,
-    // and if so return (infinity, infinity) and not (infinity, zero),
-    // because (infinity, zero) is not a valid LatticeWeight, which should
-    // either be both finite, or both infinite (i.e. Zero()).
-    return LatticeArc(
-        arc.ilabel, arc.olabel,
-        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
-                                              ? arc.weight.Value()
-                                              : 0.0),
-        arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
-/// by adding the elements of the LatticeArc weight.
-
-template <class Real>
-class LatticeToStdMapper {
-  typedef LatticeWeightTpl<Real> LatticeWeight;
-  typedef ArcTpl<LatticeWeight> LatticeArc;
-
- public:
-  StdArc operator()(const LatticeArc &arc) {
-    return StdArc(arc.ilabel, arc.olabel,
-                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
-                  arc.nextstate);
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-
-  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-
-  // I believe all properties are preserved.
-  uint64 Properties(uint64 props) { return props; }
-};
-
-template <class Weight, class Int>
-void PruneCompactLattice(
-    Weight beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
-
-}  // end namespace fst
-
-#include "fstext/lattice-utils-inl.h"
-
-#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-weight.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-weight.h
deleted file mode 100644
index 57d746ddb4225bd4ece32832a39b9a55f9fa5d09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/lattice-weight.h
+++ /dev/null
@@ -1,892 +0,0 @@
-// fstext/lattice-weight.h
-// Copyright 2009-2012  Microsoft Corporation
-//                      Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
-#define KALDI_FSTEXT_LATTICE_WEIGHT_H_
-
-#include <algorithm>
-#include <limits>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "fst/fstlib.h"
-
-namespace fst {
-
-// Declare weight type for lattice... will import to namespace kaldi.  has two
-// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
-// is basically the same as the tropical semiring on value1_+value2_, except it
-// keeps track of a and b separately.  More precisely, it is equivalent to the
-// lexicographic semiring on (value1_+value2_), (value1_-value2_)
-
-template <class FloatType>
-class LatticeWeightTpl;
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w);
-
-template <class FloatType>
-class LatticeWeightTpl {
- public:
-  typedef FloatType T;  // normally float.
-  typedef LatticeWeightTpl ReverseWeight;
-
-  inline T Value1() const { return value1_; }
-
-  inline T Value2() const { return value2_; }
-
-  inline void SetValue1(T f) { value1_ = f; }
-
-  inline void SetValue2(T f) { value2_ = f; }
-
-  LatticeWeightTpl() : value1_{}, value2_{} {}
-
-  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}
-
-  LatticeWeightTpl(const LatticeWeightTpl &other)
-      : value1_(other.value1_), value2_(other.value2_) {}
-
-  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
-    value1_ = w.value1_;
-    value2_ = w.value2_;
-    return *this;
-  }
-
-  LatticeWeightTpl<FloatType> Reverse() const { return *this; }
-
-  static const LatticeWeightTpl Zero() {
-    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                            std::numeric_limits<T>::infinity());
-  }
-
-  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }
-
-  static const std::string &Type() {
-    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
-    return type;
-  }
-
-  static const LatticeWeightTpl NoWeight() {
-    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
-                            std::numeric_limits<FloatType>::quiet_NaN());
-  }
-
-  bool Member() const {
-    // value1_ == value1_ tests for NaN.
-    // also test for no -inf, and either both or neither
-    // must be +inf, and
-    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
-    if (value1_ == -std::numeric_limits<T>::infinity() ||
-        value2_ == -std::numeric_limits<T>::infinity())
-      return false;  // -infty not allowed
-    if (value1_ == std::numeric_limits<T>::infinity() ||
-        value2_ == std::numeric_limits<T>::infinity()) {
-      if (value1_ != std::numeric_limits<T>::infinity() ||
-          value2_ != std::numeric_limits<T>::infinity())
-        return false;  // both must be +infty;
-      // this is necessary so that the semiring has only one zero.
-    }
-    return true;
-  }
-
-  LatticeWeightTpl Quantize(float delta = kDelta) const {
-    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
-                              -std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
-      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
-                              std::numeric_limits<T>::infinity());
-    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
-      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
-    } else {
-      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
-                              floor(value2_ / delta + 0.5F) * delta);
-    }
-  }
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    // Always read/write as float, even if T is double,
-    // so we can use OpenFst-style read/write and still maintain
-    // compatibility when compiling with different FloatTypes
-    ReadType(strm, &value1_);
-    ReadType(strm, &value2_);
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    WriteType(strm, value1_);
-    WriteType(strm, value2_);
-    return strm;
-  }
-
-  size_t Hash() const {
-    size_t ans;
-    union {
-      T f;
-      size_t s;
-    } u;
-    u.s = 0;
-    u.f = value1_;
-    ans = u.s;
-    u.f = value2_;
-    ans += u.s;
-    return ans;
-  }
-
- protected:
-  inline static void WriteFloatType(std::ostream &strm, const T &f) {
-    if (f == std::numeric_limits<T>::infinity())
-      strm << "Infinity";
-    else if (f == -std::numeric_limits<T>::infinity())
-      strm << "-Infinity";
-    else if (f != f)
-      strm << "BadNumber";
-    else
-      strm << f;
-  }
-
-  // Internal helper function, used in ReadNoParen.
-  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
-    std::string s;
-    strm >> s;
-    if (s == "Infinity") {
-      f = std::numeric_limits<T>::infinity();
-    } else if (s == "-Infinity") {
-      f = -std::numeric_limits<T>::infinity();
-    } else if (s == "BadNumber") {
-      f = std::numeric_limits<T>::quiet_NaN();
-    } else {
-      char *p;
-      f = strtod(s.c_str(), &p);
-      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
-    }
-  }
-
-  // Reads LatticeWeight when there are no parentheses around pair terms...
-  // currently the only form supported.
-  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
-    int c;
-    do {
-      c = strm.get();
-    } while (isspace(c));
-
-    std::string s1;
-    while (c != separator) {
-      if (c == EOF) {
-        strm.clear(std::ios::badbit);
-        return strm;
-      }
-      s1 += c;
-      c = strm.get();
-    }
-    std::istringstream strm1(s1);
-    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
-    // read second element
-    ReadFloatType(strm, value2_);
-    return strm;
-  }
-
-  friend std::istream &operator>>
-      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
-  friend std::ostream &operator<<<FloatType>(
-      std::ostream &, const LatticeWeightTpl<FloatType> &);
-
- private:
-  T value1_;
-  T value2_;
-};
-
-/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
-   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
-   matrix.  Used, for example, in applying acoustic scaling.
- */
-template <class FloatType, class ScaleFloatType>
-inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
-    const LatticeWeightTpl<FloatType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
-    return LatticeWeightTpl<FloatType>::Zero();
-  return LatticeWeightTpl<FloatType>(
-      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
-      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
-}
-
-/* For testing purposes and in case it's ever useful, we define a similar
-   function to apply to LexicographicWeight and the like, templated on
-   TropicalWeight<float> etc.; we use PairWeight which is the base class of
-   LexicographicWeight.
-*/
-template <class FloatType, class ScaleFloatType>
-inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
-ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
-                                  TropicalWeightTpl<FloatType> > &w,
-                 const std::vector<std::vector<ScaleFloatType> > &scale) {
-  typedef TropicalWeightTpl<FloatType> BaseType;
-  typedef PairWeight<BaseType, BaseType> PairType;
-  const BaseType zero = BaseType::Zero();
-  // Without the next special case we'd get NaNs from infinity * 0
-  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
-  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
-  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
-                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
-}
-
-template <class FloatType>
-inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 == vb1 && va2 == vb2);
-}
-
-template <class FloatType>
-inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
-                       const LatticeWeightTpl<FloatType> &wb) {
-  // Volatile qualifier thwarts over-aggressive compiler optimizations
-  // that lead to problems esp. with NaturalLess().
-  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
-                     vb2 = wb.Value2();
-  return (va1 != vb1 || va2 != vb2);
-}
-
-// We define a Compare function LatticeWeightTpl even though it's
-// not required by the semiring standard-- it's just more efficient
-// to do it this way rather than using the NaturalLess template.
-
-/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
-
-template <class FloatType>
-inline int Compare(const LatticeWeightTpl<FloatType> &w1,
-                   const LatticeWeightTpl<FloatType> &w2) {
-  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
-  if (f1 < f2) {  // having smaller cost means you're larger
-    return 1;
-  } else if (f1 > f2) {  // in the semiring [higher probability]
-    return -1;
-  } else if (w1.Value1() < w2.Value1()) {
-  // mathematically we should be comparing (w1.value1_-w1.value2_ <
-  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
-  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
-  // simpler equivalent form w1.value1_ < w2.value1_.
-    return 1;
-  } else if (w1.Value1() > w2.Value1()) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                                        const LatticeWeightTpl<FloatType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType>
-class NaturalLess<LatticeWeightTpl<FloatType> > {
- public:
-  typedef LatticeWeightTpl<FloatType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<float> > {
- public:
-  typedef LatticeWeightTpl<float> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<LatticeWeightTpl<double> > {
- public:
-  typedef LatticeWeightTpl<double> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Times(
-    const LatticeWeightTpl<FloatType> &w1,
-    const LatticeWeightTpl<FloatType> &w2) {
-  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
-                                     w1.Value2() + w2.Value2());
-}
-
-// divide w1 by w2 (on left/right/any doesn't matter as
-// commutative).
-template <class FloatType>
-inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
-                                          const LatticeWeightTpl<FloatType> &w2,
-                                          DivideType typ = DIVIDE_ANY) {
-  typedef FloatType T;
-  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
-  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
-      b == -std::numeric_limits<T>::infinity()) {
-    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
-               << "[dividing by zero?]  Returning zero";
-    return LatticeWeightTpl<T>::Zero();
-  }
-  if (a == std::numeric_limits<T>::infinity() ||
-      b == std::numeric_limits<T>::infinity())
-    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
-                                         // infinite.
-  return LatticeWeightTpl<T>(a, b);
-}
-
-template <class FloatType>
-inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
-                        const LatticeWeightTpl<FloatType> &w2,
-                        float delta = kDelta) {
-  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
-    return true;  // handles Zero().
-  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
-          delta);
-}
-
-template <class FloatType>
-inline std::ostream &operator<<(std::ostream &strm,
-                                const LatticeWeightTpl<FloatType> &w) {
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default;
-  // may or may not be settable from Kaldi programs.
-  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
-  return strm;
-}
-
-template <class FloatType>
-inline std::istream &operator>>(std::istream &strm,
-                                LatticeWeightTpl<FloatType> &w1) {
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  // separator defaults to ','
-  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
-}
-
-// CompactLattice will be an acceptor (accepting the words/output-symbols),
-// with the weights and input-symbol-seqs on the arcs.
-// There must be a total order on W.  We assume for the sake of efficiency
-// that there is a function
-// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
-// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
-// w2).
-
-template <class WeightType, class IntType>
-class CompactLatticeWeightTpl {
- public:
-  typedef WeightType W;
-
-  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;
-
-  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
-  // we use standard lexicographic order on string_ [this is not the same as
-  // NaturalLess on the StringWeight equivalent, which does not define a
-  // total order].
-  // Times, Divide obvious... (support both left & right division..)
-  // CommonDivisor would need to be coded separately.
-
-  CompactLatticeWeightTpl() {}
-
-  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
-      : weight_(w), string_(s) {}
-
-  CompactLatticeWeightTpl &operator=(
-      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-    weight_ = w.weight_;
-    string_ = w.string_;
-    return *this;
-  }
-
-  const W &Weight() const { return weight_; }
-
-  const std::vector<IntType> &String() const { return string_; }
-
-  void SetWeight(const W &w) { weight_ = w; }
-
-  void SetString(const std::vector<IntType> &s) { string_ = s; }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
-                                                        std::vector<IntType>());
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
-                                                        std::vector<IntType>());
-  }
-
-  inline static std::string GetIntSizeString() {
-    char buf[2];
-    buf[0] = '0' + sizeof(IntType);
-    buf[1] = '\0';
-    return buf;
-  }
-  static const std::string &Type() {
-    static const std::string type =
-        "compact" + WeightType::Type() + GetIntSizeString();
-    return type;
-  }
-
-  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
-    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
-                                                        std::vector<IntType>());
-  }
-
-  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
-    size_t s = string_.size();
-    std::vector<IntType> v(s);
-    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
-    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
-  }
-
-  bool Member() const {
-    // a semiring has only one zero, this is the important property
-    // we're trying to maintain here.  So force string_ to be empty if
-    // w_ == zero.
-    if (!weight_.Member()) return false;
-    if (weight_ == WeightType::Zero())
-      return string_.empty();
-    else
-      return true;
-  }
-
-  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
-    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
-  }
-
-  static constexpr uint64 Properties() {
-    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::istream &Read(std::istream &strm) {
-    weight_.Read(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz;
-    ReadType(strm, &sz);
-    if (strm.fail()) {
-      return strm;
-    }
-    if (sz < 0) {
-      KALDI_WARN << "Negative string size!  Read failure";
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    string_.resize(sz);
-    for (int32 i = 0; i < sz; i++) {
-      ReadType(strm, &(string_[i]));
-    }
-    return strm;
-  }
-
-  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
-  // not Kaldi-style, I/O.
-  std::ostream &Write(std::ostream &strm) const {
-    weight_.Write(strm);
-    if (strm.fail()) {
-      return strm;
-    }
-    int32 sz = static_cast<int32>(string_.size());
-    WriteType(strm, sz);
-    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
-    return strm;
-  }
-  size_t Hash() const {
-    size_t ans = weight_.Hash();
-    // any weird numbers here are largish primes
-    size_t sz = string_.size(), mult = 6967;
-    for (size_t i = 0; i < sz; i++) {
-      ans += string_[i] * mult;
-      mult *= 7499;
-    }
-    return ans;
-  }
-
- private:
-  W weight_;
-  std::vector<IntType> string_;
-};
-
-template <class WeightType, class IntType>
-inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
-}
-
-template <class WeightType, class IntType>
-inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-                        float delta = kDelta) {
-  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
-          w1.String() == w2.String());
-}
-
-// Compare is not part of the standard for weight types, but used internally for
-// efficiency.  The comparison here first compares the weight; if this is the
-// same, it compares the string.  The comparison on strings is: first compare
-// the length, if this is the same, use lexicographical order.  We can't just
-// use the lexicographical order because this would destroy the distributive
-// property of multiplication over addition, taking into account that addition
-// uses Compare.  The string element of "Compare" isn't super-important in
-// practical terms; it's only needed to ensure that Plus always give consistent
-// answers and is symmetric.  It's essentially for tie-breaking, but we need to
-// make sure all the semiring axioms are satisfied otherwise OpenFst might
-// break.
-
-template <class WeightType, class IntType>
-inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  int c1 = Compare(w1.Weight(), w2.Weight());
-  if (c1 != 0) return c1;
-  int l1 = w1.String().size(), l2 = w2.String().size();
-  // Use opposite order on the string lengths, so that if the costs are the
-  // same, the shorter string wins.
-  if (l1 > l2)
-    return -1;
-  else if (l1 < l2)
-    return 1;
-  for (int i = 0; i < l1; i++) {
-    if (w1.String()[i] < w2.String()[i])
-      return -1;
-    else if (w1.String()[i] > w2.String()[i])
-      return 1;
-  }
-  return 0;
-}
-
-// For efficiency, override the NaturalLess template class.
-template <class FloatType, class IntType>
-class NaturalLess<
-    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-template <>
-class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
- public:
-  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;
-
-  NaturalLess() {}
-
-  bool operator()(const Weight &w1, const Weight &w2) const {
-    // NaturalLess is a negative order (opposite to normal ordering).
-    // This operator () corresponds to "<" in the negative order, which
-    // corresponds to the ">" in the normal order.
-    return (Compare(w1, w2) == 1);
-  }
-};
-
-// Make sure Compare is defined for TropicalWeight, so everything works
-// if we substitute LatticeWeight for TropicalWeight.
-inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
-  float f1 = w1.Value(), f2 = w2.Value();
-  if (f1 == f2)
-    return 0;
-  else if (f1 > f2)
-    return -1;
-  else
-    return 1;
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  return (Compare(w1, w2) >= 0 ? w1 : w2);
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Times(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
-  WeightType w = Times(w1.Weight(), w2.Weight());
-  if (w == WeightType::Zero()) {
-    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    // special case to ensure zero is unique
-  } else {
-    std::vector<IntType> v;
-    v.resize(w1.String().size() + w2.String().size());
-    typename std::vector<IntType>::iterator iter = v.begin();
-    iter = std::copy(w1.String().begin(), w1.String().end(),
-                     iter);  // returns end of first range.
-    std::copy(w2.String().begin(), w2.String().end(), iter);
-    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
-  }
-}
-
-template <class WeightType, class IntType>
-inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
-    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
-    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
-    DivideType div = DIVIDE_ANY) {
-  if (w1.Weight() == WeightType::Zero()) {
-    if (w2.Weight() != WeightType::Zero()) {
-      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
-    } else {
-      KALDI_ERR << "Division by zero [0/0]";
-    }
-  } else if (w2.Weight() == WeightType::Zero()) {
-    KALDI_ERR << "Error: division by zero";
-  }
-  WeightType w = Divide(w1.Weight(), w2.Weight());
-
-  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
-  if (v2.size() > v1.size()) {
-    KALDI_ERR << "Cannot divide, length mismatch";
-  }
-  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
-                                                v1e = v1.end(),
-                                                v2b = v2.begin(),
-                                                v2e = v2.end();
-  if (div == DIVIDE_LEFT) {
-    if (!std::equal(v2b, v2e,
-                    v1b)) {  // v2 must be identical to first part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(v1b + (v2e - v2b),
-                                v1e));  // return last part of v1.
-  } else if (div == DIVIDE_RIGHT) {
-    if (!std::equal(
-            v2b, v2e,
-            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
-      KALDI_ERR << "Cannot divide, data mismatch";
-    }
-    return CompactLatticeWeightTpl<WeightType, IntType>(
-        w, std::vector<IntType>(
-               v1b, v1e - (v2e - v2b)));  // return first part of v1.
-
-  } else {
-    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
-  }
-  return CompactLatticeWeightTpl<WeightType,
-                                 IntType>::Zero();  // keep compiler happy.
-}
-
-template <class WeightType, class IntType>
-inline std::ostream &operator<<(
-    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  strm << w.Weight();
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  strm << FLAGS_fst_weight_separator[0];  // comma by default.
-  for (size_t i = 0; i < w.String().size(); i++) {
-    strm << w.String()[i];
-    if (i + 1 < w.String().size())
-      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
-                                 // code.
-  }
-  return strm;
-}
-
-template <class WeightType, class IntType>
-inline std::istream &operator>>(
-    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
-  std::string s;
-  strm >> s;
-  if (strm.fail()) {
-    return strm;
-  }
-  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
-  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
-  if (pos == std::string::npos) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // get parts of str before and after the separator (default: ',');
-  std::string s1(s, 0, pos), s2(s, pos + 1);
-  std::istringstream strm1(s1);
-  WeightType weight;
-  strm1 >> weight;
-  w.SetWeight(weight);
-  if (strm1.fail() || !strm1.eof()) {
-    strm.clear(std::ios::badbit);
-    return strm;
-  }
-  // read string part.
-  std::vector<IntType> string;
-  const char *c = s2.c_str();
-  while (*c != '\0') {
-    if (*c == kStringSeparator)  // '_'
-      c++;
-    char *c2;
-    int64_t i = strtol(c, &c2, 10);
-    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
-      strm.clear(std::ios::badbit);
-      return strm;
-    }
-    c = c2;
-    string.push_back(static_cast<IntType>(i));
-  }
-  w.SetString(string);
-  return strm;
-}
-
-template <class BaseWeightType, class IntType>
-class CompactLatticeWeightCommonDivisorTpl {
- public:
-  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;
-
-  Weight operator()(const Weight &w1, const Weight &w2) const {
-    // First find longest common prefix of the strings.
-    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
-                                                  s1e = w1.String().end(),
-                                                  s2b = w2.String().begin(),
-                                                  s2e = w2.String().end();
-    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
-      s1b++;
-      s2b++;
-    }
-    return Weight(Plus(w1.Weight(), w2.Weight()),
-                  std::vector<IntType>(w1.String().begin(), s1b));
-  }
-};
-
-/** Scales the pair (a, b) of floating-point weights inside a
-    CompactLatticeWeight by premultiplying it (viewed as a vector)
-    by a 2x2 matrix "scale".
-    Assumes there is a ScaleTupleWeight function that applies to "Weight";
-    this currently only works if Weight equals LatticeWeightTpl<FloatType>
-    for some FloatType.
-*/
-template <class Weight, class IntType, class ScaleFloatType>
-inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
-    const CompactLatticeWeightTpl<Weight, IntType> &w,
-    const std::vector<std::vector<ScaleFloatType> > &scale) {
-  return CompactLatticeWeightTpl<Weight, IntType>(
-      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
-}
-
-/** Define some ConvertLatticeWeight functions that are used in various lattice
-    conversions... make them all templates, some with no arguments, since some
-    must be templates.*/
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 LatticeWeightTpl<Float2> *w_out) {
-  w_out->SetValue1(w_in.Value1());
-  w_out->SetValue2(w_in.Value2());
-}
-
-template <class Float1, class Float2, class Int>
-inline void ConvertLatticeWeight(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
-    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
-  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
-                                   w_in.Weight().Value2());
-  w_out->SetWeight(weight2);
-  w_out->SetString(w_in.String());
-}
-
-// to convert from Lattice to standard FST
-template <class Float1, class Float2>
-inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
-                                 TropicalWeightTpl<Float2> *w_out) {
-  TropicalWeightTpl<Float2> w1(w_in.Value1());
-  TropicalWeightTpl<Float2> w2(w_in.Value2());
-  *w_out = Times(w1, w2);
-}
-
-template <class Float>
-inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
-  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
-}
-
-template <class Float, class Int>
-inline double ConvertToCost(
-    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
-  return static_cast<double>(w.Weight().Value1()) +
-         static_cast<double>(w.Weight().Value2());
-}
-
-template <class Float>
-inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
-  return w.Value();
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/pre-determinize-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/pre-determinize-inl.h
deleted file mode 100644
index 632d04155b633e98727a1f199336c7831e2d6f03..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/pre-determinize-inl.h
+++ /dev/null
@@ -1,798 +0,0 @@
-// fstext/pre-determinize-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-/* Do not include this file directly.  It is an implementation file included by
- * PreDeterminize.h */
-
-/*
-  Predeterminization
-
-    This is a function that makes an FST compactly determinizable by inserting
-  symbols on the input side as necessary for disambiguation.  Note that we do
-  not treat epsilon as a real symbol when measuring determinizability in this
-  sense.   The extra symbols are added to the vocabulary, on the input side;
-  these are of the form (prefix)1, (prefix)2, and so on without limit, where
-  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
-  this will not lead to conflicts with symbols already in the FST).  The
-  function tells us how many such symbols it created.
-
-   Note that there is a paper "Generalized optimization algorithm for speech
-  recognition transducers" by Allauzen and Mohri, that deals with a similar
-  issue, but this is a very different algorithm that only aims to ensure
-  determinizability, but not *compact* determinizability.
-
-   Our algorithm is slightly heuristic, and probably not optimal, but does
-  ensure that the output is compactly determinizable, possibly at the expense of
-  inserting unnecessary symbols.  We considered more sophisticated algorithms,
-  but these were extremely complicated and would give the same output for the
-  kinds of inputs that we envisage.
-
-   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
-  the states of det(T) as weighted subsets of states of T, each state of T only
-  appears once in any given subset.  This ensures that det(T) is no larger than
-  T in an appropriate sense.  The way we do this is as follows.  We identify all
-  states in T that have multiple input transitions (counting "being an initial
-  state" as an input transition). Let's call these "problematic" states.  For a
-  problematic state p we stipulate that it can never appear in any state of
-  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
-  to ensure this, we insert input symbols on the transitions to these
-   problematic states (this may necessitate adding extra states).
-      We also stipulate that the path through det(T) should always be sufficient
-  to tell us the path through T (and we insert extra symbols sufficient to make
-  this so).  This is to simplify the algorithm, so that we don't have to
-  consider the output symbols or weights when predeterminizing.
-
-   The algorithm is as follows.
-
-    (A) Definitions
-
-      (i)  Define a *problematic state* as a state that either has multiple
-  input transitions, or is an initial state and has at least one input
-  transition.
-
-     (ii)  For an arc a, define:
-            i[a] = input symbol on a
-            o[a] = output symbol on a
-            n[a] = dest-state of a
-            p[a] = origin-state of a
-
-           For a state q, define
-            E[q] = set of transitions leaving q.
-           For a set of states Q, define
-            E[Q] = set of transitions leaving some q in Q
-
-    (iii)  For a state s, define Closure(s) as the union of state s, and all
-  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
-  n[a] is not problematic.
-
-           For a set of states S, define Closure(S) as the union of the closures
-  of states s in S.
-
-    (B) Inputs and outputs.
-
-     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
-  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
-  check that the input FST is trim, and that it does not have any symbols that
-  appear on its arcs, that are equal to the prefix followed by digits.
-
-    (ii) Outputs: The algorithm modifies the FST that is given to it, and
-  returns the number of the highest numbered "extra symbol" inserted.  The extra
-  symbols are numbered #1, #2 and so on without limit (as integers).  They are
-  inserted into the symbol table in a sequential way by calling AvailableKey()
-         for each in turn (this is stipulated in case we need to keep other
-  symbol tables in sync).
-
-     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
-  below, which is true if s is problematic.  This also requires, for efficiency,
-  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
-  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
-  S. while Q is nonempty: pop a state s from Q. for each transition a from state
-  s with epsilon on the input label [we can find these efficiently using the
-  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
-  T. Add n[a] to Q. return T.
-
-
-     (D) Main algorithm.
-
-
-       (i) (a) Check preconditions (FST is trim)
-           (b) Make sure there is just one final state (insert epsilon
-  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
-  at the start of arc lists).
-
-
-      (ii) Work out the set of problematic states by constructing a boolean
-  array indexed by states, i.e. p(s) which is true if the state is problematic.
-  We can do this by constructing an array t(s) to store the number of
-  transitions into each state [adding one for the initial state], and then
-  setting p(s) = true if t(s) > 1.
-
-           Also create a boolean array d(s), defined for states, and set d(s) =
-  false. This array is purely for sanity-checking that we are processing each
-  state exactly once.
-
-     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
-  store these is implementation-dependent, but this will probably be a hash from
-  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
-  be added to that arc (or -1 if no such symbol; we can also simply have the arc
-  not present in the hash).  The initial value of m(a) is -1 (if array), or
-  undefined (if hash).
-
-      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
-  follows. The pairs in Q are a pair of (set-of-states, integer), where the
-  integer is the number of "special symbols" already used up for that state.
-
-            Note that we use a special indexing for the sets in both S and Q,
-  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
-  we index them by the lowest-numbered state-id.  Because each state is supposed
-  to only ever be a member of one set, if there is an attempt to add another,
-  different set with the same lowest-numbered state-id, we detect an error.
-
-            Let I be the single initial state (OpenFST only supports one).
-            We set:
-              S = { Closure(I) }
-              Push (Closure(I), 0)  onto Q.
-            Then for each state s such that p(s) = true, and s is not an initial
-  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.
-
-       (v) While Q is nonempty:
-
-          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).
-
-          (b) For each state s in A, check that d(s) is false, and set d(s) to
-  true. This is for sanity checking only.
-
-          (c)
-             Let S_\eps be the set of epsilon-transitions from members of A to
-  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
-  \}).
-
-             Next, we will define, for each t \neq \epsilon, S_t as the set of
-               transitions from some state s in S with t as the input label,
-  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
-  subsets of S where the destination state is problematic and non-problematic
-  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
-  \in E[A]: i[a] = t, p(n[a]) = false \}
-
-             The easiest way to obtain these sets is probably to have a hash
-  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
-               From this we can work out the sizes of T_t and U_t on the fly.
-
-         (d)
-             for each transition a in S_\eps:
-                m(a) <-- n # Will put symbol n on this transition.
-                n <-- n+1  # Note, same n as in pair (A, n)
-
-         (e)
-             next,
-             for each t\neq epsilon s.t. S_t is nonempty,
-
-                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
-  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1
-
-                if |U_t| > 0
-                   Let V_t be the set of destination-states of arcs in U_t.
-                   if Closure(V_t) is not in S:
-                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
-  k) to Q.
-
-       (vi) Check that for each state in the FST, d(s) = true.
-
-      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
-  (extra symbols start from zero, in this numbering which doesn't correspond to
-  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
-  table and store the mappings from 0, 1, ... n to the symbol-id.
-
-     (viii) Set up a hash h from (state, int) to (state-id) such that
-             t = h(s, k)
-            will be the state-id of a newly-created state that has a transition
-  to state s with input-label #k.
-
-      (ix) For each arc a such that m(a) != 0:
-             If i[a] = epsilon (the input label is epsilon):
-                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
-             Otherwise:
-                If t = h(n[a], m(a)) is not defined [where n[a] is the
-  dest-state]: create a new state t with a transition to n[a], with input-label
-  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
-  h(n[a], m(a)).
-
-
-*/
-namespace fst {
-
-namespace pre_determinize_helpers {
-
-// make it inline to avoid having to put it in a .cc file which most functions
-// here could not go in.
-inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
-                                      std::string *bad_sym) {
-  // returns true if the symbol table contains any string consisting of this
-  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
-  // requires symTable to be non-NULL.
-  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
-  assert(symTable != NULL);
-  const char *prefix_ptr = prefix.c_str();
-  size_t prefix_len =
-      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
-  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
-    const std::string &sym = siter.Symbol();
-    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
-      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
-                                       // digit, as a symbol.
-        // Has at least one digit.
-        size_t pos;
-        for (pos = prefix_len; sym[pos] != '\0'; pos++)
-          if (!isdigit(sym[pos])) break;
-        if (sym[pos] == '\0') {  // All remaining characters were digits.
-          if (bad_sym != NULL) *bad_sym = sym;
-          return true;
-        }
-      }  // else OK because prefix was followed by '\0' or a non-digit.
-    }
-  }
-  return false;  // doesn't have banned symbol.
-}
-
-template <class T>
-void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
-  // adds members of s to v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  assert(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != s.end(); ++siter, ++viter) {
-    assert(viter != v->end());
-    *viter = *siter;
-  }
-}
-
-// Warning.  This function calls 'new'.
-template <class T>
-std::vector<T> *InsertMember(const std::vector<T> m,
-                             std::vector<std::vector<T> *> *S) {
-  assert(m.size() > 0);
-  T idx = m[0];
-  assert(idx >= (T)0 && idx < (T)S->size());
-  if ((*S)[idx] != NULL) {
-    assert(*((*S)[idx]) == m);
-    // The vectors should be the same.  Otherwise this is a bug in the
-    // algorithm. It could either be a programming error or a deeper conceptual
-    // bug.
-    return NULL;  // nothing was inserted.
-  } else {
-    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
-    return ret;                                               // was inserted.
-  }
-}
-
-// See definition of Closure(S) in item A(iii) in the comment above. it's the
-// set of states that are reachable from S via sequences of arcs a such that
-// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
-// on input label (so epsilon arcs first) The algorithm is described in section
-// (C) above.  We use the same variable for S and T.
-template <class Arc>
-void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
-             const std::vector<bool> &pVec) {
-  typedef typename Arc::StateId StateId;
-  std::vector<StateId> Q;
-  CopySetToVector(*S, &Q);
-  while (Q.size() != 0) {
-    StateId s = Q.back();
-    Q.pop_back();
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0)
-        break;  // Break from the loop: due to sorting there will be no
-      // more transitions with epsilons as input labels.
-      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
-                                   // use this transition.
-        std::pair<typename std::set<StateId>::iterator, bool> p =
-            S->insert(arc.nextstate);
-        if (p.second) {  // True means: was inserted into S (wasn't already
-                         // there).
-          Q.push_back(arc.nextstate);
-        }
-      }
-    }
-  }
-}  // end function Closure.
-
-}  // end namespace pre_determinize_helpers.
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
-                    std::vector<Int> *symsOut) {
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
-  // for compatibility with argument of ArcIterator::Seek().
-  typedef typename Arc::Weight Weight;
-  assert(first_new_sym > 0);
-  assert(fst != NULL);
-  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
-  assert(symsOut != NULL &&
-         symsOut->size() == 0);  // we will output the symbols we add into this.
-
-  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
-    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
-    uint64 props = fst->Properties(
-        kAccessible | kCoAccessible,
-        true);  // true-> computes properties if unknown at time when called.
-    if (props !=
-        (kAccessible | kCoAccessible)) {  // All states are not both accessible
-                                          // and co-accessible...
-      KALDI_ERR << "PreDeterminize: FST is not trim";
-    }
-  }
-
-  {  // (D)(i)(b): make single final state.
-    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
-    CreateSuperFinal(fst);
-  }
-
-  {  // (D)(i)(c): sort arcs on input.
-    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
-    ILabelCompare<Arc> icomp;
-    ArcSort(fst, icomp);
-  }
-
-  StateId n_states = 0,
-          max_state =
-              0;  // Compute n_states, max_state = highest-numbered state.
-  {               // compute nStates, maxStates.
-    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
-         iter.Next()) {
-      StateId state = iter.Value();
-      assert(state >= 0);
-      n_states++;
-      if (state > max_state) max_state = state;
-    }
-    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
-                  << ", max_state =" << (max_state);
-  }
-
-  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
-  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
-     // input transition,
-    // counting being the initial state as an input transition"].
-    std::vector<bool> seen_vec(
-        max_state + 1,
-        false);  // rather than counting incoming transitions we just have a
-                 // bool that says we saw at least one.
-
-    seen_vec[fst->Start()] = true;
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
-        if (seen_vec[arc.nextstate])
-          p_vec[arc.nextstate] =
-              true;  // now have >1 transition in, so problematic.
-        else
-          seen_vec[arc.nextstate] = true;
-      }
-    }
-  }
-  // D(iii): set up m(a)
-  std::map<std::pair<StateId, ArcId>, size_t> m_map;
-  // This is the array m, indexed by arcs.  It maps to the index of the symbol
-  // we add.
-
-  // WARNING: we should be sure to clean up this memory before exiting.  Do not
-  // return or throw an exception from this function, later than this point,
-  // without cleaning up! Note that the vectors are shared between Q and S (they
-  // "belong to" S.
-  std::vector<std::vector<StateId> *> S(max_state + 1,
-                                        (std::vector<StateId> *)(void *)0);
-  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;
-
-  // D(iv): initialize S and Q.
-  {
-    std::vector<StateId>
-        all_seed_states;  // all "problematic" states, plus initial state (if
-                          // not problematic).
-    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
-    for (StateId s = 0; s <= max_state; s++)
-      if (p_vec[s]) all_seed_states.push_back(s);
-
-    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
-      StateId s = all_seed_states[idx];
-      std::set<StateId> closure_s;
-      closure_s.insert(s);  // insert "seed" state.
-      pre_determinize_helpers::Closure(
-          fst, &closure_s,
-          p_vec);  // follow epsilons to non-problematic states.
-      // Closure in this case whis will usually not add anything, for typical
-      // topologies in speech
-      std::vector<StateId> closure_s_vec;
-      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
-      KALDI_ASSERT(closure_s_vec.size() != 0);
-      std::vector<StateId> *ptr =
-          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
-      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
-      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
-    }
-  }
-
-  std::vector<bool> d_vec(max_state + 1,
-                          false);  // "done vector".  Purely for debugging.
-
-  size_t num_extra_det_states = 0;
-
-  // (D)(v)
-  while (Q.size() != 0) {
-    // (D)(v)(a)
-    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
-    Q.pop_back();
-    const std::vector<StateId> &A(*cur_pair.first);
-    size_t n = cur_pair.second;  // next special symbol to add.
-
-    // (D)(v)(b)
-    for (size_t idx = 0; idx < A.size(); idx++) {
-      assert(d_vec[A[idx]] == false &&
-             "This state has been seen before.  Algorithm error.");
-      d_vec[A[idx]] = true;
-    }
-
-    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
-    // simultaneously at first.
-    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
-        arc_hash;
-    // arc_hash is a hash with info of all arcs from states in the set A to
-    // non-problematic states.
-    // It is a map from ilabel to pair(pair(start-state, arc-offset),
-    // end-state). Here, arc-offset reflects the order in which we accessed the
-    // arc using the ArcIterator (zero for the first arc).
-
-    {  // This block sets up arc_hash
-      for (size_t idx = 0; idx < A.size(); idx++) {
-        StateId s = A[idx];
-        assert(s >= 0 && s <= max_state);
-        ArcId arc_id = 0;
-        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
-             aiter.Next(), ++arc_id) {
-          const Arc &arc = aiter.Value();
-
-          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
-              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
-          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
-          assert(inserted);  // Otherwise we had a duplicate.
-        }
-      }
-    }
-
-    // (D)(v)(d)
-    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
-      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
-          arc_hash[0];
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
-      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
-           ++siter) {
-        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
-        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
-          assert(m_map.count(this_pr.first) == 0);
-          m_map[this_pr.first] = n;
-          n++;
-        }
-      }
-    }
-
-    // (D)(v)(e)
-    {
-      typedef typename std::map<
-          Label,
-          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
-          map_iter_t;
-      typedef typename std::set<
-          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
-      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
-           ++miter) {
-        Label t = miter->first;
-        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
-            miter->second;
-        if (t != 0) {             // For t != epsilon,
-          std::set<StateId> V_t;  // set of destination non-problem states. Will
-                                  // create this set now.
-
-          // exists_noproblem is true iff |U_t| > 0.
-          size_t k = 0;
-
-          // First loop "for each transition a in T_t" (i.e. transitions to
-          // problematic states) The if-statement if (|S_t|>1) is pushed inside
-          // the loop, as the loop also computes the set V_t.
-          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
-            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
-                *siter;
-            if (p_vec[this_pr.second]) {  // only consider problematic states
-                                          // (just set T_t)
-              if (S_t.size() >
-                  1) {  // This is where we pushed the if-statement in.
-                assert(m_map.count(this_pr.first) == 0);
-                m_map[this_pr.first] = k;
-                k++;
-                num_extra_det_states++;
-              }
-            } else {  // Create the set V_t.
-              V_t.insert(this_pr.second);
-            }
-          }
-          if (V_t.size() != 0) {
-            pre_determinize_helpers::Closure(
-                fst, &V_t,
-                p_vec);  // follow epsilons to non-problematic states.
-            std::vector<StateId> closure_V_t_vec;
-            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
-            std::vector<StateId> *ptr =
-                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
-            if (ptr != NULL) {  // was inserted.
-              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
-            }
-          }
-        }
-      }
-    }
-  }  // end while (Q.size() != 0)
-
-  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
-    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-         siter.Next()) {
-      StateId val = siter.Value();
-      assert(d_vec[val] == true);
-    }
-  }
-
-  {  // (D)(vii): compute symbol-table ID's.
-    // sets up symsOut array.
-    int64 n = -1;
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      n = std::max(n,
-                   static_cast<int64>(
-                       m_iter->second));  // m_iter->second is of type size_t.
-    }
-    // At this point n is the highest symbol-id (type size_t) of symbols we must
-    // add.
-    n++;  // This is now the number of symbols we must add.
-    for (size_t i = 0; static_cast<int64>(i) < n; i++)
-      symsOut->push_back(first_new_sym + i);
-  }
-
-  // (D)(viii): set up hash.
-  std::map<std::pair<StateId, size_t>, StateId> h_map;
-
-  {  // D(ix): add extra symbols!  This is where the work gets done.
-    // Core part of this is below, search for (*)
-    size_t n_states_added = 0;
-
-    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
-             m_map.begin();
-         m_iter != m_map.end(); ++m_iter) {
-      StateId state = m_iter->first.first;
-      ArcId arcpos = m_iter->first.second;
-      size_t m_a = m_iter->second;
-
-      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-      aiter.Seek(arcpos);
-      Arc arc = aiter.Value();
-
-      // (*) core part here.
-      if (arc.ilabel == 0) {
-        arc.ilabel = (*symsOut)[m_a];
-      } else {
-        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
-        if (!h_map.count(pr)) {
-          n_states_added++;
-          StateId newstate = fst->AddState();
-          assert(newstate >= 0);
-          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
-          fst->AddArc(newstate, new_arc);
-          h_map[pr] = newstate;
-        }
-        arc.nextstate = h_map[pr];
-      }
-      aiter.SetValue(arc);
-    }
-
-    KALDI_VLOG(2) << "Added " << (n_states_added)
-                  << " new states and added/changed " << (m_map.size())
-                  << " arcs";
-  }
-  // Now free up memory.
-  for (size_t i = 0; i < S.size(); i++) delete S[i];
-}  // end function PreDeterminize
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
-                      std::string prefix, std::vector<Label> *symsOut) {
-  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
-  // Crashes if it cannot create them because one or more of them were in the
-  // symbol table already.
-  assert(symsOut && symsOut->size() == 0);
-  for (int i = 0; i < nSym; i++) {
-    std::stringstream ss;
-    ss << prefix << i;
-    std::string str = ss.str();
-    if (input_sym_table->Find(str) != -1) {  // should not be present.
-    }
-    assert(symsOut);
-    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
-  }
-}
-
-// see pre-determinize.h for documentation.
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms) {
-  assert(fst != NULL);
-  assert(isyms.size() == osyms.size());
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  size_t n = isyms.size();
-  if (n == 0) return;  // Nothing to do.
-
-  // {
-  // the following declarations and statements are for quick detection of these
-  // symbols, which is purely for debugging/checking purposes.
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
-        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
-        osyms_max = *std::max_element(osyms.begin(), osyms.end());
-  std::set<Label> isyms_set, osyms_set;
-  for (size_t i = 0; i < isyms.size(); i++) {
-    assert(isyms[i] > 0 &&
-           osyms[i] > 0);  // should not have epsilon or invalid symbols.
-    isyms_set.insert(isyms[i]);
-    osyms_set.insert(osyms[i]);
-  }
-  assert(isyms_set.size() == n && osyms_set.size() == n);
-  // } end block.
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
-    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      // If one of the following asserts fails, it means that the input FST
-      // already had the symbols we are inserting.  This is contrary to the
-      // preconditions of this algorithm.
-      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
-               isyms_set.count(arc.ilabel) != 0));
-      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
-               osyms_set.count(arc.olabel) != 0));
-      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
-        this_state_needs_self_loops = true;
-    }
-    if (this_state_needs_self_loops) {
-      for (size_t i = 0; i < n; i++) {
-        Arc arc;
-        arc.ilabel = isyms[i];
-        arc.olabel = osyms[i];
-        arc.weight = Weight::One();
-        arc.nextstate = state;
-        fst->AddArc(state, arc);
-      }
-    }
-  }
-}
-
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> isyms) {
-  // We could do this using the Mapper concept, but this is much easier to
-  // understand.
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-
-  int64 num_deleted = 0;
-
-  if (isyms.size() == 0) return 0;
-  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
-        isyms_max = *std::max_element(isyms.begin(), isyms.end());
-  bool isyms_consecutive =
-      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
-  std::set<Label> isyms_set;
-  if (!isyms_consecutive) {
-    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
-  }
-
-  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
-       siter.Next()) {
-    StateId state = siter.Value();
-    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
-        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
-          num_deleted++;
-          Arc mod_arc(arc);
-          mod_arc.ilabel = 0;  // change label to epsilon.
-          aiter.SetValue(mod_arc);
-        }
-      }
-    }
-  }
-  return num_deleted;
-}
-
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Weight Weight;
-  assert(fst != NULL);
-  StateId num_states = fst->NumStates();
-  StateId num_final = 0;
-  std::vector<StateId> final_states;
-  for (StateId s = 0; s < num_states; s++) {
-    if (fst->Final(s) != Weight::Zero()) {
-      num_final++;
-      final_states.push_back(s);
-    }
-  }
-  if (final_states.size() == 1) {
-    if (fst->Final(final_states[0]) == Weight::One()) {
-      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
-      if (iter.Done()) {
-        // We already have a final state w/ no transitions out and unit weight.
-        // So we're done.
-        return final_states[0];
-      }
-    }
-  }
-
-  StateId final_state = fst->AddState();
-  fst->SetFinal(final_state, Weight::One());
-  for (size_t idx = 0; idx < final_states.size(); idx++) {
-    StateId s = final_states[idx];
-    Weight weight = fst->Final(s);
-    fst->SetFinal(s, Weight::Zero());
-    Arc arc;
-    arc.ilabel = 0;
-    arc.olabel = 0;
-    arc.nextstate = final_state;
-    arc.weight = weight;
-    fst->AddArc(s, arc);
-  }
-  return final_state;
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/pre-determinize.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/pre-determinize.h
deleted file mode 100644
index a49a0e21bbe111b54d98845e3ae675224abc0724..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/pre-determinize.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// fstext/pre-determinize.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
-#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace fst {
-
-/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
-   to ensure that, after epsilon removal, it will be compactly determinizable by
-   the determinize* algorithm.  By compactly determinizable we mean that no
-   original FST state is represented in more than one determinized state).
-
-   Caution: this code is now only used in testing.
-
-   The new symbols start from the value "first_new_symbol", which should be
-   higher than the largest-numbered symbol currently in the FST.  The new
-   symbols added are put in the array syms_out, which should be empty at start.
-*/
-
-template <class Arc, class Int>
-void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
-                    std::vector<Int> *syms_out);
-
-/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
-   useful when you need to add a number of extra symbols to a different
-   vocabulary from the one modified by PreDeterminize. */
-
-template <class Label>
-void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
-                      std::vector<Label> *syms_out);
-
-/** AddSelfLoops is a function you will probably want to use alongside
-   PreDeterminize, to add self-loops to any FSTs that you compose on the left
-   hand side of the one modified by PreDeterminize.
-
-    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
-   FST. This is done at each final state and each state with non-epsilon output
-   symbols on at least one arc out of it.  This is to ensure that these symbols,
-   when inserted into the input side of an FST we will compose with on the
-   right, can "pass through" this FST.
-
-    At input, isyms and osyms must be vectors of the same size n, corresponding
-    to symbols that currently do not exist in 'fst'.  For each state in n that
-   has non-epsilon symbols on the output side of arcs leaving it, or which is a
-   final state, this function inserts n self-loops with unit weight and one of
-   the n pairs of symbols on its input and output.
-*/
-template <class Arc>
-void AddSelfLoops(MutableFst<Arc> *fst,
-                  const std::vector<typename Arc::Label> &isyms,
-                  const std::vector<typename Arc::Label> &osyms);
-
-/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
-   appearing on the input side, with epsilon. */
-/* It returns the number of instances of symbols deleted. */
-template <class Arc>
-int64 DeleteISymbols(MutableFst<Arc> *fst,
-                     std::vector<typename Arc::Label> symsIn);
-
-/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
-   final state with no transitions out and unit final weight, by inserting
-   epsilon transitions as necessary. */
-template <class Arc>
-typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
-
-}  // end namespace fst
-
-#include "fstext/pre-determinize-inl.h"
-
-#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/remove-eps-local-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/remove-eps-local-inl.h
deleted file mode 100644
index 94176018f309e531577f0c91706ac5798061008b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/remove-eps-local-inl.h
+++ /dev/null
@@ -1,318 +0,0 @@
-// fstext/remove-eps-local-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
-
-#include <vector>
-
-namespace fst {
-
-template <class Weight>
-struct ReweightPlusDefault {
-  inline Weight operator()(const Weight &a, const Weight &b) {
-    return Plus(a, b);
-  }
-};
-
-struct ReweightPlusLogArc {
-  inline TropicalWeight operator()(const TropicalWeight &a,
-                                   const TropicalWeight &b) {
-    LogWeight a_log(a.Value()), b_log(b.Value());
-    return TropicalWeight(Plus(a_log, b_log).Value());
-  }
-};
-
-template <class Arc,
-          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
-class RemoveEpsLocalClass {
-  typedef typename Arc::StateId StateId;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::Weight Weight;
-
- public:
-  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
-    if (fst_->Start() == kNoStateId) return;  // empty.
-    non_coacc_state_ = fst_->AddState();
-    InitNumArcs();
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++)
-      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
-    assert(CheckNumArcs());
-    Connect(fst);  // remove inaccessible states.
-  }
-
- private:
-  MutableFst<Arc> *fst_;
-  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
-  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
-                                      // one if it's the start state.
-  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
-                                       // plus one if it's a final state.
-  ReweightPlus reweight_plus_;
-
-  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
-    if (a.ilabel != 0 && b.ilabel != 0) return false;
-    if (a.olabel != 0 && b.olabel != 0) return false;
-    c->weight = Times(a.weight, b.weight);
-    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
-    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
-    c->nextstate = b.nextstate;
-    return true;
-  }
-
-  static bool CanCombineFinal(const Arc &a, Weight final_prob,
-                              Weight *final_prob_out) {
-    if (a.ilabel != 0 || a.olabel != 0) {
-      return false;
-    } else {
-      *final_prob_out = Times(a.weight, final_prob);
-      return true;
-    }
-  }
-
-  void InitNumArcs() {  // init num transitions in/out of each state.
-    StateId num_states = fst_->NumStates();
-    num_arcs_in_.resize(num_states);
-    num_arcs_out_.resize(num_states);
-    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
-    for (StateId s = 0; s < num_states; s++) {
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]++;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        num_arcs_in_[aiter.Value().nextstate]++;
-        num_arcs_out_[s]++;
-      }
-    }
-  }
-
-  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
-    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
-    StateId num_states = fst_->NumStates();
-    for (StateId s = 0; s < num_states; s++) {
-      if (s == non_coacc_state_) continue;
-      if (fst_->Final(s) != Weight::Zero())
-        num_arcs_out_[s]--;  // count final as transition.
-      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
-           aiter.Next()) {
-        if (aiter.Value().nextstate == non_coacc_state_) continue;
-        num_arcs_in_[aiter.Value().nextstate]--;
-        num_arcs_out_[s]--;
-      }
-    }
-    for (StateId s = 0; s < num_states; s++) {
-      assert(num_arcs_in_[s] == 0);
-      assert(num_arcs_out_[s] == 0);
-    }
-    return true;  // always does this.  so we can assert it w/o warnings.
-  }
-
-  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
-    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
-    aiter.Seek(pos);
-    *arc = aiter.Value();
-  }
-
-  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    aiter.SetValue(arc);
-  }
-
-  void Reweight(StateId s, size_t pos, Weight reweight) {
-    // Reweight is called from RemoveEpsPattern1; it is a step we
-    // do to preserve stochasticity.  This function multiplies the
-    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
-    // out of the next state by the same.  This is only valid if
-    // the next state has only one arc in and is not the start state.
-    assert(reweight != Weight::Zero());
-    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
-    aiter.Seek(pos);
-    Arc arc = aiter.Value();
-    assert(num_arcs_in_[arc.nextstate] == 1);
-    arc.weight = Times(arc.weight, reweight);
-    aiter.SetValue(arc);
-
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate != non_coacc_state_) {
-        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
-        aiter_next.SetValue(nextarc);
-      }
-    }
-    Weight final = fst_->Final(arc.nextstate);
-    if (final != Weight::Zero()) {
-      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
-    }
-  }
-
-  // RemoveEpsPattern1 applies where this arc, which is not a
-  // self-loop, enters a state which has only one input transition
-  // [and is not the start state], and has multiple output
-  // transitions [counting being the final-state as a final-transition].
-
-  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
-    const StateId nextstate = arc.nextstate;
-    Weight total_removed = Weight::Zero(),
-           total_kept = Weight::Zero();  // totals out of nextstate.
-    std::vector<Arc> arcs_to_add;        // to add to state s.
-    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-         !aiter_next.Done(); aiter_next.Next()) {
-      Arc nextarc = aiter_next.Value();
-      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        total_removed = reweight_plus_(total_removed, nextarc.weight);
-        num_arcs_out_[nextstate]--;
-        num_arcs_in_[nextarc.nextstate]--;
-        nextarc.nextstate = non_coacc_state_;
-        aiter_next.SetValue(nextarc);
-        arcs_to_add.push_back(combined);
-      } else {
-        total_kept = reweight_plus_(total_kept, nextarc.weight);
-      }
-    }
-
-    {  // now final-state.
-      Weight next_final = fst_->Final(nextstate);
-      if (next_final != Weight::Zero()) {
-        Weight new_final;
-        if (CanCombineFinal(arc, next_final, &new_final)) {
-          total_removed = reweight_plus_(total_removed, next_final);
-          if (fst_->Final(s) == Weight::Zero())
-            num_arcs_out_[s]++;  // final is counted as arc.
-          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        } else {
-          total_kept = reweight_plus_(total_kept, next_final);
-        }
-      }
-    }
-
-    if (total_removed != Weight::Zero()) {  // did something...
-      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
-        num_arcs_out_[s]--;
-        num_arcs_in_[arc.nextstate]--;
-        arc.nextstate = non_coacc_state_;
-        SetArc(s, pos, arc);
-      } else {
-        // Have to reweight.
-        Weight total = reweight_plus_(total_removed, total_kept);
-        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
-        Reweight(s, pos, reweight);
-      }
-    }
-    // Now add the arcs we were going to add.
-    for (size_t i = 0; i < arcs_to_add.size(); i++) {
-      num_arcs_out_[s]++;
-      num_arcs_in_[arcs_to_add[i].nextstate]++;
-      fst_->AddArc(s, arcs_to_add[i]);
-    }
-  }
-
-  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
-    // Pattern 2 is where "nextstate" has only one arc out, counting
-    // being-the-final-state as an arc, but possibly multiple arcs in.
-    // Also, nextstate != s.
-
-    const StateId nextstate = arc.nextstate;
-    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
-    // we combine, can delete the corresponding out-arc/final-prob
-    // of nextstate.
-    bool delete_arc = false;  // set to true if this arc to be deleted.
-
-    Weight next_final = fst_->Final(arc.nextstate);
-    if (next_final !=
-        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
-      Weight new_final;
-      if (CanCombineFinal(arc, next_final, &new_final)) {
-        if (fst_->Final(s) == Weight::Zero())
-          num_arcs_out_[s]++;  // final is counted as arc.
-        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
-        delete_arc = true;  // will delete "arc".
-        if (can_delete_next) {
-          num_arcs_out_[nextstate]--;
-          fst_->SetFinal(nextstate, Weight::Zero());
-        }
-      }
-    } else {  // has an arc but no final prob.
-      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
-      assert(!aiter_next.Done());
-      while (aiter_next.Value().nextstate == non_coacc_state_) {
-        aiter_next.Next();
-        assert(!aiter_next.Done());
-      }
-      // now aiter_next points to a real arc out of nextstate.
-      Arc nextarc = aiter_next.Value();
-      Arc combined;
-      if (CanCombineArcs(arc, nextarc, &combined)) {
-        delete_arc = true;
-        if (can_delete_next) {  // do it before we invalidate iterators
-          num_arcs_out_[nextstate]--;
-          num_arcs_in_[nextarc.nextstate]--;
-          nextarc.nextstate = non_coacc_state_;
-          aiter_next.SetValue(nextarc);
-        }
-        num_arcs_out_[s]++;
-        num_arcs_in_[combined.nextstate]++;
-        fst_->AddArc(s, combined);
-      }
-    }
-    if (delete_arc) {
-      num_arcs_out_[s]--;
-      num_arcs_in_[nextstate]--;
-      arc.nextstate = non_coacc_state_;
-      SetArc(s, pos, arc);
-    }
-  }
-
-  void RemoveEps(StateId s, size_t pos) {
-    // Tries to do local epsilon-removal for arc sequences starting with this
-    // arc
-    Arc arc;
-    GetArc(s, pos, &arc);
-    StateId nextstate = arc.nextstate;
-    if (nextstate == non_coacc_state_) return;  // deleted arc.
-    if (nextstate == s) return;  // don't handle self-loops: too complex.
-
-    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
-      RemoveEpsPattern1(s, pos, arc);
-    } else if (num_arcs_out_[nextstate] == 1) {
-      RemoveEpsPattern2(s, pos, arc);
-    }
-  }
-};
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst) {
-  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
-}
-
-void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
-  // work gets done in initializer.
-  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
-}
-
-}  // end namespace fst.
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/remove-eps-local.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/remove-eps-local.h
deleted file mode 100644
index a2c76d855e0fa81c87a5e59c2421929abfe4c67f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/remove-eps-local.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// fstext/remove-eps-local.h
-
-// Copyright 2009-2011  Microsoft Corporation
-//                2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-namespace fst {
-
-/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
-/// using an algorithm that is guaranteed to never increase the number of arcs
-/// in the FST (and will also never increase the number of states).  The
-/// algorithm is not optimal but is reasonably clever.  It does not just remove
-/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
-/// into one.
-/// The algorithm preserves equivalence and stochasticity in the given semiring.
-/// If you want to preserve stochasticity in a different semiring (e.g. log),
-/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
-/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
-/// that we can't just cast to a different semiring is that in that case we
-/// would no longer be able to guarantee equivalence in the original semiring
-/// (this arises from what happens when we combine identical arcs).
-/// (*) by "where possible".. there are situations where we wouldn't be able to
-/// preserve stochasticity in the LogArc sense while maintaining equivalence in
-/// the StdArc sense, so in these situations we maintain equivalence.
-
-template <class Arc>
-void RemoveEpsLocal(MutableFst<Arc> *fst);
-
-/// As RemoveEpsLocal but takes care to preserve stochasticity
-/// when cast to LogArc.
-inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);
-
-}  // namespace fst
-
-#include "fstext/remove-eps-local-inl.h"
-
-#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/table-matcher.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/table-matcher.h
deleted file mode 100644
index 0b3f695d8ee6f1b581d3e2667ecd0888541c2090..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/fstext/table-matcher.h
+++ /dev/null
@@ -1,387 +0,0 @@
-// fstext/table-matcher.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
-#define KALDI_FSTEXT_TABLE_MATCHER_H_
-
-#include <fst/fst-decl.h>
-#include <fst/fstlib.h>
-
-#include <memory>
-#include <vector>
-
-namespace fst {
-
-/// TableMatcher is a matcher specialized for the case where the output
-/// side of the left FST always has either all-epsilons coming out of
-/// a state, or a majority of the symbol table.  Therefore we can
-/// either store nothing (for the all-epsilon case) or store a lookup
-/// table from Labels to arc offsets.  Since the TableMatcher has to
-/// iterate over all arcs in each left-hand state the first time it sees
-/// it, this matcher type is not efficient if you compose with
-/// something very small on the right-- unless you do it multiple
-/// times and keep the matcher around. To do this requires using the
-/// most advanced form of ComposeFst in Compose.h, that initializes
-/// with ComposeFstImplOptions.
-
-struct TableMatcherOptions {
-  float
-      table_ratio;  // we construct the table if it would be at least this full.
-  int min_table_size;
-  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
-};
-
-// Introducing an "impl" class for TableMatcher because
-// we need to do a shallow copy of the Matcher for when
-// we want to cache tables for multiple compositions.
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcherImpl : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-
- public:
-  TableMatcherImpl(const FST &fst, MatchType match_type,
-                   const TableMatcherOptions &opts = TableMatcherOptions())
-      : match_type_(match_type),
-        fst_(fst.Copy()),
-        loop_(match_type == MATCH_INPUT
-                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
-                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
-        aiter_(NULL),
-        s_(kNoStateId),
-        opts_(opts),
-        backoff_matcher_(fst, match_type) {
-    assert(opts_.min_table_size > 0);
-    if (match_type == MATCH_INPUT)
-      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
-    else if (match_type == MATCH_OUTPUT)
-      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
-    else
-      assert(0 && "Invalid FST properties");
-  }
-
-  virtual const FST &GetFst() const { return *fst_; }
-
-  virtual ~TableMatcherImpl() {
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    for (size_t i = 0; i < tables_.size(); i++) {
-      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
-    }
-    delete aiter_;
-    delete fst_;
-  }
-
-  virtual MatchType Type(bool test) const { return match_type_; }
-
-  void SetState(StateId s) {
-    if (aiter_) {
-      delete aiter_;
-      aiter_ = NULL;
-    }
-    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
-    s_ = s;
-    std::vector<ArcId> *const empty =
-        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
-    if (static_cast<size_t>(s) >= tables_.size()) {
-      assert(s >= 0);
-      tables_.resize(s + 1, NULL);
-    }
-    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
-    if (this_table_ == empty) {
-      backoff_matcher_.SetState(s);
-      return;
-    } else if (this_table_ == NULL) {  // NULL means has not been set.
-      ArcId num_arcs = fst_->NumArcs(s);
-      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;
-      }
-      ArcIterator<FST> aiter(*fst_, s);
-      aiter.SetFlags(
-          kArcNoCache |
-              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
-          kArcNoCache | kArcValueFlags);
-      // the statement above, says: "Don't cache stuff; and I only need the
-      // ilabel/olabel to be computed.
-      aiter.Seek(num_arcs - 1);
-      Label highest_label =
-          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                       : aiter.Value().ilabel);
-      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
-        this_table_ = empty;
-        backoff_matcher_.SetState(s);
-        return;  // table would be too sparse.
-      }
-      // OK, now we are creating the table.
-      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
-      ArcId pos = 0;
-      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
-        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
-                                                   : aiter.Value().ilabel);
-        assert(static_cast<size_t>(label) <=
-               static_cast<size_t>(highest_label));  // also checks >= 0.
-        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
-        // set this_table_[label] to first position where arc has this
-        // label.
-      }
-    }
-    // At this point in the code, this_table_ != NULL and != empty.
-    aiter_ = new ArcIterator<FST>(*fst_, s);
-    aiter_->SetFlags(kArcNoCache,
-                     kArcNoCache);  // don't need to cache arcs as may only
-    // need a small subset.
-    loop_.nextstate = s;
-    // aiter_ = NULL;
-    // backoff_matcher_.SetState(s);
-  }
-
-  bool Find(Label match_label) {
-    if (!aiter_) {
-      return backoff_matcher_.Find(match_label);
-    } else {
-      match_label_ = match_label;
-      current_loop_ = (match_label == 0);
-      // kNoLabel means the implicit loop on the other FST --
-      // matches real epsilons but not the self-loop.
-      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
-      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
-          (*(tables_[s_]))[match_label_] != kNoStateId) {
-        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
-        return true;
-      }
-      return current_loop_;
-    }
-  }
-  const Arc &Value() const {
-    if (aiter_)
-      return current_loop_ ? loop_ : aiter_->Value();
-    else
-      return backoff_matcher_.Value();
-  }
-
-  void Next() {
-    if (aiter_) {
-      if (current_loop_)
-        current_loop_ = false;
-      else
-        aiter_->Next();
-    } else {
-      backoff_matcher_.Next();
-    }
-  }
-
-  bool Done() const {
-    if (aiter_ != NULL) {
-      if (current_loop_) return false;
-      if (aiter_->Done()) return true;
-      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
-                                                 : aiter_->Value().ilabel);
-      return (label != match_label_);
-    } else {
-      return backoff_matcher_.Done();
-    }
-  }
-  const Arc &Value() {
-    if (aiter_ != NULL) {
-      return (current_loop_ ? loop_ : aiter_->Value());
-    } else {
-      return backoff_matcher_.Value();
-    }
-  }
-
-  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
-    assert(0);  // shouldn't be called.  This is not a "real" matcher,
-    // although we derive from MatcherBase for convenience.
-    return NULL;
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return props;
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
-
- private:
-  virtual void SetState_(StateId s) { SetState(s); }
-  virtual bool Find_(Label label) { return Find(label); }
-  virtual bool Done_() const { return Done(); }
-  virtual const Arc &Value_() const { return Value(); }
-  virtual void Next_() { Next(); }
-
-  MatchType match_type_;
-  FST *fst_;
-  bool current_loop_;
-  Label match_label_;
-  Arc loop_;
-  ArcIterator<FST> *aiter_;
-  StateId s_;
-  std::vector<std::vector<ArcId> *> tables_;
-  TableMatcherOptions opts_;
-  BackoffMatcher backoff_matcher_;
-};
-
-template <class F, class BackoffMatcher = SortedMatcher<F> >
-class TableMatcher : public MatcherBase<typename F::Arc> {
- public:
-  typedef F FST;
-  typedef typename F::Arc Arc;
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;
-  typedef StateId
-      ArcId;  // Use this type to store arc offsets [it's actually size_t
-  // in the Seek function of ArcIterator, but StateId should be big enough].
-  typedef typename Arc::Weight Weight;
-  typedef TableMatcherImpl<F, BackoffMatcher> Impl;
-
-  TableMatcher(const FST &fst, MatchType match_type,
-               const TableMatcherOptions &opts = TableMatcherOptions())
-      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}
-
-  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
-               bool safe = false)
-      : impl_(matcher.impl_) {
-    if (safe == true) {
-      LOG(FATAL) << "TableMatcher: Safe copy not supported";
-    }
-  }
-
-  virtual const FST &GetFst() const { return impl_->GetFst(); }
-
-  virtual MatchType Type(bool test) const { return impl_->Type(test); }
-
-  void SetState(StateId s) { return impl_->SetState(s); }
-
-  bool Find(Label match_label) { return impl_->Find(match_label); }
-
-  const Arc &Value() const { return impl_->Value(); }
-
-  void Next() { return impl_->Next(); }
-
-  bool Done() const { return impl_->Done(); }
-
-  const Arc &Value() { return impl_->Value(); }
-
-  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
-    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
-  }
-
-  virtual uint64 Properties(uint64 props) const {
-    return impl_->Properties(props);
-  }  // simple matcher that does
-     // not change its FST, so properties are properties of FST it is applied to
- private:
-  std::shared_ptr<Impl> impl_;
-
-  virtual void SetState_(StateId s) { impl_->SetState(s); }
-  virtual bool Find_(Label label) { return impl_->Find(label); }
-  virtual bool Done_() const { return impl_->Done(); }
-  virtual const Arc &Value_() const { return impl_->Value(); }
-  virtual void Next_() { impl_->Next(); }
-
-  TableMatcher &operator=(const TableMatcher &) = delete;
-};
-
-struct TableComposeOptions : public TableMatcherOptions {
-  bool connect;               // Connect output
-  ComposeFilter filter_type;  // Which pre-defined filter to use
-  MatchType table_match_type;
-
-  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
-                               ComposeFilter ft = SEQUENCE_FILTER,
-                               MatchType tms = MATCH_OUTPUT)
-      : TableMatcherOptions(mo),
-        connect(c),
-        filter_type(ft),
-        table_match_type(tms) {}
-  TableComposeOptions()
-      : connect(true),
-        filter_type(SEQUENCE_FILTER),
-        table_match_type(MATCH_OUTPUT) {}
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst,
-                  const TableComposeOptions &opts = TableComposeOptions()) {
-  typedef Fst<Arc> F;
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (opts.table_match_type == MATCH_OUTPUT) {
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(opts.table_match_type == MATCH_INPUT);
-    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (opts.connect) Connect(ofst);
-}
-
-/// TableComposeCache lets us do multiple compositions while caching the same
-/// matcher.
-template <class F>
-struct TableComposeCache {
-  TableMatcher<F> *matcher;
-  TableComposeOptions opts;
-  explicit TableComposeCache(
-      const TableComposeOptions &opts = TableComposeOptions())
-      : matcher(NULL), opts(opts) {}
-  ~TableComposeCache() { delete (matcher); }
-};
-
-template <class Arc>
-void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
-                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
-  typedef Fst<Arc> F;
-  assert(cache != NULL);
-  CacheOptions nopts;
-  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
-  if (cache->opts.table_match_type == MATCH_OUTPUT) {
-    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
-    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
-    // be thread-safe-- anway I don't understand this part.
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  } else {
-    assert(cache->opts.table_match_type == MATCH_INPUT);
-    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
-    if (cache->matcher == NULL)
-      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
-    impl_opts.matcher2 = cache->matcher->Copy();
-    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
-  }
-  if (cache->opts.connect) Connect(ofst);
-}
-
-}  // namespace fst
-
-#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/itf/decodable-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/itf/decodable-itf.h
deleted file mode 100644
index cdb08deab2bbb09ac9eb58406f43802f1f4abc8c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/itf/decodable-itf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.;
-//                2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-
-/**
-    DecodableInterface provides a link between the (acoustic-modeling and
-    feature-processing) code and the decoder.  The idea is to make this
-    interface as small as possible, and to make it as agnostic as possible about
-    the form of the acoustic model (e.g. don't assume the probabilities are a
-    function of just a vector of floats), and about the decoder (e.g. don't
-    assume it accesses frames in strict left-to-right order).  For normal
-    models, without on-line operation, the "decodable" sub-class will just be a
-    wrapper around a matrix of features and an acoustic model, and it will
-    answer the question 'what is the acoustic likelihood for this index and this
-    frame?'.
-
-    For online decoding, where the features are coming in in real time, it is
-    important to understand the IsLastFrame() and NumFramesReady() functions.
-    There are two ways these are used: the old online-decoding code, in ../online/,
-    and the new online-decoding code, in ../online2/.  In the old online-decoding
-    code, the decoder would do:
-    \code{.cc}
-    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
-      // Process this frame
-    }
-    \endcode
-   and the call to IsLastFrame would block if the features had not arrived yet.
-   The decodable object would have to know when to terminate the decoding.  This
-   online-decoding mode is still supported, it is what happens when you call, for
-   example, LatticeFasterDecoder::Decode().
-
-   We realized that this "blocking" mode of decoding is not very convenient
-   because it forces the program to be multi-threaded and makes it complex to
-   control endpointing.  In the "new" decoding code, you don't call (for example)
-   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
-   and then each time you get more features, you provide them to the decodable
-   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
-   something like this:
-   \code{.cc}
-   while (num_frames_decoded_ < decodable.NumFramesReady()) {
-     // Decode one more frame [increments num_frames_decoded_]
-   }
-   \endcode
-   So the decodable object never has IsLastFrame() called.  For decoding where
-   you are starting with a matrix of features, the NumFramesReady() function will
-   always just return the number of frames in the file, and IsLastFrame() will
-   return true for the last frame.
-
-   For truly online decoding, the "old" online decodable objects in ../online/
-   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
-   The "new" online decodable objects in ../online2/ return the number of frames
-   currently accessible if you call NumFramesReady().  You will likely not need
-   to call IsLastFrame(), but we implement it to only return true for the last
-   frame of the file once we've decided to terminate decoding.
-*/
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  /// The "frame" starts from zero.  You should verify that
-  /// NumFramesReady() > frame before calling this.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  Caution: the behavior of this function in an online setting
-  /// is being changed somewhat.  In future it may return false in cases where
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  The plan in future is to rely more on
-  /// NumFramesReady(), and in future, IsLastFrame() would always return false
-  /// in an online-decoding setting, and would only return true in a
-  /// decoding-from-matrix setting where we want to allow the last delta or LDA
-  /// features to be flushed out for compatibility with the baseline setup.
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// The call NumFramesReady() will return the number of frames currently
-  /// available for this decodable object.  This is for use in setups where
-  /// you don't want the decoder to block while waiting for input.  This is
-  /// newly added as of Jan 2014, and I hope, going forward, to rely on this
-  /// mechanism more than IsLastFrame to know when to stop decoding.
-  virtual int32 NumFramesReady() const {
-    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
-    return -1;
-  }
-
-  /// Returns the number of states in the acoustic model
-  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst).
-  virtual int32 NumIndices() const = 0;
-
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/itf/options-itf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/itf/options-itf.h
deleted file mode 100644
index ce91f3474944a7c8173c70b224d65a50f47aba65..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/itf/options-itf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-
-#include <string>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0;
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0;
-
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/CPPLINT.cfg
deleted file mode 100644
index 827c149021847c99a922f46ed446d7807b8518a5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/CPPLINT.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# So many lint errors now, we just ignore it now.
-# We will try to fix it in the future.
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/determinize-lattice-pruned.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/determinize-lattice-pruned.cc
deleted file mode 100644
index 23186a3213d171eaa532ea1e773d02e1ce87619a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/determinize-lattice-pruned.cc
+++ /dev/null
@@ -1,1545 +0,0 @@
-// lat/determinize-lattice-pruned.cc
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <vector>
-#include <climits>
-#include "fstext/determinize-lattice.h" // for LatticeStringRepository
-#include "fstext/fstext-utils.h"
-#include "lat/lattice-functions.h"  // for PruneLattice
-// #include "lat/minimize-lattice.h"   // for minimization
-// #include "lat/push-lattice.h"       // for minimization
-#include "lat/determinize-lattice-pruned.h"
-
-namespace fst {
-
-using std::vector;
-using std::pair;
-using std::greater;
-
-// class LatticeDeterminizerPruned is templated on the same types that
-// CompactLatticeWeight is templated on: the base weight (Weight), typically
-// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
-// IntType, typically int32, used for the output symbols in the compact
-// representation of strings [note: the output symbols would usually be
-// p.d.f. id's in the anticipated use of this code] It has a special requirement
-// on the Weight type: that there should be a Compare function on the weights
-// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
-// w2.  This requires that there be a total order on the weights.
-
-template<class Weight, class IntType> class LatticeDeterminizerPruned {
- public:
-  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
-  // between our states and the states in ofst.  If destroy == true, release memory as we go
-  // (but we cannot output again).
-
-  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
-  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
-  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice
-
-  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
-  // weight stores the original output-symbol strings).  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
-    KALDI_ASSERT(determinized_);
-    typedef typename Arc::StateId StateId;
-    StateId nStates = static_cast<StateId>(output_states_.size());
-    if (destroy)
-      FreeMostMemory();
-    ofst->DeleteStates();
-    ofst->SetStart(kNoStateId);
-    if (nStates == 0) {
-      return;
-    }
-    for (StateId s = 0;s < nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    // now process transitions.
-    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-
-      for (;iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        CompactArc new_arc;
-        vector<Label> olabel_seq;
-        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
-        CompactWeight weight(temp_arc.weight, olabel_seq);
-        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
-          ofst->SetFinal(this_state_id, weight);
-        } else {  // is really an arc.
-          new_arc.nextstate = temp_arc.nextstate;
-          new_arc.ilabel = temp_arc.ilabel;
-          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
-          new_arc.weight = weight;  // includes string and weight.
-          ofst->AddArc(this_state_id, new_arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
-      // and we want to reduce the maximum amount ever allocated.
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-  // Output to standard FST with Weight as its weight type.  We will create extra
-  // states to handle sequences of symbols on the output.  If destroy == true,
-  // release memory as we go (but we cannot output again).
-  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
-    // Outputs to standard fst.
-    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
-    ofst->DeleteStates();
-    if (nStates == 0) {
-      ofst->SetStart(kNoStateId);
-      return;
-    }
-    if (destroy)
-      FreeMostMemory();
-    // Add basic states-- but we will add extra ones to account for strings on output.
-    for (OutputStateId s = 0; s< nStates;s++) {
-      OutputStateId news = ofst->AddState();
-      KALDI_ASSERT(news == s);
-    }
-    ofst->SetStart(0);
-    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
-      OutputState &this_state = *(output_states_[this_state_id]);
-      vector<TempArc> &this_vec(this_state.arcs);
-
-      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
-      for (; iter != end; ++iter) {
-        const TempArc &temp_arc(*iter);
-        vector<Label> seq;
-        repository_.ConvertToVector(temp_arc.string, &seq);
-
-        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
-          // Make a sequence of states going to a final state, with the strings
-          // as labels.  Put the weight on the first arc.
-          OutputStateId cur_state = this_state_id;
-          for (size_t i = 0; i < seq.size(); i++) {
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = 0;  // epsilon.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
-        } else {  // Really an arc.
-          OutputStateId cur_state = this_state_id;
-          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
-          // i < seq.size()-1 could fail for zero-length sequences.
-          for (size_t i = 0; i+1 < seq.size();i++) {
-            // for all but the last element of seq, create new state.
-            OutputStateId next_state = ofst->AddState();
-            Arc arc;
-            arc.nextstate = next_state;
-            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
-            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
-            arc.olabel = seq[i];
-            ofst->AddArc(cur_state, arc);
-            cur_state = next_state;
-          }
-          // Add the final arc in the sequence.
-          Arc arc;
-          arc.nextstate = temp_arc.nextstate;
-          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
-          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
-          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
-          ofst->AddArc(cur_state, arc);
-        }
-      }
-      // Free up memory.  Do this inside the loop as ofst is also allocating memory
-      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
-    }
-    if (destroy) {
-      FreeOutputStates();
-      repository_.Destroy();
-    }
-  }
-
-
-  // Initializer.  After initializing the object you will typically
-  // call Determinize() and then call one of the Output functions.
-  // Note: ifst.Copy() will generally do a
-  // shallow copy.  We do it like this for memory safety, rather than
-  // keeping a reference or pointer to ifst_.
-  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
-                            double beam,
-                            DeterminizeLatticePrunedOptions opts):
-      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
-      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
-    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
-    // work correctly otherwise.
-  }
-
-  void FreeOutputStates() {
-    for (size_t i = 0; i < output_states_.size(); i++)
-      delete output_states_[i];
-    vector<OutputState*> temp;
-    temp.swap(output_states_);
-  }
-
-  // frees all memory except the info (in output_states_[ ]->arcs)
-  // that we need to output the FST.
-  void FreeMostMemory() {
-    if (ifst_) {
-      delete ifst_;
-      ifst_ = NULL;
-    }
-    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
-
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> empty_subset;
-      empty_subset.swap(output_states_[i]->minimal_subset);
-    }
-
-    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter)
-      delete iter->first;
-    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      vector<Element> tmp;
-      tmp.swap(output_states_[i]->minimal_subset);
-    }
-    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
-    { // Free up the queue.  I'm not sure how to make sure all
-      // the memory is really freed (no swap() function)... doesn't really
-      // matter much though.
-      while (!queue_.empty()) {
-        Task *t = queue_.top();
-        delete t;
-        queue_.pop();
-      }
-    }
-    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
-  }
-
-  ~LatticeDeterminizerPruned() {
-    FreeMostMemory();
-    FreeOutputStates();
-    // rest is deleted by destructors.
-  }
-
-  void RebuildRepository() { // rebuild the string repository,
-    // freeing stuff we don't need.. we call this when memory usage
-    // passes a supplied threshold.  We need to accumulate all the
-    // strings we need the repository to "remember", then tell it
-    // to clean the repository.
-    std::vector<StringId> needed_strings;
-    for (size_t i = 0; i < output_states_.size(); i++) {
-      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
-      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
-        needed_strings.push_back(output_states_[i]->arcs[j].string);
-    }
-
-    { // the queue doesn't allow us access to the underlying vector,
-      // so we have to resort to a temporary collection.
-      std::vector<Task*> tasks;
-      while (!queue_.empty()) {
-        Task *task = queue_.top();
-        queue_.pop();
-        tasks.push_back(task);
-        AddStrings(task->subset, &needed_strings);
-      }
-      for (size_t i = 0; i < tasks.size(); i++)
-        queue_.push(tasks[i]);
-    }
-
-    // the following loop covers strings present in initial_hash_.
-    for (typename InitialSubsetHash::const_iterator
-             iter = initial_hash_.begin();
-         iter != initial_hash_.end(); ++iter) {
-      const vector<Element> &vec = *(iter->first);
-      Element elem = iter->second;
-      AddStrings(vec, &needed_strings);
-      needed_strings.push_back(elem.string);
-    }
-    std::sort(needed_strings.begin(), needed_strings.end());
-    needed_strings.erase(std::unique(needed_strings.begin(),
-                                     needed_strings.end()),
-                         needed_strings.end()); // uniq the strings.
-    KALDI_LOG << "Rebuilding repository.";
-
-    repository_.Rebuild(needed_strings);
-  }
-
-  bool CheckMemoryUsage() {
-    int32 repo_size = repository_.MemSize(),
-        arcs_size = num_arcs_ * sizeof(TempArc),
-        elems_size = num_elems_ * sizeof(Element),
-        total_size = repo_size + arcs_size + elems_size;
-    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
-      // This is usually due to the repository getting large, so we
-      // clean this out.
-      RebuildRepository();
-      int32 new_repo_size = repository_.MemSize(),
-          new_total_size = new_repo_size + arcs_size + elems_size;
-
-      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
-                    << repo_size << " to " << new_repo_size << " bytes (approximately)";
-
-      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
-        // Rebuilding didn't help enough-- we need a margin to stop
-        // having to rebuild too often.  We'll just return to the user at
-        // this point, with a partial lattice that's pruned tighter than
-        // the specified beam.  Here we figure out what the effective
-        // beam was.
-        double effective_beam = beam_;
-        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
-          // just being paranoid here.
-          Task *task = queue_.top();
-          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
-          effective_beam = task->priority_cost - total_weight;
-        }
-        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
-                   << "size exceeds maximum " << opts_.max_mem
-                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
-                   << arcs_size << "," << elems_size
-                   << "), after rebuilding, repo size was " << new_repo_size
-                   << ", effective beam was " << effective_beam
-                   << " vs. requested beam " << beam_;
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool Determinize(double *effective_beam) {
-    KALDI_ASSERT(!determinized_);
-    // This determinizes the input fst but leaves it in the "special format"
-    // in "output_arcs_".  Must be called after Initialize().  To get the
-    // output, call one of the Output routines.
-
-    InitializeDeterminization(); // some start-up tasks.
-    while (!queue_.empty()) {
-      Task *task = queue_.top();
-      // Note: the queue contains only tasks that are "within the beam".
-      // We also have to check whether we have reached one of the user-specified
-      // maximums, of estimated memory, arcs, or states.  The condition for
-      // ending is:
-      // num-states is more than user specified, OR
-      // num-arcs is more than user specified, OR
-      // memory passed a user-specified threshold and cleanup failed
-      //  to get it below that threshold.
-      size_t num_states = output_states_.size();
-      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
-          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
-          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
-        // it was num_states % 100, not num_states % 10, but I encountered an example
-        // where memory was exhausted before we reached state #100.
-        KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                      << " because of lattice-beam.  (#states, #arcs) is ( "
-                      << output_states_.size() << ", " << num_arcs_
-                      << " ), versus limits ( " << opts_.max_states << ", "
-                      << opts_.max_arcs << " ) (else, may be memory limit).";
-        break;
-        // we terminate the determinization here-- whatever we already expanded is
-        // what we'll return...  because we expanded stuff in order of total
-        // (forward-backward) weight, the stuff we returned first is the most
-        // important.
-      }
-      queue_.pop();
-      ProcessTransition(task->state, task->label, &(task->subset));
-      delete task;
-    }
-    determinized_ = true;
-    if (effective_beam != NULL) {
-      if (queue_.empty()) *effective_beam = beam_;
-      else
-        *effective_beam = queue_.top()->priority_cost -
-            backward_costs_[ifst_->Start()];
-    }
-    return (queue_.empty()); // return success if queue was empty, i.e. we processed
-    // all tasks and did not break out of the loop early due to reaching a memory,
-    // arc or state limit.
-  }
- private:
-
-  typedef typename Arc::Label Label;
-  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
-  typedef typename Arc::StateId InputStateId;  // state in the input FST.
-  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
-                                                // states in output Fst.
-
-  typedef LatticeStringRepository<IntType> StringRepositoryType;
-  typedef const typename StringRepositoryType::Entry* StringId;
-
-  // Element of a subset [of original states]
-  struct Element {
-    StateId state; // use StateId as this is usually InputStateId but in one case
-                   // OutputStateId.
-    StringId string;
-    Weight weight;
-    bool operator != (const Element &other) const {
-      return (state != other.state || string != other.string ||
-              weight != other.weight);
-    }
-    // This operator is only intended for the priority_queue in the function
-    // EpsilonClosure().
-    bool operator > (const Element &other) const {
-      return state > other.state;
-    }
-    // This operator is only intended to support sorting in EpsilonClosure()
-    bool operator < (const Element &other) const {
-      return state < other.state;
-    }
-  };
-
-  // Arcs in the format we temporarily create in this class (a representation, essentially of
-  // a Gallic Fst).
-  struct TempArc {
-    Label ilabel;
-    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
-    OutputStateId nextstate;  // or kNoState for final weights.
-    Weight weight;
-  };
-
-  // Hashing function used in hash of subsets.
-  // A subset is a pointer to vector<Element>.
-  // The Elements are in sorted order on state id, and without repeated states.
-  // Because the order of Elements is fixed, we can use a hashing function that is
-  // order-dependent.  However the weights are not included in the hashing function--
-  // we hash subsets that differ only in weight to the same key.  This is not optimal
-  // in terms of the O(N) performance but typically if we have a lot of determinized
-  // states that differ only in weight then the input probably was pathological in some way,
-  // or even non-determinizable.
-  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
-  // Instead we apply the delta when comparing subsets for equality, and allow a small
-  // difference.
-
-  class SubsetKey {
-   public:
-    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
-      size_t hash = 0, factor = 1;
-      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
-        hash *= factor;
-        hash += iter->state + reinterpret_cast<size_t>(iter->string);
-        factor *= 23531;  // these numbers are primes.
-      }
-      return hash;
-    }
-  };
-
-  // This is the equality operator on subsets.  It checks for exact match on state-id
-  // and string, and approximate match on weights.
-  class SubsetEqual {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state ||
-           iter1->string != iter2->string ||
-            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
-      }
-      return true;
-    }
-    float delta_;
-    SubsetEqual(float delta): delta_(delta) {}
-    SubsetEqual(): delta_(kDelta) {}
-  };
-
-  // Operator that says whether two Elements have the same states.
-  // Used only for debug.
-  class SubsetEqualStates {
-   public:
-    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
-      size_t sz = s1->size();
-      KALDI_ASSERT(sz>=0);
-      if (sz != s2->size()) return false;
-      typename vector<Element>::const_iterator iter1 = s1->begin(),
-          iter1_end = s1->end(), iter2=s2->begin();
-      for (; iter1 < iter1_end; ++iter1, ++iter2) {
-        if (iter1->state != iter2->state) return false;
-      }
-      return true;
-    }
-  };
-
-  // Define the hash type we use to map subsets (in minimal
-  // representation) to OutputStateId.
-  typedef unordered_map<const vector<Element>*, OutputStateId,
-                        SubsetKey, SubsetEqual> MinimalSubsetHash;
-
-  // Define the hash type we use to map subsets (in initial
-  // representation) to OutputStateId, together with an
-  // extra weight. [note: we interpret the Element.state in here
-  // as an OutputStateId even though it's declared as InputStateId;
-  // these types are the same anyway].
-  typedef unordered_map<const vector<Element>*, Element,
-                        SubsetKey, SubsetEqual> InitialSubsetHash;
-
-
-  // converts the representation of the subset from canonical (all states) to
-  // minimal (only states with output symbols on arcs leaving them, and final
-  // states).  Output is not necessarily normalized, even if input_subset was.
-  void ConvertToMinimal(vector<Element> *subset) {
-    KALDI_ASSERT(!subset->empty());
-    typename vector<Element>::iterator cur_in = subset->begin(),
-        cur_out = subset->begin(), end = subset->end();
-    while (cur_in != end) {
-      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
-        *cur_out = *cur_in;
-        cur_out++;
-      }
-      cur_in++;
-    }
-    subset->resize(cur_out - subset->begin());
-  }
-
-  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
-  // Involves a hash lookup, and possibly adding a new OutputStateId.
-  // If it creates a new OutputStateId, it creates a new record for it, works
-  // out its final-weight, and puts stuff on the queue relating to its
-  // transitions.
-  OutputStateId MinimalToStateId(const vector<Element> &subset,
-                                 const double forward_cost) {
-    typename MinimalSubsetHash::const_iterator iter
-        = minimal_hash_.find(&subset);
-    if (iter != minimal_hash_.end()) { // Found a matching subset.
-      OutputStateId state_id = iter->second;
-      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check that the algorithm is working...
-      if (forward_cost < state.forward_cost - 0.1) {
-        // for large weights, this check could fail due to roundoff.
-        KALDI_WARN << "New cost is less (check the difference is small) "
-                   << forward_cost << ", "
-                   << state.forward_cost;
-      }
-      return state_id;
-    }
-    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
-    OutputState *new_state = new OutputState(subset, forward_cost);
-    minimal_hash_[&(new_state->minimal_subset)] = state_id;
-    output_states_.push_back(new_state);
-    num_elems_ += subset.size();
-    // Note: in the previous algorithm, we pushed the new state-id onto the queue
-    // at this point.  Here, the queue happens elsewhere, and we directly process
-    // the state (which result in stuff getting added to the queue).
-    ProcessFinal(state_id); // will work out the final-prob.
-    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
-    return state_id;
-  }
-
-
-  // Given a normalized initial subset of elements (i.e. before epsilon closure),
-  // compute the corresponding output-state.
-  OutputStateId InitialToStateId(const vector<Element> &subset_in,
-                                 double forward_cost,
-                                 Weight *remaining_weight,
-                                 StringId *common_prefix) {
-    typename InitialSubsetHash::const_iterator iter
-        = initial_hash_.find(&subset_in);
-    if (iter != initial_hash_.end()) { // Found a matching subset.
-      const Element &elem = iter->second;
-      *remaining_weight = elem.weight;
-      *common_prefix = elem.string;
-      if (elem.weight == Weight::Zero())
-        KALDI_WARN << "Zero weight!";
-      return elem.state;
-    }
-    // else no matching subset-- have to work it out.
-    vector<Element> subset(subset_in);
-    // Follow through epsilons.  Will add no duplicate states.  note: after
-    // EpsilonClosure, it is the same as "canonical" subset, except not
-    // normalized (actually we never compute the normalized canonical subset,
-    // only the normalized minimal one).
-    EpsilonClosure(&subset); // follow epsilons.
-    ConvertToMinimal(&subset); // remove all but emitting and final states.
-
-    Element elem; // will be used to store remaining weight and string, and
-                 // OutputStateId, in initial_hash_;
-    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
-    // common string and weight in "elem".  The subset is now a minimal,
-    // normalized subset.
-
-    forward_cost += ConvertToCost(elem.weight);
-    OutputStateId ans = MinimalToStateId(subset, forward_cost);
-    *remaining_weight = elem.weight;
-    *common_prefix = elem.string;
-    if (elem.weight == Weight::Zero())
-      KALDI_WARN << "Zero weight!";
-
-    // Before returning "ans", add the initial subset to the hash,
-    // so that we can bypass the epsilon-closure etc., next time
-    // we process the same initial subset.
-    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
-    elem.state = ans;
-    initial_hash_[initial_subset_ptr] = elem;
-    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
-    return ans;
-  }
-
-  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
-  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
-  // see function
-  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
-  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
-  // in lattice-weight.h.
-  // this is the same as that, but optimized for our data structures.
-  inline int Compare(const Weight &a_w, StringId a_str,
-                     const Weight &b_w, StringId b_str) const {
-    int weight_comp = fst::Compare(a_w, b_w);
-    if (weight_comp != 0) return weight_comp;
-    // now comparing strings.
-    if (a_str == b_str) return 0;
-    vector<IntType> a_vec, b_vec;
-    repository_.ConvertToVector(a_str, &a_vec);
-    repository_.ConvertToVector(b_str, &b_vec);
-    // First compare their lengths.
-    int a_len = a_vec.size(), b_len = b_vec.size();
-    // use opposite order on the string lengths (c.f. Compare in
-    // lattice-weight.h)
-    if (a_len > b_len) return -1;
-    else if (a_len < b_len) return 1;
-    for(int i = 0; i < a_len; i++) {
-      if (a_vec[i] < b_vec[i]) return -1;
-      else if (a_vec[i] > b_vec[i]) return 1;
-    }
-    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
-    return 0;
-  }
-
-  // This function computes epsilon closure of subset of states by following epsilon links.
-  // Called by InitialToStateId and Initialize.
-  // Has no side effects except on the string repository.  The "output_subset" is not
-  // necessarily normalized (in the sense of there being no common substring), unless
-  // input_subset was.
-  void EpsilonClosure(vector<Element> *subset) {
-    // at input, subset must have only one example of each StateId.  [will still
-    // be so at output].  This function follows input-epsilons, and augments the
-    // subset accordingly.
-
-    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
-    unordered_map<InputStateId, Element> cur_subset;
-    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
-    typedef typename vector<Element>::const_iterator VecIter;
-
-    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
-      queue.push(*iter);
-      cur_subset[iter->state] = *iter;
-    }
-
-    // find whether input fst is known to be sorted on input label.
-    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
-    bool replaced_elems = false; // relates to an optimization, see below.
-    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
-    // (e.g. input with negative-cost epsilon loops); useful in testing.
-    while (queue.size() != 0) {
-      Element elem = queue.top();
-      queue.pop();
-
-      // The next if-statement is a kind of optimization.  It's to prevent us
-      // unnecessarily repeating the processing of a state.  "cur_subset" always
-      // contains only one Element with a particular state.  The issue is that
-      // whenever we modify the Element corresponding to that state in "cur_subset",
-      // both the new (optimal) and old (less-optimal) Element will still be in
-      // "queue".  The next if-statement stops us from wasting compute by
-      // processing the old Element.
-      if (replaced_elems && cur_subset[elem.state] != elem)
-        continue;
-      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
-        KALDI_ERR << "Lattice determinization aborted since looped more than "
-                  << opts_.max_loop << " times during epsilon closure.";
-      }
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
-        // more transitions with epsilons as input labels.
-        if (arc.ilabel == 0
-            && arc.weight != Weight::Zero()) {  // Epsilon transition.
-          Element next_elem;
-          next_elem.state = arc.nextstate;
-          next_elem.weight = Times(elem.weight, arc.weight);
-          // next_elem.string is not set up yet... create it only
-          // when we know we need it (this is an optimization)
-
-          MapIter iter = cur_subset.find(next_elem.state);
-          if (iter == cur_subset.end()) {
-            // was no such StateId: insert and add to queue.
-            next_elem.string = (arc.olabel == 0 ? elem.string :
-                                repository_.Successor(elem.string, arc.olabel));
-            cur_subset[next_elem.state] = next_elem;
-            queue.push(next_elem);
-          } else {
-            // was not inserted because one already there.  In normal
-            // determinization we'd add the weights.  Here, we find which one
-            // has the better weight, and keep its corresponding string.
-            int comp = fst::Compare(next_elem.weight, iter->second.weight);
-            if (comp == 0) { // A tie on weights.  This should be a rare case;
-                             // we don't optimize for it.
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string,
-                                                        arc.olabel));
-              comp = Compare(next_elem.weight, next_elem.string,
-                             iter->second.weight, iter->second.string);
-            }
-            if(comp == 1) { // next_elem is better, so use its (weight, string)
-              next_elem.string = (arc.olabel == 0 ? elem.string :
-                                  repository_.Successor(elem.string, arc.olabel));
-              iter->second.string = next_elem.string;
-              iter->second.weight = next_elem.weight;
-              queue.push(next_elem);
-              replaced_elems = true;
-            }
-            // else it is the same or worse, so use original one.
-          }
-        }
-      }
-    }
-
-    { // copy cur_subset to subset.
-      subset->clear();
-      subset->reserve(cur_subset.size());
-      MapIter iter = cur_subset.begin(), end = cur_subset.end();
-      for (; iter != end; ++iter) subset->push_back(iter->second);
-      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
-      std::sort(subset->begin(), subset->end());
-    }
-  }
-
-
-  // This function works out the final-weight of the determinized state.
-  // called by ProcessSubset.
-  // Has no side effects except on the variable repository_, and
-  // output_states_[output_state_id].arcs
-
-  void ProcessFinal(OutputStateId output_state_id) {
-    OutputState &state = *(output_states_[output_state_id]);
-    const vector<Element> &minimal_subset = state.minimal_subset;
-    // processes final-weights for this subset.  state.minimal_subset_ may be
-    // empty if the graphs is not connected/trimmed, I think, do don't check
-    // that it's nonempty.
-    StringId final_string = repository_.EmptyString();  // set it to keep the
-    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
-    Weight final_weight = Weight::Zero();
-    bool is_final = false;
-    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-    for (; iter != end; ++iter) {
-      const Element &elem = *iter;
-      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
-      StringId this_final_string = elem.string;
-      if (this_final_weight != Weight::Zero() &&
-         (!is_final || Compare(this_final_weight, this_final_string,
-                               final_weight, final_string) == 1)) { // the new
-        // (weight, string) pair is more in semiring than our current
-        // one.
-        is_final = true;
-        final_weight = this_final_weight;
-        final_string = this_final_string;
-      }
-    }
-    if (is_final &&
-        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
-      // store final weights in TempArc structure, just like a transition.
-      // Note: we only store the final-weight if it's inside the pruning beam, hence
-      // the stuff with Compare.
-      TempArc temp_arc;
-      temp_arc.ilabel = 0;
-      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
-      temp_arc.string = final_string;
-      temp_arc.weight = final_weight;
-      state.arcs.push_back(temp_arc);
-      num_arcs_++;
-    }
-  }
-
-  // NormalizeSubset normalizes the subset "elems" by
-  // removing any common string prefix (putting it in common_str),
-  // and dividing by the total weight (putting it in tot_weight).
-  void NormalizeSubset(vector<Element> *elems,
-                       Weight *tot_weight,
-                       StringId *common_str) {
-    if(elems->empty()) { // just set common_str, tot_weight
-      // to defaults and return...
-      KALDI_WARN << "empty subset";
-      *common_str = repository_.EmptyString();
-      *tot_weight = Weight::Zero();
-      return;
-    }
-    size_t size = elems->size();
-    vector<IntType> common_prefix;
-    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
-    Weight weight = (*elems)[0].weight;
-    for(size_t i = 1; i < size; i++) {
-      weight = Plus(weight, (*elems)[i].weight);
-      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
-    }
-    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
-    // weights on them, so we shouldn't have zero here.
-    size_t prefix_len = common_prefix.size();
-    for(size_t i = 0; i < size; i++) {
-      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
-      (*elems)[i].string =
-          repository_.RemovePrefix((*elems)[i].string, prefix_len);
-    }
-    *common_str = repository_.ConvertFromVector(common_prefix);
-    *tot_weight = weight;
-  }
-
-  // Take a subset of Elements that is sorted on state, and
-  // merge any Elements that have the same state (taking the best
-  // (weight, string) pair in the semiring).
-  void MakeSubsetUnique(vector<Element> *subset) {
-    typedef typename vector<Element>::iterator IterType;
-
-    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
-    // state.
-    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
-
-    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
-    size_t num_out = 0;
-    // Merge elements with same state-id
-    while (cur_in != end) {  // while we have more elements to process.
-      // At this point, cur_out points to location of next place we want to put an element,
-      // cur_in points to location of next element we want to process.
-      if (cur_in != cur_out) *cur_out = *cur_in;
-      cur_in++;
-      while (cur_in != end && cur_in->state == cur_out->state) {
-        if (Compare(cur_in->weight, cur_in->string,
-                   cur_out->weight, cur_out->string) == 1) {
-          // if *cur_in > *cur_out in semiring, then take *cur_in.
-          cur_out->string = cur_in->string;
-          cur_out->weight = cur_in->weight;
-        }
-        cur_in++;
-      }
-      cur_out++;
-      num_out++;
-    }
-    subset->resize(num_out);
-  }
-
-  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
-  // code, but now we in effect put the calls to ProcessTransition on a priority
-  // queue, and it now gets called directly from Determinize().  This function
-  // processes a transition from state "ostate_id".  The set "subset" of Elements
-  // represents a set of next-states with associated weights and strings, each
-  // one arising from an arc from some state in a determinized-state; the
-  // next-states are unique (there is only one Entry assocated with each)
-  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
-
-    double forward_cost = output_states_[ostate_id]->forward_cost;
-    StringId common_str;
-    Weight tot_weight;
-    NormalizeSubset(subset, &tot_weight, &common_str);
-    forward_cost += ConvertToCost(tot_weight);
-
-    OutputStateId nextstate;
-    {
-      Weight next_tot_weight;
-      StringId next_common_str;
-      nextstate = InitialToStateId(*subset,
-                                   forward_cost,
-                                   &next_tot_weight,
-                                   &next_common_str);
-      common_str = repository_.Concatenate(common_str, next_common_str);
-      tot_weight = Times(tot_weight, next_tot_weight);
-    }
-
-    // Now add an arc to the next state (would have been created if necessary by
-    // InitialToStateId).
-    TempArc temp_arc;
-    temp_arc.ilabel = ilabel;
-    temp_arc.nextstate = nextstate;
-    temp_arc.string = common_str;
-    temp_arc.weight = tot_weight;
-    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
-    num_arcs_++;
-  }
-
-
-  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
-  // Lexicographical order, which only compares the state when ordering the
-  // "Element" member of the pair.
-
-  class PairComparator {
-   public:
-    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
-      if (p1.first < p2.first) return true;
-      else if (p1.first > p2.first) return false;
-      else {
-        return p1.second.state < p2.second.state;
-      }
-    }
-  };
-
-
-  // ProcessTransitions processes emitting transitions (transitions with
-  // ilabels) out of this subset of states.  It actualy only creates records
-  // ("Task") that get added to the queue.  The transitions will be processed in
-  // priority order from Determinize().  This function soes not consider final
-  // states.  Partitions the emitting transitions up by ilabel (by sorting on
-  // ilabel), and for each unique ilabel, it creates a Task record that contains
-  // the information we need to process the transition.
-
-  void ProcessTransitions(OutputStateId output_state_id) {
-    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
-    // it's possible that minimal_subset could be empty if there are
-    // unreachable parts of the graph, so don't check that it's nonempty.
-    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
-    // to avoid memory allocation/deallocation.
-    {
-      // Push back into "all_elems", elements corresponding to all
-      // non-epsilon-input transitions out of all states in "minimal_subset".
-      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
-      for (;iter != end; ++iter) {
-        const Element &elem = *iter;
-        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
-          const Arc &arc = aiter.Value();
-          if (arc.ilabel != 0
-              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
-            pair<Label, Element> this_pr;
-            this_pr.first = arc.ilabel;
-            Element &next_elem(this_pr.second);
-            next_elem.state = arc.nextstate;
-            next_elem.weight = Times(elem.weight, arc.weight);
-            if (arc.olabel == 0) // output epsilon
-              next_elem.string = elem.string;
-            else
-              next_elem.string = repository_.Successor(elem.string, arc.olabel);
-            all_elems.push_back(this_pr);
-          }
-        }
-      }
-    }
-    PairComparator pc;
-    std::sort(all_elems.begin(), all_elems.end(), pc);
-    // now sorted first on input label, then on state.
-    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
-    PairIter cur = all_elems.begin(), end = all_elems.end();
-    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here, instead,
-      // we'll put the calls into a priority queue.
-      Task *task = new Task;
-      // Process ranges that share the same input symbol.
-      Label ilabel = cur->first;
-      task->state = output_state_id;
-      task->priority_cost = std::numeric_limits<double>::infinity();
-      task->label = ilabel;
-      while (cur != end && cur->first == ilabel) {
-        task->subset.push_back(cur->second);
-        const Element &element = cur->second;
-        // Note: we'll later include the term "forward_cost" in the
-        // priority_cost.
-        task->priority_cost = std::min(task->priority_cost,
-                                       ConvertToCost(element.weight) +
-                                       backward_costs_[element.state]);
-        cur++;
-      }
-
-      // After the command below, the "priority_cost" is a value comparable to
-      // the total-weight of the input FST, like a total-path weight... of
-      // course, it will typically be less (in the semiring) than that.
-      // note: we represent it just as a double.
-      task->priority_cost += output_states_[output_state_id]->forward_cost;
-
-      if (task->priority_cost > cutoff_) {
-        // This task would never get done as it's past the pruning cutoff.
-        delete task;
-      } else {
-        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-        queue_.push(task); // Push the task onto the queue.  The queue keeps it
-        // in prioritized order, so we always process the one with the "best"
-        // weight (highest in the semiring).
-
-        { // this is a check.
-          double best_cost = backward_costs_[ifst_->Start()],
-              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
-          if (task->priority_cost < best_cost - tolerance) {
-            KALDI_WARN << "Cost below best cost was encountered:"
-                       << task->priority_cost << " < " << best_cost;
-          }
-        }
-      }
-    }
-    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
-    // empty.
-  }
-
-
-  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
-    // of the input FST either is final or has an osymbol on an arc out of it.
-    // Uses the vector isymbol_or_final_ as a cache for this info.
-    KALDI_ASSERT(state >= 0);
-    if (isymbol_or_final_.size() <= state)
-      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
-    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
-      return false;
-    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
-      return true;
-    // else work it out...
-    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
-    if (ifst_->Final(state) != Weight::Zero())
-      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc = aiter.Value();
-      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
-        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
-        return true;
-      }
-    }
-    return IsIsymbolOrFinal(state); // will only recurse once.
-  }
-
-  void ComputeBackwardWeight() {
-    // Sets up the backward_costs_ array, and the cutoff_ variable.
-    KALDI_ASSERT(beam_ > 0);
-
-    // Only handle the toplogically sorted case.
-    backward_costs_.resize(ifst_->NumStates());
-    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
-      double &cost = backward_costs_[s];
-      cost = ConvertToCost(ifst_->Final(s));
-      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
-           !aiter.Done(); aiter.Next()) {
-        const Arc &arc = aiter.Value();
-        cost = std::min(cost,
-                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
-      }
-    }
-
-    if (ifst_->Start() == kNoStateId) return; // we'll be returning
-    // an empty FST.
-
-    double best_cost = backward_costs_[ifst_->Start()];
-    if (best_cost == std::numeric_limits<double>::infinity())
-      KALDI_WARN << "Total weight of input lattice is zero.";
-    cutoff_ = best_cost + beam_;
-  }
-
-  void InitializeDeterminization() {
-    // We insist that the input lattice be topologically sorted.  This is not a
-    // fundamental limitation of the algorithm (which in principle should be
-    // applicable to even cyclic FSTs), but it helps us more efficiently
-    // compute the backward_costs_ array.  There may be some other reason we
-    // require this, that escapes me at the moment.
-    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
-    ComputeBackwardWeight();
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
-      // states in ifst_, it might be a bit more efficient
-      // to pre-size the hashes so we're not constantly rebuilding them.
-      StateId num_states =
-          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
-      minimal_hash_.rehash(num_states/2 + 3);
-      initial_hash_.rehash(num_states/2 + 3);
-    }
-#endif
-    InputStateId start_id = ifst_->Start();
-    if (start_id != kNoStateId) {
-      /* Create determinized-state corresponding to the start state....
-         Unlike all the other states, we don't "normalize" the representation
-         of this determinized-state before we put it into minimal_hash_.  This is actually
-         what we want, as otherwise we'd have problems dealing with any extra weight
-         and string and might have to create a "super-initial" state which would make
-         the output nondeterministic.  Normalization is only needed to make the
-         determinized output more minimal anyway, it's not needed for correctness.
-         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
-         a lookaside buffer anyway, so this isn't a problem-- it will get populated
-         later if it needs to be.
-      */
-      vector<Element> subset(1);
-      subset[0].state = start_id;
-      subset[0].weight = Weight::One();
-      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
-      EpsilonClosure(&subset); // follow through epsilon-input links
-      ConvertToMinimal(&subset); // remove all but final states and
-      // states with input-labels on arcs out of them.
-      // Weight::One() is the "forward-weight" of this determinized state...
-      // i.e. the minimal cost from the start of the determinized FST to this
-      // state [One() because it's the start state].
-      OutputState *initial_state = new OutputState(subset, 0);
-      KALDI_ASSERT(output_states_.empty());
-      output_states_.push_back(initial_state);
-      num_elems_ += subset.size();
-      OutputStateId initial_state_id = 0;
-      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
-      ProcessFinal(initial_state_id);
-      ProcessTransitions(initial_state_id); // this will add tasks to
-      // the queue, which we'll start processing in Determinize().
-    }
-  }
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);
-
-  struct OutputState {
-    vector<Element> minimal_subset;
-    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
-    // Note: the final-weight is included here with kNoStateId as the state id.  We
-    // always process the final-weight regardless of the beam; when producing the
-    // output we may have to ignore some of these.
-    double forward_cost; // Represents minimal cost from start-state
-    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we first create the OutputState;
-    // this is because of the priority-queue we use, that ensures that the
-    // "best" path into the state will be expanded first.
-    OutputState(const vector<Element> &minimal_subset,
-                double forward_cost): minimal_subset(minimal_subset),
-                                      forward_cost(forward_cost) { }
-  };
-
-  vector<OutputState*> output_states_; // All the info about the output states.
-
-  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
-  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
-  // the keys of initial_hash_
-
-  const ExpandedFst<Arc> *ifst_;
-  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
-  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
-  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
-  // function on the lattice weights.
-
-  double beam_;
-  double cutoff_; // beam plus total-weight of input (and note, the weight is
-  // guaranteed to be "tropical-like" so the sum does represent a min-cost.
-
-  DeterminizeLatticePrunedOptions opts_;
-  SubsetKey hasher_;  // object that computes keys-- has no data members.
-  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
-  bool determinized_; // set to true when user called Determinize(); used to make
-  // sure this object is used correctly.
-  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
-                                    // representation" (only include final and states and states with
-                                    // nonzero ilabel on arc out of them.  Owns the pointers
-                                    // in its keys.
-  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
-                                     // represents the OutputStateId together
-                                     // with an extra weight and string.  Subset
-                                     // is "initial representation".  The extra
-                                     // weight and string is needed because after
-                                     // we convert to minimal representation and
-                                     // normalize, there may be an extra weight
-                                     // and string.  Owns the pointers
-                                     // in its keys.
-
-  struct Task {
-    OutputStateId state; // State from which we're processing the transition.
-    Label label; // Label on the transition we're processing out of this state.
-    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
-    double priority_cost; // Cost used in deciding priority of tasks.  Note:
-    // we assume there is a ConvertToCost() function that converts the semiring to double.
-  };
-
-  struct TaskCompare {
-    inline int operator() (const Task *t1, const Task *t2) {
-      // view this like operator <, which is the default template parameter
-      // to std::priority_queue.
-      // returns true if t1 is worse than t2.
-      return (t1->priority_cost > t2->priority_cost);
-    }
-  };
-
-  // This priority queue contains "Task"s to be processed; these correspond
-  // to transitions out of determinized states.  We process these in priority
-  // order according to the best weight of any path passing through these
-  // determinized states... it's possible to work this out.
-  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;
-
-  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.
-
-  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };
-
-  vector<char> isymbol_or_final_; // A kind of cache; it says whether
-  // each state is (emitting or final) where emitting means it has at least one
-  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()
-
-  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
-  // storing sequences of labels.
-
-  void AddStrings(const vector<Element> &vec,
-                  vector<StringId> *needed_strings) {
-    for (typename std::vector<Element>::const_iterator iter = vec.begin();
-         iter != vec.end(); ++iter)
-      needed_strings->push_back(iter->string);
-  }
-};
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double beam,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
-    DeterminizeLatticePrunedOptions opts) {
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still produce reasonable output,
-    // just with a narrower beam than "beam".  If the user specifies an infinite
-    // beam we don't do this beam-narrowing.
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        beam == std::numeric_limits<double>::infinity() ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0.0) effective_beam = 0.0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-      KALDI_LOG << "Pruned state-level lattice with beam " << beam
-                << " and retrying determinization with that beam.";
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-
-// normally Weight would be LatticeWeight<float> (which has two floats),
-// or possibly TropicalWeightTpl<float>, and IntType would be int32.
-// Caution: there are two versions of the function DeterminizeLatticePruned,
-// with identical code but different output FST types.
-template<class Weight>
-bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
-                              double beam,
-                              MutableFst<ArcTpl<Weight> > *ofst,
-                              DeterminizeLatticePrunedOptions opts) {
-  typedef int32 IntType;
-  ofst->SetInputSymbols(ifst.InputSymbols());
-  ofst->SetOutputSymbols(ifst.OutputSymbols());
-  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
-  if (ifst.NumStates() == 0) {
-    ofst->DeleteStates();
-    return true;
-  }
-  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
-                             // retrying.
-  VectorFst<ArcTpl<Weight> > temp_fst;
-
-  for (int32 iter = 0; iter < max_num_iters; iter++) {
-    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
-                                                   beam, opts);
-    double effective_beam;
-    bool ans = det.Determinize(&effective_beam);
-    // if it returns false it will typically still
-    // produce reasonable output, just with a
-    // narrower beam than "beam".
-    if (effective_beam >= beam * opts.retry_cutoff ||
-        iter + 1 == max_num_iters) {
-      det.Output(ofst);
-      return ans;
-    } else {
-      // The code below to set "beam" is a heuristic.
-      // If effective_beam is very small, we want to reduce by a lot.
-      // But never change the beam by more than a factor of two.
-      if (effective_beam < 0)
-        effective_beam = 0;
-      double new_beam = beam * sqrt(effective_beam / beam);
-      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
-      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
-                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
-                 << "lattice with new beam " << new_beam << " and retrying.";
-      beam = new_beam;
-      if (iter == 0) temp_fst = ifst;
-      kaldi::PruneLattice(beam, &temp_fst);
-    }
-  }
-  return false; // Suppress compiler warning; this code is unreachable.
-}
-
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Work out the first phone symbol. This is more related to the phone
-//   // insertion function, so we put it here and make it the returning value of
-//   // DeterminizeLatticeInsertPhones().
-//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
-//
-//   // Insert phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     if (state == fst->Start())
-//       continue;
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       // Note: the words are on the input symbol side and transition-id's are on
-//       // the output symbol side.
-//       if ((arc.olabel != 0)
-//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
-//           && (!trans_model.IsSelfLoop(arc.olabel))) {
-//         Label phone =
-//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
-//
-//         // Skips <eps>.
-//         KALDI_ASSERT(phone != 0);
-//
-//         if (arc.ilabel == 0) {
-//           // If there is no word on the arc, insert the phone directly.
-//           arc.ilabel = first_phone_label + phone;
-//         } else {
-//           // Otherwise, add an additional arc.
-//           StateId additional_state = fst->AddState();
-//           StateId next_state = arc.nextstate;
-//           arc.nextstate = additional_state;
-//           fst->AddArc(additional_state,
-//                       Arc(first_phone_label + phone, 0,
-//                           Weight::One(), next_state));
-//         }
-//       }
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-//
-//   return first_phone_label;
-// }
-//
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst) {
-//   // Define some types.
-//   typedef ArcTpl<Weight> Arc;
-//   typedef typename Arc::StateId StateId;
-//   typedef typename Arc::Label Label;
-//
-//   // Delete phones here.
-//   for (StateIterator<MutableFst<Arc> > siter(*fst);
-//        !siter.Done(); siter.Next()) {
-//     StateId state = siter.Value();
-//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc = aiter.Value();
-//
-//       if (arc.ilabel >= first_phone_label)
-//         arc.ilabel = 0;
-//
-//       aiter.SetValue(arc);
-//     }
-//   }
-// }
-// instantiate for type LatticeWeight
-// template
-// void DeterminizeLatticeDeletePhones(
-//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
-//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
-//
-// /** This function does a first pass determinization with phone symbols inserted
-//     at phone boundary. It uses a transition model to work out the transition-id
-//     to phone map. First, phones will be inserted into the word level lattice.
-//     Second, determinization will be applied on top of the phone + word lattice.
-//     Finally, the inserted phones will be removed, converting the lattice back to
-//     a word level lattice. The output lattice of this pass is not deterministic,
-//     since we remove the phone symbols as a last step. It is supposed to be
-//     followed by another pass of determinization at the word level. It could also
-//     be useful for some other applications such as fMLLR estimation, confidence
-//     estimation, discriminative training, etc.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePrunedFirstPass(
-//     const kaldi::TransitionModel &trans_model,
-//     double beam,
-//     MutableFst<ArcTpl<Weight> > *fst,
-//     const DeterminizeLatticePrunedOptions &opts) {
-//   // First, insert the phones.
-//   typename ArcTpl<Weight>::Label first_phone_label =
-//       DeterminizeLatticeInsertPhones(trans_model, fst);
-//   TopSort(fst);
-//
-//   // Second, do determinization with phone inserted.
-//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
-//
-//   // Finally, remove the inserted phones.
-//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
-//   TopSort(fst);
-//
-//   return ans;
-// }
-//
-// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
-// // lattice might be modified.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   // Returning status.
-//   bool ans = true;
-//
-//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
-//   // is not false, otherwise calling this function doesn't make any sense.
-//   if ((opts.phone_determinize || opts.word_determinize) == false) {
-//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
-//                << "false, copying lattice without determinization.";
-//     // We are expecting the words on the input side.
-//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//     return ans;
-//   }
-//
-//   // Determinization options.
-//   DeterminizeLatticePrunedOptions det_opts;
-//   det_opts.delta = opts.delta;
-//   det_opts.max_mem = opts.max_mem;
-//
-//   // If --phone-determinize is true, do the determinization on phone + word
-//   // lattices.
-//   if (opts.phone_determinize) {
-//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
-//                   << "lattices.";
-//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
-//         trans_model, beam, ifst, det_opts) && ans;
-//
-//     // If --word-determinize is false, we've finished the job and return here.
-//     if (!opts.word_determinize) {
-//       // We are expecting the words on the input side.
-//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
-//       return ans;
-//     }
-//   }
-//
-//   // If --word-determinize is true, do the determinization on word lattices.
-//   if (opts.word_determinize) {
-//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
-//     ans = DeterminizeLatticePruned<Weight, IntType>(
-//         *ifst, beam, ofst, det_opts) && ans;
-//   }
-//
-//   // If --minimize is true, push and minimize after determinization.
-//   if (opts.minimize) {
-//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
-//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
-//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
-//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
-//   }
-//
-//   return ans;
-// }
-//
-// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
-// // will be kept as unchanged.
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double beam,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
-//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
-//                                        beam, ofst, opts);
-// }
-//
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double beam,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts) {
-//   bool ans = true;
-//   Invert(ifst);
-//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(ifst)) {
-//       // Cannot topologically sort the lattice -- determinization will fail.
-//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
-//                 << " your lexicon has empty words or your LM has epsilon cycles"
-//                 << ").";
-//     }
-//   }
-//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
-//   ArcSort(ifst, ilabel_comp);
-//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//       trans_model, ifst, beam, ofst, opts);
-//   Connect(ofst);
-//   return ans;
-// }
-
-// Instantiate the templates for the types we might need.
-// Note: there are actually four templates, each of which
-// we instantiate for a single type.
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::CompactLatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-template
-bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
-    const ExpandedFst<kaldi::LatticeArc> &ifst,
-    double prune,
-    MutableFst<kaldi::LatticeArc> *ofst,
-    DeterminizeLatticePrunedOptions opts);
-
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<kaldi::LatticeArc> &ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-//
-// template
-// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts);
-
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/determinize-lattice-pruned.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/determinize-lattice-pruned.h
deleted file mode 100644
index bd4d9262cde6839f4c44c1b5461e25aa34e4dc6e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/determinize-lattice-pruned.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// lat/determinize-lattice-pruned.h
-
-// Copyright 2009-2012  Microsoft Corporation
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
-#include <fst/fstlib.h>
-#include <fst/fst-decl.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <vector>
-#include "fstext/lattice-weight.h"
-// #include "hmm/transition-model.h"
-#include "itf/options-itf.h"
-#include "lat/kaldi-lattice.h"
-
-namespace fst {
-
-/// \addtogroup fst_extensions
-///  @{
-
-
-// For example of usage, see test-determinize-lattice-pruned.cc
-
-/*
-   DeterminizeLatticePruned implements a special form of determinization with
-   epsilon removal, optimized for a phase of lattice generation.  This algorithm
-   also does pruning at the same time-- the combination is more efficient as it
-   somtimes prevents us from creating a lot of states that would later be pruned
-   away.  This allows us to increase the lattice-beam and not have the algorithm
-   blow up.  Also, because our algorithm processes states in order from those
-   that appear on high-scoring paths down to those that appear on low-scoring
-   paths, we can easily terminate the algorithm after a certain specified number
-   of states or arcs.
-
-   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
-   with a lexicographical type of order, such as LatticeWeightTpl<float>).
-   Typically this would be a state-level lattice, with input symbols equal to
-   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
-   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
-   symbols are words, and the weights contain the original weights together with
-   strings (with zero or one symbol in them) containing the original output labels
-   (the p.d.f.'s).  We determinize this using acceptor determinization with
-   epsilon removal.  Remember (from lattice-weight.h) that
-   CompactLatticeWeightTpl has a special kind of semiring where we always take
-   the string corresponding to the best cost (of type BaseWeightType), and
-   discard the other.  This corresponds to taking the best output-label sequence
-   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
-   Gallic weight for this, or it would die as soon as it detected that the input
-   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
-   can be determinized.
-   We assume that there is a function
-      Compare(const BaseWeightType &a, const BaseWeightType &b)
-   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
-   total order on the BaseWeightType... this information should be the
-   same as NaturalLess would give, but it's more efficient to do it this way.
-   You can define this for things like TropicalWeight if you need to instantiate
-   this class for that weight type.
-
-   We implement this determinization in a special way to make it efficient for
-   the types of FSTs that we will apply it to.  One issue is that if we
-   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
-   type vector<IntType>, the algorithm takes time quadratic in the length of
-   words (in states), because propagating each arc involves copying a whole
-   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
-   where each string is a pointer (Entry*), and uses a hash from (Entry*,
-   IntType), to the successor string (and a way to get the latest IntType and the
-   ancestor Entry*).  [this is the class LatticeStringRepository].
-
-   Another issue is that rather than representing a determinized-state as a
-   collection of (state, weight), we represent it in a couple of reduced forms.
-   Suppose a determinized-state is a collection of (state, weight) pairs; call
-   this the "canonical representation".  Note: these collections are always
-   normalized to remove any common weight and string part.  Define end-states as
-   the subset of states that have an arc out of them with a label on, or are
-   final.  If we represent a determinized-state a the set of just its (end-state,
-   weight) pairs, this will be a valid and more compact representation, and will
-   lead to a smaller set of determinized states (like early minimization).  Call
-   this collection of (end-state, weight) pairs the "minimal representation".  As
-   a mechanism to reduce compute, we can also consider another representation.
-   In the determinization algorithm, we start off with a set of (begin-state,
-   weight) pairs (where the "begin-states" are initial or have a label on the
-   transition into them), and the "canonical representation" consists of the
-   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
-   (begin-state, weight) pairs, appropriately normalized, the "initial
-   representation".  If two initial representations are the same, the "canonical
-   representation" and hence the "minimal representation" will be the same.  We
-   can use this to reduce compute.  Note that if two initial representations are
-   different, this does not preclude the other representations from being the same.
-
-*/
-
-
-struct DeterminizeLatticePrunedOptions {
-  float delta; // A small offset used to measure equality of weights.
-  int max_mem; // If >0, determinization will fail and return false
-  // when the algorithm's (approximate) memory consumption crosses this threshold.
-  int max_loop; // If >0, can be used to detect non-determinizable input
-  // (a case that wouldn't be caught by max_mem).
-  int max_states;
-  int max_arcs;
-  float retry_cutoff;
-  DeterminizeLatticePrunedOptions(): delta(kDelta),
-                                     max_mem(-1),
-                                     max_loop(-1),
-                                     max_states(-1),
-                                     max_arcs(-1),
-                                     retry_cutoff(0.5) { }
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this)");
-    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
-                   "output FST (total, not per state");
-    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
-                   "FST (total, not per state");
-    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
-                   "type of determinization failure, typically due to invalid input "
-                   "(e.g., negative-cost loops)");
-    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
-                   "lattice and retrying determinization: if effective-beam < "
-                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
-                   "ever getting empty output for long segments.");
-  }
-};
-
-struct DeterminizeLatticePhonePrunedOptions {
-  // delta: a small offset used to measure equality of weights.
-  float delta;
-  // max_mem: if > 0, determinization will fail and return false when the
-  // algorithm's (approximate) memory consumption crosses this threshold.
-  int max_mem;
-  // phone_determinize: if true, do a first pass determinization on both phones
-  // and words.
-  bool phone_determinize;
-  // word_determinize: if true, do a second pass determinization on words only.
-  bool word_determinize;
-  // minimize: if true, push and minimize after determinization.
-  bool minimize;
-  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
-                                          max_mem(50000000),
-                                          phone_determinize(true),
-                                          word_determinize(true),
-                                          minimize(false) {}
-  void Register (kaldi::OptionsItf *opts) {
-    opts->Register("delta", &delta, "Tolerance used in determinization");
-    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
-                   "determinization (real usage might be many times this).");
-    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
-                   "initial pass of determinization on both phones and words (see"
-                   " also --word-determinize)");
-    opts->Register("word-determinize", &word_determinize, "If true, do a second "
-                   "pass of determinization on words only (see also "
-                   "--phone-determinize)");
-    opts->Register("minimize", &minimize, "If true, push and minimize after "
-                   "determinization.");
-  }
-};
-
-/**
-    This function implements the normal version of DeterminizeLattice, in which the
-    output strings are represented using sequences of arcs, where all but the
-    first one has an epsilon on the input side.  It also prunes using the beam
-    in the "prune" parameter.  The input FST must be topologically sorted in order
-    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
-    Returns true on success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: you may want to use the version below which outputs to CompactLattice.
-*/
-template<class Weight>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> > &ifst,
-    double prune,
-    MutableFst<ArcTpl<Weight> > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-
-/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
-    where the output sequences are encoded using the CompactLatticeArcTpl template
-    (i.e. the sequences of output symbols are represented directly as strings The input
-    FST must be topologically sorted in order for the algorithm to work. For efficiency
-    it is recommended to sort the ilabel for the input FST as well.
-    Returns true on normal success, and false if it had to terminate the determinization
-    earlier than specified by the "prune" beam-- that is, if it terminated because
-    of the max_mem, max_loop or max_arcs constraints in the options.
-    CAUTION: if Lattice is the input, you need to Invert() before calling this,
-    so words are on the input side.
-*/
-template<class Weight, class IntType>
-bool DeterminizeLatticePruned(
-    const ExpandedFst<ArcTpl<Weight> >&ifst,
-    double prune,
-    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
-
-// /** This function takes in lattices and inserts phones at phone boundaries. It
-//     uses the transition model to work out the transition_id to phone map. The
-//     returning value is the starting index of the phone label. Typically we pick
-//     (maximum_output_label_index + 1) as this value. The inserted phones are then
-//     mapped to (returning_value + original_phone_label) in the new lattice. The
-//     returning value will be used by DeterminizeLatticeDeletePhones() where it
-//     works out the phones according to this value.
-// */
-// template<class Weight>
-// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function takes in lattices and deletes "phones" from them. The "phones"
-//     here are actually any label that is larger than first_phone_label because
-//     when we insert phones into the lattice, we map the original phone label to
-//     (first_phone_label + original_phone_label). It is supposed to be used
-//     together with DeterminizeLatticeInsertPhones()
-// */
-// template<class Weight>
-// void DeterminizeLatticeDeletePhones(
-//     typename ArcTpl<Weight>::Label first_phone_label,
-//     MutableFst<ArcTpl<Weight> > *fst);
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
-//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
-//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
-//     determinization on the phone + word lattices. If --word-determinize is set
-//     true, it then does a second pass of determinization on the word lattices by
-//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
-//     a warning and copying the lattices without determinization.
-//
-//     Note: the point of doing first a phone-level determinization pass and then
-//     a word-level determinization pass is that it allows us to determinize
-//     deeper lattices without "failing early" and returning a too-small lattice
-//     due to the max-mem constraint.  The result should be the same as word-level
-//     determinization in general, but for deeper lattices it is a bit faster,
-//     despite the fact that we now have two passes of determinization by default.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     const ExpandedFst<ArcTpl<Weight> > &ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-//     lattice might be changed.
-// */
-// template<class Weight, class IntType>
-// bool DeterminizeLatticePhonePruned(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<ArcTpl<Weight> > *ifst,
-//     double prune,
-//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-//
-// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
-//     Lattice type FSTs.  It simplifies the calling process by calling
-//     TopSort() Invert() and ArcSort() for you.
-//     Unlike other determinization routines, the function
-//     requires "ifst" to have transition-id's on the input side and words on the
-//     output side.
-//     This function can be used as the top-level interface to all the determinization
-//     code.
-// */
-// bool DeterminizeLatticePhonePrunedWrapper(
-//     const kaldi::TransitionModel &trans_model,
-//     MutableFst<kaldi::LatticeArc> *ifst,
-//     double prune,
-//     MutableFst<kaldi::CompactLatticeArc> *ofst,
-//     DeterminizeLatticePhonePrunedOptions opts
-//       = DeterminizeLatticePhonePrunedOptions());
-
-/// @} end "addtogroup fst_extensions"
-
-} // end namespace fst
-
-#endif
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/kaldi-lattice.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/kaldi-lattice.cc
deleted file mode 100644
index 744cc5384624fe420cff7bc87d1403761da6484d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/kaldi-lattice.cc
+++ /dev/null
@@ -1,506 +0,0 @@
-// lat/kaldi-lattice.cc
-
-// Copyright 2009-2011     Microsoft Corporation
-//                2013     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/kaldi-lattice.h"
-#include "fst/script/print-impl.h"
-
-namespace kaldi {
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  CompactLattice *ofst = new CompactLattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
-  return ifst;
-}
-
-/// Converts lattice types if necessary, deleting its input.
-template<class OrigWeightType>
-Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
-  if (!ifst) return NULL;
-  Lattice *ofst = new Lattice();
-  ConvertLattice(*ifst, ofst);
-  delete ifst;
-  return ofst;
-}
-
-// This overrides the template if there is no type conversion going on
-// (for efficiency).
-template<>
-Lattice* ConvertToLattice(Lattice *ifst) {
-  return ifst;
-}
-
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = true, write_one = false;
-    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
-                                               t.OutputSymbols(),
-                                               NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-/// LatticeReader provides (static) functions for reading both Lattice
-/// and CompactLattice, in text form.
-class LatticeReader {
-  typedef LatticeArc Arc;
-  typedef LatticeWeight Weight;
-  typedef CompactLatticeArc CArc;
-  typedef CompactLatticeWeight CWeight;
-  typedef Arc::Label Label;
-  typedef Arc::StateId StateId;
- public:
-  // everything is static in this class.
-
-  /** This function reads from the FST text format; it does not know in advance
-      whether it's a Lattice or CompactLattice in the stream so it tries to
-      read both formats until it becomes clear which is the correct one.
-  */
-  static std::pair<Lattice*, CompactLattice*> ReadText(
-      std::istream &is) {
-    typedef std::pair<Lattice*, CompactLattice*> PairT;
-    using std::string;
-    using std::vector;
-    Lattice *fst = new Lattice();
-    CompactLattice *cfst = new CompactLattice();
-    string line;
-    size_t nline = 0;
-    string separator = FLAGS_fst_field_separator + "\r\n";
-    while (std::getline(is, line)) {
-      nline++;
-      vector<string> col;
-      // on Windows we'll write in text and read in binary mode.
-      SplitStringToVector(line, separator.c_str(), true, &col);
-      if (col.size() == 0) break; // Empty line is a signal to stop, in our
-      // archive format.
-      if (col.size() > 5) {
-        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      StateId s;
-      if (!ConvertStringToInteger(col[0], &s)) {
-        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
-        delete fst;
-        delete cfst;
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-      if (fst)
-        while (s >= fst->NumStates())
-          fst->AddState();
-      if (cfst)
-        while (s >= cfst->NumStates())
-          cfst->AddState();
-      if (nline == 1) {
-        if (fst) fst->SetStart(s);
-        if (cfst) cfst->SetStart(s);
-      }
-
-      if (fst) { // we still have fst; try to read that arc.
-        bool ok = true;
-        Arc arc;
-        Weight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            fst->SetFinal(s, Weight::One());
-            break;
-          case 2:
-            if (!StrToWeight(col[1], true, &w)) ok = false;
-            else fst->SetFinal(s, w);
-            break;
-          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
-            ok = false;
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.weight = Weight::One();
-              fst->AddArc(s, arc);
-            }
-            break;
-          case 5:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                ConvertStringToInteger(col[3], &arc.olabel) &&
-                StrToWeight(col[4], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              fst->AddArc(s, arc);
-            }
-            break;
-          default:
-            ok = false;
-        }
-        while (d >= fst->NumStates())
-          fst->AddState();
-        if (!ok) {
-          delete fst;
-          fst = NULL;
-        }
-      }
-      if (cfst) {
-        bool ok = true;
-        CArc arc;
-        CWeight w;
-        StateId d = s;
-        switch (col.size()) {
-          case 1 :
-            cfst->SetFinal(s, CWeight::One());
-            break;
-          case 2:
-            if (!StrToCWeight(col[1], true, &w)) ok = false;
-            else cfst->SetFinal(s, w);
-            break;
-          case 3: // compact-lattice is acceptor format: state, next-state, label.
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              arc.weight = CWeight::One();
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 4:
-            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
-                ConvertStringToInteger(col[2], &arc.ilabel) &&
-                StrToCWeight(col[3], false, &arc.weight);
-            if (ok) {
-              d = arc.nextstate;
-              arc.olabel = arc.ilabel;
-              cfst->AddArc(s, arc);
-            }
-            break;
-          case 5: default:
-            ok = false;
-        }
-        while (d >= cfst->NumStates())
-          cfst->AddState();
-        if (!ok) {
-          delete cfst;
-          cfst = NULL;
-        }
-      }
-      if (!fst && !cfst) {
-        KALDI_WARN << "Bad line in lattice text format: " << line;
-        // read until we get an empty line, so at least we
-        // have a chance to read the next one (although this might
-        // be a bit futile since the calling code will get unhappy
-        // about failing to read this one.
-        while (std::getline(is, line)) {
-          SplitStringToVector(line, separator.c_str(), true, &col);
-          if (col.empty()) break;
-        }
-        return PairT(static_cast<Lattice*>(NULL),
-                     static_cast<CompactLattice*>(NULL));
-      }
-    }
-    return PairT(fst, cfst);
-  }
-
-  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == Weight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-
-  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
-    std::istringstream strm(s);
-    strm >> *w;
-    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
-      return false;
-    }
-    return true;
-  }
-};
-
-
-CompactLattice *ReadCompactLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.second != NULL) {
-    delete lat_pair.first;
-    return lat_pair.second;
-  } else if (lat_pair.first != NULL) {
-    // note: ConvertToCompactLattice frees its input.
-    return ConvertToCompactLattice(lat_pair.first);
-  } else {
-    return NULL;
-  }
-}
-
-
-Lattice *ReadLatticeText(std::istream &is) {
-  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
-  if (lat_pair.first != NULL) {
-    delete lat_pair.second;
-    return lat_pair.first;
-  } else if (lat_pair.second != NULL) {
-    // note: ConvertToLattice frees its input.
-    return ConvertToLattice(lat_pair.second);
-  } else {
-    return NULL;
-  }
-}
-
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat) {
-  KALDI_ASSERT(*clat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading compact lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    CompactLattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToCompactLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToCompactLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToCompactLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToCompactLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to CompactLattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading compact lattice (after reading header).";
-      return false;
-    }
-    *clat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
-    return (*clat != NULL);
-  }
-}
-
-
-bool CompactLatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading CompactLattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadCompactLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadCompactLattice(is, true, &t_);
-  }
-}
-
-bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
-  if (binary) {
-    fst::FstWriteOptions opts;
-    // Leave all the options default.  Normally these lattices wouldn't have any
-    // osymbols/isymbols so no point directing it not to write them (who knows what
-    // we'd want to do if we had them).
-    return t.Write(os, opts);
-  } else {
-    // Text-mode output.  Note: we expect that t.InputSymbols() and
-    // t.OutputSymbols() would always return NULL.  The corresponding input
-    // routine would not work if the FST actually had symbols attached.
-    // Write a newline after the key, so the first line of the FST appears
-    // on its own line.
-    os << '\n';
-    bool acceptor = false, write_one = false;
-    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
-                                        t.OutputSymbols(),
-                                        NULL, acceptor, write_one, "\t");
-    printer.Print(&os, "<unknown>");
-    if (os.fail())
-      KALDI_WARN << "Stream failure detected.";
-    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
-    // OpenFst code].
-    os << '\n';
-    return os.good();
-  }
-}
-
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat) {
-  KALDI_ASSERT(*lat == NULL);
-  if (binary) {
-    fst::FstHeader hdr;
-    if (!hdr.Read(is, "<unknown>")) {
-      KALDI_WARN << "Reading lattice: error reading FST header.";
-      return false;
-    }
-    if (hdr.FstType() != "vector") {
-      KALDI_WARN << "Reading lattice: unsupported FST type: "
-                 << hdr.FstType();
-      return false;
-    }
-    fst::FstReadOptions ropts("<unspecified>",
-                              &hdr);
-
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
-    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
-    typedef fst::LatticeWeightTpl<float> T3;
-    typedef fst::LatticeWeightTpl<double> T4;
-    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
-    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
-    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
-    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
-
-    Lattice *ans = NULL;
-    if (hdr.ArcType() == T1::Type()) {
-      ans = ConvertToLattice(F1::Read(is, ropts));
-    } else if (hdr.ArcType() == T2::Type()) {
-      ans = ConvertToLattice(F2::Read(is, ropts));
-    } else if (hdr.ArcType() == T3::Type()) {
-      ans = ConvertToLattice(F3::Read(is, ropts));
-    } else if (hdr.ArcType() == T4::Type()) {
-      ans = ConvertToLattice(F4::Read(is, ropts));
-    } else {
-      KALDI_WARN << "FST with arc type " << hdr.ArcType()
-                 << " cannot be converted to Lattice.\n";
-      return false;
-    }
-    if (ans == NULL) {
-      KALDI_WARN << "Error reading lattice (after reading header).";
-      return false;
-    }
-    *lat = ans;
-    return true;
-  } else {
-    // The next line would normally consume the \r on Windows, plus any
-    // extra spaces that might have got in there somehow.
-    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
-    if (is.peek() == '\n') is.get(); // consume the newline.
-    else { // saw spaces but no newline.. this is not expected.
-      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
-                 << " at file position " << is.tellg();
-      return false;
-    }
-    *lat = ReadLatticeText(is); // that routine will warn on error.
-    return (*lat != NULL);
-  }
-}
-
-
-/* Since we don't write the binary headers for this type of holder,
-   we use a different method to work out whether we're in binary mode.
- */
-bool LatticeHolder::Read(std::istream &is) {
-  Clear(); // in case anything currently stored.
-  int c = is.peek();
-  if (c == -1) {
-    KALDI_WARN << "End of stream detected reading Lattice.";
-    return false;
-  } else if (isspace(c)) { // The text form of the lattice begins
-    // with space (normally, '\n'), so this means it's text (the binary form
-    // cannot begin with space because it starts with the FST Type() which is not
-    // space).
-    return ReadLattice(is, false, &t_);
-  } else if (c != 214) { // 214 is first char of FST magic number,
-    // on little-endian machines which is all we support (\326 octal)
-    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
-               << " [non-space but no magic number detected], file pos is "
-               << is.tellg();
-    return false;
-  } else {
-    return ReadLattice(is, true, &t_);
-  }
-}
-
-
-
-} // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/kaldi-lattice.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/kaldi-lattice.h
deleted file mode 100644
index 9158bffe2a7553197f939b40883bf3e0af608440..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/kaldi-lattice.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// lat/kaldi-lattice.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_KALDI_LATTICE_H_
-#define KALDI_LAT_KALDI_LATTICE_H_
-
-#include "fstext/fstext-lib.h"
-#include "base/kaldi-common.h"
-// #include "util/common-utils.h"
-
-
-namespace kaldi {
-// will import some things above...
-
-typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;
-
-// careful: kaldi::int32 is not always the same C type as fst::int32
-typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;
-
-typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
-  CompactLatticeWeightCommonDivisor;
-
-typedef fst::ArcTpl<LatticeWeight> LatticeArc;
-
-typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;
-
-typedef fst::VectorFst<LatticeArc> Lattice;
-
-typedef fst::VectorFst<CompactLatticeArc> CompactLattice;
-
-// The following functions for writing and reading lattices in binary or text
-// form are provided here in case you need to include lattices in larger,
-// Kaldi-type objects with their own Read and Write functions.  Caution: these
-// functions return false on stream failure rather than throwing an exception as
-// most similar Kaldi functions would do.
-
-bool WriteCompactLattice(std::ostream &os, bool binary,
-                         const CompactLattice &clat);
-bool WriteLattice(std::ostream &os, bool binary,
-                  const Lattice &lat);
-
-// the following function requires that *clat be
-// NULL when called.
-bool ReadCompactLattice(std::istream &is, bool binary,
-                        CompactLattice **clat);
-// the following function requires that *lat be
-// NULL when called.
-bool ReadLattice(std::istream &is, bool binary,
-                 Lattice **lat);
-
-
-class CompactLatticeHolder {
- public:
-  typedef CompactLattice T;
-
-  CompactLatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteCompactLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
-    return *t_;
-  }
-
-  void Clear() { delete t_; t_ = NULL; }
-
-  void Swap(CompactLatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~CompactLatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-class LatticeHolder {
- public:
-  typedef Lattice T;
-
-  LatticeHolder() { t_ = NULL; }
-
-  static bool Write(std::ostream &os, bool binary, const T &t) {
-    // Note: we don't include the binary-mode header when writing
-    // this object to disk; this ensures that if we write to single
-    // files, the result can be read by OpenFst.
-    return WriteLattice(os, binary, t);
-  }
-
-  bool Read(std::istream &is);
-
-  static bool IsReadInBinary() { return true; }
-
-  T &Value() {
-    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
-    return *t_;
-  }
-
-  void Clear() {  delete t_; t_ = NULL; }
-
-  void Swap(LatticeHolder *other) {
-    std::swap(t_, other->t_);
-  }
-
-  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
-    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
-    return false;
-  }
-
-  ~LatticeHolder() { Clear(); }
- private:
-  T *t_;
-};
-
-// typedef TableWriter<LatticeHolder> LatticeWriter;
-// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
-//
-// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
-
-
-} // namespace kaldi
-
-#endif  // KALDI_LAT_KALDI_LATTICE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/lattice-functions.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/lattice-functions.cc
deleted file mode 100644
index e483151c095d3cd95694446e0f924e5db54d9144..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/lattice-functions.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-// lat/lattice-functions.cc
-
-// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
-//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
-//                      Bagher BabaAli
-//                2013  Cisco Systems (author: Neha Agrawal) [code modified
-//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
-//                2014  Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "lat/lattice-functions.h"
-// #include "hmm/transition-model.h"
-// #include "util/stl-utils.h"
-#include "base/kaldi-math.h"
-// #include "hmm/hmm-utils.h"
-
-namespace kaldi {
-using std::map;
-using std::vector;
-
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes) {
-//   using namespace fst;
-//   typedef Lattice::Arc::Weight Weight;
-//   vector<BaseFloat> loglikes;
-//
-//   int32 cur_state = nbest.Start();
-//   int32 prev_frame = -1;
-//   BaseFloat eps_acwt = 0.0;
-//   while(1) {
-//     Weight w = nbest.Final(cur_state);
-//     if (w != Weight::Zero()) {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
-//       if (per_frame_loglikes != NULL)  {
-//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
-//         Vector<BaseFloat> vec(subvec);
-//         *per_frame_loglikes = vec;
-//       }
-//       break;
-//     } else {
-//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
-//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
-//       const Lattice::Arc &arc = iter.Value();
-//       BaseFloat acwt = arc.weight.Value2();
-//       if (arc.ilabel != 0) {
-//         if (eps_acwt > 0) {
-//           acwt += eps_acwt;
-//           eps_acwt = 0.0;
-//         }
-//         loglikes.push_back(acwt);
-//         prev_frame++;
-//       } else if (acwt == acwt){
-//         if (prev_frame > -1) {
-//           loglikes[prev_frame] += acwt;
-//         } else {
-//           eps_acwt += acwt;
-//         }
-//       }
-//       cur_state = arc.nextstate;
-//     }
-//   }
-// }
-//
-// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//
-//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
-//         // next time instance
-//         if ((*times)[arc.nextstate] == -1) {
-//           (*times)[arc.nextstate] = cur_time + 1;
-//         } else {
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
-//         }
-//       } else {  // epsilon input label on arc
-//         // Same time instance
-//         if ((*times)[arc.nextstate] == -1)
-//           (*times)[arc.nextstate] = cur_time;
-//         else
-//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
-//       }
-//     }
-//   }
-//   return (*std::max_element(times->begin(), times->end()));
-// }
-//
-// int32 CompactLatticeStateTimes(const CompactLattice &lat,
-//                                vector<int32> *times) {
-//   if (!lat.Properties(fst::kTopSorted, true))
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//   int32 num_states = lat.NumStates();
-//   times->clear();
-//   times->resize(num_states, -1);
-//   (*times)[0] = 0;
-//   int32 utt_len = -1;
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = (*times)[state];
-//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
-//       if ((*times)[arc.nextstate] == -1)
-//         (*times)[arc.nextstate] = cur_time + arc_len;
-//       else
-//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
-//     }
-//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
-//       if (utt_len == -1) utt_len = this_utt_len;
-//       else {
-//         if (this_utt_len != utt_len) {
-//           KALDI_WARN << "Utterance does not "
-//               "seem to have a consistent length.";
-//           utt_len = std::max(utt_len, this_utt_len);
-//         }
-//       }
-//     }
-//   }
-//   if (utt_len == -1) {
-//     KALDI_WARN << "Utterance does not have a final-state.";
-//     return 0;
-//   }
-//   return utt_len;
-// }
-//
-// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
-//                                  vector<double> *alpha) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   //Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*alpha).resize(0);
-//   (*alpha).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate alphas forward. Note that we don't acount the weight of the
-//   // final state to alpha[final_state] -- we acount it to beta[final_state];
-//   (*alpha)[0] = 0.0;
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
-//                                        this_alpha + arc_like);
-//     }
-//   }
-//
-//   return true;
-// }
-//
-// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
-//                                 vector<double> *beta) {
-//   using namespace fst;
-//
-//   // typedef the arc, weight types
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_WARN << "Input lattice must be topologically sorted.";
-//     return false;
-//   }
-//   if (clat.Start() != 0) {
-//     KALDI_WARN << "Input lattice must start from state 0.";
-//     return false;
-//   }
-//
-//   int32 num_states = clat.NumStates();
-//   (*beta).resize(0);
-//   (*beta).resize(num_states, kLogZeroDouble);
-//
-//   // Now propagate betas backward. Note that beta[final_state] contains the
-//   // weight of the final state in the lattice -- compare that with alpha.
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = clat.Final(s);
-//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -(arc.weight.Weight().Value1() +
-//                           arc.weight.Weight().Value2());
-//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//
-//   return true;
-// }
-
-template<class LatType>  // could be Lattice or CompactLattice
-bool PruneLattice(BaseFloat beam, LatType *lat) {
-  typedef typename LatType::Arc Arc;
-  typedef typename Arc::Weight Weight;
-  typedef typename Arc::StateId StateId;
-
-  KALDI_ASSERT(beam > 0.0);
-  if (!lat->Properties(fst::kTopSorted, true)) {
-    if (fst::TopSort(lat) == false) {
-      KALDI_WARN << "Cycles detected in lattice";
-      return false;
-    }
-  }
-  // We assume states before "start" are not reachable, since
-  // the lattice is topologically sorted.
-  int32 start = lat->Start();
-  int32 num_states = lat->NumStates();
-  if (num_states == 0) return false;
-  std::vector<double> forward_cost(num_states,
-                                   std::numeric_limits<double>::infinity());  // viterbi forward.
-  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
-  // less than this.
-  double best_final_cost = std::numeric_limits<double>::infinity();
-  // Update the forward probs.
-  // Thanks to Jing Zheng for finding a bug here.
-  for (int32 state = 0; state < num_states; state++) {
-    double this_forward_cost = forward_cost[state];
-    for (fst::ArcIterator<LatType> aiter(*lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      const Arc &arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double next_forward_cost = this_forward_cost +
-          ConvertToCost(arc.weight);
-      if (forward_cost[nextstate] > next_forward_cost)
-        forward_cost[nextstate] = next_forward_cost;
-    }
-    Weight final_weight = lat->Final(state);
-    double this_final_cost = this_forward_cost +
-        ConvertToCost(final_weight);
-    if (this_final_cost < best_final_cost)
-      best_final_cost = this_final_cost;
-  }
-  int32 bad_state = lat->AddState(); // this state is not final.
-  double cutoff = best_final_cost + beam;
-
-  // Go backwards updating the backward probs (which share memory with the
-  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
-  // by making them point to the non-final state "bad_state".  We'll then use
-  // Trim() to remove unnecessary arcs and states.  [this is just easier than
-  // doing it ourselves.]
-  std::vector<double> &backward_cost(forward_cost);
-  for (int32 state = num_states - 1; state >= 0; state--) {
-    double this_forward_cost = forward_cost[state];
-    double this_backward_cost = ConvertToCost(lat->Final(state));
-    if (this_backward_cost + this_forward_cost > cutoff
-        && this_backward_cost != std::numeric_limits<double>::infinity())
-      lat->SetFinal(state, Weight::Zero());
-    for (fst::MutableArcIterator<LatType> aiter(lat, state);
-         !aiter.Done();
-         aiter.Next()) {
-      Arc arc(aiter.Value());
-      StateId nextstate = arc.nextstate;
-      KALDI_ASSERT(nextstate > state && nextstate < num_states);
-      double arc_cost = ConvertToCost(arc.weight),
-          arc_backward_cost = arc_cost + backward_cost[nextstate],
-          this_fb_cost = this_forward_cost + arc_backward_cost;
-      if (arc_backward_cost < this_backward_cost)
-        this_backward_cost = arc_backward_cost;
-      if (this_fb_cost > cutoff) { // Prune the arc.
-        arc.nextstate = bad_state;
-        aiter.SetValue(arc);
-      }
-    }
-    backward_cost[state] = this_backward_cost;
-  }
-  fst::Connect(lat);
-  return (lat->NumStates() > 0);
-}
-
-// instantiate the template for lattice and CompactLattice.
-template bool PruneLattice(BaseFloat beam, Lattice *lat);
-template bool PruneLattice(BaseFloat beam, CompactLattice *lat);
-
-
-// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
-//                                  double *acoustic_like_sum) {
-//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
-//   // of (transition-id, posterior-probability) pairs.
-//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
-//
-//   // Make sure the lattice is topologically sorted.
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   std::vector<double> alpha(num_states, kLogZeroDouble);
-//   std::vector<double> &beta(alpha); // we re-use the same memory for
-//   // this, but it's semantically distinct so we name it differently.
-//   double tot_forward_prob = kLogZeroDouble;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//       int32 transition_id = arc.ilabel;
-//
-//       // The following "if" is an optimization to avoid un-needed exp().
-//       if (transition_id != 0 || acoustic_like_sum != NULL) {
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//
-//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
-//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                            static_cast<kaldi::BaseFloat>(posterior)));
-//         if (acoustic_like_sum != NULL)
-//           *acoustic_like_sum -= posterior * arc.weight.Value2();
-//       }
-//     }
-//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
-//       double final_logprob = - ConvertToCost(f),
-//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
-//       *acoustic_like_sum -= posterior * f.Value2();
-//     }
-//     beta[s] = this_beta;
-//   }
-//   double tot_backward_prob = beta[0];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Now combine any posteriors with the same transition-id.
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_backward_prob;
-// }
-//
-//
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const vector<int32> &silence_phones,
-//                          vector< std::set<int32> > *active_phones) {
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   vector<int32> state_times;
-//   int32 num_states = lat.NumStates();
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   active_phones->clear();
-//   active_phones->resize(max_time);
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       const LatticeArc &arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
-//         if (!std::binary_search(silence_phones.begin(),
-//                                 silence_phones.end(), phone))
-//           (*active_phones)[cur_time].insert(phone);
-//       }
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// void ConvertLatticeToPhones(const TransitionModel &trans,
-//                             Lattice *lat) {
-//   typedef LatticeArc Arc;
-//   int32 num_states = lat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//         aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       arc.olabel = 0; // remove any word.
-//       if ((arc.ilabel != 0) // has a transition-id on input..
-//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
-//           && (!trans.IsSelfLoop(arc.ilabel))) {
-//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
-//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
-//       }
-//       aiter.SetValue(arc);
-//     }  // end looping over arcs
-//   }  // end looping over states
-// }
-//
-//
-// static inline double LogAddOrMax(bool viterbi, double a, double b) {
-//   if (viterbi)
-//     return std::max(a, b);
-//   else
-//     return LogAdd(a, b);
-// }
-//
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta) {
-//   typedef typename LatticeType::Arc Arc;
-//   typedef typename Arc::Weight Weight;
-//   typedef typename Arc::StateId StateId;
-//
-//   StateId num_states = lat.NumStates();
-//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
-//   KALDI_ASSERT(lat.Start() == 0);
-//   alpha->clear();
-//   beta->clear();
-//   alpha->resize(num_states, kLogZeroDouble);
-//   beta->resize(num_states, kLogZeroDouble);
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   (*alpha)[0] = 0.0;
-//   // Propagate alphas forward.
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = (*alpha)[s];
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
-//                                                 this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - ConvertToCost(f);
-//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
-//     }
-//   }
-//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
-//     double this_beta = -ConvertToCost(lat.Final(s));
-//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = (*beta)[arc.nextstate] + arc_like;
-//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
-//     }
-//     (*beta)[s] = this_beta;
-//   }
-//   double tot_backward_prob = (*beta)[lat.Start()];
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
-//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
-//                << ", while total backward probability = " << tot_backward_prob;
-//   }
-//   // Split the difference when returning... they should be the same.
-//   return 0.5 * (tot_backward_prob + tot_forward_prob);
-// }
-//
-// // instantiate the template for Lattice and CompactLattice
-// template
-// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-// template
-// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
-//                                     bool viterbi,
-//                                     vector<double> *alpha,
-//                                     vector<double> *beta);
-//
-//
-//
-// /// This is used in CompactLatticeLimitDepth.
-// struct LatticeArcRecord {
-//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
-//                      // minus the overall best-cost of the lattice.
-//   CompactLatticeArc::StateId state; // state in the lattice.
-//   size_t arc; // arc index within the state.
-//   bool operator < (const LatticeArcRecord &other) const {
-//     return logprob < other.logprob;
-//   }
-// };
-//
-// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
-//                               CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat->Start() == fst::kNoStateId) {
-//     KALDI_WARN << "Limiting depth of empty lattice.";
-//     return;
-//   }
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Topological sorting of lattice failed.";
-//   }
-//
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
-//
-//   // The alpha and beta quantities here are "viterbi" alphas and beta.
-//   std::vector<double> alpha;
-//   std::vector<double> beta;
-//   bool viterbi = true;
-//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
-//                                                   &alpha, &beta);
-//
-//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
-//
-//   StateId num_states = clat->NumStates();
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       LatticeArcRecord arc_record;
-//       arc_record.state = s;
-//       arc_record.arc = aiter.Position();
-//       arc_record.logprob =
-//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
-//            - best_prob;
-//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
-//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
-//       for (int32 t = start_t; t < start_t + num_frames; t++) {
-//         KALDI_ASSERT(t < T);
-//         arc_records[t].push_back(arc_record);
-//       }
-//     }
-//   }
-//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
-//                                          // to remove arcs (make them end
-//                                          // there).
-//   size_t max_depth = max_depth_per_frame;
-//   for (int32 t = 0; t < T; t++) {
-//     size_t size = arc_records[t].size();
-//     if (size > max_depth) {
-//       // we sort from worst to best, so we keep the later-numbered ones,
-//       // and delete the lower-numbered ones.
-//       size_t cutoff = size - max_depth;
-//       std::nth_element(arc_records[t].begin(),
-//                        arc_records[t].begin() + cutoff,
-//                        arc_records[t].end());
-//       for (size_t index = 0; index < cutoff; index++) {
-//         LatticeArcRecord record(arc_records[t][index]);
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
-//         aiter.Seek(record.arc);
-//         Arc arc = aiter.Value();
-//         if (arc.nextstate != dead_state) { // not already killed.
-//           arc.nextstate = dead_state;
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   Connect(clat);
-//   TopSortCompactLatticeIfNeeded(clat);
-// }
-//
-//
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-// void TopSortLatticeIfNeeded(Lattice *lat) {
-//   if (lat->Properties(fst::kTopSorted, true) == 0) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_ERR << "Topological sorting failed";
-//     }
-//   }
-// }
-//
-//
-// /// Returns the depth of the lattice, defined as the average number of
-// /// arcs crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that input is topologically sorted.
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
-//               << "sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     *num_frames = 0;
-//     return 1.0;
-//   }
-//   size_t num_arc_frames = 0;
-//   int32 t;
-//   {
-//     vector<int32> state_times;
-//     t = CompactLatticeStateTimes(clat, &state_times);
-//   }
-//   if (num_frames != NULL)
-//     *num_frames = t;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//          aiter.Next()) {
-//       const CompactLatticeArc &arc = aiter.Value();
-//       num_arc_frames += arc.weight.String().size();
-//     }
-//     num_arc_frames += clat.Final(s).String().size();
-//   }
-//   return num_arc_frames / static_cast<BaseFloat>(t);
-// }
-//
-//
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame) {
-//   typedef CompactLattice::Arc::StateId StateId;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
-//               << "topologically sorted.";
-//   }
-//   if (clat.Start() == fst::kNoStateId) {
-//     depth_per_frame->clear();
-//     return;
-//   }
-//   vector<int32> state_times;
-//   int32 T = CompactLatticeStateTimes(clat, &state_times);
-//
-//   depth_per_frame->clear();
-//   if (T <= 0) {
-//     return;
-//   } else {
-//     depth_per_frame->resize(T, 0);
-//     for (StateId s = 0; s < clat.NumStates(); s++) {
-//       int32 start_time = state_times[s];
-//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
-//            aiter.Next()) {
-//         const CompactLatticeArc &arc = aiter.Value();
-//         int32 len = arc.weight.String().size();
-//         for (int32 t = start_time; t < start_time + len; t++) {
-//           KALDI_ASSERT(t < T);
-//           (*depth_per_frame)[t]++;
-//         }
-//       }
-//       int32 final_len = clat.Final(s).String().size();
-//       for (int32 t = start_time; t < start_time + final_len; t++) {
-//         KALDI_ASSERT(t < T);
-//         (*depth_per_frame)[t]++;
-//       }
-//     }
-//   }
-// }
-//
-//
-//
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   int32 num_states = clat->NumStates();
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = arc.weight.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       arc.weight.SetString(phone_seq);
-//       aiter.SetValue(arc);
-//     } // end looping over arcs
-//     Weight f = clat->Final(state);
-//     if (f != Weight::Zero()) {
-//       std::vector<int32> phone_seq;
-//       const std::vector<int32> &tid_seq = f.String();
-//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
-//            iter != tid_seq.end(); ++iter) {
-//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
-//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
-//       }
-//       f.SetString(phone_seq);
-//       clat->SetFinal(state, f);
-//     }
-//   }  // end looping over states
-// }
-//
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat) {
-//   TopSortLatticeIfNeeded(lat);
-//
-//   // get all stored properties (test==false means don't test if not known).
-//   uint64 props = lat->Properties(fst::kFstProperties,
-//                                  false);
-//
-//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
-//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
-//   vector<int32> state_times;
-//   int32 num_states = lat->NumStates();
-//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
-//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
-//   for (int32 state = 0; state < num_states; state++) {
-//     int32 cur_time = state_times[state];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-//          aiter.Next()) {
-//       LatticeArc arc = aiter.Value();
-//       if (arc.ilabel != 0) {  // Non-epsilon arc
-//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
-//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
-//                      << "lattice/model mismatch?";
-//           return false;
-//         }
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
-//         BaseFloat frame_error;
-//         if (phone == ref_phone) {
-//           frame_error = 0.0;
-//         } else { // an error...
-//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
-//             frame_error = max_silence_error;
-//           else
-//             frame_error = 1.0;
-//         }
-//         BaseFloat delta_cost = -b * frame_error; // negative cost if
-//         // frame is wrong, to boost likelihood of arcs with errors on them.
-//         // Add this cost to the graph part.
-//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   // All we changed is the weights, so any properties that were
-//   // known before, are still known, except for whether or not the
-//   // lattice was weighted.
-//   lat->SetProperties(props,
-//                      ~(fst::kWeighted|fst::kUnweighted));
-//
-//   return true;
-// }
-//
-//
-//
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post) {
-//   using namespace fst;
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
-//   bool is_mpfe = (criterion == "mpfe");
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0)
-//     KALDI_ERR << "Input lattice must be topologically sorted.";
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   int32 num_states = lat.NumStates();
-//   vector<int32> state_times;
-//   int32 max_time = LatticeStateTimes(lat, &state_times);
-//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
-//   std::vector<double> alpha(num_states, kLogZeroDouble),
-//       alpha_smbr(num_states, 0), //forward variable for sMBR
-//       beta(num_states, kLogZeroDouble),
-//       beta_smbr(num_states, 0); //backward variable for sMBR
-//
-//   double tot_forward_prob = kLogZeroDouble;
-//   double tot_forward_score = 0;
-//
-//   post->clear();
-//   post->resize(max_time);
-//
-//   alpha[0] = 0.0;
-//   // First Pass Forward,
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // First Pass Backward,
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     Weight f = lat.Final(s);
-//     double this_beta = -(f.Value1() + f.Value2());
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       this_beta = LogAdd(this_beta, arc_beta);
-//     }
-//     beta[s] = this_beta;
-//   }
-//   // First Pass Forward-Backward Check
-//   double tot_backward_prob = beta[0];
-//   // may loose the condition somehow here 1e-6 (was 1e-8)
-//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
-//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
-//               << ", while total backward probability = " << tot_backward_prob;
-//   }
-//
-//   alpha_smbr[0] = 0.0;
-//   // Second Pass Forward, calculate forward for MPFE/SMBR
-//   for (StateId s = 0; s < num_states; s++) {
-//     double this_alpha = alpha[s];
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight);
-//       double frame_acc = 0.0;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(),
-//                                                phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
-//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
-//     }
-//     Weight f = lat.Final(s);
-//     if (f != Weight::Zero()) {
-//       double final_like = this_alpha - (f.Value1() + f.Value2());
-//       double arc_scale = Exp(final_like - tot_forward_prob);
-//       tot_forward_score += arc_scale * alpha_smbr[s];
-//       KALDI_ASSERT(state_times[s] == max_time &&
-//                    "Lattice is inconsistent (final-prob not at max_time)");
-//     }
-//   }
-//   // Second Pass Backward, collect Mpe style posteriors
-//   for (StateId s = num_states-1; s >= 0; s--) {
-//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_like = -ConvertToCost(arc.weight),
-//           arc_beta = beta[arc.nextstate] + arc_like;
-//       double frame_acc = 0.0;
-//       int32 transition_id = arc.ilabel;
-//       if (arc.ilabel != 0) {
-//         int32 cur_time = state_times[s];
-//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
-//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
-//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                silence_phones.end(), phone),
-//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
-//                                                   silence_phones.end(),
-//                                                   ref_phone),
-//             both_sil = phone_is_sil && ref_phone_is_sil;
-//         if (!is_mpfe) { // smbr.
-//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
-//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
-//         } else {
-//           if (!one_silence_class)  // old behavior
-//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
-//           else
-//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
-//         }
-//       }
-//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
-//       // check arc_scale NAN,
-//       // this is to prevent partial paths in Lattices
-//       // i.e., paths don't survive to the final state
-//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
-//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
-//
-//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
-//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
-//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
-//                                - tot_forward_score;
-//         double posterior_smbr = posterior * acc_diff;
-//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
-//                                                          static_cast<BaseFloat>(posterior_smbr)));
-//       }
-//     }
-//   }
-//
-//   //Second Pass Forward Backward check
-//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
-//   // may loose the condition somehow here 1e-5/1e-4
-//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
-//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
-//               << ", while total backward score = " << tot_backward_score;
-//   }
-//
-//   // Output the computed posteriors
-//   for (int32 t = 0; t < max_time; t++)
-//     MergePairVectorSumming(&((*post)[t]));
-//   return tot_forward_score;
-// }
-//
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths) {
-//   words->clear();
-//   begin_times->clear();
-//   lengths->clear();
-//   prons->clear();
-//   phone_lengths->clear();
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef CompactLattice::StateId StateId;
-//   typedef CompactLattice::Weight Weight;
-//   using namespace fst;
-//   StateId state = clat.Start();
-//   int32 cur_time = 0;
-//   if (state == kNoStateId) {
-//     KALDI_WARN << "Empty lattice.";
-//     return false;
-//   }
-//   while (1) {
-//     Weight final = clat.Final(state);
-//     size_t num_arcs = clat.NumArcs(state);
-//     if (final != Weight::Zero()) {
-//       if (num_arcs != 0) {
-//         KALDI_WARN << "Lattice is not linear.";
-//         return false;
-//       }
-//       if (! final.String().empty()) {
-//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
-//             "was not word-aligned (alignments will be approximate)";
-//       }
-//       return true;
-//     } else {
-//       if (num_arcs != 1) {
-//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
-//         return false;
-//       }
-//       fst::ArcIterator<CompactLattice> aiter(clat, state);
-//       const Arc &arc = aiter.Value();
-//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
-//       // Also note: word_id may be zero; we output it anyway.
-//       int32 length = arc.weight.String().size();
-//       words->push_back(word_id);
-//       begin_times->push_back(cur_time);
-//       lengths->push_back(length);
-//       const std::vector<int32> &arc_alignment = arc.weight.String();
-//       std::vector<std::vector<int32> > split_alignment;
-//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
-//       std::vector<int32> phones(split_alignment.size());
-//       std::vector<int32> plengths(split_alignment.size());
-//       for (size_t i = 0; i < split_alignment.size(); i++) {
-//         KALDI_ASSERT(!split_alignment[i].empty());
-//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
-//         plengths[i] = split_alignment[i].size();
-//       }
-//       prons->push_back(phones);
-//       phone_lengths->push_back(plengths);
-//
-//       cur_time += length;
-//       state = arc.nextstate;
-//     }
-//   }
-// }
-//
-//
-//
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path) {
-//   using namespace fst;
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     CompactLatticeShortestPath(clat_copy, shortest_path);
-//     return;
-//   }
-//   // Now we can assume it's topologically sorted.
-//   shortest_path->DeleteStates();
-//   if (clat.Start() == kNoStateId) return;
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::StateId StateId;
-//   typedef CompactLatticeWeight Weight;
-//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
-//   StateId superfinal = clat.NumStates();
-//   for (StateId s = 0; s <= clat.NumStates(); s++) {
-//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
-//     best_cost_and_pred[s].second = fst::kNoStateId;
-//   }
-//   best_cost_and_pred[clat.Start()].first = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double my_cost = best_cost_and_pred[s].first;
-//     for (ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done();
-//          aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double arc_cost = ConvertToCost(arc.weight),
-//           next_cost = my_cost + arc_cost;
-//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
-//         best_cost_and_pred[arc.nextstate].first = next_cost;
-//         best_cost_and_pred[arc.nextstate].second = s;
-//       }
-//     }
-//     double final_cost = ConvertToCost(clat.Final(s)),
-//         tot_final = my_cost + final_cost;
-//     if (tot_final < best_cost_and_pred[superfinal].first) {
-//       best_cost_and_pred[superfinal].first = tot_final;
-//       best_cost_and_pred[superfinal].second = s;
-//     }
-//   }
-//   std::vector<StateId> states; // states on best path.
-//   StateId cur_state = superfinal, start_state = clat.Start();
-//   while (cur_state != start_state) {
-//     StateId prev_state = best_cost_and_pred[cur_state].second;
-//     if (prev_state == kNoStateId) {
-//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
-//       return; // return empty best-path.
-//     }
-//     states.push_back(prev_state);
-//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
-//     cur_state = prev_state;
-//   }
-//   std::reverse(states.begin(), states.end());
-//   for (size_t i = 0; i < states.size(); i++)
-//     shortest_path->AddState();
-//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
-//     if (s == 0) shortest_path->SetStart(s);
-//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
-//       bool have_arc = false;
-//       Arc cur_arc;
-//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
-//            !aiter.Done();
-//            aiter.Next()) {
-//         const Arc &arc = aiter.Value();
-//         if (arc.nextstate == states[s+1]) {
-//           if (!have_arc ||
-//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
-//             cur_arc = arc;
-//             have_arc = true;
-//           }
-//         }
-//       }
-//       KALDI_ASSERT(have_arc && "Code error.");
-//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
-//                                    cur_arc.weight, s+1));
-//     } else { // final-prob.
-//       shortest_path->SetFinal(s, clat.Final(states[s]));
-//     }
-//   }
-// }
-//
-//
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat) {
-//   using namespace fst;
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   if (clat.Start() == kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat.Properties(kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
-//     return;
-//   }
-//
-//   // Compute backward logprobs betas for the expanded lattice.
-//   // Note: the backward logprobs in the original lattice <clat> and the
-//   // expanded lattice <expand_clat> are the same.
-//   int32 num_states = clat.NumStates();
-//   std::vector<double> beta(num_states, kLogZeroDouble);
-//   ComputeCompactLatticeBetas(clat, &beta);
-//   double tot_backward_logprob = beta[0];
-//   std::vector<double> alpha;
-//   alpha.push_back(0.0);
-//   expand_clat->DeleteStates();
-//   MapType state_map; // Map from state pair (orig_state, copy_state) to
-//   // copy_state, where orig_state is a state in the original lattice, and
-//   // copy_state is its corresponding one in the expanded lattice.
-//   unordered_map<StateId, StateId> states; // Map from orig_state to its
-//   // copy_state for states with incoming arcs' posteriors <= epsilon.
-//   std::queue<StatePair> state_queue;
-//
-//   // Set start state in the expanded lattice.
-//   StateId start_state = expand_clat->AddState();
-//   expand_clat->SetStart(start_state);
-//   StatePair start_pair(clat.Start(), start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//     state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
-//   while (!state_queue.empty()) {
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first,
-//             s2 = s.second;
-//     state_queue.pop();
-//
-//     Weight f = clat.Final(s1);
-//     if (f != Weight::Zero()) {
-//       KALDI_ASSERT(state_map.find(s) != state_map.end());
-//       expand_clat->SetFinal(state_map[s], f);
-//     }
-//
-//     for (ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       StateId orig_state = arc.nextstate;
-//       double arc_like = -ConvertToCost(arc.weight),
-//              this_alpha = alpha[s2] + arc_like,
-//              arc_post = Exp(this_alpha + beta[orig_state] -
-//                             tot_backward_logprob);
-//       // Generate the expanded lattice.
-//       StateId copy_state;
-//       if (arc_post > epsilon) {
-//         copy_state = expand_clat->AddState();
-//         StatePair next_pair(orig_state, copy_state);
-//         std::pair<IterType, bool> result =
-//           state_map.insert(std::make_pair(next_pair, copy_state));
-//         KALDI_ASSERT(result.second == true);
-//         state_queue.push(next_pair);
-//       } else {
-//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
-//         if (iter == states.end() ) { // The counterpart state of orig_state
-//                                    // has not been created in <expand_clat> yet.
-//           copy_state = expand_clat->AddState();
-//           StatePair next_pair(orig_state, copy_state);
-//           std::pair<IterType, bool> result =
-//             state_map.insert(std::make_pair(next_pair, copy_state));
-//           KALDI_ASSERT(result.second == true);
-//           state_queue.push(next_pair);
-//           states[orig_state] = copy_state;
-//         } else {
-//           copy_state = iter->second;
-//         }
-//       }
-//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
-//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
-//                                             copy_state));
-//       // Compute forward logprobs alpha for the expanded lattice.
-//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
-//                                              // for copy_state in <expand_clat>.
-//         alpha.push_back(this_alpha);
-//       } else { // Accumulate alpha.
-//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
-//       }
-//     }
-//   } // end while
-// }
-//
-//
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred) {
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//
-//   forward_best_cost_and_pred->clear();
-//   backward_best_cost_and_pred->clear();
-//   forward_best_cost_and_pred->resize(clat.NumStates());
-//   backward_best_cost_and_pred->resize(clat.NumStates());
-//   // Initialize the cost and predecessor state for each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     (*forward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*backward_best_cost_and_pred)[s].first =
-//                                         std::numeric_limits<double>::infinity();
-//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
-//   }
-//
-//   StateId start_state = clat.Start();
-//   (*forward_best_cost_and_pred)[start_state].first = 0;
-//   // Transverse the lattice forwardly to compute the best cost from the start
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = cur_cost + ConvertToCost(arc.weight);
-//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
-//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
-//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
-//       }
-//     }
-//   }
-//   // Transverse the lattice backwardly to compute the best cost from a final
-//   // state to each state and the best predecessor state of each state.
-//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
-//     double this_cost = ConvertToCost(clat.Final(s));
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
-//         ConvertToCost(arc.weight);
-//       if (next_cost < this_cost) {
-//         this_cost = next_cost;
-//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
-//       }
-//     }
-//     (*backward_best_cost_and_pred)[s].first = this_cost;
-//   }
-// }
-//
-//
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat) {
-//   if (clat->Start() == fst::kNoStateId) return;
-//   // Make sure the input lattice is topologically sorted.
-//   if (clat->Properties(fst::kTopSorted, true) == 0) {
-//     KALDI_LOG << "Topsort this lattice.";
-//     if (!TopSort(clat))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
-//     return;
-//   }
-//
-//   // typedef the arc, weight types
-//   typedef CompactLatticeArc Arc;
-//   typedef Arc::Weight Weight;
-//   typedef Arc::StateId StateId;
-//   typedef std::pair<int32, int32> StatePair;
-//
-//   int32 num_states = clat->NumStates();
-//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
-//   for (StateId s = 0; s < num_states; s++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
-//                                            static_cast<int32>(arc.nextstate));
-//       MapT::const_iterator it = nnlm_scores.find(arc_index);
-//       double nnlm_score;
-//       if (it != nnlm_scores.end())
-//         nnlm_score = it->second;
-//       else
-//         KALDI_ERR << "Some arc does not have neural language model score.";
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // Add associated neural LM score to each arc.
-//         weight.SetValue1(weight.Value1() + nnlm_score);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//       Weight clat_final = clat->Final(arc.nextstate);
-//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
-//       // Add neural LM scores to each final state only once.
-//       if (clat_final != CompactLatticeWeight::Zero() &&
-//           final_state_check.find(final_pair) == final_state_check.end()) {
-//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
-//         double final_nnlm_score = 0.0;
-//         if (final_it != nnlm_scores.end())
-//           final_nnlm_score = final_it->second;
-//         // Add neural LM scores to the final weight.
-//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                           final_nnlm_score,
-//                                           clat_final.Weight().Value2()),
-//                                           clat_final.String());
-//         clat->SetFinal(arc.nextstate, final_weight);
-//         final_state_check[final_pair] = true;
-//       }
-//     } // end looping over arcs
-//   } // end looping over states
-// }
-//
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat) {
-//   typedef CompactLatticeArc Arc;
-//   int32 num_states = clat->NumStates();
-//
-//   //scan the lattice
-//   for (int32 state = 0; state < num_states; state++) {
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next()) {
-//
-//       Arc arc(aiter.Value());
-//
-//       if (arc.ilabel != 0) { // if there is a word on this arc
-//         LatticeWeight weight = arc.weight.Weight();
-//         // add word insertion penalty to lattice
-//         weight.SetValue1( weight.Value1() + word_ins_penalty);
-//         arc.weight.SetWeight(weight);
-//         aiter.SetValue(arc);
-//       }
-//     } // end looping over arcs
-//   }  // end looping over states
-// }
-//
-// struct ClatRescoreTuple {
-//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
-//       state_id(state), arc_id(arc), tid(tid) { }
-//   int32 state_id;
-//   int32 arc_id;
-//   int32 tid;
-// };
-//
-// /** RescoreCompactLatticeInternal is the internal code for both
-//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
-//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
-//  */
-// bool RescoreCompactLatticeInternal(
-//     const TransitionModel *tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   KALDI_ASSERT(speedup_factor >= 1.0);
-//   if (clat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!clat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(clat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
-//
-//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
-//
-//   int32 num_states = clat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     KALDI_ASSERT(state_times[state] >= 0);
-//     int32 t = state_times[state];
-//     int32 arc_id = 0;
-//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//          !aiter.Done(); aiter.Next(), arc_id++) {
-//       CompactLatticeArc arc = aiter.Value();
-//       std::vector<int32> arc_string = arc.weight.String();
-//
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         if (t < utt_len) { // end state may be past this..
-//           int32 tid = arc_string[offset];
-//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
-//         } else {
-//           if (t != utt_len) {
-//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
-//                        << "aborting.";
-//             return false;
-//           }
-//         }
-//       }
-//     }
-//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
-//       arc_id = -1;
-//       std::vector<int32> arc_string = clat->Final(state).String();
-//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
-//         KALDI_ASSERT(t + offset < utt_len); // already checked in
-//         // CompactLatticeStateTimes, so would be code error.
-//         time_to_state[t+offset].push_back(
-//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
-//       }
-//     }
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     // frame_scale is the scale we put on the computed acoustic probs for this
-//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
-//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
-//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
-//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
-//     // we can avoid computing the probabilities.
-//     BaseFloat frame_scale = 1.0;
-//     KALDI_ASSERT(!time_to_state[t].empty());
-//     if (tmodel != NULL) {
-//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
-//       bool frame_has_multiple_pdfs = false;
-//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
-//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
-//           frame_has_multiple_pdfs = true;
-//           break;
-//         }
-//       }
-//       if (frame_has_multiple_pdfs) {
-//         frame_scale = 1.0;
-//       } else {
-//         if (WithProb(1.0 / speedup_factor)) {
-//           frame_scale = speedup_factor;
-//         } else {
-//           frame_scale = 0.0;
-//         }
-//       }
-//       if (frame_scale == 0.0)
-//         continue; // the code below would be pointless.
-//     }
-//
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i].state_id;
-//       int32 arc_id = time_to_state[t][i].arc_id;
-//       int32 tid = time_to_state[t][i].tid;
-//
-//       if (arc_id == -1) { // Final state
-//         // Access the trans_id
-//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
-//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
-//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
-//         new_clat_weight.SetWeight(new_lat_weight);
-//         clat->SetFinal(state, new_clat_weight);
-//       } else {
-//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
-//
-//         aiter.Seek(arc_id);
-//         CompactLatticeArc arc = aiter.Value();
-//
-//         // Calculate likelihood
-//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
-//         // update weight
-//         LatticeWeight new_weight = arc.weight.Weight();
-//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
-//         arc.weight.SetWeight(new_weight);
-//         aiter.SetValue(arc);
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
-// }
-//
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat) {
-//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
-// }
-//
-//
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat) {
-//   if (lat->NumStates() == 0) {
-//     KALDI_WARN << "Rescoring empty lattice";
-//     return false;
-//   }
-//   if (!lat->Properties(fst::kTopSorted, true)) {
-//     if (fst::TopSort(lat) == false) {
-//       KALDI_WARN << "Cycles detected in lattice.";
-//       return false;
-//     }
-//   }
-//   std::vector<int32> state_times;
-//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
-//
-//   std::vector<std::vector<int32> > time_to_state(utt_len );
-//
-//   int32 num_states = lat->NumStates();
-//   KALDI_ASSERT(num_states == state_times.size());
-//   for (size_t state = 0; state < num_states; state++) {
-//     int32 t = state_times[state];
-//     // Don't check t >= 0 because non-accessible states could have t = -1.
-//     KALDI_ASSERT(t <= utt_len);
-//     if (t >= 0 && t < utt_len)
-//       time_to_state[t].push_back(state);
-//   }
-//
-//   for (int32 t = 0; t < utt_len; t++) {
-//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
-//       KALDI_WARN << "Features are too short for lattice: utt-len is "
-//                  << utt_len << ", " << t << " is last frame";
-//       return false;
-//     }
-//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
-//       int32 state = time_to_state[t][i];
-//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
-//            !aiter.Done(); aiter.Next()) {
-//         LatticeArc arc = aiter.Value();
-//         if (arc.ilabel != 0) {
-//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
-//           // have to be a transition-id, just whatever the Decodable
-//           // object is expecting, but it's normally a transition-id.
-//
-//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
-//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
-//           aiter.SetValue(arc);
-//         }
-//       }
-//     }
-//   }
-//   return true;
-// }
-//
-//
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &tmodel,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *post) {
-//   // First compute the MMI posteriors.
-//
-//   Posterior den_post;
-//   BaseFloat ans = LatticeForwardBackward(lat,
-//                                          &den_post,
-//                                          NULL);
-//
-//   Posterior num_post;
-//   AlignmentToPosterior(num_ali, &num_post);
-//
-//   // Now negate the MMI posteriors and add the numerator
-//   // posteriors.
-//   ScalePosterior(-1.0, &den_post);
-//
-//   if (convert_to_pdf_ids) {
-//     Posterior num_tmp;
-//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
-//     num_tmp.swap(num_post);
-//     Posterior den_tmp;
-//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
-//     den_tmp.swap(den_post);
-//   }
-//
-//   MergePosteriors(num_post, den_post,
-//                   cancel, drop_frames, post);
-//
-//   return ans;
-// }
-//
-//
-// int32 LongestSentenceLength(const Lattice &lat) {
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (lat.Properties(fst::kTopSorted, true) == 0) {
-//     Lattice lat_copy(lat);
-//     if (!TopSort(&lat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(lat_copy);
-//   }
-//   std::vector<int32> max_length(lat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.olabel != 0);
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       if (arc_has_word) {
-//         // A lattice should ideally not have cycles anyway; a cycle with a word
-//         // on is something very bad.
-//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       } else {
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//       }
-//     }
-//     if (lat.Final(s) != LatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// int32 LongestSentenceLength(const CompactLattice &clat) {
-//   typedef CompactLattice::Arc Arc;
-//   typedef Arc::Label Label;
-//   typedef Arc::StateId StateId;
-//
-//   if (clat.Properties(fst::kTopSorted, true) == 0) {
-//     CompactLattice clat_copy(clat);
-//     if (!TopSort(&clat_copy))
-//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
-//     return LongestSentenceLength(clat_copy);
-//   }
-//   std::vector<int32> max_length(clat.NumStates(), 0);
-//   int32 lattice_max_length = 0;
-//   for (StateId s = 0; s < clat.NumStates(); s++) {
-//     int32 this_max_length = max_length[s];
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
-//       // also note: for normal CompactLattice, e.g. as produced by
-//       // determinization, all arcs will have nonzero labels, but the user might
-//       // decide to remplace some of the labels with zero for some reason, and we
-//       // want to support this.
-//       StateId nextstate = arc.nextstate;
-//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
-//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
-//       if (arc_has_word)
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length + 1);
-//       else
-//         max_length[nextstate] = std::max(max_length[nextstate],
-//                                          this_max_length);
-//     }
-//     if (clat.Final(s) != CompactLatticeWeight::Zero())
-//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
-//   }
-//   return lattice_max_length;
-// }
-//
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat) {
-//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
-//   typedef fst::StdArc::StateId StateId;
-//   typedef fst::StdArc::Weight Weight1;
-//   typedef CompactLatticeArc::Weight Weight2;
-//   typedef std::pair<StateId, StateId> StatePair;
-//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
-//   typedef MapType::iterator IterType;
-//
-//   // Empties the output FST.
-//   KALDI_ASSERT(composed_clat != NULL);
-//   composed_clat->DeleteStates();
-//
-//   MapType state_map;
-//   std::queue<StatePair> state_queue;
-//
-//   // Sets start state in <composed_clat>.
-//   StateId start_state = composed_clat->AddState();
-//   StatePair start_pair(clat.Start(), det_fst->Start());
-//   composed_clat->SetStart(start_state);
-//   state_queue.push(start_pair);
-//   std::pair<IterType, bool> result =
-//       state_map.insert(std::make_pair(start_pair, start_state));
-//   KALDI_ASSERT(result.second == true);
-//
-//   // Starts composition here.
-//   while (!state_queue.empty()) {
-//     // Gets the first state in the queue.
-//     StatePair s = state_queue.front();
-//     StateId s1 = s.first;
-//     StateId s2 = s.second;
-//     state_queue.pop();
-//
-//
-//     Weight2 clat_final = clat.Final(s1);
-//     if (clat_final.Weight().Value1() !=
-//         std::numeric_limits<BaseFloat>::infinity()) {
-//       // Test for whether the final-prob of state s1 was zero.
-//       Weight1 det_fst_final = det_fst->Final(s2);
-//       if (det_fst_final.Value() !=
-//           std::numeric_limits<BaseFloat>::infinity()) {
-//         // Test for whether the final-prob of state s2 was zero.  If neither
-//         // source-state final prob was zero, then we should create final state
-//         // in fst_composed. We compute the product manually since this is more
-//         // efficient.
-//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
-//                                            det_fst_final.Value(),
-//                                            clat_final.Weight().Value2()),
-//                              clat_final.String());
-//         // we can assume final_weight is not Zero(), since neither of
-//         // the sources was zero.
-//         KALDI_ASSERT(state_map.find(s) != state_map.end());
-//         composed_clat->SetFinal(state_map[s], final_weight);
-//       }
-//     }
-//
-//     // Loops over pair of edges at s1 and s2.
-//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
-//          !aiter.Done(); aiter.Next()) {
-//       const CompactLatticeArc& arc1 = aiter.Value();
-//       fst::StdArc arc2;
-//       StateId next_state1 = arc1.nextstate, next_state2;
-//       bool matched = false;
-//
-//       if (arc1.olabel == 0) {
-//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
-//         // for <clat>, but keep <det_fst> at the current state.
-//         matched = true;
-//         next_state2 = s2;
-//       } else {
-//         // Otherwise try to find the matched arc in <det_fst>.
-//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
-//         if (matched) {
-//           next_state2 = arc2.nextstate;
-//         }
-//       }
-//
-//       // If matched arc is found in <det_fst>, then we have to add new arcs to
-//       // <composed_clat>.
-//       if (matched) {
-//         StatePair next_state_pair(next_state1, next_state2);
-//         IterType siter = state_map.find(next_state_pair);
-//         StateId next_state;
-//
-//         // Adds composed state to <state_map>.
-//         if (siter == state_map.end()) {
-//           // If the composed state has not been created yet, create it.
-//           next_state = composed_clat->AddState();
-//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
-//                                                              next_state);
-//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
-//           KALDI_ASSERT(result.second);
-//           state_queue.push(next_state_pair);
-//         } else {
-//           // If the composed state is already in <state_map>, we can directly
-//           // use that.
-//           next_state = siter->second;
-//         }
-//
-//         // Adds arc to <composed_clat>.
-//         if (arc1.olabel == 0) {
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, 0,
-//                                                   arc1.weight, next_state));
-//         } else {
-//           Weight2 composed_weight(
-//               LatticeWeight(arc1.weight.Weight().Value1() +
-//                             arc2.weight.Value(),
-//                             arc1.weight.Weight().Value2()),
-//               arc1.weight.String());
-//           composed_clat->AddArc(state_map[s],
-//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
-//                                                   composed_weight, next_state));
-//         }
-//       }
-//     }
-//   }
-//   fst::Connect(composed_clat);
-// }
-//
-//
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   acoustic_scores->clear();
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
-//
-//   KALDI_ASSERT(lat.Start() == 0);
-//
-//   for (StateId s = 0; s < lat.NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
-//           aiter.Next()) {
-//       const Arc &arc = aiter.Value();
-//       const LatticeWeight &weight = arc.weight;
-//
-//       int32 tid = arc.ilabel;
-//
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
-//         if (it == acoustic_scores->end()) {
-//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
-//                                           std::make_pair(weight.Value2(), 1)));
-//         } else {
-//           if (it->second.second == 2
-//                 && it->second.first / it->second.second != weight.Value2()) {
-//             KALDI_VLOG(2) << "Transitions on the same frame have different "
-//                           << "acoustic costs for tid " << tid << "; "
-//                           << it->second.first / it->second.second
-//                           << " vs " << weight.Value2();
-//           }
-//           it->second.first += weight.Value2();
-//           it->second.second++;
-//         }
-//       } else {
-//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
-//         KALDI_ASSERT(weight.Value2() == 0);
-//       }
-//     }
-//
-//     LatticeWeight f = lat.Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Final acoustic cost must be 0 as we are reading from
-//       // non-determinized, non-compact lattice
-//       KALDI_ASSERT(f.Value2() == 0.0);
-//     }
-//   }
-// }
-//
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat) {
-//   // typedef the arc, weight types
-//   typedef Lattice::Arc Arc;
-//   typedef Arc::Weight LatticeWeight;
-//   typedef Arc::StateId StateId;
-//
-//   TopSortLatticeIfNeeded(lat);
-//
-//   std::vector<int32> state_times;
-//   LatticeStateTimes(*lat, &state_times);
-//
-//   KALDI_ASSERT(lat->Start() == 0);
-//
-//   for (StateId s = 0; s < lat->NumStates(); s++) {
-//     int32 t = state_times[s];
-//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
-//          !aiter.Done(); aiter.Next()) {
-//       Arc arc(aiter.Value());
-//
-//       int32 tid = arc.ilabel;
-//       if (tid != 0) {
-//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
-//         if (it == acoustic_scores.end()) {
-//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
-//                     << " in the acoustic scores map.";
-//         } else {
-//           arc.weight.SetValue2(it->second.first / it->second.second);
-//         }
-//       } else {
-//         // For epsilon arcs, set acoustic cost to 0.0
-//         arc.weight.SetValue2(0.0);
-//       }
-//       aiter.SetValue(arc);
-//     }
-//
-//     LatticeWeight f = lat->Final(s);
-//     if (f != LatticeWeight::Zero()) {
-//       // Set final acoustic cost to 0.0
-//       f.SetValue2(0.0);
-//       lat->SetFinal(s, f);
-//     }
-//   }
-// }
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/lattice-functions.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/lattice-functions.h
deleted file mode 100644
index 6b1b6656c276e0bb1f7808b0d36556e4609822fc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lat/lattice-functions.h
+++ /dev/null
@@ -1,455 +0,0 @@
-// lat/lattice-functions.h
-
-// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
-//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
-//                       Bagher BabaAli
-//                2014   Guoguo Chen
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
-#define KALDI_LAT_LATTICE_FUNCTIONS_H_
-
-#include <vector>
-#include <map>
-
-#include "base/kaldi-common.h"
-// #include "hmm/posterior.h"
-#include "fstext/fstext-lib.h"
-// #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-// #include "itf/decodable-itf.h"
-
-namespace kaldi {
-
-// /**
-//    This function extracts the per-frame log likelihoods from a linear
-//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
-//    The dimension of *per_frame_loglikes will be set to the
-//    number of input symbols in 'nbest'.  The elements of
-//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
-//    weights, which represent the acoustic costs; you may want to scale this
-//    vector afterward by -1/acoustic_scale to get the original loglikes.
-//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
-//    (and this should not normally be the case in situations where it makes
-//    sense to call this function), they will be included to the cost of the
-//    preceding input symbol, or the following input symbol for input-epsilons
-//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
-//    'per_frame_loglikes' will be set to the empty vector.
-// **/
-// void GetPerFrameAcousticCosts(const Lattice &nbest,
-//                               Vector<BaseFloat> *per_frame_loglikes);
-//
-// /// This function iterates over the states of a topologically sorted lattice and
-// /// counts the time instance corresponding to each state. The times are returned
-// /// in a vector of integers 'times' which is resized to have a size equal to the
-// /// number of states in the lattice. The function also returns the maximum time
-// /// in the lattice (this will equal the number of frames in the file).
-// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
-//
-// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
-// /// be topologically sorted.  Returns length of the utterance in frames, which
-// /// might not be the same as the maximum time in the lattice, due to frames
-// /// in the final-prob.
-// int32 CompactLatticeStateTimes(const CompactLattice &clat,
-//                                std::vector<int32> *times);
-//
-// /// This function does the forward-backward over lattices and computes the
-// /// posterior probabilities of the arcs. It returns the total log-probability
-// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
-// /// on each frame.
-// /// If the pointer "acoustic_like_sum" is provided, this value is set to
-// /// the sum over the arcs, of the posterior of the arc times the
-// /// acoustic likelihood [i.e. negated acoustic score] on that link.
-// /// This is used in combination with other quantities to work out
-// /// the objective function in MMI discriminative training.
-// BaseFloat LatticeForwardBackward(const Lattice &lat,
-//                                  Posterior *arc_post,
-//                                  double *acoustic_like_sum = NULL);
-//
-// // This function is something similar to LatticeForwardBackward(), but it is on
-// // the CompactLattice lattice format. Also we only need the alpha in the forward
-// // path, not the posteriors.
-// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
-//                                  std::vector<double> *alpha);
-//
-// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
-// // the backward path here.
-// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
-//                                 std::vector<double> *beta);
-//
-//
-// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
-// // best-path negated cost) Note: in either case, the alphas and betas are
-// // negated costs.  Requires that lat be topologically sorted.  This code
-// // will work for either CompactLattice or Latice.
-// template<typename LatticeType>
-// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
-//                                     bool viterbi,
-//                                     std::vector<double> *alpha,
-//                                     std::vector<double> *beta);
-//
-//
-// /// Topologically sort the compact lattice if not already topologically sorted.
-// /// Will crash if the lattice cannot be topologically sorted.
-// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
-//
-//
-// /// Topologically sort the lattice if not already topologically sorted.
-// /// Will crash if lattice cannot be topologically sorted.
-// void TopSortLatticeIfNeeded(Lattice *clat);
-//
-// /// Returns the depth of the lattice, defined as the average number of arcs (or
-// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
-// /// Requires that clat is topologically sorted!
-// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
-//                               int32 *num_frames = NULL);
-//
-// /// This function returns, for each frame, the number of arcs crossing that
-// /// frame.
-// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
-//                                  std::vector<int32> *depth_per_frame);
-//
-//
-// /// This function limits the depth of the lattice, per frame: that means, it
-// /// does not allow more than a specified number of arcs active on any given
-// /// frame.  This can be used to reduce the size of the "very deep" portions of
-// /// the lattice.
-// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
-//                               CompactLattice *clat);
-//
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// outputs for each frame the set of phones active on that frame.  If
-// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
-// /// phones in this list.
-// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
-//                          const std::vector<int32> &sil_phones,
-//                          std::vector<std::set<int32> > *active_phones);
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the output symbols (presumably words), with phones; we
-// /// use the TransitionModel to work out the phone sequence.  Note
-// /// that the phone labels are not exactly aligned with the phone
-// /// boundaries.  We put a phone label to coincide with any transition
-// /// to the final, nonemitting state of a phone (this state always exists,
-// /// we ensure this in HmmTopology::Check()).  This would be the last
-// /// transition-id in the phone if reordering is not done (but typically
-// /// we do reorder).
-// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
-// void ConvertLatticeToPhones(const TransitionModel &trans_model,
-//                             Lattice *lat);
-
-/// Prunes a lattice or compact lattice.  Returns true on success, false if
-/// there was some kind of failure.
-template<class LatticeType>
-bool PruneLattice(BaseFloat beam, LatticeType *lat);
-
-//
-// /// Given a lattice, and a transition model to map pdf-ids to phones,
-// /// replace the sequences of transition-ids with sequences of phones.
-// /// Note that this is different from ConvertLatticeToPhones, in that
-// /// we replace the transition-ids not the words.
-// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
-//                                    CompactLattice *clat);
-//
-// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
-// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
-// /// There is a frame error if a particular transition-id on a particular frame
-// /// corresponds to a phone not matching transcription's alignment for that frame.
-// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
-// /// The TransitionModel is used to map transition-ids in the lattice
-// /// input-side to phones; the phones appearing in
-// /// "silence_phones" are treated specially in that we replace the frame error f
-// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
-// /// For the normal recipe, max_silence_error would be zero.
-// /// Returns true on success, false if there was some kind of mismatch.
-// /// At input, silence_phones must be sorted and unique.
-// bool LatticeBoost(const TransitionModel &trans,
-//                   const std::vector<int32> &alignment,
-//                   const std::vector<int32> &silence_phones,
-//                   BaseFloat b,
-//                   BaseFloat max_silence_error,
-//                   Lattice *lat);
-//
-//
-// /**
-//    This function implements either the MPFE (minimum phone frame error) or SMBR
-//    (state-level minimum bayes risk) forward-backward, depending on whether
-//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
-//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
-//    may be positive or negative) into "post".
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] silence_phones   A list of integer ids of silence phones. The
-//                         silence frames i.e. the frames where num_ali
-//                         corresponds to a silence phones are treated specially.
-//                         The behavior is determined by 'one_silence_class'
-//                         being false (traditional behavior) or true.
-//                         Usually in our setup, several phones including
-//                         the silence, vocalized noise, non-spoken noise
-//                         and unk are treated as "silence phones"
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
-//                         for MPFE (minimum phone frame error) or sMBR
-//                         (state minimum bayes risk) training.
-//    @param [in] one_silence_class   Determines how the silence frames are treated.
-//                         Setting this to false gives the old traditional behavior,
-//                         where the silence frames (according to num_ali) are
-//                         treated as incorrect. However, this means that the
-//                         insertions are not penalized by the objective.
-//                         Setting this to true gives the new behaviour, where we
-//                         treat silence as any other phone, except that all pdfs
-//                         of silence phones are collapsed into a single class for
-//                         the frame-error computation. This can possible reduce
-//                         the insertions in the trained model. This is closer to
-//                         the WER metric that we actually care about, since WER is
-//                         generally computed after filtering out noises, but
-//                         does penalize insertions.
-//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
-//                         pseudo log-likelihoods of states at each frame.
-// */
-// BaseFloat LatticeForwardBackwardMpeVariants(
-//     const TransitionModel &trans,
-//     const std::vector<int32> &silence_phones,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     std::string criterion,
-//     bool one_silence_class,
-//     Posterior *post);
-//
-// /**
-//    This function can be used to compute posteriors for MMI, with a positive contribution
-//    for the numerator and a negative one for the denominator.  This function is not actually
-//    used in our normal MMI training recipes, where it's instead done using various command
-//    line programs that each do a part of the job.  This function was written for use in
-//    neural-net MMI training.
-//
-//    @param [in] trans    The transition model. Used to map the
-//                         transition-ids to phones or pdfs.
-//    @param [in] lat      The denominator lattice
-//    @param [in] num_ali  The numerator alignment
-//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
-//                         posteriors on frames where the num and den have disjoint
-//                         pdf-ids.
-//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
-//                         convert the output to be at the level of pdf-ids, not
-//                         transition-ids.
-//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
-//                         negative parts from the same transition-id (or pdf-id,
-//                         if convert_to_pdf_ids == true).
-//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
-//                         pdf-ids if convert_to_pdf_ids == true) at each frame
-//                         i.e. the difference between the numerator
-//                         and denominator posteriors.
-//
-//    It returns the forward-backward likelihood of the lattice. */
-// BaseFloat LatticeForwardBackwardMmi(
-//     const TransitionModel &trans,
-//     const Lattice &lat,
-//     const std::vector<int32> &num_ali,
-//     bool drop_frames,
-//     bool convert_to_pdf_ids,
-//     bool cancel,
-//     Posterior *arc_post);
-//
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label and the begin time and length in frames.  This is done even for zero
-// /// (epsilon) words, generally corresponding to optional silence-- if you don't
-// /// want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
-//                                    std::vector<int32> *words,
-//                                    std::vector<int32> *begin_times,
-//                                    std::vector<int32> *lengths);
-//
-// /// This function takes a CompactLattice that should only contain a single
-// /// linear sequence (e.g. derived from lattice-1best), and that should have been
-// /// processed so that the arcs in the CompactLattice align correctly with the
-// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
-// /// same size, which give, for each word in the lattice (in sequence), the word
-// /// label, the begin time and length in frames, and the pronunciation (sequence
-// /// of phones).  This is done even for zero words, corresponding to optional
-// /// silences -- if you don't want them, just ignore them in the output.
-// /// This function will print a warning and return false, if the lattice
-// /// did not have the correct format (e.g. if it is empty or it is not
-// /// linear).
-// bool CompactLatticeToWordProns(
-//     const TransitionModel &tmodel,
-//     const CompactLattice &clat,
-//     std::vector<int32> *words,
-//     std::vector<int32> *begin_times,
-//     std::vector<int32> *lengths,
-//     std::vector<std::vector<int32> > *prons,
-//     std::vector<std::vector<int32> > *phone_lengths);
-//
-//
-// /// A form of the shortest-path/best-path algorithm that's specially coded for
-// /// CompactLattice.  Requires that clat be acyclic.
-// void CompactLatticeShortestPath(const CompactLattice &clat,
-//                                 CompactLattice *shortest_path);
-//
-// /// This function expands a CompactLattice to ensure high-probability paths
-// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
-// void ExpandCompactLattice(const CompactLattice &clat,
-//                           double epsilon,
-//                           CompactLattice *expand_clat);
-//
-// /// For each state, compute forward and backward best (viterbi) costs and its
-// /// traceback states (for generating best paths later). The forward best cost
-// /// for a state is the cost of the best path from the start state to the state.
-// /// The traceback state of this state is its predecessor state in the best path.
-// /// The backward best cost for a state is the cost of the best path from the
-// /// state to a final one. Its traceback state is the successor state in the best
-// /// path in the forward direction.
-// /// Note: final weights of states are in backward_best_cost_and_pred.
-// /// Requires the input CompactLattice clat be acyclic.
-// typedef std::vector<std::pair<double,
-//         CompactLatticeArc::StateId> > CostTraceType;
-// void CompactLatticeBestCostsAndTracebacks(
-//     const CompactLattice &clat,
-//     CostTraceType *forward_best_cost_and_pred,
-//     CostTraceType *backward_best_cost_and_pred);
-//
-// /// This function adds estimated neural language model scores of words in a
-// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
-// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
-// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
-// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
-//                                   CompactLattice *clat);
-//
-// /// This function add the word insertion penalty to graph score of each word
-// /// in the compact lattice
-// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
-//                                    CompactLattice *clat);
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (typically some kind of mismatched inputs).
-// bool RescoreCompactLattice(DecodableInterface *decodable,
-//                            CompactLattice *clat);
-//
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice (i.e. the the maximum of any path, of the count of
-// /// olabels on that path).
-// int32 LongestSentenceLength(const Lattice &lat);
-//
-// /// This function returns the number of words in the longest sentence in a
-// /// CompactLattice, i.e. the the maximum of any path, of the count of
-// /// labels on that path... note, in CompactLattice, the ilabels and olabels
-// /// are identical because it is an acceptor.
-// int32 LongestSentenceLength(const CompactLattice &lat);
-//
-//
-// /// This function is like RescoreCompactLattice, but it is modified to avoid
-// /// computing probabilities on most frames where all the pdf-ids are the same.
-// /// (it needs the transition-model to work out whether two transition-ids map to
-// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
-// /// The naive thing would be to just set all probabilities to zero on frames
-// /// where all the pdf-ids are the same (because this value won't affect the
-// /// lattice posterior).  But this would become confusing when we compute
-// /// corpus-level diagnostics such as the MMI objective function.  Instead,
-// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
-// /// speedup_factor) we compute those likelihoods and multiply them by
-// /// speedup_factor; otherwise we set them to zero.  This gives the right
-// /// expected probability so our corpus-level diagnostics will be about right.
-// bool RescoreCompactLatticeSpeedup(
-//     const TransitionModel &tmodel,
-//     BaseFloat speedup_factor,
-//     DecodableInterface *decodable,
-//     CompactLattice *clat);
-//
-//
-// /// This function *adds* the negated scores obtained from the Decodable object,
-// /// to the acoustic scores on the arcs.  If you want to replace them, you should
-// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
-// /// true on success, false on error (e.g. some kind of mismatched inputs).
-// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
-// /// other index the Decodable object expects.
-// bool RescoreLattice(DecodableInterface *decodable,
-//                     Lattice *lat);
-//
-// /// This function Composes a CompactLattice format lattice with a
-// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
-// /// CompactLattice format lattice. The first element (the one that corresponds
-// /// to LM weight) in CompactLatticeWeight is used for composition.
-// ///
-// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
-// /// we cannot use "const" for <det_fst>.
-// void ComposeCompactLatticeDeterministic(
-//     const CompactLattice& clat,
-//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
-//     CompactLattice* composed_clat);
-//
-// /// This function computes the mapping from the pair
-// /// (frame-index, transition-id) to the pair
-// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the
-// /// transition-id in that frame.
-// /// frame-index in the lattice.
-// /// This function is useful for retaining the acoustic scores in a
-// /// non-compact lattice after a process like determinization where the
-// /// frame-level acoustic scores are typically lost.
-// /// The function ReplaceAcousticScoresFromMap is used to restore the
-// /// acoustic scores computed by this function.
-// ///
-// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
-// ///                     function will crash.
-// ///   @param [out] acoustic_scores
-// ///                     Pointer to a map from the pair (frame-index,
-// ///                     transition-id) to a pair (sum-of-acoustic-scores,
-// ///                     num-of-occurences).
-// ///                     Usually the acoustic scores for a pdf-id (and hence
-// ///                     transition-id) on a frame will be the same for all the
-// ///                     occurences of the pdf-id in that frame.
-// ///                     But if not, we will take the average of the acoustic
-// ///                     scores. Hence, we store both the sum-of-acoustic-scores
-// ///                     and the num-of-occurences of the transition-id in that
-// ///                     frame.
-// void ComputeAcousticScoresMap(
-//     const Lattice &lat,
-//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > *acoustic_scores);
-//
-// /// This function restores acoustic scores computed using the function
-// /// ComputeAcousticScoresMap into the lattice.
-// ///
-// ///   @param [in] acoustic_scores
-// ///                      A map from the pair (frame-index, transition-id) to a
-// ///                      pair (sum-of-acoustic-scores, num-of-occurences) of
-// ///                      the occurences of the transition-id in that frame.
-// ///                      See the comments for ComputeAcousticScoresMap for
-// ///                      details.
-// ///   @param [out] lat   Pointer to the output lattice.
-// void ReplaceAcousticScoresFromMap(
-//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
-//                                         PairHasher<int32> > &acoustic_scores,
-//     Lattice *lat);
-
-}  // namespace kaldi
-
-#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-file-parser.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-file-parser.cc
deleted file mode 100644
index 82a0c2e48bafdf591f50b9e9c767b106daced840..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-file-parser.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// lm/arpa-file-parser.cc
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fst/fstlib.h>
-
-#include <sstream>
-
-#include "base/kaldi-error.h"
-#include "base/kaldi-math.h"
-#include "lm/arpa-file-parser.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
-                               fst::SymbolTable* symbols)
-    : options_(options),
-      symbols_(symbols),
-      line_number_(0),
-      warning_count_(0) {}
-
-ArpaFileParser::~ArpaFileParser() {}
-
-void TrimTrailingWhitespace(std::string* str) {
-  str->erase(str->find_last_not_of(" \n\r\t") + 1);
-}
-
-void ArpaFileParser::Read(std::istream& is) {
-  // Argument sanity checks.
-  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
-      options_.bos_symbol == options_.eos_symbol)
-    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
-              << "differ from each other. Given:"
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL &&
-      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
-      (options_.unk_symbol <= 0 || options_.unk_symbol == options_.bos_symbol ||
-       options_.unk_symbol == options_.eos_symbol))
-    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
-              << "UNK symbol is required, must not be epsilon, and "
-              << "differ from both BOS and EOS symbols. Given:"
-              << " UNK=" << options_.unk_symbol
-              << " BOS=" << options_.bos_symbol
-              << " EOS=" << options_.eos_symbol;
-  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
-    KALDI_ERR << "BOS symbol must exist in symbol table";
-  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
-    KALDI_ERR << "EOS symbol must exist in symbol table";
-  if (symbols_ != NULL && options_.unk_symbol > 0 &&
-      symbols_->Find(options_.unk_symbol).empty())
-    KALDI_ERR << "UNK symbol must exist in symbol table";
-
-  ngram_counts_.clear();
-  line_number_ = 0;
-  warning_count_ = 0;
-  current_line_.clear();
-
-#define PARSE_ERR KALDI_ERR << LineReference() << ": "
-
-  // Give derived class an opportunity to prepare its state.
-  ReadStarted();
-
-  // Processes "\data\" section.
-  bool keyword_found = false;
-  while (++line_number_, getline(is, current_line_) && !is.eof()) {
-    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
-      continue;
-    }
-
-    TrimTrailingWhitespace(&current_line_);
-
-    // Continue skipping lines until the \data\ marker alone on a line is found.
-    if (!keyword_found) {
-      if (current_line_ == "\\data\\") {
-        KALDI_LOG << "Reading \\data\\ section.";
-        keyword_found = true;
-      }
-      continue;
-    }
-
-    if (current_line_[0] == '\\') break;
-
-    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
-    // which means there are 1000 unigrams.
-    std::size_t equal_symbol_pos = current_line_.find("=");
-    if (equal_symbol_pos != std::string::npos)
-      // Guaranteed spaces around the "=".
-      current_line_.replace(equal_symbol_pos, 1, " = ");
-    std::vector<std::string> col;
-    SplitStringToVector(current_line_, " \t", true, &col);
-    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
-      int32 order, ngram_count = 0;
-      if (!ConvertStringToInteger(col[1], &order) ||
-          !ConvertStringToInteger(col[3], &ngram_count)) {
-        PARSE_ERR << "cannot parse ngram count";
-      }
-      if (ngram_counts_.size() <= order) {
-        ngram_counts_.resize(order);
-      }
-      ngram_counts_[order - 1] = ngram_count;
-    } else {
-      KALDI_WARN << LineReference()
-                 << ": uninterpretable line in \\data\\ section";
-    }
-  }
-
-  if (ngram_counts_.size() == 0)
-    PARSE_ERR << "\\data\\ section missing or empty.";
-
-  // Signal that grammar order and n-gram counts are known.
-  HeaderAvailable();
-
-  NGram ngram;
-  ngram.words.reserve(ngram_counts_.size());
-
-  // Processes "\N-grams:" section.
-  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
-    // Skips n-grams with zero count.
-    if (ngram_counts_[cur_order - 1] == 0)
-      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
-                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
-                 << " section). There is possibly a problem with the file.";
-
-    // Must be looking at a \k-grams: directive at this point.
-    std::ostringstream keyword;
-    keyword << "\\" << cur_order << "-grams:";
-    if (current_line_ != keyword.str()) {
-      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
-    }
-    KALDI_LOG << "Reading " << current_line_ << " section.";
-
-    int32 ngram_count = 0;
-    while (++line_number_, getline(is, current_line_) && !is.eof()) {
-      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
-        continue;
-      }
-      if (current_line_[0] == '\\') {
-        TrimTrailingWhitespace(&current_line_);
-        std::ostringstream next_keyword;
-        next_keyword << "\\" << cur_order + 1 << "-grams:";
-        if ((current_line_ != next_keyword.str()) &&
-            (current_line_ != "\\end\\")) {
-          if (ShouldWarn()) {
-            KALDI_WARN << "ignoring possible directive '" << current_line_
-                       << "' expecting '" << next_keyword.str() << "'";
-
-            if (warning_count_ > 0 &&
-                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-                         << options_.max_warnings << " were reported. "
-                         << "Run program with --max-arpa-warnings=-1 "
-                         << "to see all warnings";
-            }
-          }
-        } else {
-          break;
-        }
-      }
-
-      std::vector<std::string> col;
-      SplitStringToVector(current_line_, " \t", true, &col);
-
-      if (col.size() < 1 + cur_order || col.size() > 2 + cur_order ||
-          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
-        PARSE_ERR << "Invalid n-gram data line";
-      }
-      ++ngram_count;
-
-      // Parse out n-gram logprob and, if present, backoff weight.
-      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
-        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
-      }
-      ngram.backoff = 0.0;
-      if (col.size() > cur_order + 1) {
-        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
-          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
-      }
-      // Convert to natural log.
-      ngram.logprob *= M_LN10;
-      ngram.backoff *= M_LN10;
-
-      ngram.words.resize(cur_order);
-      bool skip_ngram = false;
-      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
-        int32 word;
-        if (symbols_) {
-          // Symbol table provided, so symbol labels are expected.
-          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
-            word = symbols_->AddSymbol(col[1 + index]);
-          } else {
-            word = symbols_->Find(col[1 + index]);
-            if (word == -1) {  // fst::kNoSymbol
-              switch (options_.oov_handling) {
-                case ArpaParseOptions::kReplaceWithUnk:
-                  word = options_.unk_symbol;
-                  break;
-                case ArpaParseOptions::kSkipNGram:
-                  if (ShouldWarn())
-                    KALDI_WARN << LineReference() << " skipped: word '"
-                               << col[1 + index] << "' not in symbol table";
-                  skip_ngram = true;
-                  break;
-                default:
-                  PARSE_ERR << "word '" << col[1 + index]
-                            << "' not in symbol table";
-              }
-            }
-          }
-        } else {
-          // Symbols not provided, LM file should contain integers.
-          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
-            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
-          }
-        }
-        // Whichever way we got it, an epsilon is invalid.
-        if (word == 0) {
-          PARSE_ERR << "epsilon symbol '" << col[1 + index]
-                    << "' is illegal in ARPA LM";
-        }
-        ngram.words[index] = word;
-      }
-      if (!skip_ngram) {
-        ConsumeNGram(ngram);
-      }
-    }
-    if (ngram_count > ngram_counts_[cur_order - 1]) {
-      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
-                << " n-grams of order " << cur_order
-                << ", but we saw more already.";
-    }
-  }
-
-  if (current_line_ != "\\end\\") {
-    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
-  }
-
-  if (warning_count_ > 0 &&
-      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
-    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
-               << options_.max_warnings << " were reported. Run program with "
-               << "--max_warnings=-1 to see all warnings";
-  }
-
-  current_line_.clear();
-  ReadComplete();
-
-#undef PARSE_ERR
-}
-
-std::string ArpaFileParser::LineReference() const {
-  std::ostringstream ss;
-  ss << "line " << line_number_ << " [" << current_line_ << "]";
-  return ss.str();
-}
-
-bool ArpaFileParser::ShouldWarn() {
-  return (warning_count_ != -1) &&
-         (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-file-parser.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-file-parser.h
deleted file mode 100644
index 6f1268e14c1ecf8eb879616fec88f0bc16d1387f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-file-parser.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// lm/arpa-file-parser.h
-
-// Copyright 2014  Guoguo Chen
-// Copyright 2016  Smart Action Company LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
-#define KALDI_LM_ARPA_FILE_PARSER_H_
-
-#include <fst/fst-decl.h>
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/**
-  Options that control ArpaFileParser
-*/
-struct ArpaParseOptions {
-  enum OovHandling {
-    kRaiseError,      ///< Abort on OOV words
-    kAddToSymbols,    ///< Add novel words to the symbol table.
-    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
-    kSkipNGram        ///< Skip n-gram with OOV word and continue.
-  };
-
-  ArpaParseOptions()
-      : bos_symbol(-1),
-        eos_symbol(-1),
-        unk_symbol(-1),
-        oov_handling(kRaiseError),
-        max_warnings(30) {}
-
-  void Register(OptionsItf* opts) {
-    // Registering only the max_warnings count, since other options are
-    // treated differently by client programs: some want integer symbols,
-    // while other are passed words in their command line.
-    opts->Register("max-arpa-warnings", &max_warnings,
-                   "Maximum warnings to report on ARPA parsing, "
-                   "0 to disable, -1 to show all");
-  }
-
-  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
-  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
-  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
-  OovHandling oov_handling;  ///< How to handle OOV words in the file.
-  int32 max_warnings;        ///< Maximum warnings to report, <0 unlimited.
-};
-
-/**
-   A parsed n-gram from ARPA LM file.
-*/
-struct NGram {
-  NGram() : logprob(0.0), backoff(0.0) {}
-  std::vector<int32> words;  ///< Symbols in left to right order.
-  float logprob;             ///< Log-prob of the n-gram.
-  float backoff;             ///< log-backoff weight of the n-gram.
-                             ///< Defaults to zero if not specified.
-};
-
-/**
-    ArpaFileParser is an abstract base class for ARPA LM file conversion.
-
-    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
-*/
-class ArpaFileParser {
- public:
-  /// Constructs the parser with the given options and optional symbol table.
-  /// If symbol table is provided, then the file should contain text n-grams,
-  /// and the words are mapped to symbols through it. bos_symbol and
-  /// eos_symbol in the options structure must be valid symbols in the table,
-  /// and so must be unk_symbol if provided. The table is not owned by the
-  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
-  /// If symbol table is a null pointer, the file should contain integer
-  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
-  /// must be valid symbols still.
-  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
-  virtual ~ArpaFileParser();
-
-  /// Read ARPA LM file from a stream.
-  void Read(std::istream& is);
-
-  /// Parser options.
-  const ArpaParseOptions& Options() const { return options_; }
-
- protected:
-  /// Override called before reading starts. This is the point to prepare
-  /// any state in the derived class.
-  virtual void ReadStarted() {}
-
-  /// Override function called to signal that ARPA header with the expected
-  /// number of n-grams has been read, and ngram_counts() is now valid.
-  virtual void HeaderAvailable() {}
-
-  /// Pure override that must be implemented to process current n-gram. The
-  /// n-grams are sent in the file order, which guarantees that all
-  /// (k-1)-grams are processed before the first k-gram is.
-  virtual void ConsumeNGram(const NGram&) = 0;
-
-  /// Override function called after the last n-gram has been consumed.
-  virtual void ReadComplete() {}
-
-  /// Read-only access to symbol table. Not owned, do not make public.
-  const fst::SymbolTable* Symbols() const { return symbols_; }
-
-  /// Inside ConsumeNGram(), provides the current line number.
-  int32 LineNumber() const { return line_number_; }
-
-  /// Inside ConsumeNGram(), returns a formatted reference to the line being
-  /// compiled, to print out as part of diagnostics.
-  std::string LineReference() const;
-
-  /// Increments warning count, and returns true if a warning should be
-  /// printed or false if the count has exceeded the set maximum.
-  bool ShouldWarn();
-
-  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
-  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }
-
- private:
-  ArpaParseOptions options_;
-  fst::SymbolTable* symbols_;  // the pointer is not owned here.
-  int32 line_number_;
-  uint32 warning_count_;
-  std::string current_line_;
-  std::vector<int32> ngram_counts_;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_FILE_PARSER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-lm-compiler.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-lm-compiler.cc
deleted file mode 100644
index 7674e07214bfdd1450bc3787a5fb1596dec7b1af..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-lm-compiler.cc
+++ /dev/null
@@ -1,381 +0,0 @@
-// lm/arpa-lm-compiler.cc
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-// Copyright 2017 Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-math.h"
-#include "fstext/remove-eps-local.h"
-#include "lm/arpa-lm-compiler.h"
-#include "util/stl-utils.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface {
- public:
-  virtual ~ArpaLmCompilerImplInterface() {}
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
-};
-
-namespace {
-
-typedef int32 StateId;
-typedef int32 Symbol;
-
-// GeneralHistKey can represent state history in an arbitrarily large n
-// n-gram model with symbol ids fitting int32.
-class GeneralHistKey {
- public:
-  // Construct key from being and end iterators.
-  template <class InputIt>
-  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) {}
-  // Construct empty history key.
-  GeneralHistKey() : vector_() {}
-  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
-  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
-  // key class does not need this operartion).
-  GeneralHistKey Tails() const {
-    return GeneralHistKey(vector_.begin() + 1, vector_.end());
-  }
-  // Keys are equal if represent same state.
-  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
-    return a.vector_ == b.vector_;
-  }
-  // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
-    size_t operator()(const GeneralHistKey& key) const {
-      return VectorHasher<Symbol>().operator()(key.vector_);
-    }
-  };
-
- private:
-  std::vector<Symbol> vector_;
-};
-
-// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
-// machine word. allowing significant memory reduction and some runtime
-// benefit over GeneralHistKey. Since 3 symbols are enough to track history
-// in a 4-gram model, this optimized key is used for smaller models with up
-// to 4-gram and symbol values up to 2^21-1.
-//
-// See GeneralHistKey for interface requirements of a key class.
-class OptimizedHistKey {
- public:
-  enum {
-    kShift = 21,  // 21 * 3 = 63 bits for data.
-    kMaxData = (1 << kShift) - 1
-  };
-  template <class InputIt>
-  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
-    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
-      data_ |= static_cast<uint64>(*begin) << shift;
-    }
-  }
-  OptimizedHistKey() : data_(0) {}
-  OptimizedHistKey Tails() const { return OptimizedHistKey(data_ >> kShift); }
-  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
-    return a.data_ == b.data_;
-  }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
-    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
-  };
-
- private:
-  explicit OptimizedHistKey(uint64 data) : data_(data) {}
-  uint64 data_;
-};
-
-}  // namespace
-
-template <class HistKey>
-class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
- public:
-  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
-                     Symbol sub_eps);
-
-  virtual void ConsumeNGram(const NGram& ngram, bool is_highest);
-
- private:
-  StateId AddStateWithBackoff(HistKey key, float backoff);
-  void CreateBackoff(HistKey key, StateId state, float weight);
-
-  ArpaLmCompiler* parent_;  // Not owned.
-  fst::StdVectorFst* fst_;  // Not owned.
-  Symbol bos_symbol_;
-  Symbol eos_symbol_;
-  Symbol sub_eps_;
-
-  StateId eos_state_;
-  typedef unordered_map<HistKey, StateId, typename HistKey::HashType>
-      HistoryMap;
-  HistoryMap history_;
-};
-
-template <class HistKey>
-ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(ArpaLmCompiler* parent,
-                                                fst::StdVectorFst* fst,
-                                                Symbol sub_eps)
-    : parent_(parent),
-      fst_(fst),
-      bos_symbol_(parent->Options().bos_symbol),
-      eos_symbol_(parent->Options().eos_symbol),
-      sub_eps_(sub_eps) {
-  // The algorithm maintains state per history. The 0-gram is a special state
-  // for empty history. All unigrams (including BOS) backoff into this state.
-  StateId zerogram = fst_->AddState();
-  history_[HistKey()] = zerogram;
-
-  // Also, if </s> is not treated as epsilon, create a common end state for
-  // all transitions accepting the </s>, since they do not back off. This small
-  // optimization saves about 2% states in an average grammar.
-  if (sub_eps_ == 0) {
-    eos_state_ = fst_->AddState();
-    fst_->SetFinal(eos_state_, 0);
-  }
-}
-
-template <class HistKey>
-void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram& ngram,
-                                               bool is_highest) {
-  // Generally, we do the following. Suppose we are adding an n-gram "A B
-  // C". Then find the node for "A B", add a new node for "A B C", and connect
-  // them with the arc accepting "C" with the specified weight. Also, add a
-  // backoff arc from the new "A B C" node to its backoff state "B C".
-  //
-  // Two notable exceptions are the highest order n-grams, and final n-grams.
-  //
-  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
-  // the following optimization is performed. There is no point adding a node
-  // for "A B C" with a "C" arc from "A B", since there will be no other
-  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
-  // model "B C", with the weight of \bar{1}. To save a node, create an arc
-  // accepting "C" directly from "A B" to "B C". This saves as many nodes
-  // as there are the highest order n-grams, which is typically about half
-  // the size of a large 3-gram model.
-  //
-  // Indeed, this does not apply to n-grams ending in EOS, since they do not
-  // back off. These are special, as they do not have a back-off state, and
-  // the node for "(..anything..) </s>" is always final. These are handled
-  // in one of the two possible ways, If symbols <s> and </s> are being
-  // replaced by epsilons, neither node nor arc is created, and the logprob
-  // of the n-gram is applied to its source node as final weight. If <s> and
-  // </s> are preserved, then a special final node for </s> is allocated and
-  // used as the destination of the "</s>" acceptor arc.
-  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
-  typename HistoryMap::iterator source_it = history_.find(heads);
-  if (source_it == history_.end()) {
-    // There was no "A B", therefore the probability of "A B C" is zero.
-    // Print a warning and discard current n-gram.
-    if (parent_->ShouldWarn())
-      KALDI_WARN << parent_->LineReference()
-                 << " skipped: no parent (n-1)-gram exists";
-    return;
-  }
-
-  StateId source = source_it->second;
-  StateId dest;
-  Symbol sym = ngram.words.back();
-  float weight = -ngram.logprob;
-  if (sym == sub_eps_ || sym == 0) {
-    KALDI_ERR << " <eps> or disambiguation symbol " << sym
-              << "found in the ARPA file. ";
-  }
-  if (sym == eos_symbol_) {
-    if (sub_eps_ == 0) {
-      // Keep </s> as a real symbol when not substituting.
-      dest = eos_state_;
-    } else {
-      // Treat </s> as if it was epsilon: mark source final, with the weight
-      // of the n-gram.
-      fst_->SetFinal(source, weight);
-      return;
-    }
-  } else {
-    // For the highest order n-gram, this may find an existing state, for
-    // non-highest, will create one (unless there are duplicate n-grams
-    // in the grammar, which cannot be reliably detected if highest order,
-    // so we better do not do that at all).
-    dest = AddStateWithBackoff(
-        HistKey(ngram.words.begin() + (is_highest ? 1 : 0), ngram.words.end()),
-        -ngram.backoff);
-  }
-
-  if (sym == bos_symbol_) {
-    weight = 0;  // Accepting <s> is always free.
-    if (sub_eps_ == 0) {
-      // <s> is as a real symbol, only accepted in the start state.
-      source = fst_->AddState();
-      fst_->SetStart(source);
-    } else {
-      // The new state for <s> unigram history *is* the start state.
-      fst_->SetStart(dest);
-      return;
-    }
-  }
-
-  // Add arc from source to dest, whichever way it was found.
-  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
-  return;
-}
-
-// Find or create a new state for n-gram defined by key, and ensure it has a
-// backoff transition.  The key is either the current n-gram for all but
-// highest orders, or the tails of the n-gram for the highest order. The
-// latter arises from the chain-collapsing optimization described above.
-template <class HistKey>
-StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
-                                                         float backoff) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  if (dest_it != history_.end()) {
-    // Found an existing state in the history map. Invariant: if the state in
-    // the map, then its backoff arc is in the FST. We are done.
-    return dest_it->second;
-  }
-  // Otherwise create a new state and its backoff arc, and register in the map.
-  StateId dest = fst_->AddState();
-  history_[key] = dest;
-  CreateBackoff(key.Tails(), dest, backoff);
-  return dest;
-}
-
-// Create a backoff arc for a state. Key is a backoff destination that may or
-// may not exist. When the destination is not found, naturally fall back to
-// the lower order model, and all the way down until one is found (since the
-// 0-gram model is always present, the search is guaranteed to terminate).
-template <class HistKey>
-inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(HistKey key,
-                                                       StateId state,
-                                                       float weight) {
-  typename HistoryMap::iterator dest_it = history_.find(key);
-  while (dest_it == history_.end()) {
-    key = key.Tails();
-    dest_it = history_.find(key);
-  }
-
-  // The arc should transduce either <eos> or #0 to <eps>, depending on the
-  // epsilon substitution mode. This is the only case when input and output
-  // label may differ.
-  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
-}
-
-ArpaLmCompiler::~ArpaLmCompiler() {
-  if (impl_ != NULL) delete impl_;
-}
-
-void ArpaLmCompiler::HeaderAvailable() {
-  KALDI_ASSERT(impl_ == NULL);
-  // Use optimized implementation if the grammar is 4-gram or less, and the
-  // maximum attained symbol id will fit into the optimized range.
-  int64 max_symbol = 0;
-  if (Symbols() != NULL) max_symbol = Symbols()->AvailableKey() - 1;
-  // If augmenting the symbol table, assume the worst case when all words in
-  // the model being read are novel.
-  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
-    max_symbol += NgramCounts()[0];
-
-  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
-    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
-  } else {
-    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
-    KALDI_LOG << "Reverting to slower state tracking because model is large: "
-              << NgramCounts().size() << "-gram with symbols up to "
-              << max_symbol;
-  }
-}
-
-void ArpaLmCompiler::ConsumeNGram(const NGram& ngram) {
-  // <s> is invalid in tails, </s> in heads of an n-gram.
-  for (int i = 0; i < ngram.words.size(); ++i) {
-    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
-        (i + 1 < ngram.words.size() &&
-         ngram.words[i] == Options().eos_symbol)) {
-      if (ShouldWarn())
-        KALDI_WARN << LineReference()
-                   << " skipped: n-gram has invalid BOS/EOS placement";
-      return;
-    }
-  }
-
-  bool is_highest = ngram.words.size() == NgramCounts().size();
-  impl_->ConsumeNGram(ngram, is_highest);
-}
-
-void ArpaLmCompiler::RemoveRedundantStates() {
-  fst::StdArc::Label backoff_symbol = sub_eps_;
-  if (backoff_symbol == 0) {
-    // The method of removing redundant states implemented in this function
-    // leads to slow determinization of L o G when people use the older style of
-    // usage of arpa2fst where the --disambig-symbol option was not specified.
-    // The issue seems to be that it creates a non-deterministic FST, while G is
-    // supposed to be deterministic.  By 'return'ing below, we just disable this
-    // method if people were using an older script.  This method isn't really
-    // that consequential anyway, and people will move to the newer-style
-    // scripts (see current utils/format_lm.sh), so this isn't much of a
-    // problem.
-    return;
-  }
-
-  fst::StdArc::StateId num_states = fst_.NumStates();
-
-  // replace the #0 symbols on the input of arcs out of redundant states (states
-  // that are not final and have only a backoff arc leaving them), with <eps>.
-  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
-    if (fst_.NumArcs(state) == 1 &&
-        fst_.Final(state) == fst::TropicalWeight::Zero()) {
-      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
-      fst::StdArc arc = iter.Value();
-      if (arc.ilabel == backoff_symbol) {
-        arc.ilabel = 0;
-        iter.SetValue(arc);
-      }
-    }
-  }
-
-  // we could call fst::RemoveEps, and it would have the same effect in normal
-  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
-  // places, but RemoveEpsLocal is a bit safer in case something weird is going
-  // on; it guarantees not to blow up the FST.
-  fst::RemoveEpsLocal(&fst_);
-  KALDI_LOG << "Reduced num-states from " << num_states << " to "
-            << fst_.NumStates();
-}
-
-void ArpaLmCompiler::Check() const {
-  if (fst_.Start() == fst::kNoStateId) {
-    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
-              << Symbols()->Find(Options().bos_symbol) << ".";
-  }
-}
-
-void ArpaLmCompiler::ReadComplete() {
-  fst_.SetInputSymbols(Symbols());
-  fst_.SetOutputSymbols(Symbols());
-  RemoveRedundantStates();
-  Check();
-}
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-lm-compiler.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-lm-compiler.h
deleted file mode 100644
index 069c71bd0e6f5acf0b9521ec1ef46796eb31fe4d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lm/arpa-lm-compiler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// lm/arpa-lm-compiler.h
-
-// Copyright 2009-2011 Gilles Boulianne
-// Copyright 2016 Smart Action LLC (kkm)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
-#define KALDI_LM_ARPA_LM_COMPILER_H_
-
-#include <fst/fstlib.h>
-
-#include "lm/arpa-file-parser.h"
-
-namespace kaldi {
-
-class ArpaLmCompilerImplInterface;
-
-class ArpaLmCompiler : public ArpaFileParser {
- public:
-  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
-                 fst::SymbolTable* symbols)
-      : ArpaFileParser(options, symbols), sub_eps_(sub_eps), impl_(NULL) {}
-  ~ArpaLmCompiler();
-
-  const fst::StdVectorFst& Fst() const { return fst_; }
-  fst::StdVectorFst* MutableFst() { return &fst_; }
-
- protected:
-  // ArpaFileParser overrides.
-  virtual void HeaderAvailable();
-  virtual void ConsumeNGram(const NGram& ngram);
-  virtual void ReadComplete();
-
- private:
-  // this function removes states that only have a backoff arc coming
-  // out of them.
-  void RemoveRedundantStates();
-  void Check() const;
-
-  int sub_eps_;
-  ArpaLmCompilerImplInterface* impl_;  // Owned.
-  fst::StdVectorFst fst_;
-  template <class HistKey>
-  friend class ArpaLmCompilerImpl;
-};
-
-}  // namespace kaldi
-
-#endif  // KALDI_LM_ARPA_LM_COMPILER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lmbin/arpa2fst.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lmbin/arpa2fst.cc
deleted file mode 100644
index 881a45c5b37810247ea38dae56237f59b5554a9c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/lmbin/arpa2fst.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// bin/arpa2fst.cc
-//
-// Copyright 2009-2011  Gilles Boulianne.
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-
-#include "lm/arpa-lm-compiler.h"
-#include "util/kaldi-io.h"
-#include "util/parse-options.h"
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;  // NOLINT
-  try {
-    const char *usage =
-        "Convert an ARPA format language model into an FST\n"
-        "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
-        " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
-        "data/lang/words.txt lm/input.arpa G.fst\n\n"
-        "Note: When called without switches, the output G.fst will contain\n"
-        "an embedded symbol table. This is compatible with the way a previous\n"
-        "version of arpa2fst worked.\n";
-
-    ParseOptions po(usage);
-
-    ArpaParseOptions options;
-    options.Register(&po);
-
-    // Option flags.
-    std::string bos_symbol = "<s>";
-    std::string eos_symbol = "</s>";
-    std::string disambig_symbol;
-    std::string read_syms_filename;
-    std::string write_syms_filename;
-    bool keep_symbols = false;
-    bool ilabel_sort = true;
-
-    po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
-    po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
-    po.Register("disambig-symbol", &disambig_symbol,
-                "Disambiguator. If provided (e. g. #0), used on input side of "
-                "backoff links, and <s> and </s> are replaced with epsilons");
-    po.Register("read-symbol-table", &read_syms_filename,
-                "Use existing symbol table");
-    po.Register("write-symbol-table", &write_syms_filename,
-                "Write generated symbol table to a file");
-    po.Register("keep-symbols", &keep_symbols,
-                "Store symbol table with FST. Symbols always saved to FST if "
-                "symbol tables are neither read or written (otherwise symbols "
-                "would be lost entirely)");
-    po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 1 && po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-    std::string arpa_rxfilename = po.GetArg(1),
-                fst_wxfilename = po.GetOptArg(2);
-
-    int64 disambig_symbol_id = 0;
-
-    fst::SymbolTable *symbols;
-    if (!read_syms_filename.empty()) {
-      // Use existing symbols. Required symbols must be in the table.
-      kaldi::Input kisym(read_syms_filename);
-      symbols = fst::SymbolTable::ReadText(
-          kisym.Stream(), PrintableWxfilename(read_syms_filename));
-      if (symbols == NULL)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << read_syms_filename;
-
-      options.oov_handling = ArpaParseOptions::kSkipNGram;
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->Find(disambig_symbol);
-        if (disambig_symbol_id == -1)  // fst::kNoSymbol
-          KALDI_ERR << "Symbol table " << read_syms_filename
-                    << " has no symbol for " << disambig_symbol;
-      }
-    } else {
-      // Create a new symbol table and populate it from ARPA file.
-      symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
-      options.oov_handling = ArpaParseOptions::kAddToSymbols;
-      symbols->AddSymbol("<eps>", 0);
-      if (!disambig_symbol.empty()) {
-        disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
-      }
-    }
-
-    // Add or use existing BOS and EOS.
-    options.bos_symbol = symbols->AddSymbol(bos_symbol);
-    options.eos_symbol = symbols->AddSymbol(eos_symbol);
-
-    // If producing new (not reading existing) symbols and not saving them,
-    // need to keep symbols with FST, otherwise they would be lost.
-    if (read_syms_filename.empty() && write_syms_filename.empty())
-      keep_symbols = true;
-
-    // Actually compile LM.
-    KALDI_ASSERT(symbols != NULL);
-    ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
-    {
-      Input ki(arpa_rxfilename);
-      lm_compiler.Read(ki.Stream());
-    }
-
-    // Sort the FST in-place if requested by options.
-    if (ilabel_sort) {
-      fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
-    }
-
-    // Write symbols if requested.
-    if (!write_syms_filename.empty()) {
-      kaldi::Output kosym(write_syms_filename, false);
-      symbols->WriteText(kosym.Stream());
-    }
-
-    // Write LM FST.
-    bool write_binary = true, write_header = false;
-    kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
-    fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
-    wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
-    lm_compiler.Fst().Write(kofst.Stream(), wopts);
-
-    delete symbols;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/basic-filebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/basic-filebuf.h
deleted file mode 100644
index 22ec891064d5955c8b1d255e0d34781a9f505a38..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,952 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// Copyright 20XX LLVM
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <utility>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi {
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
- public:
-  typedef CharT char_type;
-  typedef Traits traits_type;
-  typedef typename traits_type::int_type int_type;
-  typedef typename traits_type::pos_type pos_type;
-  typedef typename traits_type::off_type off_type;
-  typedef typename traits_type::state_type state_type;
-
-  basic_filebuf();
-  basic_filebuf(basic_filebuf&& rhs);
-  virtual ~basic_filebuf();
-
-  basic_filebuf& operator=(basic_filebuf&& rhs);
-  void swap(basic_filebuf& rhs);
-
-  bool is_open() const;
-  basic_filebuf* open(const char* s, std::ios_base::openmode mode);
-  basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
-  basic_filebuf* open(int fd, std::ios_base::openmode mode);
-  basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
-  basic_filebuf* close();
-
-  FILE* file() { return this->_M_file; }
-  int fd() { return fileno(this->_M_file); }
-
- protected:
-  int_type underflow() override;
-  int_type pbackfail(int_type c = traits_type::eof()) override;
-  int_type overflow(int_type c = traits_type::eof()) override;
-  std::basic_streambuf<char_type, traits_type>* setbuf(
-      char_type* s, std::streamsize n) override;
-  pos_type seekoff(off_type off, std::ios_base::seekdir way,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  pos_type seekpos(pos_type sp,
-                   std::ios_base::openmode wch = std::ios_base::in |
-                                                 std::ios_base::out) override;
-  int sync() override;
-  void imbue(const std::locale& loc) override;
-
- protected:
-  char* _M_extbuf;
-  const char* _M_extbufnext;
-  const char* _M_extbufend;
-  char _M_extbuf_min[8];
-  size_t _M_ebs;
-  char_type* _M_intbuf;
-  size_t _M_ibs;
-  FILE* _M_file;
-  const std::codecvt<char_type, char, state_type>* _M_cv;
-  state_type _M_st;
-  state_type _M_st_last;
-  std::ios_base::openmode _M_om;
-  std::ios_base::openmode _M_cm;
-  bool _M_owns_eb;
-  bool _M_owns_ib;
-  bool _M_always_noconv;
-
-  const char* _M_get_mode(std::ios_base::openmode mode);
-  bool _M_read_mode();
-  void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
-    : _M_extbuf(nullptr),
-      _M_extbufnext(nullptr),
-      _M_extbufend(nullptr),
-      _M_ebs(0),
-      _M_intbuf(nullptr),
-      _M_ibs(0),
-      _M_file(nullptr),
-      _M_cv(nullptr),
-      _M_st(),
-      _M_st_last(),
-      _M_om(std::ios_base::openmode(0)),
-      _M_cm(std::ios_base::openmode(0)),
-      _M_owns_eb(false),
-      _M_owns_ib(false),
-      _M_always_noconv(false) {
-  if (std::has_facet<std::codecvt<char_type, char, state_type> >(
-          this->getloc())) {
-    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(
-        this->getloc());
-    _M_always_noconv = _M_cv->always_noconv();
-  }
-  setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
-    : std::basic_streambuf<CharT, Traits>(rhs) {
-  if (rhs._M_extbuf == rhs._M_extbuf_min) {
-    _M_extbuf = _M_extbuf_min;
-    _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
-    _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
-  } else {
-    _M_extbuf = rhs._M_extbuf;
-    _M_extbufnext = rhs._M_extbufnext;
-    _M_extbufend = rhs._M_extbufend;
-  }
-  _M_ebs = rhs._M_ebs;
-  _M_intbuf = rhs._M_intbuf;
-  _M_ibs = rhs._M_ibs;
-  _M_file = rhs._M_file;
-  _M_cv = rhs._M_cv;
-  _M_st = rhs._M_st;
-  _M_st_last = rhs._M_st_last;
-  _M_om = rhs._M_om;
-  _M_cm = rhs._M_cm;
-  _M_owns_eb = rhs._M_owns_eb;
-  _M_owns_ib = rhs._M_owns_ib;
-  _M_always_noconv = rhs._M_always_noconv;
-  if (rhs.pbase()) {
-    if (rhs.pbase() == rhs._M_intbuf)
-      this->setp(_M_intbuf, _M_intbuf + (rhs.epptr() - rhs.pbase()));
-    else
-      this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) +
-                     (rhs.epptr() - rhs.pbase()));
-    this->pbump(rhs.pptr() - rhs.pbase());
-  } else if (rhs.eback()) {
-    if (rhs.eback() == rhs._M_intbuf)
-      this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
-                 _M_intbuf + (rhs.egptr() - rhs.eback()));
-    else
-      this->setg(
-          reinterpret_cast<char_type*>(_M_extbuf),
-          reinterpret_cast<char_type*>(_M_extbuf) + (rhs.gptr() - rhs.eback()),
-          reinterpret_cast<char_type*>(_M_extbuf) +
-              (rhs.egptr() - rhs.eback()));
-  }
-  rhs._M_extbuf = nullptr;
-  rhs._M_extbufnext = nullptr;
-  rhs._M_extbufend = nullptr;
-  rhs._M_ebs = 0;
-  rhs._M_intbuf = nullptr;
-  rhs._M_ibs = 0;
-  rhs._M_file = nullptr;
-  rhs._M_st = state_type();
-  rhs._M_st_last = state_type();
-  rhs._M_om = std::ios_base::openmode(0);
-  rhs._M_cm = std::ios_base::openmode(0);
-  rhs._M_owns_eb = false;
-  rhs._M_owns_ib = false;
-  rhs.setg(0, 0, 0);
-  rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>& basic_filebuf<CharT, Traits>::operator=(
-    basic_filebuf&& rhs) {
-  close();
-  swap(rhs);
-  return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf() {
-  // try
-  // {
-  //     close();
-  // }
-  // catch (...)
-  // {
-  // }
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
-  std::basic_streambuf<char_type, traits_type>::swap(rhs);
-  if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-    std::swap(_M_extbuf, rhs._M_extbuf);
-    std::swap(_M_extbufnext, rhs._M_extbufnext);
-    std::swap(_M_extbufend, rhs._M_extbufend);
-  } else {
-    ptrdiff_t ln = _M_extbufnext - _M_extbuf;
-    ptrdiff_t le = _M_extbufend - _M_extbuf;
-    ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
-    ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
-    if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
-      _M_extbuf = rhs._M_extbuf;
-      rhs._M_extbuf = rhs._M_extbuf_min;
-    } else if (_M_extbuf != _M_extbuf_min &&
-               rhs._M_extbuf == rhs._M_extbuf_min) {
-      rhs._M_extbuf = _M_extbuf;
-      _M_extbuf = _M_extbuf_min;
-    }
-    _M_extbufnext = _M_extbuf + rn;
-    _M_extbufend = _M_extbuf + re;
-    rhs._M_extbufnext = rhs._M_extbuf + ln;
-    rhs._M_extbufend = rhs._M_extbuf + le;
-  }
-  std::swap(_M_ebs, rhs._M_ebs);
-  std::swap(_M_intbuf, rhs._M_intbuf);
-  std::swap(_M_ibs, rhs._M_ibs);
-  std::swap(_M_file, rhs._M_file);
-  std::swap(_M_cv, rhs._M_cv);
-  std::swap(_M_st, rhs._M_st);
-  std::swap(_M_st_last, rhs._M_st_last);
-  std::swap(_M_om, rhs._M_om);
-  std::swap(_M_cm, rhs._M_cm);
-  std::swap(_M_owns_eb, rhs._M_owns_eb);
-  std::swap(_M_owns_ib, rhs._M_owns_ib);
-  std::swap(_M_always_noconv, rhs._M_always_noconv);
-  if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->gptr() - this->eback();
-    ptrdiff_t e = this->egptr() - this->eback();
-    this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + n,
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-  } else if (this->pbase() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
-    ptrdiff_t n = this->pptr() - this->pbase();
-    ptrdiff_t e = this->epptr() - this->pbase();
-    this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
-               reinterpret_cast<char_type*>(_M_extbuf_min) + e);
-    this->pbump(n);
-  }
-  if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.gptr() - rhs.eback();
-    ptrdiff_t e = rhs.egptr() - rhs.eback();
-    rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-  } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
-    ptrdiff_t n = rhs.pptr() - rhs.pbase();
-    ptrdiff_t e = rhs.epptr() - rhs.pbase();
-    rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
-             reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
-    rhs.pbump(n);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline void swap(basic_filebuf<CharT, Traits>& x,
-                 basic_filebuf<CharT, Traits>& y) {
-  x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline bool basic_filebuf<CharT, Traits>::is_open() const {
-  return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(
-    std::ios_base::openmode mode) {
-  switch ((mode & ~std::ios_base::ate) | 0) {
-    case std::ios_base::out:
-    case std::ios_base::out | std::ios_base::trunc:
-      return "w";
-    case std::ios_base::out | std::ios_base::app:
-    case std::ios_base::app:
-      return "a";
-      break;
-    case std::ios_base::in:
-      return "r";
-    case std::ios_base::in | std::ios_base::out:
-      return "r+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
-      return "w+";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app:
-    case std::ios_base::in | std::ios_base::app:
-      return "a+";
-    case std::ios_base::out | std::ios_base::binary:
-    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
-      return "wb";
-    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
-    case std::ios_base::app | std::ios_base::binary:
-      return "ab";
-    case std::ios_base::in | std::ios_base::binary:
-      return "rb";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
-      return "r+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::trunc |
-        std::ios_base::binary:
-      return "w+b";
-    case std::ios_base::in | std::ios_base::out | std::ios_base::app |
-        std::ios_base::binary:
-    case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
-      return "a+b";
-    default:
-      return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const char* s, std::ios_base::openmode mode) {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file == nullptr) {
-    const char* md = _M_get_mode(mode);
-    if (md) {
-      _M_file = fopen(s, md);
-      if (_M_file) {
-        rt = this;
-        _M_om = mode;
-        if (mode & std::ios_base::ate) {
-          if (fseek(_M_file, 0, SEEK_END)) {
-            fclose(_M_file);
-            _M_file = nullptr;
-            rt = nullptr;
-          }
-        }
-      }
-    }
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    const std::string& s, std::ios_base::openmode mode) {
-  return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    int fd, std::ios_base::openmode mode) {
-  const char* md = this->_M_get_mode(mode);
-  if (md) {
-    this->_M_file = fdopen(fd, md);
-    this->_M_om = mode;
-    return this;
-  } else {
-    return nullptr;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::open(
-    FILE* f, std::ios_base::openmode mode) {
-  this->_M_file = f;
-  this->_M_om = mode;
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>* basic_filebuf<CharT, Traits>::close() {
-  basic_filebuf<CharT, Traits>* rt = nullptr;
-  if (_M_file) {
-    rt = this;
-    std::unique_ptr<FILE, int (*)(FILE*)> h(_M_file, fclose);
-    if (sync()) rt = nullptr;
-    if (fclose(h.release()) == 0)
-      _M_file = nullptr;
-    else
-      rt = nullptr;
-  }
-  return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow() {
-  if (_M_file == nullptr) return traits_type::eof();
-  bool initial = _M_read_mode();
-  char_type buf;
-  if (this->gptr() == nullptr) this->setg(&buf, &buf + 1, &buf + 1);
-  const size_t unget_sz =
-      initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
-  int_type c = traits_type::eof();
-  if (this->gptr() == this->egptr()) {
-    memmove(this->eback(), this->egptr() - unget_sz,
-            unget_sz * sizeof(char_type));
-    if (_M_always_noconv) {
-      size_t nmemb =
-          static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
-      nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
-      if (nmemb != 0) {
-        this->setg(this->eback(), this->eback() + unget_sz,
-                   this->eback() + unget_sz + nmemb);
-        c = traits_type::to_int_type(*this->gptr());
-      }
-    } else {
-      memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
-      _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
-      _M_extbufend =
-          _M_extbuf +
-          (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
-      size_t nmemb =
-          std::min(static_cast<size_t>(_M_ibs - unget_sz),
-                   static_cast<size_t>(_M_extbufend - _M_extbufnext));
-      std::codecvt_base::result r;
-      _M_st_last = _M_st;
-      size_t nr =
-          fread(reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
-                1, nmemb, _M_file);
-      if (nr != 0) {
-        if (!_M_cv) throw std::bad_cast();
-        _M_extbufend = _M_extbufnext + nr;
-        char_type* inext;
-        r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
-                      this->eback() + unget_sz, this->eback() + _M_ibs, inext);
-        if (r == std::codecvt_base::noconv) {
-          this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                     reinterpret_cast<char_type*>(_M_extbuf),
-                     const_cast<char_type*>(_M_extbufend));
-          c = traits_type::to_int_type(*this->gptr());
-        } else if (inext != this->eback() + unget_sz) {
-          this->setg(this->eback(), this->eback() + unget_sz, inext);
-          c = traits_type::to_int_type(*this->gptr());
-        }
-      }
-    }
-  } else {
-    c = traits_type::to_int_type(*this->gptr());
-  }
-  if (this->eback() == &buf) this->setg(0, 0, 0);
-  return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
-  if (_M_file && this->eback() < this->gptr()) {
-    if (traits_type::eq_int_type(c, traits_type::eof())) {
-      this->gbump(-1);
-      return traits_type::not_eof(c);
-    }
-    if ((_M_om & std::ios_base::out) ||
-        traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
-      this->gbump(-1);
-      *this->gptr() = traits_type::to_char_type(c);
-      return c;
-    }
-  }
-  return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c) {
-  if (_M_file == nullptr) return traits_type::eof();
-  _M_write_mode();
-  char_type buf;
-  char_type* pb_save = this->pbase();
-  char_type* epb_save = this->epptr();
-  if (!traits_type::eq_int_type(c, traits_type::eof())) {
-    if (this->pptr() == nullptr) this->setp(&buf, &buf + 1);
-    *this->pptr() = traits_type::to_char_type(c);
-    this->pbump(1);
-  }
-  if (this->pptr() != this->pbase()) {
-    if (_M_always_noconv) {
-      size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-      if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
-        return traits_type::eof();
-    } else {
-      char* extbe = _M_extbuf;
-      std::codecvt_base::result r;
-      do {
-        if (!_M_cv) throw std::bad_cast();
-        const char_type* e;
-        r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, _M_extbuf,
-                       _M_extbuf + _M_ebs, extbe);
-        if (e == this->pbase()) return traits_type::eof();
-        if (r == std::codecvt_base::noconv) {
-          size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
-          if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-        } else if (r == std::codecvt_base::ok ||
-                   r == std::codecvt_base::partial) {
-          size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-          if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
-            return traits_type::eof();
-          if (r == std::codecvt_base::partial) {
-            this->setp(const_cast<char_type*>(e), this->pptr());
-            this->pbump(this->epptr() - this->pbase());
-          }
-        } else {
-          return traits_type::eof();
-        }
-      } while (r == std::codecvt_base::partial);
-    }
-    this->setp(pb_save, epb_save);
-  }
-  return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>* basic_filebuf<CharT, Traits>::setbuf(
-    char_type* s, std::streamsize n) {
-  this->setg(0, 0, 0);
-  this->setp(0, 0);
-  if (_M_owns_eb) delete[] _M_extbuf;
-  if (_M_owns_ib) delete[] _M_intbuf;
-  _M_ebs = n;
-  if (_M_ebs > sizeof(_M_extbuf_min)) {
-    if (_M_always_noconv && s) {
-      _M_extbuf = reinterpret_cast<char*>(s);
-      _M_owns_eb = false;
-    } else {
-      _M_extbuf = new char[_M_ebs];
-      _M_owns_eb = true;
-    }
-  } else {
-    _M_extbuf = _M_extbuf_min;
-    _M_ebs = sizeof(_M_extbuf_min);
-    _M_owns_eb = false;
-  }
-  if (!_M_always_noconv) {
-    _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
-    if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
-      _M_intbuf = s;
-      _M_owns_ib = false;
-    } else {
-      _M_intbuf = new char_type[_M_ibs];
-      _M_owns_ib = true;
-    }
-  } else {
-    _M_ibs = 0;
-    _M_intbuf = 0;
-    _M_owns_ib = false;
-  }
-  return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
-                                      std::ios_base::openmode) {
-  if (!_M_cv) throw std::bad_cast();
-  int width = _M_cv->encoding();
-  if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
-    return pos_type(off_type(-1));
-  // width > 0 || off == 0
-  int whence;
-  switch (way) {
-    case std::ios_base::beg:
-      whence = SEEK_SET;
-      break;
-    case std::ios_base::cur:
-      whence = SEEK_CUR;
-      break;
-    case std::ios_base::end:
-      whence = SEEK_END;
-      break;
-    default:
-      return pos_type(off_type(-1));
-  }
-#if _WIN32
-  if (fseek(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftell(_M_file);
-#else
-  if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
-    return pos_type(off_type(-1));
-  pos_type r = ftello(_M_file);
-#endif
-  r.state(_M_st);
-  return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
-  if (_M_file == nullptr || sync()) return pos_type(off_type(-1));
-#if _WIN32
-  if (fseek(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#else
-  if (fseeko(_M_file, sp, SEEK_SET)) return pos_type(off_type(-1));
-#endif
-  _M_st = sp.state();
-  return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int basic_filebuf<CharT, Traits>::sync() {
-  if (_M_file == nullptr) return 0;
-  if (!_M_cv) throw std::bad_cast();
-  if (_M_cm & std::ios_base::out) {
-    if (this->pptr() != this->pbase())
-      if (overflow() == traits_type::eof()) return -1;
-    std::codecvt_base::result r;
-    do {
-      char* extbe;
-      r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
-      size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
-      if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) return -1;
-    } while (r == std::codecvt_base::partial);
-    if (r == std::codecvt_base::error) return -1;
-    if (fflush(_M_file)) return -1;
-  } else if (_M_cm & std::ios_base::in) {
-    off_type c;
-    state_type state = _M_st_last;
-    bool update_st = false;
-    if (_M_always_noconv) {
-      c = this->egptr() - this->gptr();
-    } else {
-      int width = _M_cv->encoding();
-      c = _M_extbufend - _M_extbufnext;
-      if (width > 0) {
-        c += width * (this->egptr() - this->gptr());
-      } else {
-        if (this->gptr() != this->egptr()) {
-          const int off = _M_cv->length(state, _M_extbuf, _M_extbufnext,
-                                        this->gptr() - this->eback());
-          c += _M_extbufnext - _M_extbuf - off;
-          update_st = true;
-        }
-      }
-    }
-#if _WIN32
-    if (fseek(_M_file_, -c, SEEK_CUR)) return -1;
-#else
-    if (fseeko(_M_file, -c, SEEK_CUR)) return -1;
-#endif
-    if (update_st) _M_st = state;
-    _M_extbufnext = _M_extbufend = _M_extbuf;
-    this->setg(0, 0, 0);
-    _M_cm = std::ios_base::openmode(0);
-  }
-  return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
-  sync();
-  _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
-  bool old_anc = _M_always_noconv;
-  _M_always_noconv = _M_cv->always_noconv();
-  if (old_anc != _M_always_noconv) {
-    this->setg(0, 0, 0);
-    this->setp(0, 0);
-    // invariant, char_type is char, else we couldn't get here
-    // need to dump _M_intbuf
-    if (_M_always_noconv) {
-      if (_M_owns_eb) delete[] _M_extbuf;
-      _M_owns_eb = _M_owns_ib;
-      _M_ebs = _M_ibs;
-      _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
-      _M_ibs = 0;
-      _M_intbuf = nullptr;
-      _M_owns_ib = false;
-    } else {  // need to obtain an _M_intbuf.
-              // If _M_extbuf is user-supplied, use it, else new _M_intbuf
-      if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
-        _M_ibs = _M_ebs;
-        _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
-        _M_owns_ib = false;
-        _M_extbuf = new char[_M_ebs];
-        _M_owns_eb = true;
-      } else {
-        _M_ibs = _M_ebs;
-        _M_intbuf = new char_type[_M_ibs];
-        _M_owns_ib = true;
-      }
-    }
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool basic_filebuf<CharT, Traits>::_M_read_mode() {
-  if (!(_M_cm & std::ios_base::in)) {
-    this->setp(0, 0);
-    if (_M_always_noconv)
-      this->setg(reinterpret_cast<char_type*>(_M_extbuf),
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
-                 reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
-    else
-      this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
-    _M_cm = std::ios_base::in;
-    return true;
-  }
-  return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void basic_filebuf<CharT, Traits>::_M_write_mode() {
-  if (!(_M_cm & std::ios_base::out)) {
-    this->setg(0, 0, 0);
-    if (_M_ebs > sizeof(_M_extbuf_min)) {
-      if (_M_always_noconv)
-        this->setp(reinterpret_cast<char_type*>(_M_extbuf),
-                   reinterpret_cast<char_type*>(_M_extbuf) + (_M_ebs - 1));
-      else
-        this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
-    } else {
-      this->setp(0, 0);
-    }
-    _M_cm = std::ios_base::out;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}  // namespace kaldi
-
-///////////////////////////////////////////////////////////////////////////////
-#endif  // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license.  As a user of this code you may
- * choose to use it under either license.  As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- *     LLVM Team
- *
- *     University of Illinois at Urbana-Champaign
- *
- *     http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * with the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- *     * Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimers.
- *
- *     * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimers in the
- *       documentation and/or other materials provided with the distribution.
- *
- *     * Neither the names of the LLVM Team, University of Illinois at
- *       Urbana-Champaign, nor the names of its contributors may be used to
- *       endorse or promote products derived from this Software without specific
- *       prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included
- * below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project.  If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will
- * be done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts.  The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * E: compnerd@compnerd.org
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * E: dimitry@andric.com
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * E: holgerar@gmail.com
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * E: mclow.lists@gmail.com
- * E: marshall@idio.com
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * E: william.w.fisher@gmail.com
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * E: matthew@dempsky.org
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * E: hhinnant@apple.com
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * E: tuhertz@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * E: kyrtzidis@apple.com
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * E: bruce.mitchener@gmail.com
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * E: mimomorin@gmail.com
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * E: andrew.c.morrow@gmail.com
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * E: breese@users.sourceforge.net
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * E: nico.rieck@gmail.com
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * E: csilvers@google.com
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * E: joerg@NetBSD.org
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * E: st@quanttec.com
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * E: zhangxiongpang@gmail.com
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * E: xingxue@ca.ibm.com
- * D: AIX port
- *
- * N: Zhihao Yuan
- * E: lichray@gmail.com
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * E: jyasskin@gmail.com
- * E: jyasskin@google.com
- * D: Linux fixes.
- */
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/const-integer-set-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index b93846148a3e4595774507f638396ce13393ac0e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly.  It is included by const-integer-set.h
-
-namespace kaldi {
-
-template <class I>
-void ConstIntegerSet<I>::InitInternal() {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  quick_set_.clear();  // just in case we previously had data.
-  if (slow_set_.size() == 0) {
-    lowest_member_ = (I)1;
-    highest_member_ = (I)0;
-    contiguous_ = false;
-    quick_ = false;
-  } else {
-    lowest_member_ = slow_set_.front();
-    highest_member_ = slow_set_.back();
-    size_t range = highest_member_ + 1 - lowest_member_;
-    if (range == slow_set_.size()) {
-      contiguous_ = true;
-      quick_ = false;
-    } else {
-      contiguous_ = false;
-      // If it would be more compact to store as bool
-      if (range < slow_set_.size() * 8 * sizeof(I)) {
-        // (assuming 1 bit per element)...
-        quick_set_.resize(range, false);
-        for (size_t i = 0; i < slow_set_.size(); i++)
-          quick_set_[slow_set_[i] - lowest_member_] = true;
-        quick_ = true;
-      } else {
-        quick_ = false;
-      }
-    }
-  }
-}
-
-template <class I>
-int ConstIntegerSet<I>::count(I i) const {
-  if (i < lowest_member_ || i > highest_member_) {
-    return 0;
-  } else {
-    if (contiguous_) return true;
-    if (quick_) {
-      return (quick_set_[i - lowest_member_] ? 1 : 0);
-    } else {
-      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
-      return (ans ? 1 : 0);
-    }
-  }
-}
-
-template <class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
-  WriteIntegerVector(os, binary, slow_set_);
-}
-
-template <class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
-  ReadIntegerVector(is, binary, &slow_set_);
-  InitInternal();
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/const-integer-set.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/const-integer-set.h
deleted file mode 100644
index 809a56a7c83804bfaa4badb5e28059734bfcad1e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011     Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-#include "util/stl-utils.h"
-
-/* ConstIntegerSet is a way to efficiently test whether something is in a
-   supplied set of integers.  It can be initialized from a vector or set, but
-   never changed after that. It either uses a sorted vector or an array of
-   bool, depending on the input.  It behaves like a const version of an STL set,
-   with only a subset of the functionality, except all the member functions are
-   upper-case.
-
-   Note that we could get rid of the member slow_set_, but we'd have to
-   do more work to implement an iterator type.  This would save memory.
-*/
-
-namespace kaldi {
-
-template <class I>
-class ConstIntegerSet {
- public:
-  ConstIntegerSet() : lowest_member_(1), highest_member_(0) {}
-
-  void Init(const std::vector<I> &input) {
-    slow_set_ = input;
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-
-  void Init(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-
-  explicit ConstIntegerSet(const std::vector<I> &input) : slow_set_(input) {
-    SortAndUniq(&slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const std::set<I> &input) {
-    CopySetToVector(input, &slow_set_);
-    InitInternal();
-  }
-  explicit ConstIntegerSet(const ConstIntegerSet<I> &other)
-      : slow_set_(other.slow_set_) {
-    InitInternal();
-  }
-
-  int count(I i) const;  // returns 1 or 0.
-
-  typedef typename std::vector<I>::const_iterator iterator;
-  iterator begin() const { return slow_set_.begin(); }
-  iterator end() const { return slow_set_.end(); }
-  size_t size() const { return slow_set_.size(); }
-  bool empty() const { return slow_set_.empty(); }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
- private:
-  I lowest_member_;
-  I highest_member_;
-  bool contiguous_;
-  bool quick_;
-  std::vector<bool> quick_set_;
-  std::vector<I> slow_set_;
-  void InitInternal();
-};
-
-}  // end namespace kaldi
-
-#include "util/const-integer-set-inl.h"
-
-#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/hash-list-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 063fa7131ec618f0aae9dc30f4edd26c9dcce7fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly.  It is included by fast-hash.h
-
-namespace kaldi {
-
-template <class I, class T>
-HashList<I, T>::HashList() {
-  list_head_ = NULL;
-  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
-  hash_size_ = 0;
-  freed_head_ = NULL;
-}
-
-template <class I, class T>
-void HashList<I, T>::SetSize(size_t size) {
-  hash_size_ = size;
-  KALDI_ASSERT(list_head_ == NULL &&
-               bucket_list_tail_ ==
-                   static_cast<size_t>(-1));  // make sure empty.
-  if (size > buckets_.size()) buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template <class I, class T>
-typename HashList<I, T>::Elem *HashList<I, T>::Clear() {
-  // Clears the hashtable and gives ownership of the currently contained list
-  // to the user.
-  for (size_t cur_bucket = bucket_list_tail_;
-       cur_bucket != static_cast<size_t>(-1);
-       cur_bucket = buckets_[cur_bucket].prev_bucket) {
-    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
-  }
-  bucket_list_tail_ = static_cast<size_t>(-1);
-  Elem *ans = list_head_;
-  list_head_ = NULL;
-  return ans;
-}
-
-template <class I, class T>
-const typename HashList<I, T>::Elem *HashList<I, T>::GetList() const {
-  return list_head_;
-}
-
-template <class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
-  e->tail = freed_head_;
-  freed_head_ = e;
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Find(I key) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  if (bucket.last_elem == NULL) {
-    return NULL;  // empty bucket.
-  } else {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-    return NULL;  // Not found.
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::New() {
-  if (freed_head_) {
-    Elem *ans = freed_head_;
-    freed_head_ = freed_head_->tail;
-    return ans;
-  } else {
-    Elem *tmp = new Elem[allocate_block_size_];
-    for (size_t i = 0; i + 1 < allocate_block_size_; i++)
-      tmp[i].tail = tmp + i + 1;
-    tmp[allocate_block_size_ - 1].tail = NULL;
-    freed_head_ = tmp;
-    allocated_.push_back(tmp);
-    return this->New();
-  }
-}
-
-template <class I, class T>
-HashList<I, T>::~HashList() {
-  // First test whether we had any memory leak within the
-  // HashList, i.e. things for which the user did not call Delete().
-  size_t num_in_list = 0, num_allocated = 0;
-  for (Elem *e = freed_head_; e != NULL; e = e->tail) num_in_list++;
-  for (size_t i = 0; i < allocated_.size(); i++) {
-    num_allocated += allocate_block_size_;
-    delete[] allocated_[i];
-  }
-  if (num_in_list != num_allocated) {
-    KALDI_WARN << "Possible memory leak: " << num_in_list
-               << " != " << num_allocated
-               << ": you might have forgotten to call Delete on "
-               << "some Elems";
-  }
-}
-
-template <class I, class T>
-inline typename HashList<I, T>::Elem *HashList<I, T>::Insert(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  // Check the element is existing or not.
-  if (bucket.last_elem != NULL) {
-    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1)
-                      ? list_head_
-                      : buckets_[bucket.prev_bucket].last_elem->tail),
-         *tail = bucket.last_elem->tail;
-    for (Elem *e = head; e != tail; e = e->tail)
-      if (e->key == key) return e;
-  }
-
-  // This is a new element. Insert it.
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
-    // head of bucket list (which is tail of regular list, they go in
-    // opposite directions).
-    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
-      // list was empty so this is the first elem.
-      KALDI_ASSERT(list_head_ == NULL);
-      list_head_ = elem;
-    } else {
-      // link in to the chain of Elems
-      buckets_[bucket_list_tail_].last_elem->tail = elem;
-    }
-    elem->tail = NULL;
-    bucket.last_elem = elem;
-    bucket.prev_bucket = bucket_list_tail_;
-    bucket_list_tail_ = index;
-  } else {
-    // Already-occupied bucket.  Insert at tail of list of elements within
-    // the bucket.
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-  }
-  return elem;
-}
-
-template <class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
-  size_t index = (static_cast<size_t>(key) % hash_size_);
-  HashBucket &bucket = buckets_[index];
-  Elem *elem = New();
-  elem->key = key;
-  elem->val = val;
-
-  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
-  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
-    elem->tail = bucket.last_elem->tail;
-    bucket.last_elem->tail = elem;
-    bucket.last_elem = elem;
-    return;
-  }
-  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1)
-                 ? list_head_
-                 : buckets_[bucket.prev_bucket].last_elem->tail);
-  // find place to insert in linked list
-  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
-  KALDI_ASSERT(e->key == key);  // not found? - should not happen
-  elem->tail = e->tail;
-  e->tail = elem;
-}
-
-}  // end namespace kaldi
-
-#endif  // KALDI_UTIL_HASH_LIST_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/hash-list.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/hash-list.h
deleted file mode 100644
index 31cc9bdc4870773475f8c5139539e320746bf5fe..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011   Microsoft Corporation
-//                2013   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "base/kaldi-error.h"
-
-/* This header provides utilities for a structure that's used in a decoder (but
-   is quite generic in nature so we implement and test it separately).
-   Basically it's a singly-linked list, but implemented in such a way that we
-   can quickly search for elements in the list.  We give it a slightly richer
-   interface than just a hash and a list.  The idea is that we want to separate
-   the hash part and the list part: basically, in the decoder, we want to have a
-   single hash for the current frame and the next frame, because by the time we
-   need to access the hash for the next frame we no longer need the hash for the
-   previous frame.  So we have an operation that clears the hash but leaves the
-   list structure intact.  We also control memory management inside this object,
-   to avoid repeated new's/deletes.
-
-   See hash-list-test.cc for an example of how to use this object.
-*/
-
-namespace kaldi {
-
-template <class I, class T>
-class HashList {
- public:
-  struct Elem {
-    I key;
-    T val;
-    Elem *tail;
-  };
-
-  /// Constructor takes no arguments.
-  /// Call SetSize to inform it of the likely size.
-  HashList();
-
-  /// Clears the hash and gives the head of the current list to the user;
-  /// ownership is transferred to the user (the user must call Delete()
-  /// for each element in the list, at his/her leisure).
-  Elem *Clear();
-
-  /// Gives the head of the current list to the user.  Ownership retained in the
-  /// class.  Caution: in December 2013 the return type was changed to const
-  /// Elem* and this function was made const.  You may need to change some types
-  /// of local Elem* variables to const if this produces compilation errors.
-  const Elem *GetList() const;
-
-  /// Think of this like delete().  It is to be called for each Elem in turn
-  /// after you "obtained ownership" by doing Clear().  This is not the opposite
-  /// of. Insert, it is the opposite of New.  It's really a memory operation.
-  inline void Delete(Elem *e);
-
-  /// This should probably not be needed to be called directly by the user.
-  /// Think of it as opposite
-  /// to Delete();
-  inline Elem *New();
-
-  /// Find tries to find this element in the current list using the hashtable.
-  /// It returns NULL if not present.  The Elem it returns is not owned by the
-  /// user, it is part of the internal list owned by this object, but the user
-  /// is free to modify the "val" element.
-  inline Elem *Find(I key);
-
-  /// Insert inserts a new element into the hashtable/stored list.
-  /// Because element keys in a hashtable are unique, this operation checks
-  /// whether each inserted element has a key equivalent to the one of an
-  /// element already in the hashtable. If so, the element is not inserted,
-  /// returning an pointer to this existing element.
-  inline Elem *Insert(I key, T val);
-
-  /// Insert inserts another element with same key into the hashtable/
-  /// stored list.
-  /// By calling this, the user asserts that one element with that key is
-  /// already present.
-  /// We insert it that way, that all elements with the same key
-  /// follow each other.
-  /// Find() will return the first one of the elements with the same key.
-  inline void InsertMore(I key, T val);
-
-  /// SetSize tells the object how many hash buckets to allocate (should
-  /// typically be at least twice the number of objects we expect to go in the
-  /// structure, for fastest performance).  It must be called while the hash
-  /// is empty (e.g. after Clear() or after initializing the object, but before
-  /// adding anything to the hash.
-  void SetSize(size_t sz);
-
-  /// Returns current number of hash buckets.
-  inline size_t Size() { return hash_size_; }
-
-  ~HashList();
-
- private:
-  struct HashBucket {
-    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
-    // list of buckets goes in opposite direction to list of Elems.
-    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
-    inline HashBucket(size_t i, Elem *e) : prev_bucket(i), last_elem(e) {}
-  };
-
-  Elem *list_head_;          // head of currently stored list.
-  size_t bucket_list_tail_;  // tail of list of active hash buckets.
-
-  size_t hash_size_;  // number of hash buckets.
-
-  std::vector<HashBucket> buckets_;
-
-  Elem *freed_head_;  // head of list of currently freed elements. [ready for
-  // allocation]
-
-  std::vector<Elem *> allocated_;  // list of allocated blocks.
-
-  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
-  // allocate in one block.  Must be largish so storing allocated_ doesn't
-  // become a problem.
-};
-
-}  // end namespace kaldi
-
-#include "util/hash-list-inl.h"
-
-#endif  // KALDI_UTIL_HASH_LIST_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io-inl.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 8b0c92131c4af2113eb33da6f3cfa9dc4dee83e1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-#include <string>
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
-  return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
-  return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() { return impl_ != NULL; }
-
-bool Output::IsOpen() { return impl_ != NULL; }
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_KALDI_IO_INL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io.cc
deleted file mode 100644
index 5f8ec4870138df32f6aca9c12383cf3885411741..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io.cc
+++ /dev/null
@@ -1,898 +0,0 @@
-// util/kaldi-io.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/kaldi-io.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstdlib>
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-#include "util/kaldi-pipebuf.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-#ifdef KALDI_CYGWIN_COMPAT
-#include "util/kaldi-cygwin-io-inl.h"
-#define MapOsPath(x) MapCygwinPath(x)
-#else  // KALDI_CYGWIN_COMPAT
-#define MapOsPath(x) x
-#endif  // KALDI_CYGWIN_COMPAT
-
-#if defined(_MSC_VER)
-static FILE *popen(const char *command, const char *mode) {
-#ifdef KALDI_CYGWIN_COMPAT
-  return kaldi::CygwinCompatPopen(command, mode);
-#else  // KALDI_CYGWIN_COMPAT
-  return _popen(command, mode);
-#endif  // KALDI_CYGWIN_COMPAT
-}
-#endif  // _MSC_VER
-
-namespace kaldi {
-
-#ifndef _MSC_VER  // on VS, we don't need this type.
-// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
-// Would mean we could use less of our own code.
-typedef basic_pipebuf<char> PipebufType;
-#endif
-}  // namespace kaldi
-
-namespace kaldi {
-
-std::string PrintableRxfilename(const std::string &rxfilename) {
-  if (rxfilename == "" || rxfilename == "-") {
-    return "standard input";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return rxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(rxfilename);
-  }
-}
-
-std::string PrintableWxfilename(const std::string &wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") {
-    return "standard output";
-  } else {
-    // If this call to Escape later causes compilation issues,
-    // just replace it with "return wxfilename"; it's only a
-    // pretty-printing issue.
-    return ParseOptions::Escape(wxfilename);
-  }
-}
-
-OutputType ClassifyWxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardOutput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardOutput;
-  } else if (first_char == '|') {
-    return kPipeOutput;  // An output pipe like "|blah".
-  } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
-    return kNoOutput;  // Leading or trailing space: can't interpret this.
-                       // Final '|' would represent an input pipe, not an
-                       // output pipe.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //            (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //            kNoWspecifier ||
-    //             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoOutput;
-  } else if (isdigit(last_char)) {
-    // This could be a file, but we have to see if it's an offset into a file
-    // (like foo.ark:4314328), which is not allowed for writing (but is
-    // allowed for reaching).  This eliminates some things which would be
-    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
-    // such filenames for writing, we woudln't be able to correctly read them).
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':') return kNoOutput;
-    // else it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but we
-  // check for internal '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the beginning?): "
-               << filename;
-    return kNoOutput;
-  }
-  return kFileOutput;  // It matched no other pattern: assume it's a filename.
-}
-
-InputType ClassifyRxfilename(const std::string &filename) {
-  const char *c = filename.c_str();
-  size_t length = filename.length();
-  char first_char = c[0],
-       last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
-
-  // if 'filename' is "" or "-", return kStandardInput.
-  if (length == 0 || (length == 1 && first_char == '-')) {
-    return kStandardInput;
-  } else if (first_char == '|') {
-    return kNoInput;  // An output pipe like "|blah": not
-                      // valid for input.
-  } else if (last_char == '|') {
-    return kPipeInput;
-  } else if (isspace(first_char) || isspace(last_char)) {
-    return kNoInput;  // We don't allow leading or trailing space in a filename.
-    // } else if ((first_char == 'a' || first_char == 's') &&
-    //            strchr(c, ':') != NULL &&
-    //           (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
-    //           kNoWspecifier ||
-    //            ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
-    //   // e.g. ark:something or scp:something... this is almost certainly a
-    //   // scripting error, so call it an error rather than treating it as a
-    //   file.
-    //   // In practice in modern kaldi scripts all (r,w)filenames begin with
-    //   "ark"
-    //   // or "scp", even though technically speaking options like "b", "t",
-    //   "s" or
-    //   // "cs" can appear before the ark or scp, like "b,ark".  For
-    //   efficiency,
-    //   // and because this code is really just a nicety to catch errors
-    //   earlier
-    //   // than they would otherwise be caught, we only call those extra
-    //   functions
-    //   // for filenames beginning with 'a' or 's'.
-    //   return kNoInput;
-  } else if (isdigit(last_char)) {
-    const char *d = c + length - 1;
-    while (isdigit(*d) && d > c) d--;
-    if (*d == ':')
-      return kOffsetFileInput;  // Filename is like
-                                // some_file:12345
-    // otherwise it could still be a filename; continue to the next check.
-  }
-
-  // At this point it matched no other pattern so we assume a filename, but
-  // we check for '|' as it's a common source of errors to have pipe
-  // commands without the pipe in the right place.  Say that it can't be
-  // classified in this case.
-  if (strchr(c, '|') != NULL) {
-    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
-                  " wrong place (pipe without | at the end?): "
-               << filename;
-    return kNoInput;
-  }
-  return kFileInput;  // It matched no other pattern: assume it's a filename.
-}
-
-class OutputImplBase {
- public:
-  // Open will open it as a file (no header), and return true
-  // on success.  It cannot be called on an already open stream.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::ostream &Stream() = 0;
-  virtual bool Close() = 0;
-  virtual ~OutputImplBase() {}
-};
-
-class FileOutputImpl : public OutputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Open(), "
-                << "open called on already open file.";
-    filename_ = filename;
-    os_.open(MapOsPath(filename_).c_str(),
-             binary ? std::ios_base::out | std::ios_base::binary
-                    : std::ios_base::out);
-    return os_.is_open();
-  }
-
-  virtual std::ostream &Stream() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return os_;
-  }
-
-  virtual bool Close() {
-    if (!os_.is_open())
-      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    os_.close();
-    return !(os_.fail());
-  }
-  virtual ~FileOutputImpl() {
-    if (os_.is_open()) {
-      os_.close();
-      if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
-    }
-  }
-
- private:
-  std::string filename_;
-  std::ofstream os_;
-};
-
-class StandardOutputImpl : public OutputImplBase {
- public:
-  StandardOutputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardOutputImpl::Open(), "
-                   "open called on already open file.";
-#ifdef _MSC_VER
-    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
-#endif
-    is_open_ = std::cout.good();
-    return is_open_;
-  }
-
-  virtual std::ostream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cout;
-  }
-
-  virtual bool Close() {
-    if (!is_open_)
-      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
-    is_open_ = false;
-    std::cout << std::flush;
-    return !(std::cout.fail());
-  }
-  virtual ~StandardOutputImpl() {
-    if (is_open_) {
-      std::cout << std::flush;
-      if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
-    }
-  }
-
- private:
-  bool is_open_;
-};
-
-class PipeOutputImpl : public OutputImplBase {
- public:
-  PipeOutputImpl() : f_(NULL), os_(NULL) {}
-
-  virtual bool Open(const std::string &wxfilename, bool binary) {
-    filename_ = wxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
-    // start with '|'
-    std::string cmd_name(wxfilename, 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
-#else
-    f_ = popen(cmd_name.c_str(), "w");
-#endif
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't make the
-                                 // destructor try to close the stream when
-                                 // we're done.
-                            (binary ? std::ios_base::out | std::ios_base::binary
-                                    : std::ios_base::out));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      os_ = new std::ostream(fb_);
-#else
-      os_ = new std::ofstream(f_);
-#endif
-      return os_->good();
-    }
-  }
-
-  virtual std::ostream &Stream() {
-    if (os_ == NULL)
-      KALDI_ERR << "PipeOutputImpl::Stream(),"
-                   " object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *os_;
-  }
-
-  virtual bool Close() {
-    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
-    bool ok = true;
-    os_->flush();
-    if (os_->fail()) ok = false;
-    delete os_;
-    os_ = NULL;
-    int status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return ok;
-  }
-  virtual ~PipeOutputImpl() {
-    if (os_) {
-      if (!Close())
-        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
-    }
-  }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::ostream *os_;
-};
-
-class InputImplBase {
- public:
-  // Open will open it as a file, and return true on success.
-  // May be called twice only for kOffsetFileInput (otherwise,
-  // if called twice, we just create a new Input object, to avoid
-  // having to deal with the extra hassle of reopening with the
-  // same object.
-  // Note that we will to call Open with true (binary) for
-  // for text-mode Kaldi files; the only actual text-mode input
-  // is for non-Kaldi files.
-  virtual bool Open(const std::string &filename, bool binary) = 0;
-  virtual std::istream &Stream() = 0;
-  virtual int32 Close() = 0;  // We only need to check failure in the case of
-                              // kPipeInput.
-  // on close for input streams.
-  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
-                                   // call Open twice
-  // (has efficiency benefits).
-
-  virtual ~InputImplBase() {}
-};
-
-class FileInputImpl : public InputImplBase {
- public:
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_.is_open())
-      KALDI_ERR << "FileInputImpl::Open(), "
-                << "open called on already open file.";
-    is_.open(
-        MapOsPath(filename).c_str(),
-        binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
-    return is_.is_open();
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kFileInput; }
-
-  virtual ~FileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::ifstream is_;
-};
-
-class StandardInputImpl : public InputImplBase {
- public:
-  StandardInputImpl() : is_open_(false) {}
-
-  virtual bool Open(const std::string &filename, bool binary) {
-    if (is_open_)
-      KALDI_ERR << "StandardInputImpl::Open(), "
-                   "open called on already open file.";
-    is_open_ = true;
-#ifdef _MSC_VER
-    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
-#endif
-    return true;  // Don't check good() because would be false if
-    // eof, which may be valid input.
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_open_)
-      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return std::cin;
-  }
-
-  virtual InputType MyType() { return kStandardInput; }
-
-  virtual int32 Close() {
-    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
-    is_open_ = false;
-    return 0;
-  }
-  virtual ~StandardInputImpl() {}
-
- private:
-  bool is_open_;
-};
-
-class PipeInputImpl : public InputImplBase {
- public:
-  PipeInputImpl() : f_(NULL), is_(NULL) {}
-
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    filename_ = rxfilename;
-    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
-    KALDI_ASSERT(rxfilename.length() != 0 &&
-                 rxfilename[rxfilename.length() - 1] ==
-                     '|');  // should end with '|'
-    std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
-#else
-    f_ = popen(cmd_name.c_str(), "r");
-#endif
-
-    if (!f_) {  // Failure.
-      KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
-                 << ", errno is " << strerror(errno);
-      return false;
-    } else {
-#ifndef _MSC_VER
-      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
-                                 // destructor to close the stream.
-                            (binary ? std::ios_base::in | std::ios_base::binary
-                                    : std::ios_base::in));
-      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
-      is_ = new std::istream(fb_);
-#else
-      is_ = new std::ifstream(f_);
-#endif
-      if (is_->fail() || is_->bad()) return false;
-      if (is_->eof()) {
-        KALDI_WARN << "Pipe opened with command "
-                   << PrintableRxfilename(rxfilename) << " is empty.";
-        // don't return false: empty may be valid.
-      }
-      return true;
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (is_ == NULL)
-      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
-    // I believe this error can only arise from coding error.
-    return *is_;
-  }
-
-  virtual int32 Close() {
-    if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
-    delete is_;
-    is_ = NULL;
-    int32 status;
-#ifdef _MSC_VER
-    status = _pclose(f_);
-#else
-    status = pclose(f_);
-#endif
-    if (status)
-      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
-                 << status;
-    f_ = NULL;
-#ifndef _MSC_VER
-    delete fb_;
-    fb_ = NULL;
-#endif
-    return status;
-  }
-  virtual ~PipeInputImpl() {
-    if (is_) Close();
-  }
-  virtual InputType MyType() { return kPipeInput; }
-
- private:
-  std::string filename_;
-  FILE *f_;
-#ifndef _MSC_VER
-  PipebufType *fb_;
-#endif
-  std::istream *is_;
-};
-
-/*
-#else
-
-// Just have an empty implementation of the pipe input that crashes if
-// called.
-class PipeInputImpl: public InputImplBase {
- public:
-  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
-  platform."); }
-  virtual bool Open(const std::string, bool) { return 0; }
-  virtual std::istream &Stream() const { return NULL; }
-  virtual void Close() {}
-  virtual InputType MyType() { return kPipeInput; }
-};
-
-#endif
-*/
-
-class OffsetFileInputImpl : public InputImplBase {
-  // This class is a bit more complicated than the
-
- public:
-  // splits a filename like /my/file:123 into /my/file and the
-  // number 123.  Crashes if not this format.
-  static void SplitFilename(const std::string &rxfilename,
-                            std::string *filename, size_t *offset) {
-    size_t pos = rxfilename.find_last_of(':');
-    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
-    // code, as the filename is supposed to be of the correct form at this
-    // point.
-    *filename = std::string(rxfilename, 0, pos);
-    std::string number(rxfilename, pos + 1);
-    bool ans = ConvertStringToInteger(number, offset);
-    if (!ans)
-      KALDI_ERR << "Cannot get offset from filename " << rxfilename
-                << " (possibly you compiled in 32-bit and have a >32-bit"
-                << " byte offset into a file; you'll have to compile 64-bit.";
-  }
-
-  bool Seek(size_t offset) {
-    size_t cur_pos = is_.tellg();
-    if (cur_pos == offset) {
-      return true;
-    } else if (cur_pos < offset && cur_pos + 100 > offset) {
-      // We're close enough that it may be faster to just
-      // read that data, rather than seek.
-      for (size_t i = cur_pos; i < offset; i++) is_.get();
-      return (is_.tellg() == std::streampos(offset));
-    }
-    // Try to actually seek.
-    is_.seekg(offset, std::ios_base::beg);
-    if (is_.fail()) {  // failbit or badbit is set [error happened]
-      is_.close();
-      return false;  // failure.
-    } else {
-      is_.clear();  // Clear any failure bits (e.g. eof).
-      return true;  // success.
-    }
-  }
-
-  // This Open routine is unusual in that it is designed to work even
-  // if it was already open.  This for efficiency when seeking multiple
-  // times.
-  virtual bool Open(const std::string &rxfilename, bool binary) {
-    if (is_.is_open()) {
-      // We are opening when we have an already-open file.
-      // We may have to seek within this file, or else close it and
-      // open a different one.
-      std::string tmp_filename;
-      size_t offset;
-      SplitFilename(rxfilename, &tmp_filename, &offset);
-      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
-        is_.clear();  // clear fail bit, etc.
-        return Seek(offset);
-      } else {
-        is_.close();  // don't bother checking error status of is_.
-        filename_ = tmp_filename;
-        is_.open(MapOsPath(filename_).c_str(),
-                 binary ? std::ios_base::in | std::ios_base::binary
-                        : std::ios_base::in);
-        if (!is_.is_open())
-          return false;
-        else
-          return Seek(offset);
-      }
-    } else {
-      size_t offset;
-      SplitFilename(rxfilename, &filename_, &offset);
-      binary_ = binary;
-      is_.open(MapOsPath(filename_).c_str(),
-               binary ? std::ios_base::in | std::ios_base::binary
-                      : std::ios_base::in);
-      if (!is_.is_open())
-        return false;
-      else
-        return Seek(offset);
-    }
-  }
-
-  virtual std::istream &Stream() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
-    // I believe this error can only arise from coding error.
-    return is_;
-  }
-
-  virtual int32 Close() {
-    if (!is_.is_open())
-      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
-    // I believe this error can only arise from coding error.
-    is_.close();
-    // Don't check status.
-    return 0;
-  }
-
-  virtual InputType MyType() { return kOffsetFileInput; }
-
-  virtual ~OffsetFileInputImpl() {
-    // Stream will automatically be closed, and we don't care about
-    // whether it fails.
-  }
-
- private:
-  std::string filename_;  // the actual filename
-  bool binary_;           // true if was opened in binary mode.
-  std::ifstream is_;
-};
-
-Output::Output(const std::string &wxfilename, bool binary, bool write_header)
-    : impl_(NULL) {
-  if (!Open(wxfilename, binary, write_header)) {
-    if (impl_) {
-      delete impl_;
-      impl_ = NULL;
-    }
-    KALDI_ERR << "Error opening output stream "
-              << PrintableWxfilename(wxfilename);
-  }
-}
-
-bool Output::Close() {
-  if (!impl_) {
-    return false;  // error to call Close if not open.
-  } else {
-    bool ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  }
-}
-
-Output::~Output() {
-  if (impl_) {
-    bool ok = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    if (!ok)
-      KALDI_ERR << "Error closing output file "
-                << PrintableWxfilename(filename_)
-                << (ClassifyWxfilename(filename_) == kFileOutput
-                        ? " (disk full?)"
-                        : "");
-  }
-}
-
-std::ostream &Output::Stream() {  // will throw if not open; else returns
-  // stream.
-  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
-  return impl_->Stream();
-}
-
-bool Output::Open(const std::string &wxfn, bool binary, bool header) {
-  if (IsOpen()) {
-    if (!Close()) {  // Throw here rather than return status, as it's an error
-      // about something else: if the user wanted to avoid the exception he/she
-      // could have called Close().
-      KALDI_ERR << "Output::Open(), failed to close output stream: "
-                << PrintableWxfilename(filename_);
-    }
-  }
-
-  filename_ = wxfn;
-
-  OutputType type = ClassifyWxfilename(wxfn);
-  KALDI_ASSERT(impl_ == NULL);
-
-  if (type == kFileOutput) {
-    impl_ = new FileOutputImpl();
-  } else if (type == kStandardOutput) {
-    impl_ = new StandardOutputImpl();
-  } else if (type == kPipeOutput) {
-    impl_ = new PipeOutputImpl();
-  } else {  // type == kNoOutput
-    KALDI_WARN << "Invalid output filename format "
-               << PrintableWxfilename(wxfn);
-    return false;
-  }
-  if (!impl_->Open(wxfn, binary)) {
-    delete impl_;
-    impl_ = NULL;
-    return false;  // failed to open.
-  } else {         // successfully opened it.
-    if (header) {
-      InitKaldiOutputStream(impl_->Stream(), binary);
-      bool ok = impl_->Stream().good();  // still OK?
-      if (!ok) {
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      return true;
-    } else {
-      return true;
-    }
-  }
-}
-
-Input::Input(const std::string &rxfilename, bool *binary) : impl_(NULL) {
-  if (!Open(rxfilename, binary)) {
-    KALDI_ERR << "Error opening input stream "
-              << PrintableRxfilename(rxfilename);
-  }
-}
-
-int32 Input::Close() {
-  if (impl_) {
-    int32 ans = impl_->Close();
-    delete impl_;
-    impl_ = NULL;
-    return ans;
-  } else {
-    return 0;
-  }
-}
-
-bool Input::OpenInternal(const std::string &rxfilename, bool file_binary,
-                         bool *contents_binary) {
-  InputType type = ClassifyRxfilename(rxfilename);
-  if (IsOpen()) {
-    // May have to close the stream first.
-    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
-      // We want to use the same object to Open... this is in case
-      // the files are the same, so we can just seek.
-      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-        // always open in binary.
-        delete impl_;
-        impl_ = NULL;
-        return false;
-      }
-      // read the binary header, if requested.
-      if (contents_binary != NULL)
-        return InitKaldiInputStream(impl_->Stream(), contents_binary);
-      else
-        return true;
-    } else {
-      Close();
-      // and fall through to code below which actually opens the file.
-    }
-  }
-  if (type == kFileInput) {
-    impl_ = new FileInputImpl();
-  } else if (type == kStandardInput) {
-    impl_ = new StandardInputImpl();
-  } else if (type == kPipeInput) {
-    impl_ = new PipeInputImpl();
-  } else if (type == kOffsetFileInput) {
-    impl_ = new OffsetFileInputImpl();
-  } else {  // type == kNoInput
-    KALDI_WARN << "Invalid input filename format "
-               << PrintableRxfilename(rxfilename);
-    return false;
-  }
-  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
-    // always read in binary.
-    delete impl_;
-    impl_ = NULL;
-    return false;
-  }
-  if (contents_binary != NULL)
-    return InitKaldiInputStream(impl_->Stream(), contents_binary);
-  else
-    return true;
-}
-
-Input::~Input() {
-  if (impl_) Close();
-}
-
-std::istream &Input::Stream() {
-  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
-  return impl_->Stream();
-}
-
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<float> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m) {
-//   if (!filename.empty() && filename[filename.size() - 1] == ']') {
-//     // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
-//     // (the bit in square brackets is the range).
-//     std::string rxfilename, range;
-//     if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
-//       KALDI_ERR << "Could not make sense of possible range specifier in
-//       filename "
-//                 << "while reading matrix: " << filename;
-//     }
-//     Matrix<double> temp;
-//     bool binary_in;
-//     Input ki(rxfilename, &binary_in);
-//     temp.Read(ki.Stream(), binary_in);
-//     if (!ExtractObjectRange(temp, range, m)) {
-//       KALDI_ERR << "Error extracting range of object: " << filename;
-//     }
-//   } else {
-//     // The normal case, there is no range.
-//     bool binary_in;
-//     Input ki(filename, &binary_in);
-//     m->Read(ki.Stream(), binary_in);
-//   }
-// }
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io.h
deleted file mode 100644
index 2175ca8f89ed5f3e3bade26528e924208df692c6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-//                2016  Xiaohui Zhang
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#ifdef _MSC_VER
-#include <fcntl.h>
-#include <io.h>
-#endif
-#include <cctype>  // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-// #include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-class OutputImplBase;  // Forward decl; defined in a .cc file
-class InputImplBase;   // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files.  They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable.  The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode.  Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header.  So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename"  is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
-//                        Documents\\boo"
-//          (whatever the actual file-system interprets)
-// (2) Standard output:  "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-//
-//
-// A "rxfilename" is an extended filename for reading.  It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-//   [these are created by the Table and TableWriter classes; I may also write
-//    a program that creates them for arbitrary files]
-//
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-//    Output ko(some_filename, binary);
-//    MyObject1.Write(ko.Stream(), binary);
-//    MyObject2.Write(ko.Stream(), binary);
-// }
-
-enum OutputType { kNoOutput, kFileOutput, kStandardOutput, kPipeOutput };
-
-/// ClassifyWxfilename interprets filenames as follows:
-///  - kNoOutput: invalid filenames (leading or trailing space, things that look
-///     like wspecifiers and rspecifiers or like pipes to read from with leading
-///     |.
-///  - kFileOutput: Normal filenames
-///  - kStandardOutput: The empty string or "-", interpreted as standard output
-///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
-  kNoInput,
-  kFileInput,
-  kStandardInput,
-  kOffsetFileInput,
-  kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-///  - kNoInput: invalid filenames (leading or trailing space, things that
-///       look like wspecifiers and rspecifiers or pipes to write to
-///       with trailing |.
-///  - kFileInput: normal filenames
-///  - kStandardInput: the empty string or "-"
-///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
-///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-class Output {
- public:
-  // The normal constructor, provided for convenience.
-  // Equivalent to calling with default constructor then Open()
-  // with these arguments.
-  Output(const std::string &filename, bool binary, bool write_header = true);
-
-  Output() : impl_(NULL) {}
-
-  /// This opens the stream, with the given mode (binary or text).  It returns
-  /// true on success and false on failure.  However, it will throw if something
-  /// was already open and could not be closed (to avoid this, call Close()
-  /// first.  if write_header == true and binary == true, it writes the Kaldi
-  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
-  /// already open; it will close the existing stream and reopen (however if
-  /// closing the old stream failed it will throw).
-  bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
-  inline bool IsOpen();  // return true if we have an open stream.  Does not
-  // imply stream is good for writing.
-
-  std::ostream &Stream();  // will throw if not open; else returns stream.
-
-  // Close closes the stream. Calling Close is never necessary unless you
-  // want to avoid exceptions being thrown.  There are times when calling
-  // Close will hurt efficiency (basically, when using offsets into files,
-  // and using the same Input object),
-  // but most of the time the user won't be doing this directly, it will
-  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
-  bool Close();
-
-  // This will throw if stream could not be closed (to check error status,
-  // call Close()).
-  ~Output();
-
- private:
-  OutputImplBase *impl_;  // non-NULL if open.
-  std::string filename_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki.Stream(), binary_in);
-//
-// ... more extensive example:
-//
-// {
-//    bool binary_in;
-//    Input ki(some_filename, &binary_in);
-//    MyObject1.Read(ki.Stream(), &binary_in);
-//    MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-// Input interprets four kinds of filenames:
-//  (1) Normal filenames
-//  (2) The empty string or "-", interpreted as standard output
-//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
-//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-class Input {
- public:
-  /// The normal constructor.  Opens the stream in binary mode.
-  /// Equivalent to calling the default constructor followed by Open(); then, if
-  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
-  /// throws on error.
-  explicit Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  Input() : impl_(NULL) {}
-
-  // Open opens the stream for reading (the mode, where relevant, is binary; use
-  // OpenTextMode for text-mode, we made this a separate function rather than a
-  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
-  // since reading in the file system's text mode is unusual.)  If
-  // contents_binary != NULL, it reads the binary-mode header and puts it in the
-  // "binary" variable.  Returns true on success.  If it returns false it will
-  // not be open.  You may call Open even if it is already open; it will close
-  // the existing stream and reopen (however if closing the old stream failed it
-  // will throw).
-  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
-  // As Open but (if the file system has text/binary modes) opens in text mode;
-  // you shouldn't ever have to use this as in Kaldi we read even text files in
-  // binary mode (and ignore the \r).
-  inline bool OpenTextMode(const std::string &rxfilename);
-
-  // Return true if currently open for reading and Stream() will
-  // succeed.  Does not guarantee that the stream is good.
-  inline bool IsOpen();
-
-  // It is never necessary or helpful to call Close, except if
-  // you are concerned about to many filehandles being open.
-  // Close does not throw. It returns the exit code as int32
-  // in the case of a pipe [kPipeInput], and always zero otherwise.
-  int32 Close();
-
-  // Returns the underlying stream. Throws if !IsOpen()
-  std::istream &Stream();
-
-  // Destructor does not throw: input streams may legitimately fail so we
-  // don't worry about the status when we close them.
-  ~Input();
-
- private:
-  bool OpenInternal(const std::string &rxfilename, bool file_binary,
-                    bool *contents_binary);
-  InputImplBase *impl_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C>
-void ReadKaldiObject(const std::string &filename, C *c) {
-  bool binary_in;
-  Input ki(filename, &binary_in);
-  c->Read(ki.Stream(), binary_in);
-}
-
-// Specialize the template for reading matrices, because we want to be able to
-// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<float> *m);
-//
-//
-// template <> void ReadKaldiObject(const std::string &filename,
-//                                  Matrix<double> *m);
-
-template <class C>
-inline void WriteKaldiObject(const C &c, const std::string &filename,
-                             bool binary) {
-  Output ko(filename, binary);
-  c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(const std::string &rxfilename);
-
-/// PrintableWxfilename turns the wxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(const std::string &wxfilename);
-
-/// @}
-
-}  // end namespace kaldi.
-
-#include "util/kaldi-io-inl.h"
-
-#endif  // KALDI_UTIL_KALDI_IO_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-pipebuf.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index bcee80ccb1a6fa8ce3195483ac144c5ff66d2f89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-/** @file kaldi-pipebuf.h
- *  This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#include <string>
-#if !defined(_LIBCPP_VERSION)  // libc++
-#include <fstream>
-#else
-#include "util/basic-filebuf.h"
-#endif
-
-namespace kaldi {
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++.  This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION)  // libc++
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : basic_filebuf<CharType, Traits>() {
-    this->open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-  }
-};  // class basic_pipebuf
-#else
-template <class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
- public:
-  typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
-  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
-      : std::basic_filebuf<CharType, Traits>() {
-    this->_M_file.sys_open(fptr, mode);
-    if (!this->is_open()) {
-      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
-      // code error, if the fptr was good.
-      return;
-    }
-    this->_M_mode = mode;
-    this->_M_buf_size = BUFSIZ;
-    this->_M_allocate_internal_buffer();
-    this->_M_reading = false;
-    this->_M_writing = false;
-    this->_M_set_buffer(-1);
-  }
-};  // class basic_pipebuf
-#endif  // _MSC_VER
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/parse-options.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/parse-options.cc
deleted file mode 100644
index 1f2ef844d28d67ed58d2e0c9d7c7b674e8209df8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/parse-options.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// util/parse-options.cc
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-//                      Frantisek Skala;  Arnab Ghoshal
-// Copyright 2013       Tanel Alumae
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
-    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
-  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
-  if (po != NULL && po->other_parser_ != NULL) {
-    // we get here if this constructor is used twice, recursively.
-    other_parser_ = po->other_parser_;
-  } else {
-    other_parser_ = other;
-  }
-  if (po != NULL && po->prefix_ != "") {
-    prefix_ = po->prefix_ + std::string(".") + prefix;
-  } else {
-    prefix_ = prefix;
-  }
-}
-
-void ParseOptions::Register(const std::string &name, bool *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, int32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, uint32 *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, float *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, double *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-void ParseOptions::Register(const std::string &name, std::string *ptr,
-                            const std::string &doc) {
-  RegisterTmpl(name, ptr, doc);
-}
-
-// old-style, used for registering application-specific parameters
-template <typename T>
-void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
-                                const std::string &doc) {
-  if (other_parser_ == NULL) {
-    this->RegisterCommon(name, ptr, doc, false);
-  } else {
-    KALDI_ASSERT(prefix_ != "" &&
-                 "Cannot use empty prefix when registering with prefix.");
-    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
-    other_parser_->Register(new_name, ptr, doc);
-  }
-}
-
-// does the common part of the job of registering a parameter
-template <typename T>
-void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
-                                  const std::string &doc, bool is_standard) {
-  KALDI_ASSERT(ptr != NULL);
-  std::string idx = name;
-  NormalizeArgName(&idx);
-  if (doc_map_.find(idx) != doc_map_.end())
-    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
-  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
-}
-
-// used to register standard parameters (those that are present in all of the
-// applications)
-template <typename T>
-void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
-                                    const std::string &doc) {
-  this->RegisterCommon(name, ptr, doc, true);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, bool *b,
-                                    const std::string &doc, bool is_standard) {
-  bool_map_[idx] = b;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
-              is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, int32 *i,
-                                    const std::string &doc, bool is_standard) {
-  int_map_[idx] = i;
-  std::ostringstream ss;
-  ss << doc << " (int, default = " << *i << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, uint32 *u,
-                                    const std::string &doc, bool is_standard) {
-  uint_map_[idx] = u;
-  std::ostringstream ss;
-  ss << doc << " (uint, default = " << *u << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, float *f,
-                                    const std::string &doc, bool is_standard) {
-  float_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (float, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, double *f,
-                                    const std::string &doc, bool is_standard) {
-  double_map_[idx] = f;
-  std::ostringstream ss;
-  ss << doc << " (double, default = " << *f << ")";
-  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
-}
-
-void ParseOptions::RegisterSpecific(const std::string &name,
-                                    const std::string &idx, std::string *s,
-                                    const std::string &doc, bool is_standard) {
-  string_map_[idx] = s;
-  doc_map_[idx] =
-      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
-}
-void ParseOptions::DisableOption(const std::string &name) {
-  if (argv_ != NULL)
-    KALDI_ERR << "DisableOption must not be called after calling Read().";
-  if (doc_map_.erase(name) == 0)
-    KALDI_ERR << "Option " << name
-              << " was not registered so cannot be disabled: ";
-  bool_map_.erase(name);
-  int_map_.erase(name);
-  uint_map_.erase(name);
-  float_map_.erase(name);
-  double_map_.erase(name);
-  string_map_.erase(name);
-}
-
-int ParseOptions::NumArgs() const { return positional_args_.size(); }
-
-std::string ParseOptions::GetArg(int i) const {
-  // use KALDI_ERR if code error
-  if (i < 1 || i > static_cast<int>(positional_args_.size()))
-    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
-  return positional_args_[i - 1];
-}
-
-// We currently do not support any other options.
-enum ShellType { kBash = 0 };
-
-// This can be changed in the code if it ever does need to be changed (as it's
-// unlikely that one compilation of this tool-set would use both shells).
-static ShellType kShellType = kBash;
-
-// Returns true if we need to escape a string before putting it into
-// a shell (mainly thinking of bash shell, but should work for others)
-// This is for the convenience of the user so command-lines that are
-// printed out by ParseOptions::Read (with --print-args=true) are
-// paste-able into the shell and will run. If you use a different type of
-// shell, it might be necessary to change this function.
-// But it's mostly a cosmetic issue as it basically affects how
-// the program echoes its command-line arguments to the screen.
-static bool MustBeQuoted(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  const char *c = str.c_str();
-  if (*c == '\0') {
-    return true;  // Must quote empty string
-  } else {
-    const char *ok_chars[2];
-
-    // These seem not to be interpreted as long as there are no other "bad"
-    // characters involved (e.g. "," would be interpreted as part of something
-    // like a{b,c}, but not on its own.
-    ok_chars[kBash] = "[]~#^_-+=:.,/";
-
-    // Just want to make sure that a space character doesn't get automatically
-    // inserted here via an automated style-checking script, like it did before.
-    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));
-
-    for (; *c != '\0'; c++) {
-      // For non-alphanumeric characters we have a list of characters which
-      // are OK. All others are forbidden (this is easier since the shell
-      // interprets most non-alphanumeric characters).
-      if (!isalnum(*c)) {
-        const char *d;
-        for (d = ok_chars[st]; *d != '\0'; d++)
-          if (*c == *d) break;
-        // If not alphanumeric or one of the "ok_chars", it must be escaped.
-        if (*d == '\0') return true;
-      }
-    }
-    return false;  // The string was OK. No quoting or escaping.
-  }
-}
-
-// Returns a quoted and escaped version of "str"
-// which has previously been determined to need escaping.
-// Our aim is to print out the command line in such a way that if it's
-// pasted into a shell of ShellType "st" (only bash for now), it
-// will get passed to the program in the same way.
-static std::string QuoteAndEscape(const std::string &str, ShellType st) {
-  // Only Bash is supported (for the moment).
-  KALDI_ASSERT(st == kBash && "Invalid shell type.");
-
-  // For now we use the following rules:
-  // In the normal case, we quote with single-quote "'", and to escape
-  // a single-quote we use the string: '\'' (interpreted as closing the
-  // single-quote, putting an escaped single-quote from the shell, and
-  // then reopening the single quote).
-  char quote_char = '\'';
-  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b
-
-  // If the string contains single-quotes that would need escaping this
-  // way, and we determine that the string could be safely double-quoted
-  // without requiring any escaping, then we double-quote the string.
-  // This is the case if the characters "`$\ do not appear in the string.
-  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
-  const char *c_str = str.c_str();
-  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
-    quote_char = '"';
-    escape_str = "\\\"";  // should never be accessed.
-  }
-
-  char buf[2];
-  buf[1] = '\0';
-
-  buf[0] = quote_char;
-  std::string ans = buf;
-  const char *c = str.c_str();
-  for (; *c != '\0'; c++) {
-    if (*c == quote_char) {
-      ans += escape_str;
-    } else {
-      buf[0] = *c;
-      ans += buf;
-    }
-  }
-  buf[0] = quote_char;
-  ans += buf;
-  return ans;
-}
-
-// static function
-std::string ParseOptions::Escape(const std::string &str) {
-  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
-}
-
-int ParseOptions::Read(int argc, const char *const argv[]) {
-  argc_ = argc;
-  argv_ = argv;
-  std::string key, value;
-  int i;
-  if (argc > 0) {
-    // set global "const char*" g_program_name (name of the program)
-    // so it can be printed out in error messages;
-    // it's useful because often the stderr of different programs will
-    // be mixed together in the same log file.
-#ifdef _MSC_VER
-    const char *c = strrchr(argv[0], '\\');
-#else
-    const char *c = strrchr(argv[0], '/');
-#endif
-    SetProgramName(c == NULL ? argv[0] : c + 1);
-  }
-  // first pass: look for config parameter, look for priority
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // a lone "--" marks the end of named options
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (key.compare("config") == 0) {
-        ReadConfigFile(value);
-      }
-      if (key.compare("help") == 0) {
-        PrintUsage();
-        exit(0);
-      }
-    }
-  }
-  bool double_dash_seen = false;
-  // second pass: add the command line options
-  for (i = 1; i < argc; i++) {
-    if (std::strncmp(argv[i], "--", 2) == 0) {
-      if (std::strcmp(argv[i], "--") == 0) {
-        // A lone "--" marks the end of named options.
-        // Skip that option and break the processing of named options
-        i += 1;
-        double_dash_seen = true;
-        break;
-      }
-      bool has_equal_sign;
-      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
-      NormalizeArgName(&key);
-      Trim(&value);
-      if (!SetOption(key, value, has_equal_sign)) {
-        PrintUsage(true);
-        KALDI_ERR << "Invalid option " << argv[i];
-      }
-    } else {
-      break;
-    }
-  }
-
-  // process remaining arguments as positional
-  for (; i < argc; i++) {
-    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
-      double_dash_seen = true;
-    } else {
-      positional_args_.push_back(std::string(argv[i]));
-    }
-  }
-
-  // if the user did not suppress this with --print-args = false....
-  if (print_args_) {
-    std::ostringstream strm;
-    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-  return i;
-}
-
-void ParseOptions::PrintUsage(bool print_command_line) {
-  std::cerr << '\n' << usage_ << '\n';
-  DocMapType::iterator it;
-  // first we print application-specific options
-  bool app_specific_header_printed = false;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == false) {  // application-specific option
-      if (app_specific_header_printed == false) {  // header was not yet printed
-        std::cerr << "Options:" << '\n';
-        app_specific_header_printed = true;
-      }
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  if (app_specific_header_printed == true) {
-    std::cerr << '\n';
-  }
-
-  // then the standard options
-  std::cerr << "Standard options:" << '\n';
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    if (it->second.is_standard_ == true) {  // we have standard option
-      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
-                << " : " << it->second.use_msg_ << '\n';
-    }
-  }
-  std::cerr << '\n';
-  if (print_command_line) {
-    std::ostringstream strm;
-    strm << "Command line was: ";
-    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
-    strm << '\n';
-    std::cerr << strm.str() << std::flush;
-  }
-}
-
-void ParseOptions::PrintConfig(std::ostream &os) {
-  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
-  std::string key;
-  DocMapType::iterator it;
-  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
-    key = it->first;
-    os << it->second.name_ << " = ";
-    if (bool_map_.end() != bool_map_.find(key)) {
-      os << (*bool_map_[key] ? "true" : "false");
-    } else if (int_map_.end() != int_map_.find(key)) {
-      os << (*int_map_[key]);
-    } else if (uint_map_.end() != uint_map_.find(key)) {
-      os << (*uint_map_[key]);
-    } else if (float_map_.end() != float_map_.find(key)) {
-      os << (*float_map_[key]);
-    } else if (double_map_.end() != double_map_.find(key)) {
-      os << (*double_map_[key]);
-    } else if (string_map_.end() != string_map_.find(key)) {
-      os << "'" << *string_map_[key] << "'";
-    } else {
-      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
-    }
-    os << '\n';
-  }
-  os << '\n';
-}
-
-void ParseOptions::ReadConfigFile(const std::string &filename) {
-  std::ifstream is(filename.c_str(), std::ifstream::in);
-  if (!is.good()) {
-    KALDI_ERR << "Cannot open config file: " << filename;
-  }
-
-  std::string line, key, value;
-  int32 line_number = 0;
-  while (std::getline(is, line)) {
-    line_number++;
-    // trim out the comments
-    size_t pos;
-    if ((pos = line.find_first_of('#')) != std::string::npos) {
-      line.erase(pos);
-    }
-    // skip empty lines
-    Trim(&line);
-    if (line.length() == 0) continue;
-
-    if (line.substr(0, 2) != "--") {
-      KALDI_ERR << "Reading config file " << filename << ": line "
-                << line_number << " does not look like a line "
-                << "from a Kaldi command-line program's config file: should "
-                << "be of the form --x=y.  Note: config files intended to "
-                << "be sourced by shell scripts lack the '--'.";
-    }
-
-    // parse option
-    bool has_equal_sign;
-    SplitLongArg(line, &key, &value, &has_equal_sign);
-    NormalizeArgName(&key);
-    Trim(&value);
-    if (!SetOption(key, value, has_equal_sign)) {
-      PrintUsage(true);
-      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
-    }
-  }
-}
-
-void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
-                                std::string *value, bool *has_equal_sign) {
-  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
-  size_t pos = in.find_first_of('=', 0);
-  if (pos == std::string::npos) {  // we allow --option for bools
-    // defaults to empty.  We handle this differently in different cases.
-    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
-    *value = "";
-    *has_equal_sign = false;
-  } else if (pos == 2) {  // we also don't allow empty keys: --=value
-    PrintUsage(true);
-    KALDI_ERR << "Invalid option (no key): " << in;
-  } else {                         // normal case: --option=value
-    *key = in.substr(2, pos - 2);  // 2 because starts with --.
-    *value = in.substr(pos + 1);
-    *has_equal_sign = true;
-  }
-}
-
-void ParseOptions::NormalizeArgName(std::string *str) {
-  std::string out;
-  std::string::iterator it;
-
-  for (it = str->begin(); it != str->end(); ++it) {
-    if (*it == '_')
-      out += '-';  // convert _ to -
-    else
-      out += std::tolower(*it);
-  }
-  *str = out;
-
-  KALDI_ASSERT(str->length() > 0);
-}
-
-bool ParseOptions::SetOption(const std::string &key, const std::string &value,
-                             bool has_equal_sign) {
-  if (bool_map_.end() != bool_map_.find(key)) {
-    if (has_equal_sign && value == "")
-      KALDI_ERR << "Invalid option --" << key << "=";
-    *(bool_map_[key]) = ToBool(value);
-  } else if (int_map_.end() != int_map_.find(key)) {
-    *(int_map_[key]) = ToInt(value);
-  } else if (uint_map_.end() != uint_map_.find(key)) {
-    *(uint_map_[key]) = ToUint(value);
-  } else if (float_map_.end() != float_map_.find(key)) {
-    *(float_map_[key]) = ToFloat(value);
-  } else if (double_map_.end() != double_map_.find(key)) {
-    *(double_map_[key]) = ToDouble(value);
-  } else if (string_map_.end() != string_map_.find(key)) {
-    if (!has_equal_sign)
-      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
-    *(string_map_[key]) = value;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-bool ParseOptions::ToBool(std::string str) {
-  std::transform(str.begin(), str.end(), str.begin(), ::tolower);
-
-  // allow "" as a valid option for "true", so that --x is the same as --x=true
-  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
-      (str.compare("1") == 0) || (str.compare("") == 0)) {
-    return true;
-  }
-  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
-      (str.compare("0") == 0)) {
-    return false;
-  }
-  // if it is neither true nor false:
-  PrintUsage(true);
-  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
-            << str;
-  return false;  // never reached
-}
-
-int32 ParseOptions::ToInt(const std::string &str) {
-  int32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-uint32 ParseOptions::ToUint(const std::string &str) {
-  uint32 ret;
-  if (!ConvertStringToInteger(str, &ret))
-    KALDI_ERR << "Invalid integer option \"" << str << "\"";
-  return ret;
-}
-
-float ParseOptions::ToFloat(const std::string &str) {
-  float ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-double ParseOptions::ToDouble(const std::string &str) {
-  double ret;
-  if (!ConvertStringToReal(str, &ret))
-    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
-  return ret;
-}
-
-// instantiate templates
-template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
-                                         const std::string &doc);
-template void ParseOptions::RegisterTmpl(const std::string &name,
-                                         std::string *ptr,
-                                         const std::string &doc);
-
-template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             int32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             uint32 *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             float *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             double *ptr,
-                                             const std::string &doc);
-template void ParseOptions::RegisterStandard(const std::string &name,
-                                             std::string *ptr,
-                                             const std::string &doc);
-
-template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-template void ParseOptions::RegisterCommon(const std::string &name,
-                                           std::string *ptr,
-                                           const std::string &doc,
-                                           bool is_standard);
-
-}  // namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/parse-options.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/parse-options.h
deleted file mode 100644
index 93a060f4a411dfd63298a91bb313e0b66d337a75..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
-//                      Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
-  explicit ParseOptions(const char *usage)
-      : print_args_(true),
-        help_(false),
-        usage_(usage),
-        argc_(0),
-        argv_(NULL),
-        prefix_(""),
-        other_parser_(NULL) {
-#if !defined(_MSC_VER) && \
-    !defined(__CYGWIN__)  // This is just a convenient place to set the stderr
-                          // to line
-    setlinebuf(stderr);  // buffering mode, since it's called at program start.
-#endif  // This helps ensure different programs' output is not mixed up.
-    RegisterStandard("config", &config_,
-                     "Configuration file to read (this "
-                     "option may be repeated)");
-    RegisterStandard("print-args", &print_args_,
-                     "Print the command line arguments (to stderr)");
-    RegisterStandard("help", &help_, "Print out usage message");
-    RegisterStandard("verbose", &g_kaldi_verbose_level,
-                     "Verbose level (higher->more logging)");
-  }
-
-  /**
-    This is a constructor for the special case where some options are
-    registered with a prefix to avoid conflicts.  The object thus created will
-    only be used temporarily to register an options class with the original
-    options parser (which is passed as the *other pointer) using the given
-    prefix.  It should not be used for any other purpose, and the prefix must
-    not be the empty string.  It seems to be the least bad way of implementing
-    options with prefixes at this point.
-    Example of usage is:
-     ParseOptions po;  // original ParseOptions object
-     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
-     MfccOptions mfcc_opts;
-     mfcc_opts.Register(&po_mfcc);
-    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
-    instead of just --frame-shift=10.0
-   */
-  ParseOptions(const std::string &prefix, OptionsItf *other);
-
-  ~ParseOptions() {}
-
-  // Methods from the interface
-  void Register(const std::string &name, bool *ptr, const std::string &doc);
-  void Register(const std::string &name, int32 *ptr, const std::string &doc);
-  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
-  void Register(const std::string &name, float *ptr, const std::string &doc);
-  void Register(const std::string &name, double *ptr, const std::string &doc);
-  void Register(const std::string &name, std::string *ptr,
-                const std::string &doc);
-
-  /// If called after registering an option and before calling
-  /// Read(), disables that option from being used.  Will crash
-  /// at runtime if that option had not been registered.
-  void DisableOption(const std::string &name);
-
-  /// This one is used for registering standard parameters of all the programs
-  template <typename T>
-  void RegisterStandard(const std::string &name, T *ptr,
-                        const std::string &doc);
-
-  /**
-    Parses the command line options and fills the ParseOptions-registered
-    variables. This must be called after all the variables were registered!!!
-
-    Initially the variables have implicit values,
-    then the config file values are set-up,
-    finally the command line values given.
-    Returns the first position in argv that was not used.
-    [typically not useful: use NumParams() and GetParam(). ]
-   */
-  int Read(int argc, const char *const *argv);
-
-  /// Prints the usage documentation [provided in the constructor].
-  void PrintUsage(bool print_command_line = false);
-  /// Prints the actual configuration of all the registered variables
-  void PrintConfig(std::ostream &os);
-
-  /// Reads the options values from a config file.  Must be called after
-  /// registering all options.  This is usually used internally after the
-  /// standard --config option is used, but it may also be called from a
-  /// program.
-  void ReadConfigFile(const std::string &filename);
-
-  /// Number of positional parameters (c.f. argc-1).
-  int NumArgs() const;
-
-  /// Returns one of the positional parameters; 1-based indexing for argc/argv
-  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
-  std::string GetArg(int param) const;
-
-  std::string GetOptArg(int param) const {
-    return (param <= NumArgs() ? GetArg(param) : "");
-  }
-
-  /// The following function will return a possibly quoted and escaped
-  /// version of "str", according to the current shell.  Currently
-  /// this is just hardwired to bash.  It's useful for debug output.
-  static std::string Escape(const std::string &str);
-
- private:
-  /// Template to register various variable types,
-  /// used for program-specific parameters
-  template <typename T>
-  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
-  // Following functions do just the datatype-specific part of the job
-  /// Register boolean variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        bool *b, const std::string &doc, bool is_standard);
-  /// Register int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        int32 *i, const std::string &doc, bool is_standard);
-  /// Register unsinged  int32 variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        uint32 *u, const std::string &doc, bool is_standard);
-  /// Register float variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        float *f, const std::string &doc, bool is_standard);
-  /// Register double variable [useful as we change BaseFloat type].
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        double *f, const std::string &doc, bool is_standard);
-  /// Register string variable
-  void RegisterSpecific(const std::string &name, const std::string &idx,
-                        std::string *s, const std::string &doc,
-                        bool is_standard);
-
-  /// Does the actual job for both kinds of parameters
-  /// Does the common part of the job for all datatypes,
-  /// then calls RegisterSpecific
-  template <typename T>
-  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
-                      bool is_standard);
-
-  /// Set option with name "key" to "value"; will crash if can't do it.
-  /// "has_equal_sign" is used to allow --x for a boolean option x,
-  /// and --y=, for a string option y.
-  bool SetOption(const std::string &key, const std::string &value,
-                 bool has_equal_sign);
-
-  bool ToBool(std::string str);
-  int32 ToInt(const std::string &str);
-  uint32 ToUint(const std::string &str);
-  float ToFloat(const std::string &str);
-  double ToDouble(const std::string &str);
-
-  // maps for option variables
-  std::map<std::string, bool *> bool_map_;
-  std::map<std::string, int32 *> int_map_;
-  std::map<std::string, uint32 *> uint_map_;
-  std::map<std::string, float *> float_map_;
-  std::map<std::string, double *> double_map_;
-  std::map<std::string, std::string *> string_map_;
-
-  /**
-     Structure for options' documentation
-   */
-  struct DocInfo {
-    DocInfo() {}
-    DocInfo(const std::string &name, const std::string &usemsg)
-        : name_(name), use_msg_(usemsg), is_standard_(false) {}
-    DocInfo(const std::string &name, const std::string &usemsg,
-            bool is_standard)
-        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
-    std::string name_;
-    std::string use_msg_;
-    bool is_standard_;
-  };
-  typedef std::map<std::string, DocInfo> DocMapType;
-  DocMapType doc_map_;  ///< map for the documentation
-
-  bool print_args_;     ///< variable for the implicit --print-args parameter
-  bool help_;           ///< variable for the implicit --help parameter
-  std::string config_;  ///< variable for the implicit --config parameter
-  std::vector<std::string> positional_args_;
-  const char *usage_;
-  int argc_;
-  const char *const *argv_;
-
-  /// These members are not normally used. They are only used when the object
-  /// is constructed with a prefix
-  std::string prefix_;
-  OptionsItf *other_parser_;
-
- protected:
-  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
-  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
-  /// this is needed in order to correctly allow --x for a boolean option
-  /// x, and --y= for a string option y, and to disallow --x= and --y.
-  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
-                    bool *has_equal_sign);
-
-  void NormalizeArgName(std::string *str);
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed.  This function assumes the config has a function
-/// "void Register(OptionsItf *opts)" which it can call to register the
-/// ParseOptions object.
-template <class C>
-void ReadConfigFromFile(const std::string &config_filename, C *c) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << config_filename << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c->Register(&po);
-  po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template <class C1, class C2>
-void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
-  std::ostringstream usage_str;
-  usage_str << "Parsing config from "
-            << "from '" << conf << "'";
-  ParseOptions po(usage_str.str().c_str());
-  c1->Register(&po);
-  c2->Register(&po);
-  po.ReadConfigFile(conf);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/simple-io-funcs.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/simple-io-funcs.cc
deleted file mode 100644
index 5ace601b6a2bb186dec78b0b25cb5a3227c48bc9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/simple-io-funcs.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// util/simple-io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "util/simple-io-funcs.h"
-#include "util/text-utils.h"
-
-namespace kaldi {
-
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
-  return ko.Close();
-}
-
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  int32 i;
-  list->clear();
-  while (!(is >> i).fail()) list->push_back(i);
-  is >> std::ws;
-  return is.eof();  // should be eof, or junk at end of file.
-}
-
-bool WriteIntegerVectorVectorSimple(
-    const std::string &wxfilename,
-    const std::vector<std::vector<int32> > &list) {
-  kaldi::Output ko;
-  // false, false is: text-mode, no Kaldi header.
-  if (!ko.Open(wxfilename, false, false)) return false;
-  std::ostream &os = ko.Stream();
-  for (size_t i = 0; i < list.size(); i++) {
-    for (size_t j = 0; j < list[i].size(); j++) {
-      os << list[i][j];
-      if (j + 1 < list[i].size()) os << ' ';
-    }
-    os << '\n';
-  }
-  return ko.Close();
-}
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *list) {
-  kaldi::Input ki;
-  if (!ki.OpenTextMode(rxfilename)) return false;
-  std::istream &is = ki.Stream();
-  list->clear();
-  std::string line;
-  while (std::getline(is, line)) {
-    std::vector<int32> v;
-    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
-      list->clear();
-      return false;
-    }
-    list->push_back(v);
-  }
-  return is.eof();  // if we're not at EOF, something weird happened.
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/simple-io-funcs.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 1ead12790ba9bd6a44ccdff855918270191b8ebd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include <string>
-#include <vector>
-#include "util/kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text
-// formats:integers in files, one per line, and integers in files, possibly
-// multiple per line. these are not really fully native Kaldi formats; they are
-// mostly for small files that might be generated by scripts, and can be read
-// all at one time. for longer files of this type, we would probably use the
-// Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(const std::string &wxfilename,
-                              const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(const std::string &rxfilename,
-                             std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
-                                    const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
-                                   std::vector<std::vector<int32> > *v);
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/stl-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/stl-utils.h
deleted file mode 100644
index 8a29cd582c77b3078277aa9713b8676032bbc5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-using std::unordered_map;
-using std::unordered_set;
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template <typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
-  std::sort(vec->begin(), vec->end());
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Returns true if the vector is sorted.
-template <typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter < *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template <typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
-  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
-  if (iter == end) return true;
-  while (1) {
-    typename std::vector<T>::const_iterator next_iter = iter;
-    ++next_iter;
-    if (next_iter == end) return true;  // end of loop and nothing out of order
-    if (*next_iter <= *iter) return false;
-    iter = next_iter;
-  }
-}
-
-/// Removes duplicate elements from a sorted list.
-template <typename T>
-inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
-  KALDI_PARANOID_ASSERT(IsSorted(*vec));
-  KALDI_ASSERT(vec);
-  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template <class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
-  // copies members of s into v, in sorted order from lowest to highest
-  // (because the set was in sorted order).
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-template <class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(s.size());
-  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
-  typename std::vector<T>::iterator viter = v->begin();
-  for (; siter != send; ++siter, ++viter) {
-    *viter = *siter;
-  }
-}
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template <class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
-                     std::vector<std::pair<A, B> > *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = std::make_pair(miter->first, miter->second);
-    // do it like this because of const casting.
-  }
-}
-
-/// Copies the keys in a map to a vector.
-template <class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<A>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->first;
-  }
-}
-
-/// Copies the values in a map to a vector.
-template <class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
-  KALDI_ASSERT(v != NULL);
-  v->resize(m.size());
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  typename std::vector<B>::iterator viter = v->begin();
-  for (; miter != mend; ++miter, ++viter) {
-    *viter = miter->second;
-  }
-}
-
-/// Copies the keys in a map to a set.
-template <class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) {
-    s->insert(s->end(), miter->first);
-  }
-}
-
-/// Copies the values in a map to a set.
-template <class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
-  for (; miter != mend; ++miter) s->insert(s->end(), miter->second);
-}
-
-/// Copies the contents of a vector to a set.
-template <class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
-  KALDI_ASSERT(s != NULL);
-  s->clear();
-  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter) s->insert(s->end(), *iter);
-  // s->end() is a hint in case v was sorted.  will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template <class A>
-void DeletePointers(std::vector<A *> *v) {
-  KALDI_ASSERT(v != NULL);
-  typename std::vector<A *>::iterator iter = v->begin(), end = v->end();
-  for (; iter != end; ++iter) {
-    if (*iter != NULL) {
-      delete *iter;
-      *iter = NULL;  // set to NULL for extra safety.
-    }
-  }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template <class A>
-bool ContainsNullPointers(const std::vector<A *> &v) {
-  typename std::vector<A *>::const_iterator iter = v.begin(), end = v.end();
-  for (; iter != end; ++iter)
-    if (*iter == static_cast<A *>(NULL)) return true;
-  return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template <typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
-  KALDI_ASSERT(vec_out != NULL);
-  vec_out->resize(vec_in.size());
-  for (size_t i = 0; i < vec_in.size(); i++)
-    (*vec_out)[i] = static_cast<B>(vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template <typename Int>
-struct VectorHasher {  // hashing function for vector<Int>.
-  size_t operator()(const std::vector<Int> &x) const noexcept {
-    size_t ans = 0;
-    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
-    for (; iter != end; ++iter) {
-      ans *= kPrime;
-      ans += *iter;
-    }
-    return ans;
-  }
-  VectorHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template <typename Int1, typename Int2 = Int1>
-struct PairHasher {  // hashing function for pair<int>
-  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
-    // 7853 was chosen at random from a list of primes.
-    return x.first + x.second * 7853;
-  }
-  PairHasher() {  // Check we're instantiated with an integer type.
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
-    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
-  }
-};
-
-/// A hashing function object for strings.
-struct StringHasher {  // hashing function for std::string
-  size_t operator()(const std::string &str) const noexcept {
-    size_t ans = 0, len = str.length();
-    const char *c = str.c_str(), *end = c + len;
-    for (; c != end; c++) {
-      ans *= kPrime;
-      ans += *c;
-    }
-    return ans;
-  }
-
- private:
-  static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template <typename T>
-inline void ReverseVector(std::vector<T> *vec) {
-  KALDI_ASSERT(vec != NULL);
-  size_t sz = vec->size();
-  for (size_t i = 0; i < sz / 2; i++) std::swap((*vec)[i], (*vec)[sz - 1 - i]);
-}
-
-/// Comparator object for pairs that compares only the first pair.
-template <class A, class B>
-struct CompareFirstMemberOfPair {
-  inline bool operator()(const std::pair<A, B> &p1, const std::pair<A, B> &p2) {
-    return p1.first < p2.first;
-  }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value.  This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template <typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  CompareFirstMemberOfPair<I, F> c;
-  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
-  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
-                                                   in = vec->begin(),
-                                                   end = vec->end();
-  // special case: while there is nothing to be changed, skip over
-  // initial input (avoids unnecessary copying).
-  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
-    in++;
-    out++;
-  }
-  while (in < end) {
-    // We reach this point only at the first element of
-    // each stretch of identical .first elements.
-    *out = *in;
-    ++in;
-    while (in < end && in->first == out->first) {
-      out->second += in->second;  // this is the merge operation.
-      ++in;
-    }
-    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
-      out++;
-  }
-  vec->erase(out, end);
-}
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_STL_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/text-utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/text-utils.cc
deleted file mode 100644
index fd70889644f6b4e14793ddd4f5b0d71a66768699..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/text-utils.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-// util/text-utils.cc
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "util/text-utils.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out) {
-  KALDI_ASSERT(out != NULL);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    F f = 0;
-    if (!ConvertStringToReal(split[i], &f)) return false;
-    (*out)[i] = f;
-  }
-  return true;
-}
-
-// Instantiate the template above for float and double.
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<float> *out);
-template bool SplitStringToFloats(const std::string &full, const char *delim,
-                                  bool omit_empty_strings,
-                                  std::vector<double> *out);
-
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out) {
-  std::string tmp_str;
-  for (size_t i = 0; i < vec_in.size(); i++) {
-    if (!omit_empty_strings || !vec_in[i].empty()) {
-      tmp_str.append(vec_in[i]);
-      if (i < vec_in.size() - 1)
-        if (!omit_empty_strings || !vec_in[i + 1].empty())
-          tmp_str.append(delim);
-    }
-  }
-  str_out->swap(tmp_str);
-}
-
-void Trim(std::string *str) {
-  const char *white_chars = " \t\n\r\f\v";
-
-  std::string::size_type pos = str->find_last_not_of(white_chars);
-  if (pos != std::string::npos) {
-    str->erase(pos + 1);
-    pos = str->find_first_not_of(white_chars);
-    if (pos != std::string::npos) str->erase(0, pos);
-  } else {
-    str->erase(str->begin(), str->end());
-  }
-}
-
-bool IsToken(const std::string &token) {
-  size_t l = token.length();
-  if (l == 0) return false;
-  for (size_t i = 0; i < l; i++) {
-    unsigned char c = token[i];
-    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
-      return false;
-    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
-    // non-ASCII characters such as French characters with accents [except for
-    // 255 which is "nbsp", a form of space].
-  }
-  return true;
-}
-
-void SplitStringOnFirstSpace(const std::string &str, std::string *first,
-                             std::string *rest) {
-  const char *white_chars = " \t\n\r\f\v";
-  typedef std::string::size_type I;
-  const I npos = std::string::npos;
-  I first_nonwhite = str.find_first_not_of(white_chars);
-  if (first_nonwhite == npos) {
-    first->clear();
-    rest->clear();
-    return;
-  }
-  // next_white is first whitespace after first nonwhitespace.
-  I next_white = str.find_first_of(white_chars, first_nonwhite);
-
-  if (next_white == npos) {  // no more whitespace...
-    *first = std::string(str, first_nonwhite);
-    rest->clear();
-    return;
-  }
-  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
-  if (next_nonwhite == npos) {
-    *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-    rest->clear();
-    return;
-  }
-
-  I last_nonwhite = str.find_last_not_of(white_chars);
-  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.
-
-  *first = std::string(str, first_nonwhite, next_white - first_nonwhite);
-  *rest = std::string(str, next_nonwhite, last_nonwhite + 1 - next_nonwhite);
-}
-
-bool IsLine(const std::string &line) {
-  if (line.find('\n') != std::string::npos) return false;
-  if (line.empty()) return true;
-  if (isspace(*(line.begin()))) return false;
-  if (isspace(*(line.rbegin()))) return false;
-  std::string::const_iterator iter = line.begin(), end = line.end();
-  for (; iter != end; iter++)
-    if (!isprint(*iter)) return false;
-  return true;
-}
-
-template <class T>
-class NumberIstream {
- public:
-  explicit NumberIstream(std::istream &i) : in_(i) {}
-
-  NumberIstream &operator>>(T &x) {
-    if (!in_.good()) return *this;
-    in_ >> x;
-    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
-    return ParseOnFail(&x);
-  }
-
- private:
-  std::istream &in_;
-
-  bool RemainderIsOnlySpaces() {
-    if (in_.tellg() != std::istream::pos_type(-1)) {
-      std::string rem;
-      in_ >> rem;
-
-      if (rem.find_first_not_of(' ') != std::string::npos) {
-        // there is not only spaces
-        return false;
-      }
-    }
-
-    in_.clear();
-    return true;
-  }
-
-  NumberIstream &ParseOnFail(T *x) {
-    std::string str;
-    in_.clear();
-    in_.seekg(0);
-    // If the stream is broken even before trying
-    // to read from it or if there are many tokens,
-    // it's pointless to try.
-    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
-      in_.setstate(std::ios_base::failbit);
-      return *this;
-    }
-
-    std::map<std::string, T> inf_nan_map;
-    // we'll keep just uppercase values.
-    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
-    // MSVC
-    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
-    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
-    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
-    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();
-
-    std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-
-    if (inf_nan_map.find(str) != inf_nan_map.end()) {
-      *x = inf_nan_map[str];
-    } else {
-      in_.setstate(std::ios_base::failbit);
-    }
-
-    return *this;
-  }
-};
-
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out) {
-  std::istringstream iss(str);
-
-  NumberIstream<T> i(iss);
-
-  i >> *out;
-
-  if (iss.fail()) {
-    // Number conversion failed.
-    return false;
-  }
-
-  return true;
-}
-
-template bool ConvertStringToReal(const std::string &str, float *out);
-template bool ConvertStringToReal(const std::string &str, double *out);
-
-/*
-  This function is a helper function of StringsApproxEqual.  It should be
-  thought of as a recursive function-- it was designed that way-- but rather
-  than actually recursing (which would cause problems with stack overflow), we
-  just set the args and return to the start.
-
-  The 'decimal_places_tolerance' argument is just passed in from outside,
-  see the documentation for StringsApproxEqual in text-utils.h to see an
-  explanation.  The argument 'places_into_number' provides some information
-  about the strings 'a' and 'b' that precedes the current pointers.
-  For purposes of this comment, let's define the 'decimal' of a number
-  as the part that comes after the decimal point, e.g. in '99.123',
-  '123' would be the decimal.  If 'places_into_number' is -1, it means
-  we're not currently inside some place like that (i.e. it's not the
-  case that we're pointing to the '1' or the '2' or the '3').
-  If it's 0, then we'd be pointing to the first place after the decimal,
-  '1' in this case.  Note if one of the numbers is shorter than the
-  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
-  while 'b' points to the second '4', 'places_into_number' referes to the
-  shorter of the two, i.e. it would be 2 in this example.
-
-
- */
-bool StringsApproxEqualInternal(const char *a, const char *b,
-                                int32 decimal_places_tolerance,
-                                int32 places_into_number) {
-start:
-  char ca = *a, cb = *b;
-  if (ca == cb) {
-    if (ca == '\0') {
-      return true;
-    } else {
-      if (places_into_number >= 0) {
-        if (isdigit(ca)) {
-          places_into_number++;
-        } else {
-          places_into_number = -1;
-        }
-      } else {
-        if (ca == '.') {
-          places_into_number = 0;
-        }
-      }
-      a++;
-      b++;
-      goto start;
-    }
-  } else {
-    if (places_into_number >= decimal_places_tolerance &&
-        (isdigit(ca) || isdigit(cb))) {
-      // we're potentially willing to accept this difference between the
-      // strings.
-      if (isdigit(ca)) a++;
-      if (isdigit(cb)) b++;
-      // we'll have advanced at least one of the two strings.
-      goto start;
-    } else if (places_into_number >= 0 &&
-               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
-      // this clause is designed to ensure that, for example,
-      // "0.1" would count the same as "0.100001".
-      if (ca == '0')
-        a++;
-      else
-        b++;
-      places_into_number++;
-      goto start;
-    } else {
-      return false;
-    }
-  }
-}
-
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_tolerance) {
-  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
-                                    decimal_places_tolerance, -1);
-}
-
-bool ConfigLine::ParseLine(const std::string &line) {
-  data_.clear();
-  whole_line_ = line;
-  if (line.size() == 0) return false;  // Empty line
-  size_t pos = 0, size = line.size();
-  while (isspace(line[pos]) && pos < size) pos++;
-  if (pos == size) return false;  // whitespace-only line
-  size_t first_token_start_pos = pos;
-  // first get first_token_.
-  while (!isspace(line[pos]) && pos < size) {
-    if (line[pos] == '=') {
-      // If the first block of non-whitespace looks like "foo-bar=...",
-      // then we ignore it: there is no initial token, and FirstToken()
-      // is empty.
-      pos = first_token_start_pos;
-      break;
-    }
-    pos++;
-  }
-  first_token_ =
-      std::string(line, first_token_start_pos, pos - first_token_start_pos);
-  // first_token_ is expected to be either empty or something like
-  // "component-node", which actually is a slightly more restrictive set of
-  // strings than IsValidName() checks for this is a convenient way to check it.
-  if (!first_token_.empty() && !IsValidName(first_token_)) return false;
-
-  while (pos < size) {
-    if (isspace(line[pos])) {
-      pos++;
-      continue;
-    }
-
-    // OK, at this point we know that we are pointing at nonspace.
-    size_t next_equals_sign = line.find_first_of("=", pos);
-    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
-      // we're looking for something like 'key=value'.  If there is no equals
-      // sign, or it's not preceded by something, it's a parsing failure.
-      return false;
-    }
-    std::string key(line, pos, next_equals_sign - pos);
-    if (!IsValidName(key)) return false;
-
-    // handle any quotes.  we support key='blah blah' or key="foo bar".
-    // no escaping is supported.
-    if (line[next_equals_sign + 1] == '\'' ||
-        line[next_equals_sign + 1] == '"') {
-      char my_quote = line[next_equals_sign + 1];
-      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
-      if (next_quote == std::string::npos) {  // no matching quote was found.
-        KALDI_WARN << "No matching quote for " << my_quote
-                   << " in config line '" << line << "'";
-        return false;
-      } else {
-        std::string value(line, next_equals_sign + 2,
-                          next_quote - next_equals_sign - 2);
-        data_.insert(std::make_pair(key, std::make_pair(value, false)));
-        pos = next_quote + 1;
-        continue;
-      }
-    } else {
-      // we want to be able to parse something like "... input=Offset(a, -1)
-      // foo=bar": in general, config values with spaces in them, even without
-      // quoting.
-
-      size_t next_next_equals_sign =
-                 line.find_first_of("=", next_equals_sign + 1),
-             terminating_space = size;
-
-      if (next_next_equals_sign !=
-          std::string::npos) {  // found a later equals sign.
-        size_t preceding_space =
-            line.find_last_of(" \t", next_next_equals_sign);
-        if (preceding_space != std::string::npos &&
-            preceding_space > next_equals_sign)
-          terminating_space = preceding_space;
-      }
-      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
-        terminating_space--;
-
-      std::string value(line, next_equals_sign + 1,
-                        terminating_space - (next_equals_sign + 1));
-      data_.insert(std::make_pair(key, std::make_pair(value, false)));
-      pos = terminating_space;
-    }
-  }
-  return true;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::string *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      *value = (it->second).first;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToReal((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, int32 *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!ConvertStringToInteger((it->second).first, value)) return false;
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
-  KALDI_ASSERT(value != NULL);
-  value->clear();
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
-        // KALDI_WARN << "Bad option " << (it->second).first;
-        return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::GetValue(const std::string &key, bool *value) {
-  KALDI_ASSERT(value != NULL);
-  std::map<std::string, std::pair<std::string, bool> >::iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (it->first == key) {
-      if ((it->second).first.size() == 0) return false;
-      switch (((it->second).first)[0]) {
-        case 'F':
-        case 'f':
-          *value = false;
-          break;
-        case 'T':
-        case 't':
-          *value = true;
-          break;
-        default:
-          return false;
-      }
-      (it->second).second = true;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ConfigLine::HasUnusedValues() const {
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) return true;
-  }
-  return false;
-}
-
-std::string ConfigLine::UnusedValues() const {
-  std::string unused_str;
-  std::map<std::string, std::pair<std::string, bool> >::const_iterator it =
-      data_.begin();
-  for (; it != data_.end(); ++it) {
-    if (!(it->second).second) {
-      if (unused_str == "")
-        unused_str = it->first + "=" + (it->second).first;
-      else
-        unused_str += " " + it->first + "=" + (it->second).first;
-    }
-  }
-  return unused_str;
-}
-
-// This is like ExpectToken but for two tokens, and it
-// will either accept token1 and then token2, or just token2.
-// This is useful in Read functions where the first token
-// may already have been consumed.
-// void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-//                           const std::string &token1,
-//                           const std::string &token2) {
-//   KALDI_ASSERT(token1 != token2);
-//   std::string temp;
-//   ReadToken(is, binary, &temp);
-//   if (temp == token1) {
-//     ExpectToken(is, binary, token2);
-//   } else {
-//     if (temp != token2) {
-//       KALDI_ERR << "Expecting token " << token1 << " or " << token2
-//                 << " but got " << temp;
-//     }
-//   }
-// }
-
-bool IsValidName(const std::string &name) {
-  if (name.size() == 0) return false;
-  for (size_t i = 0; i < name.size(); i++) {
-    if (i == 0 && !isalpha(name[i]) && name[i] != '_') return false;
-    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
-      return false;
-  }
-  return true;
-}
-
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines) {
-  KALDI_ASSERT(lines != NULL);
-  std::string line;
-  while (std::getline(is, line)) {
-    if (line.size() == 0) continue;
-    size_t start = line.find_first_not_of(" \t");
-    size_t end = line.find_first_of('#');
-    if (start == std::string::npos || start == end) continue;
-    end = line.find_last_not_of(" \t", end - 1);
-    KALDI_ASSERT(end >= start);
-    lines->push_back(line.substr(start, end - start + 1));
-  }
-}
-
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines) {
-  config_lines->resize(lines.size());
-  for (size_t i = 0; i < lines.size(); i++) {
-    bool ret = (*config_lines)[i].ParseLine(lines[i]);
-    if (!ret) {
-      KALDI_ERR << "Error parsing config line: " << lines[i];
-    }
-  }
-}
-
-}  // end namespace kaldi
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/text-utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/text-utils.h
deleted file mode 100644
index bc7763c4aff38214d97cbeda3b29c8717dd65318..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011  Saarland University;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <errno.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter.  If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string.  In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
-                        const char *delim, bool omit_empty_strings,
-                        std::string *str_out);
-
-/**
-  \brief Split a string (e.g. 1:2:3) into a vector of integers.
-
-  \param [in]  delim  String containing a list of characters, any of which
-                      is allowed as a delimiter.
-  \param [in] omit_empty_strings If true, empty strings between delimiters are
-                      allowed and will not produce an output integer; if false,
-                      instances of characters in 'delim' that are consecutive or
-                      at the start or end of the string would be an error.
-                      You'll normally want this to be true if 'delim' consists
-                      of spaces, and false otherwise.
-  \param [out] out   The output list of integers.
-*/
-template <class I>
-bool SplitStringToIntegers(const std::string &full, const char *delim,
-                           bool omit_empty_strings,  // typically false [but
-                                                     // should probably be true
-                                                     // if "delim" is spaces].
-                           std::vector<I> *out) {
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT_IS_INTEGER_TYPE(I);
-  if (*(full.c_str()) == '\0') {
-    out->clear();
-    return true;
-  }
-  std::vector<std::string> split;
-  SplitStringToVector(full, delim, omit_empty_strings, &split);
-  out->resize(split.size());
-  for (size_t i = 0; i < split.size(); i++) {
-    const char *this_str = split[i].c_str();
-    char *end = NULL;
-    int64 j = 0;
-    j = KALDI_STRTOLL(this_str, &end);
-    if (end == this_str || *end != '\0') {
-      out->clear();
-      return false;
-    } else {
-      I jI = static_cast<I>(j);
-      if (static_cast<int64>(jI) != j) {
-        // output type cannot fit this integer.
-        out->clear();
-        return false;
-      }
-      (*out)[i] = jI;
-    }
-  }
-  return true;
-}
-
-// This is defined for F = float and double.
-template <class F>
-bool SplitStringToFloats(const std::string &full, const char *delim,
-                         bool omit_empty_strings,  // typically false
-                         std::vector<F> *out);
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into).  Only sets *out if everything was OK and it returns
-/// true.
-template <class Int>
-bool ConvertStringToInteger(const std::string &str, Int *out) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
-  const char *this_str = str.c_str();
-  char *end = NULL;
-  errno = 0;
-  int64 i = KALDI_STRTOLL(this_str, &end);
-  if (end != this_str)
-    while (isspace(*end)) end++;
-  if (end == this_str || *end != '\0' || errno != 0) return false;
-  Int iInt = static_cast<Int>(i);
-  if (static_cast<int64>(iInt) != i ||
-      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
-    return false;
-  }
-  *out = iInt;
-  return true;
-}
-
-/// ConvertStringToReal converts a string into either float or double
-/// and returns false if there was any kind of problem (i.e. the string
-/// was not a floating point number or contained extra non-whitespace junk).
-/// Be careful- this function will successfully read inf's or nan's.
-template <typename T>
-bool ConvertStringToReal(const std::string &str, T *out);
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest".  If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line, std::string *first,
-                             std::string *rest);
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-/**
-   This function returns true when two text strings are approximately equal, and
-   false when they are not.  The definition of 'equal' is normal string
-   equality, except that two substrings like "0.31134" and "0.311341" would be
-   considered equal.  'decimal_places_tolerance' controls how many digits after
-   the '.' have to match up.
-   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
-   return false because there is a difference in the 2nd decimal, but with
-   an argument of 1 it would return true.
- */
-bool StringsApproxEqual(const std::string &a, const std::string &b,
-                        int32 decimal_places_check = 2);
-
-/**
-   This class is responsible for parsing input like
-    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c'
-   baz="a b c d='a b' e" and giving you access to the fields, in this case
-
-   FirstToken() == "hi-there", and key->value pairs:
-
-   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
-   bing->"a b c", baz->"a b c d='a b' e"
-
-   The first token is optional, if the line started with a key-value pair then
-   FirstValue() will be empty.
-
-   Note: it can parse value fields with space inside them only if they are free
-   of the '=' character.  If values are going to contain the '=' character, you
-   need to quote them with either single or double quotes.
-
-   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
- */
-class ConfigLine {
- public:
-  // Tries to parse the line as a config-file line.  Returns false
-  // if it could not for some reason, e.g. parsing failure.  In most cases
-  // prints no warnings; the user should do this.  Does not expect comments.
-  bool ParseLine(const std::string &line);
-
-  // the GetValue functions are overloaded for various types.  They return true
-  // if the key exists with value that can be converted to that type, and false
-  // otherwise.  They also mark the key-value pair as having been read.  It is
-  // not an error to read values twice.
-  bool GetValue(const std::string &key, std::string *value);
-  bool GetValue(const std::string &key, BaseFloat *value);
-  bool GetValue(const std::string &key, int32 *value);
-  // Values may be separated by ":" or by ",".
-  bool GetValue(const std::string &key, std::vector<int32> *value);
-  bool GetValue(const std::string &key, bool *value);
-
-  bool HasUnusedValues() const;
-  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
-  /// of the GetValue() functions.
-  std::string UnusedValues() const;
-
-  const std::string &FirstToken() const { return first_token_; }
-
-  const std::string WholeLine() { return whole_line_; }
-  // use default assignment operator and copy constructor.
- private:
-  std::string whole_line_;
-  // the first token of the line, e.g. if line is
-  // foo-bar baz=bing
-  // then first_token_ would be "foo-bar".
-  std::string first_token_;
-
-  // data_ maps from key to (value, is-this-value-consumed?).
-  std::map<std::string, std::pair<std::string, bool> > data_;
-};
-
-/// This function is like ExpectToken but for two tokens, and it will either
-/// accept token1 and then token2, or just token2.  This is useful in Read
-/// functions where the first token may already have been consumed.
-void ExpectOneOrTwoTokens(std::istream &is, bool binary,
-                          const std::string &token1, const std::string &token2);
-
-/**
-   This function reads in a config file and *appends* its contents to a vector
-   of lines; it is responsible for removing comments (anything after '#') and
-   stripping out any lines that contain only whitespace after comment removal.
- */
-void ReadConfigLines(std::istream &is, std::vector<std::string> *lines);
-
-/**
-   This function converts config-lines from a simple sequence of strings
-   as output by ReadConfigLines(), into a sequence of first-tokens and
-   name-value pairs.  The general format is:
-      "command-type bar=baz xx=yyy"
-   etc., although there are subtleties as to what exactly is allowed, see
-   documentation for class ConfigLine for details.
-   This function will die if there was a parsing failure.
- */
-void ParseConfigLines(const std::vector<std::string> &lines,
-                      std::vector<ConfigLine> *config_lines);
-
-/// Returns true if 'name' would be a valid name for a component or node in a
-/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing
-/// only
-/// '-', '_', '.', A-Z, a-z, or 0-9.
-bool IsValidName(const std::string &name);
-
-}  // namespace kaldi
-
-#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/CPPLINT.cfg b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/CPPLINT.cfg
deleted file mode 100644
index 51ff339c18435a6c3a3be03131080d7b8ab8de86..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/CMakeLists.txt
deleted file mode 100644
index 04051ef5ae46c04a40c1ffccc98c37fa594ad13e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
-
-include_directories(./include/)
-install(DIRECTORY include/ DESTINATION include/
-        FILES_MATCHING PATTERN "*.h")
-
-add_subdirectory(lib)
-
-if(HAVE_SCRIPT)
-  add_subdirectory(script)
-endif(HAVE_SCRIPT)
-
-if(HAVE_BIN)
-  add_subdirectory(bin)
-endif(HAVE_BIN)
-
-add_subdirectory(extensions)
-
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(test)
-endif(BUILD_TESTING)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/extensions/special/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/extensions/special/CMakeLists.txt
deleted file mode 100644
index 9c71b750a72ffe3c2dafde657273361c3dbae409..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/extensions/special/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-file(GLOB HEADER_FILES ../../include/fst/extensions/special/*.h)
-message(STATUS "${HEADER_FILES}")
-
-if(HAVE_BIN)
-  add_executable(fstspecial-bin
-    ../../bin/fstconvert.cc
-    ../../bin/fstconvert-main.cc
-    phi-fst.cc
-    rho-fst.cc
-    sigma-fst.cc
-  )
-
-  set_target_properties(fstspecial-bin PROPERTIES
-    FOLDER special/bin
-    OUTPUT_NAME fstspecial
-  )
-
-  target_link_libraries(fstspecial-bin
-    fstscript
-    fst
-    ${CMAKE_DL_LIBS}
-  )
-endif(HAVE_BIN)
-
-
-add_library(fstspecial
-  phi-fst.cc
-  rho-fst.cc
-  sigma-fst.cc
-  ${HEADER_FILES}
-)
-
-set_target_properties(fstspecial PROPERTIES
-  SOVERSION "${SOVERSION}"
-  FOLDER special
-)
-target_link_libraries(fstspecial
-  fst
-)
-
-set(FST_SPECIAL_INSTALL_TARGETS fstspecial)
-if(HAVE_BIN)
-  list(APPEND FST_SPECIAL_INSTALL_TARGETS fstspecial-bin)
-endif()
-
-install(TARGETS ${FST_SPECIAL_INSTALL_TARGETS}
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-  ARCHIVE DESTINATION lib
-)
-
-function (add_module _name)
-  add_library(${ARGV})
-  if (TARGET ${_name})
-    target_link_libraries(${_name} fst)
-    set_target_properties(${_name}
-      PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true
-      FOLDER special/modules
-    )
-  endif()
-
-  install(TARGETS ${_name} LIBRARY DESTINATION lib/fst)
-endfunction()
-
-add_module(phi-fst MODULE phi-fst.cc)
-add_module(rho-fst MODULE rho-fst.cc)
-add_module(sigma-fst MODULE sigma-fst.cc)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/include/fst/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/include/fst/flags.h
deleted file mode 100644
index b5ec8ff7416774a0612ae0fe7e008a630b289dd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/include/fst/flags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style flag handling declarations and inline definitions.
-
-#ifndef FST_LIB_FLAGS_H_
-#define FST_LIB_FLAGS_H_
-
-#include <cstdlib>
-
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/lock.h>
-
-#include "gflags/gflags.h"
-#include "glog/logging.h"
-
-using std::string;
-
-// FLAGS USAGE:
-//
-// Definition example:
-//
-//    DEFINE_int32(length, 0, "length");
-//
-// This defines variable FLAGS_length, initialized to 0.
-//
-// Declaration example:
-//
-//    DECLARE_int32(length);
-//
-// SET_FLAGS() can be used to set flags from the command line
-// using, for example, '--length=2'.
-//
-// ShowUsage() can be used to print out command and flag usage.
-
-// #define DECLARE_bool(name) extern bool FLAGS_ ## name
-// #define DECLARE_string(name) extern string FLAGS_ ## name
-// #define DECLARE_int32(name) extern int32 FLAGS_ ## name
-// #define DECLARE_int64(name) extern int64 FLAGS_ ## name
-// #define DECLARE_double(name) extern double FLAGS_ ## name
-
-template <typename T>
-struct FlagDescription {
-  FlagDescription(T *addr, const char *doc, const char *type,
-      const char *file, const T val)
-      : address(addr),
-    doc_string(doc),
-    type_name(type),
-    file_name(file),
-    default_value(val) {}
-
-  T *address;
-  const char *doc_string;
-  const char *type_name;
-  const char *file_name;
-  const T default_value;
-};
-
-template <typename T>
-class FlagRegister {
- public:
-  static FlagRegister<T> *GetRegister() {
-    static auto reg = new FlagRegister<T>;
-    return reg;
-  }
-
-  const FlagDescription<T> &GetFlagDescription(const string &name) const {
-    fst::MutexLock l(&flag_lock_);
-    auto it = flag_table_.find(name);
-    return it != flag_table_.end() ? it->second : 0;
-  }
-
-  void SetDescription(const string &name,
-                      const FlagDescription<T> &desc) {
-    fst::MutexLock l(&flag_lock_);
-    flag_table_.insert(make_pair(name, desc));
-  }
-
-  bool SetFlag(const string &val, bool *address) const {
-    if (val == "true" || val == "1" || val.empty()) {
-      *address = true;
-      return true;
-    } else if (val == "false" || val == "0") {
-      *address = false;
-      return true;
-    }
-    else {
-      return false;
-    }
-  }
-
-  bool SetFlag(const string &val, string *address) const {
-    *address = val;
-    return true;
-  }
-
-  bool SetFlag(const string &val, int32 *address) const {
-    char *p = 0;
-    *address = strtol(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, int64 *address) const {
-    char *p = 0;
-    *address = strtoll(val.c_str(), &p, 0);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &val, double *address) const {
-    char *p = 0;
-    *address = strtod(val.c_str(), &p);
-    return !val.empty() && *p == '\0';
-  }
-
-  bool SetFlag(const string &arg, const string &val) const {
-    for (typename std::map< string, FlagDescription<T> >::const_iterator it =
-           flag_table_.begin();
-         it != flag_table_.end();
-         ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      if (arg == name)
-        return SetFlag(val, desc.address);
-    }
-    return false;
-  }
-
-  void GetUsage(std::set<std::pair<string, string>> *usage_set) const {
-    for (auto it = flag_table_.begin(); it != flag_table_.end(); ++it) {
-      const string &name = it->first;
-      const FlagDescription<T> &desc = it->second;
-      string usage = "  --" + name;
-      usage += ": type = ";
-      usage += desc.type_name;
-      usage += ", default = ";
-      usage += GetDefault(desc.default_value) + "\n  ";
-      usage += desc.doc_string;
-      usage_set->insert(make_pair(desc.file_name, usage));
-    }
-  }
-
- private:
-  string GetDefault(bool default_value) const {
-    return default_value ? "true" : "false";
-  }
-
-  string GetDefault(const string &default_value) const {
-    return "\"" + default_value + "\"";
-  }
-
-  template <class V>
-  string GetDefault(const V &default_value) const {
-    std::ostringstream strm;
-    strm << default_value;
-    return strm.str();
-  }
-
-  mutable fst::Mutex flag_lock_;        // Multithreading lock.
-  std::map<string, FlagDescription<T>> flag_table_;
-};
-
-template <typename T>
-class FlagRegisterer {
- public:
-  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
-  }
-
- private:
-  FlagRegisterer(const FlagRegisterer &) = delete;
-  FlagRegisterer &operator=(const FlagRegisterer &) = delete;
-};
-
-
-#define DEFINE_VAR(type, name, value, doc)                                \
-  type FLAGS_ ## name = value;                                            \
-  static FlagRegisterer<type>                                             \
-  name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
-                                                         doc,             \
-                                                         #type,           \
-                                                         __FILE__,        \
-                                                         value))
-
-// #define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
-// #define DEFINE_string(name, value, doc) \
-//   DEFINE_VAR(string, name, value, doc)
-// #define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
-// #define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
-// #define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
-
-
-// Temporary directory.
-DECLARE_string(tmpdir);
-
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
-              const char *src = "");
-
-#define SET_FLAGS(usage, argc, argv, rmflags) \
-gflags::ParseCommandLineFlags(argc, argv, true)
-// SetFlags(usage, argc, argv, rmflags, __FILE__)
-
-// Deprecated; for backward compatibility.
-inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
-  return SetFlags(usage, argc, argv, rmflags);
-}
-
-void ShowUsage(bool long_usage = true);
-
-#endif  // FST_LIB_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/include/fst/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/include/fst/log.h
deleted file mode 100644
index bf041c58ebfab73d03bb14adf28c7c7916a2217d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/patch/openfst/src/include/fst/log.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// See www.openfst.org for extensive documentation on this weighted
-// finite-state transducer library.
-//
-// Google-style logging declarations and inline definitions.
-
-#ifndef FST_LIB_LOG_H_
-#define FST_LIB_LOG_H_
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include <fst/types.h>
-#include <fst/flags.h>
-
-using std::string;
-
-DECLARE_int32(v);
-
-class LogMessage {
- public:
-  LogMessage(const string &type) : fatal_(type == "FATAL") {
-    std::cerr << type << ": ";
-  }
-  ~LogMessage() {
-    std::cerr << std::endl;
-    if(fatal_)
-      exit(1);
-  }
-  std::ostream &stream() { return std::cerr; }
-
- private:
-  bool fatal_;
-};
-
-// #define LOG(type) LogMessage(#type).stream()
-// #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
-
-// Checks
-inline void FstCheck(bool x, const char* expr,
-                const char *file, int line) {
-  if (!x) {
-    LOG(FATAL) << "Check failed: \"" << expr
-               << "\" file: " << file
-               << " line: " << line;
-  }
-}
-
-// #define CHECK(x) FstCheck(static_cast<bool>(x), #x, __FILE__, __LINE__)
-// #define CHECK_EQ(x, y) CHECK((x) == (y))
-// #define CHECK_LT(x, y) CHECK((x) < (y))
-// #define CHECK_GT(x, y) CHECK((x) > (y))
-// #define CHECK_LE(x, y) CHECK((x) <= (y))
-// #define CHECK_GE(x, y) CHECK((x) >= (y))
-// #define CHECK_NE(x, y) CHECK((x) != (y))
-
-// Debug checks
-// #define DCHECK(x) assert(x)
-// #define DCHECK_EQ(x, y) DCHECK((x) == (y))
-// #define DCHECK_LT(x, y) DCHECK((x) < (y))
-// #define DCHECK_GT(x, y) DCHECK((x) > (y))
-// #define DCHECK_LE(x, y) DCHECK((x) <= (y))
-// #define DCHECK_GE(x, y) DCHECK((x) >= (y))
-// #define DCHECK_NE(x, y) DCHECK((x) != (y))
-
-
-// Ports
-#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
-
-#endif  // FST_LIB_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/CMakeLists.txt
deleted file mode 100644
index 6113bbc26eb8fe35e4e17ffd1cab382f0fb0f1f8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(post_processor STATIC
-  post_processor.cc
-)
-target_link_libraries(post_processor PUBLIC utils)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/post_processor.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/post_processor.cc
deleted file mode 100644
index 315f62d34cbc441ecbaf7c07667eb35ee61c2c8d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/post_processor.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <sstream>
-#include <vector>
-
-#include "utils/string.h"
-
-namespace wenet {
-
-std::string PostProcessor::ProcessSpace(const std::string& str) {
-  std::string result = str;
-  // 1. remove ' ' if needed
-  // only spaces between mandarin words need to be removed, please note that
-  // if str contains '_', we assume that the decoding type must be
-  // `CtcPrefixBeamSearch` and this branch will do nothing since str must be
-  // obtained via "".join() (in function `AsrDecoder::UpdateResult()`)
-  if (opts_.language_type == kMandarinEnglish && !str.empty()) {
-    result.clear();
-    // split str by ' '
-    std::vector<std::string> words;
-    std::stringstream ss(str);
-    std::string tmp;
-    while (ss >> tmp) {
-      words.push_back(tmp);
-    }
-    // check english word
-    bool is_englishword_prev = false;
-    bool is_englishword_now = false;
-    for (std::string& w : words) {
-      is_englishword_now = CheckEnglishWord(w);
-      if (is_englishword_prev && is_englishword_now) {
-        result += (' ' + w);
-      } else {
-        result += (w);
-      }
-      is_englishword_prev = is_englishword_now;
-    }
-  }
-  // 2. replace '_' with ' '
-  // this should be done for all cases (both kMandarinEnglish and kIndoEuropean)
-  result = ProcessBlank(result, opts_.lowercase);
-  return result;
-}
-
-std::string PostProcessor::Process(const std::string& str, bool finish) {
-  std::string result;
-  result = ProcessSpace(str);
-  // TODO(xcsong): do itn/punctuation if finish == true
-  return result;
-}
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/post_processor.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/post_processor.h
deleted file mode 100644
index 54597845ebc88ad22e1244d2e693e2088cff6d21..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/post_processor/post_processor.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#ifndef POST_PROCESSOR_POST_PROCESSOR_H_
-#define POST_PROCESSOR_POST_PROCESSOR_H_
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-enum LanguageType {
-  // spaces between **mandarin words** should be removed.
-  // cases of processing spaces with mandarin-only, english-only
-  // and mandarin-english code-switch can be found in post_processor_test.cc
-  kMandarinEnglish = 0x00,
-  // spaces should be kept for most of the
-  // Indo-European languages (i.e., deutsch or english-deutsch code-switch).
-  // cases of those languages can be found in post_processor_test.cc
-  kIndoEuropean = 0x01
-};
-
-struct PostProcessOptions {
-  // space options
-  // The decoded result may contain spaces (' ' or '_'),
-  // we will process those spaces according to language_type. More details can
-  // be found in
-  // https://github.com/wenet-e2e/wenet/issues/583#issuecomment-907994058
-  LanguageType language_type = kMandarinEnglish;
-  // whether lowercase letters are required
-  bool lowercase = true;
-};
-
-// TODO(xcsong): add itn/punctuation related resource
-struct PostProcessResource {};
-
-// Post Processor
-class PostProcessor {
- public:
-  explicit PostProcessor(PostProcessOptions&& opts) : opts_(std::move(opts)) {}
-  explicit PostProcessor(const PostProcessOptions& opts) : opts_(opts) {}
-  // call other functions to do post processing
-  std::string Process(const std::string& str, bool finish);
-  // process spaces according to configurations
-  std::string ProcessSpace(const std::string& str);
-  // TODO(xcsong): add itn/punctuation
-  // void InverseTN(const std::string& str);
-  // void Punctuate(const std::string& str);
-
- private:
-  const PostProcessOptions opts_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(PostProcessor);
-};
-
-}  // namespace wenet
-
-#endif  // POST_PROCESSOR_POST_PROCESSOR_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/CMakeLists.txt
deleted file mode 100644
index 145654105350e91a5f9121b47197f5fc60663f5c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-link_libraries(gtest_main gmock)
-
-add_executable(utils_test utils_test.cc)
-target_link_libraries(utils_test PUBLIC utils)
-add_test(UTILS_TEST utils_test)
-
-add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
-target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
-add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
-
-add_executable(post_processor_test post_processor_test.cc)
-target_link_libraries(post_processor_test PUBLIC post_processor)
-add_test(POST_PROCESSOR_TEST post_processor_test)
-
-
-add_executable(feature_pipeline_test feature_pipeline_test.cc)
-target_link_libraries(feature_pipeline_test PUBLIC frontend)
-add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/ctc_prefix_beam_search_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/ctc_prefix_beam_search_test.cc
deleted file mode 100644
index d8f3b65693b934beb33f3a770795f0b6e7ce3456..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/ctc_prefix_beam_search_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_prefix_beam_search.h"
-
-#include <cmath>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(CtcPrefixBeamSearchTest, CtcPrefixBeamSearchLogicTest) {
-  using ::testing::ElementsAre;
-  // See https://robin1001.github.io/2020/12/11/ctc-search for the
-  // graph demonstration of the data
-  std::vector<std::vector<float>> data = {
-      {0.25, 0.40, 0.35}, {0.40, 0.35, 0.25}, {0.10, 0.50, 0.40}};
-  // Apply log
-  for (int i = 0; i < data.size(); i++) {
-    for (int j = 0; j < data[i].size(); j++) {
-      data[i][j] = std::log(data[i][j]);
-    }
-  }
-  wenet::CtcPrefixBeamSearchOptions option;
-  option.first_beam_size = 3;
-  option.second_beam_size = 3;
-  wenet::CtcPrefixBeamSearch prefix_beam_search(option);
-  prefix_beam_search.Search(data);
-  /* Test case info
-  | top k | result index | prefix score | viterbi score | timestamp |
-  |-------|--------------|--------------|---------------|-----------|
-  | top 1 | [2, 1]       | 0.2185       | 0.07          | [0, 2]    |
-  | top 2 | [1, 2]       | 0.1550       | 0.064         | [0, 2]    |
-  | top 3 | [1]          | 0.1525       | 0.07          | [2]       |
-  */
-  const std::vector<std::vector<int>>& result = prefix_beam_search.Outputs();
-  EXPECT_EQ(result.size(), 3);
-  ASSERT_THAT(result[0], ElementsAre(2, 1));
-  ASSERT_THAT(result[1], ElementsAre(1, 2));
-  ASSERT_THAT(result[2], ElementsAre(1));
-
-  const std::vector<float>& likelihood = prefix_beam_search.Likelihood();
-  EXPECT_EQ(likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[0]), 0.2185);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[1]), 0.1550);
-  EXPECT_FLOAT_EQ(std::exp(likelihood[2]), 0.1525);
-
-  const std::vector<float>& viterbi_likelihood =
-      prefix_beam_search.viterbi_likelihood();
-  EXPECT_EQ(viterbi_likelihood.size(), 3);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[0]), 0.07);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[1]), 0.064);
-  EXPECT_FLOAT_EQ(std::exp(viterbi_likelihood[2]), 0.07);
-
-  const std::vector<std::vector<int>>& times = prefix_beam_search.Times();
-  EXPECT_EQ(times.size(), 3);
-  ASSERT_THAT(times[0], ElementsAre(0, 2));
-  ASSERT_THAT(times[1], ElementsAre(0, 2));
-  ASSERT_THAT(times[2], ElementsAre(2));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/feature_pipeline_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/feature_pipeline_test.cc
deleted file mode 100644
index 244ec0735b6086211b476e8d97569e1ee5959bc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/feature_pipeline_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2022 Roney
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <thread>
-#include <vector>
-
-#include "frontend/feature_pipeline.h"
-#include "utils/blocking_queue.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-void pushQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que,
-               std::vector<int> vec) {
-  que->Push(vec);
-}
-
-void popQueue(const std::shared_ptr<wenet::BlockingQueue<int>>& que, int num,
-              int back_data) {
-  auto pop_data = que->Pop(num);
-  ASSERT_EQ(pop_data[num - 1], back_data);
-}
-
-TEST(FeaturePipelineTest, BlockingQueueTest) {
-  auto capacity_queue = std::make_shared<wenet::BlockingQueue<int>>(2);
-  std::vector<int> test_data{1, 2, 3, 4, 5};
-  std::thread push_thread(&pushQueue, capacity_queue, test_data);
-  ASSERT_EQ(capacity_queue->Pop(), 1);
-  ASSERT_LE(capacity_queue->Size(), 2);    // capacity_queue: 2 or 2,3
-  auto pop_data = capacity_queue->Pop(3);  // 2,3,4 num > capacity
-  ASSERT_EQ(pop_data.size(), 3);
-  ASSERT_EQ(pop_data[2], 4);
-  push_thread.join();
-  ASSERT_EQ(capacity_queue->Size(), 1);  // capacity_queue:5
-
-  std::thread pop_thread(&popQueue, capacity_queue, 3, 0);  // num > capacity
-  capacity_queue->Push(9);  // capacity_queue:5,9
-  capacity_queue->Push(0);  // capacity_queue:5,9,0
-  pop_thread.join();        // capacity_queue:
-  ASSERT_EQ(capacity_queue->Size(), 0);
-
-  pop_data = capacity_queue->Pop(0);
-  ASSERT_TRUE(pop_data.empty());
-}
-
-TEST(FeaturePipelineTest, PipelineTest) {
-  wenet::FeaturePipelineConfig config(80, 8000);
-  wenet::FeaturePipeline feature_pipeline(config);
-  int audio_len = 8 * 55;  // audio len 55ms,4 frames
-  std::vector<float> pcm(audio_len, 0);
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 4);
-
-  std::vector<std::vector<float>> out_feats;
-  auto b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_TRUE(b);
-  ASSERT_EQ(out_feats.size(), 2);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 2);
-
-  std::vector<float> out_feat;
-  b = feature_pipeline.ReadOne(&out_feat);
-  ASSERT_TRUE(b);
-  ASSERT_FALSE(out_feat.empty());
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 1);
-
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 1);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-
-  feature_pipeline.AcceptWaveform(pcm.data(), audio_len);
-  feature_pipeline.Read(2, &out_feats);
-  feature_pipeline.Reset();
-  feature_pipeline.set_input_finished();
-  b = feature_pipeline.Read(2, &out_feats);
-  ASSERT_FALSE(b);
-  ASSERT_EQ(out_feats.size(), 0);
-  ASSERT_EQ(feature_pipeline.NumQueuedFrames(), 0);
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/post_processor_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/post_processor_test.cc
deleted file mode 100644
index fa11fa29231032d62389a93fd00b0ec782bf8a3b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/post_processor_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2021 Xingchen Song sxc19@mails.tsinghua.edu.cn
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-#include "post_processor/post_processor.h"
-
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include "utils/utils.h"
-
-TEST(PostProcessorTest, ProcessSpacekMandarinEnglishTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: mandarin character
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "震东好帅",
-      // modeling unit: mandarin word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 吴迪 也 好帅",
-      // modeling unit: english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁binbin▁is▁also▁handsome",
-      // modeling unit: english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " life is short i use wenet",
-      // modeling unit: mandarin character + english wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "超哥▁is▁the▁most▁handsome",
-      // modeling unit: mandarin word + english word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " 人生 苦短 i use wenet",
-  };
-
-  std::vector<std::string> result_lowercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "binbin is also handsome",
-      "life is short i use wenet",
-      "超哥 is the most handsome",
-      "人生苦短i use wenet",
-  };
-
-  std::vector<std::string> result_uppercase = {
-      "震东好帅",
-      "吴迪也好帅",
-      "BINBIN IS ALSO HANDSOME",
-      "LIFE IS SHORT I USE WENET",
-      "超哥 IS THE MOST HANDSOME",
-      "人生苦短I USE WENET",
-  };
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
-
-TEST(PostProcessorTest, ProcessSpacekIndoEuropeanTest) {
-  wenet::PostProcessOptions opts_lowercase;
-  opts_lowercase.language_type = wenet::kIndoEuropean;
-  wenet::PostProcessor post_processor_lowercase(opts_lowercase);
-
-  wenet::PostProcessOptions opts_uppercase;
-  opts_uppercase.language_type = wenet::kIndoEuropean;
-  opts_uppercase.lowercase = false;
-  wenet::PostProcessor post_processor_uppercase(opts_uppercase);
-
-  std::vector<std::string> input = {
-      // modeling unit: wordpiece
-      // decode type: CtcPrefixBeamSearch, "".join()
-      "▁zhendong▁ist▁so▁schön",
-      // modeling unit: word
-      // decode type: CtcWfstBeamSearch, " ".join()
-      " zhendong ist so schön"};
-
-  std::vector<std::string> result_lowercase = {"zhendong ist so schön",
-                                               "zhendong ist so schön"};
-
-  std::vector<std::string> result_uppercase = {"ZHENDONG IST SO SCHÖN",
-                                               "ZHENDONG IST SO SCHÖN"};
-
-  for (size_t i = 0; i < input.size(); ++i) {
-    EXPECT_EQ(post_processor_lowercase.ProcessSpace(input[i]),
-              result_lowercase[i]);
-    EXPECT_EQ(post_processor_uppercase.ProcessSpace(input[i]),
-              result_uppercase[i]);
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/utils_test.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/utils_test.cc
deleted file mode 100644
index 6b2bbac25e000ce854d5e55a50cb51109d62d758..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/test/utils_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "utils/utils.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-TEST(UtilsTest, TopKTest) {
-  using ::testing::ElementsAre;
-  using ::testing::FloatNear;
-  using ::testing::Pointwise;
-  std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
-  std::vector<float> values;
-  std::vector<int32_t> indices;
-  wenet::TopK(data, 3, &values, &indices);
-  EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
-  ASSERT_THAT(indices, ElementsAre(9, 4, 8));
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/toolchains/aarch64-linux-gnu.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/toolchains/aarch64-linux-gnu.toolchain.cmake
deleted file mode 100644
index 9ad37cba9eb6fa58aa194ece96cf9a5da472a76d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/toolchains/aarch64-linux-gnu.toolchain.cmake
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CMAKE_SYSTEM_NAME Linux)
-SET (CMAKE_SYSTEM_PROCESSOR aarch64)
-
-set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
-set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/toolchains/ios.toolchain.cmake b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/toolchains/ios.toolchain.cmake
deleted file mode 100644
index 2bcb0adf7b07c0c5fd5bf16d1b687050579ba673..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/toolchains/ios.toolchain.cmake
+++ /dev/null
@@ -1,1014 +0,0 @@
-# This file is part of the ios-cmake project. It was retrieved from
-# https://github.com/leetal/ios-cmake.git, which is a fork of
-# https://github.com/gerstrong/ios-cmake.git, which is a fork of
-# https://github.com/cristeab/ios-cmake.git, which is a fork of
-# https://code.google.com/p/ios-cmake/. Which in turn is based off of
-# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which
-# are included with CMake 2.8.4
-#
-# The ios-cmake project is licensed under the new BSD license.
-#
-# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software,
-# Kitware, Inc., Insight Software Consortium.  All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# This file is based off of the Platform/Darwin.cmake and
-# Platform/UnixPaths.cmake files which are included with CMake 2.8.4
-# It has been altered for iOS development.
-#
-# Updated by Alex Stewart (alexs.mac@gmail.com)
-#
-# *****************************************************************************
-#      Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com)
-#                      under the BSD-3-Clause license
-#                   https://github.com/leetal/ios-cmake
-# *****************************************************************************
-#
-#                           INFORMATION / HELP
-#
-###############################################################################
-#                                  OPTIONS                                    #
-###############################################################################
-#
-# PLATFORM: (default "OS64")
-#    OS = Build for iPhoneOS.
-#    OS64 = Build for arm64 iphoneOS.
-#    OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR = Build for x86 i386 iphoneOS Simulator.
-#    SIMULATOR64 = Build for x86_64 iphoneOS Simulator.
-#    SIMULATORARM64 = Build for arm64 iphoneOS Simulator.
-#    TVOS = Build for arm64 tvOS.
-#    TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_TVOS = Build for x86_64 tvOS Simulator.
-#    WATCHOS = Build for armv7k arm64_32 for watchOS.
-#    WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
-#    SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator.
-#    MAC = Build for x86_64 macOS.
-#    MAC_ARM64 = Build for Apple Silicon macOS.
-#    MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS).
-#                   Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#    MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS).
-#                         Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
-#
-# CMAKE_OSX_SYSROOT: Path to the SDK to use.  By default this is
-#    automatically determined from PLATFORM and xcodebuild, but
-#    can also be manually specified (although this should not be required).
-#
-# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform
-#    being compiled for.  By default this is automatically determined from
-#    CMAKE_OSX_SYSROOT, but can also be manually specified (although this should
-#    not be required).
-#
-# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS
-#
-# NAMED_LANGUAGE_SUPPORT:
-#    ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support
-#    OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behaviour, CMake version < 3.16)
-#
-# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default ON
-#
-# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default)
-#
-# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default)
-#
-# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker
-#    to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY)
-#
-# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM
-#    OS = armv7 armv7s arm64 (if applicable)
-#    OS64 = arm64 (if applicable)
-#    SIMULATOR = i386
-#    SIMULATOR64 = x86_64
-#    SIMULATORARM64 = arm64
-#    TVOS = arm64
-#    SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated)
-#    WATCHOS = armv7k arm64_32 (if applicable)
-#    SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated)
-#    MAC = x86_64
-#    MAC_ARM64 = arm64
-#    MAC_CATALYST = x86_64
-#    MAC_CATALYST_ARM64 = arm64
-#
-# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64"
-#
-###############################################################################
-#                                END OPTIONS                                  #
-###############################################################################
-#
-# This toolchain defines the following properties (available via get_property()) for use externally:
-#
-# PLATFORM: The currently targeted platform.
-# XCODE_VERSION: Version number (not including Build version) of Xcode detected.
-# SDK_VERSION: Version of SDK being used.
-# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM).
-# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set!
-#
-# This toolchain defines the following macros for use externally:
-#
-# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT)
-#   A convenience macro for setting xcode specific properties on targets.
-#   Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel
-#   example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all").
-#
-# find_host_package (PROGRAM ARGS)
-#   A macro used to find executable programs on the host system, not within the
-#   environment. Thanks to the android-cmake project for providing the
-#   command.
-#
-
-cmake_minimum_required(VERSION 3.8.0)
-
-# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds.
-if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN})
-  return()
-endif()
-set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true)
-
-# List of supported platform values
-list(APPEND _supported_platforms
-        "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64"
-        "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS"
-        "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS"
-        "MAC" "MAC_ARM64"
-        "MAC_CATALYST" "MAC_CATALYST_ARM64")
-
-# Cache what generator is used
-set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}")
-
-# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib)
-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14")
-  set(MODERN_CMAKE YES)
-endif()
-
-# Get the Xcode version being used.
-# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs.
-# Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable.
-#
-# NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts
-if(DEFINED XCODE_VERSION_INT)
-  # Environment variables are always preserved.
-  set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}")
-elseif(DEFINED ENV{_XCODE_VERSION_INT})
-  set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}")
-elseif(NOT DEFINED XCODE_VERSION_INT)
-  find_program(XCODEBUILD_EXECUTABLE xcodebuild)
-  if(NOT XCODEBUILD_EXECUTABLE)
-    message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.")
-  endif()
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version
-          OUTPUT_VARIABLE XCODE_VERSION_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
-  set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "")
-endif()
-
-# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur)
-# if you don't set a deployment target it will be set the way you only get 64-bit builds
-if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0)
-  # Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...)
-  set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64")
-endif()
-
-# Check if the platform variable is set
-if(DEFINED PLATFORM)
-  # Environment variables are always preserved.
-  set(ENV{_PLATFORM} "${PLATFORM}")
-elseif(DEFINED ENV{_PLATFORM})
-  set(PLATFORM "$ENV{_PLATFORM}")
-elseif(NOT DEFINED PLATFORM)
-  message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!")
-endif ()
-
-if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The combined builds support requires Xcode to be used as generator via '-G Xcode' command-line argument in CMake")
-endif()
-
-# Safeguard that the platform value is set and is one of the supported values
-list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM)
-if("${contains_PLATFORM}" EQUAL "-1")
-  string(REPLACE ";"  "\n * " _supported_platforms_formatted "${_supported_platforms}")
-  message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n"
-          " Supported PLATFORM values: \n * ${_supported_platforms_formatted}")
-endif()
-
-# Check if Apple Silicon is supported
-if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5")
-  message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5")
-endif()
-
-# Touch toolchain variable to suppress "unused variable" warning.
-# This happens if CMake is invoked with the same command line the second time.
-if(CMAKE_TOOLCHAIN_FILE)
-endif()
-
-# Fix for PThread library not in path
-set(CMAKE_THREAD_LIBS_INIT "-lpthread")
-set(CMAKE_HAVE_THREADS_LIBRARY 1)
-set(CMAKE_USE_WIN32_THREADS_INIT 0)
-set(CMAKE_USE_PTHREADS_INIT 1)
-
-# Specify named language support defaults.
-if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
-  set(NAMED_LANGUAGE_SUPPORT ON)
-  message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.")
-elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  set(NAMED_LANGUAGE_SUPPORT OFF)
-  message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behaviour.")
-elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
-  message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.")
-endif()
-set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL
-        "Whether or not to enable explicit named language support" FORCE)
-
-# Specify minimum version of deployment target.
-if(NOT DEFINED DEPLOYMENT_TARGET)
-  if (PLATFORM MATCHES "WATCHOS")
-    # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS).
-    set(DEPLOYMENT_TARGET "4.0")
-  elseif(PLATFORM STREQUAL "MAC")
-    # Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos).
-    set(DEPLOYMENT_TARGET "10.13")
-  elseif(PLATFORM STREQUAL "MAC_ARM64")
-    # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm).
-    set(DEPLOYMENT_TARGET "11.0")
-  elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-    # Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement).
-    set(DEPLOYMENT_TARGET "13.1")
-  else()
-    # Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS).
-    set(DEPLOYMENT_TARGET "11.0")
-  endif()
-  message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!")
-elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1")
-  message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!")
-endif()
-
-# Store the DEPLOYMENT_TARGET in the cache
-set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "")
-
-# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially)
-if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "OS64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
-  set(PLATFORM "SIMULATOR64")
-  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
-endif()
-
-set(PLATFORM_INT "${PLATFORM}")
-
-if(DEFINED ARCHS)
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-endif()
-
-# Determine the platform name and architectures for use in xcodebuild commands
-# from the specified PLATFORM_INT name.
-if(PLATFORM_INT STREQUAL "OS")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    set(ARCHS armv7 armv7s arm64)
-    set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64")
-  set(SDK_NAME iphoneos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS arm64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-    else()
-      set(ARCHS arm64)
-    endif()
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "OS64COMBINED")
-  set(SDK_NAME iphoneos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      else()
-        set(ARCHS arm64 x86_64)
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
-      endif()
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET})
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-  message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.")
-elseif(PLATFORM_INT STREQUAL "SIMULATOR64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATORARM64")
-  set(SDK_NAME iphonesimulator)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "TVOS")
-  set(SDK_NAME appletvos)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos${DEPLOYMENT_TARGET})
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-  endif()
-elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED")
-  set(SDK_NAME appletvos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      set(ARCHS arm64 x86_64)
-      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos${DEPLOYMENT_TARGET})
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64")
-      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64")
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-  set(SDK_NAME appletvsimulator)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOS")
-  set(SDK_NAME watchos)
-  if(NOT ARCHS)
-    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-      set(ARCHS armv7k arm64_32)
-      set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos${DEPLOYMENT_TARGET})
-    else()
-      set(ARCHS armv7k)
-      set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-  endif()
-elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED")
-  set(SDK_NAME watchos)
-  if(MODERN_CMAKE)
-    if(NOT ARCHS)
-      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
-        set(ARCHS armv7k arm64_32 i386)
-        set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      else()
-        set(ARCHS armv7k i386)
-        set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET})
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k")
-        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
-      endif()
-    else()
-      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
-    endif()
-  else()
-    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work")
-  endif()
-elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-  set(SDK_NAME watchsimulator)
-  if(NOT ARCHS)
-    set(ARCHS i386)
-    set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  else()
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator)
-  endif()
-elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS x86_64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$")
-  set(SDK_NAME macosx)
-  if(NOT ARCHS)
-    set(ARCHS arm64)
-  endif()
-  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
-  if(PLATFORM_INT STREQUAL "MAC_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
-  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64")
-    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
-  endif()
-else()
-  message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}")
-endif()
-
-string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}")
-
-if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode")
-endif()
-
-if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx")
-  set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst")
-  if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET)
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15")
-  else()
-    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}")
-  endif()
-elseif(CMAKE_GENERATOR MATCHES "Xcode")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
-  set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}")
-  if(NOT PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-    set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
-  endif()
-endif()
-
-# If user did not specify the SDK root to use, then query xcodebuild for it.
-if(DEFINED CMAKE_OSX_SYSROOT_INT)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}")
-elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT})
-  set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}")
-elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path
-          OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT)
-  message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain"
-          "is pointing to the correct path. Please run:"
-          "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer"
-          "and see if that fixes the problem for you.")
-  message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} "
-          "does not exist.")
-elseif(DEFINED CMAKE_OSX_SYSROOT_INT)
-  set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT.
-  set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-endif()
-
-# Use bitcode or not
-if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+")
-  # Unless specified, enable bitcode support by default
-  message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!")
-  set(ENABLE_BITCODE ON)
-elseif(NOT DEFINED ENABLE_BITCODE)
-  message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!")
-  set(ENABLE_BITCODE OFF)
-endif()
-set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL
-        "Whether or not to enable bitcode" FORCE)
-# Use ARC or not
-if(NOT DEFINED ENABLE_ARC)
-  # Unless specified, enable ARC support by default
-  set(ENABLE_ARC ON)
-  message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!")
-endif()
-set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE)
-# Use hidden visibility or not
-if(NOT DEFINED ENABLE_VISIBILITY)
-  # Unless specified, disable symbols visibility by default
-  set(ENABLE_VISIBILITY OFF)
-  message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!")
-endif()
-set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE)
-# Set strict compiler checks or not
-if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE)
-  # Unless specified, disable strict try_compile()
-  set(ENABLE_STRICT_TRY_COMPILE OFF)
-  message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!")
-endif()
-set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL
-        "Whether or not to use strict compiler checks" FORCE)
-
-# Get the SDK version information.
-if(DEFINED SDK_VERSION)
-  # Environment variables are always preserved.
-  set(ENV{_SDK_VERSION} "${SDK_VERSION}")
-elseif(DEFINED ENV{_SDK_VERSION})
-  set(SDK_VERSION "$ENV{_SDK_VERSION}")
-elseif(NOT DEFINED SDK_VERSION)
-  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion
-          OUTPUT_VARIABLE SDK_VERSION
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-# Find the Developer root for the specific iOS platform being compiled for
-# from CMAKE_OSX_SYSROOT.  Should be ../../ from SDK specified in
-# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain
-# this information from xcrun or xcodebuild.
-if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode")
-  get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH)
-  get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH)
-  if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}")
-    message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.")
-  endif()
-endif()
-
-# Find the C & C++ compilers for the specified SDK.
-if(DEFINED CMAKE_C_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_C_COMPILER})
-  set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}")
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-elseif(NOT DEFINED CMAKE_C_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang
-          OUTPUT_VARIABLE CMAKE_C_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
-endif()
-if(DEFINED CMAKE_CXX_COMPILER)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
-elseif(DEFINED ENV{_CMAKE_CXX_COMPILER})
-  set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}")
-elseif(NOT DEFINED CMAKE_CXX_COMPILER)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++
-          OUTPUT_VARIABLE CMAKE_CXX_COMPILER
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find (Apple's) libtool.
-if(DEFINED BUILD_LIBTOOL)
-  # Environment variables are always preserved.
-  set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}")
-elseif(DEFINED ENV{_BUILD_LIBTOOL})
-  set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}")
-elseif(NOT DEFINED BUILD_LIBTOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool
-          OUTPUT_VARIABLE BUILD_LIBTOOL
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-# Find the toolchain's provided install_name_tool if none is found on the host
-if(DEFINED CMAKE_INSTALL_NAME_TOOL)
-  # Environment variables are always preserved.
-  set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}")
-elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL})
-  set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}")
-elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL)
-  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool
-          OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-  set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "")
-endif()
-
-# Configure libtool to be used instead of ar + ranlib to build static libraries.
-# This is required on Xcode 7+, but should also work on previous versions of
-# Xcode.
-get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
-foreach(lang ${languages})
-  set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o <TARGET> <LINK_FLAGS> <OBJECTS> " CACHE INTERNAL "")
-endforeach()
-
-# CMake 3.14+ support building for iOS, watchOS and tvOS out of the box.
-if(MODERN_CMAKE)
-  if(SDK_NAME MATCHES "iphone")
-    set(CMAKE_SYSTEM_NAME iOS)
-  elseif(SDK_NAME MATCHES "macosx")
-    set(CMAKE_SYSTEM_NAME Darwin)
-  elseif(SDK_NAME MATCHES "appletv")
-    set(CMAKE_SYSTEM_NAME tvOS)
-  elseif(SDK_NAME MATCHES "watch")
-    set(CMAKE_SYSTEM_NAME watchOS)
-  endif()
-  # Provide flags for a combined FAT library build on newer CMake versions
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO")
-    set(CMAKE_IOS_INSTALL_COMBINED YES)
-  endif()
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10")
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME iOS)
-elseif(NOT DEFINED CMAKE_SYSTEM_NAME)
-  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
-  set(CMAKE_SYSTEM_NAME Darwin)
-endif()
-# Standard settings.
-set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "")
-set(UNIX ON CACHE BOOL "")
-set(APPLE ON CACHE BOOL "")
-if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64")
-  set(IOS OFF CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
-  set(IOS ON CACHE BOOL "")
-  set(MACOS ON CACHE BOOL "")
-else()
-  set(IOS ON CACHE BOOL "")
-endif()
-set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
-set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
-set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE)
-# Set the architectures for which to build.
-set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "")
-# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks
-if(NOT ENABLE_STRICT_TRY_COMPILE_INT)
-  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-endif()
-# All iOS/Darwin specific settings - some may be redundant.
-set(CMAKE_MACOSX_BUNDLE YES)
-set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO")
-set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
-set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib")
-set(CMAKE_SHARED_MODULE_PREFIX "lib")
-set(CMAKE_SHARED_MODULE_SUFFIX ".so")
-set(CMAKE_C_COMPILER_ABI ELF)
-set(CMAKE_CXX_COMPILER_ABI ELF)
-set(CMAKE_C_HAS_ISYSROOT 1)
-set(CMAKE_CXX_HAS_ISYSROOT 1)
-set(CMAKE_MODULE_EXISTS 1)
-set(CMAKE_DL_LIBS "")
-set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
-set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
-set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
-set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
-
-if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+")
-  set(CMAKE_C_SIZEOF_DATA_PTR 8)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 8)
-  if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+")
-    set(CMAKE_SYSTEM_PROCESSOR "aarch64")
-  else()
-    set(CMAKE_SYSTEM_PROCESSOR "x86_64")
-  endif()
-else()
-  set(CMAKE_C_SIZEOF_DATA_PTR 4)
-  set(CMAKE_CXX_SIZEOF_DATA_PTR 4)
-  set(CMAKE_SYSTEM_PROCESSOR "arm")
-endif()
-
-# Note that only Xcode 7+ supports the newer more specific:
-# -m${SDK_NAME}-version-min flags, older versions of Xcode use:
-# -m(ios/ios-simulator)-version-min instead.
-if(${CMAKE_VERSION} VERSION_LESS "3.11")
-  if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64")
-    if(XCODE_VERSION_INT VERSION_LESS 7.0)
-      set(SDK_NAME_VERSION_FLAGS
-              "-mios-version-min=${DEPLOYMENT_TARGET}")
-    else()
-      # Xcode 7.0+ uses flags we can build directly from SDK_NAME.
-      set(SDK_NAME_VERSION_FLAGS
-              "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}")
-    endif()
-  elseif(PLATFORM_INT STREQUAL "TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}")
-  elseif(PLATFORM_INT STREQUAL "MAC")
-    set(SDK_NAME_VERSION_FLAGS
-            "-mmacosx-version-min=${DEPLOYMENT_TARGET}")
-  else()
-    # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min.
-    set(SDK_NAME_VERSION_FLAGS
-            "-mios-simulator-version-min=${DEPLOYMENT_TARGET}")
-  endif()
-elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST")
-  # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets
-  set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET})
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE_INT)
-  set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "")
-  set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-  set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
-endif()
-
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks")
-endif()
-
-if(ENABLE_BITCODE_INT)
-  set(BITCODE "-fembed-bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES")
-else()
-  set(BITCODE "")
-  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO")
-endif()
-
-if(ENABLE_ARC_INT)
-  set(FOBJC_ARC "-fobjc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES")
-else()
-  set(FOBJC_ARC "-fno-objc-arc")
-  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO")
-endif()
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-  set(OBJC_LEGACY_VARS "")
-else()
-  set(OBJC_VARS "")
-  set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
-endif()
-
-if(NOT ENABLE_VISIBILITY_INT)
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES")
-  set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden")
-else()
-  foreach(lang ${languages})
-    set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "")
-  endforeach()
-  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO")
-  set(VISIBILITY "-fvisibility=default")
-endif()
-
-if(DEFINED APPLE_TARGET_TRIPLE)
-  set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}")
-endif()
-
-#Check if Xcode generator is used, since that will handle these flags automagically
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator. Modifying the Xcode build-settings directly instead.")
-else()
-  set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}")
-  set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}")
-  set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}")
-  set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}")
-  set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}")
-  set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}")
-  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
-  set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}")
-    set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}")
-    set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}")
-    set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}")
-    set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}")
-    set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}")
-    set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}")
-    set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}")
-  endif()
-  set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
-  set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS}  -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
-  if(NAMED_LANGUAGE_SUPPORT_INT)
-    set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}")
-    set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}")
-  endif()
-  set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}")
-endif()
-
-## Print status messages to inform of the current state
-message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}")
-message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}")
-message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}")
-message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}")
-message(STATUS "Using libtool: ${BUILD_LIBTOOL}")
-message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}")
-if(DEFINED APPLE_TARGET_TRIPLE)
-  message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}")
-endif()
-message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}"
-        " (SDK version: ${SDK_VERSION})")
-if(MODERN_CMAKE)
-  message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!")
-  if(PLATFORM_INT MATCHES ".*COMBINED")
-    message(STATUS "Will combine built (static) artifacts into FAT lib...")
-  endif()
-endif()
-if(CMAKE_GENERATOR MATCHES "Xcode")
-  message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}")
-endif()
-message(STATUS "CMake version: ${CMAKE_VERSION}")
-if(DEFINED SDK_NAME_VERSION_FLAGS)
-  message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}")
-endif()
-message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}")
-if(ENABLE_BITCODE_INT)
-  message(STATUS "Bitcode: Enabled")
-else()
-  message(STATUS "Bitcode: Disabled")
-endif()
-
-if(ENABLE_ARC_INT)
-  message(STATUS "ARC: Enabled")
-else()
-  message(STATUS "ARC: Disabled")
-endif()
-
-if(ENABLE_VISIBILITY_INT)
-  message(STATUS "Hiding symbols: Disabled")
-else()
-  message(STATUS "Hiding symbols: Enabled")
-endif()
-
-# Set global properties
-set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}")
-set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}")
-set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}")
-set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}")
-set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}")
-
-# Export configurable variables for the try_compile() command.
-set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        PLATFORM
-        XCODE_VERSION_INT
-        SDK_VERSION
-        NAMED_LANGUAGE_SUPPORT
-        DEPLOYMENT_TARGET
-        CMAKE_DEVELOPER_ROOT
-        CMAKE_OSX_SYSROOT_INT
-        ENABLE_BITCODE
-        ENABLE_ARC
-        CMAKE_ASM_COMPILER
-        CMAKE_C_COMPILER
-        CMAKE_C_COMPILER_TARGET
-        CMAKE_CXX_COMPILER
-        CMAKE_CXX_COMPILER_TARGET
-        BUILD_LIBTOOL
-        CMAKE_INSTALL_NAME_TOOL
-        CMAKE_C_FLAGS
-        CMAKE_C_DEBUG
-        CMAKE_C_MINSIZEREL
-        CMAKE_C_RELWITHDEBINFO
-        CMAKE_C_RELEASE
-        CMAKE_CXX_FLAGS
-        CMAKE_CXX_FLAGS_DEBUG
-        CMAKE_CXX_FLAGS_MINSIZEREL
-        CMAKE_CXX_FLAGS_RELWITHDEBINFO
-        CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_C_LINK_FLAGS
-        CMAKE_CXX_LINK_FLAGS
-        CMAKE_ASM_FLAGS
-)
-
-if(NAMED_LANGUAGE_SUPPORT_INT)
-  list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
-        CMAKE_OBJC_FLAGS
-        CMAKE_OBJC_DEBUG
-        CMAKE_OBJC_MINSIZEREL
-        CMAKE_OBJC_RELWITHDEBINFO
-        CMAKE_OBJC_RELEASE
-        CMAKE_OBJCXX_FLAGS
-        CMAKE_OBJCXX_DEBUG
-        CMAKE_OBJCXX_MINSIZEREL
-        CMAKE_OBJCXX_RELWITHDEBINFO
-        CMAKE_OBJCXX_RELEASE
-        CMAKE_OBJC_LINK_FLAGS
-        CMAKE_OBJCXX_LINK_FLAGS
-  )
-endif()
-
-set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
-set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks")
-set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names")
-set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
-set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a")
-set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name")
-
-# Set the find root to the SDK developer roots.
-# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds.
-if(NOT PLATFORM_INT MATCHES "^MAC.*$")
-  list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
-  set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "")
-endif()
-
-# Default to searching for frameworks first.
-set(CMAKE_FIND_FRAMEWORK FIRST)
-
-# Set up the default search directories for frameworks.
-if(PLATFORM_INT MATCHES "^MAC_CATALYST")
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-else()
-  set(CMAKE_FRAMEWORK_PATH
-          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
-          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
-          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
-endif()
-
-# By default, search both the specified iOS SDK and the remainder of the host filesystem.
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "")
-endif()
-if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "")
-endif()
-
-#
-# Some helper-macros below to simplify and beautify the CMakeFile
-#
-
-# This little macro lets you set any Xcode specific property.
-macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION)
-  set(XCODE_RELVERSION_I "${XCODE_RELVERSION}")
-  if(XCODE_RELVERSION_I STREQUAL "All")
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}")
-  else()
-    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}")
-  endif()
-endmacro(set_xcode_property)
-
-# This macro lets you find executable programs on the host system.
-macro(find_host_package)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER)
-  set(_TOOLCHAIN_IOS ${IOS})
-  set(IOS OFF)
-  find_package(${ARGN})
-  set(IOS ${_TOOLCHAIN_IOS})
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
-  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
-endmacro(find_host_package)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/CMakeLists.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/CMakeLists.txt
deleted file mode 100644
index 686362688c050d48224ca0a01e0d24b03d94758a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_library(utils STATIC
-  string.cc
-  utils.cc
-)
-
-if(NOT ANDROID)
-  if(MSVC)
-    target_link_libraries(utils PUBLIC fst)
-  else()
-    target_link_libraries(utils PUBLIC fst dl)
-  endif()
-endif()
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/blocking_queue.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/blocking_queue.h
deleted file mode 100644
index 9bf0127d9298fbfae2eeebb9431c680fc5dd7647..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/blocking_queue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_BLOCKING_QUEUE_H_
-#define UTILS_BLOCKING_QUEUE_H_
-
-#include <condition_variable>
-#include <limits>
-#include <mutex>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/utils.h"
-
-namespace wenet {
-
-template <typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue(size_t capacity = std::numeric_limits<int>::max())
-      : capacity_(capacity) {}
-
-  void Push(const T& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(value);
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(T&& value) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      while (queue_.size() >= capacity_) {
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(const std::vector<T>& values) {
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      for (auto& value : values) {
-        while (queue_.size() >= capacity_) {
-          not_empty_condition_.notify_one();
-          not_full_condition_.wait(lock);
-        }
-        queue_.push(value);
-      }
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  void Push(std::vector<T>&& values) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    for (auto& value : values) {
-      while (queue_.size() >= capacity_) {
-        not_empty_condition_.notify_one();
-        not_full_condition_.wait(lock);
-      }
-      queue_.push(std::move(value));
-    }
-    not_empty_condition_.notify_one();
-  }
-
-  T Pop() {
-    std::unique_lock<std::mutex> lock(mutex_);
-    while (queue_.empty()) {
-      not_empty_condition_.wait(lock);
-    }
-    T t(std::move(queue_.front()));
-    queue_.pop();
-    not_full_condition_.notify_one();
-    return t;
-  }
-
-  // num can be greater than capacity,but it needs to be used with care
-  std::vector<T> Pop(size_t num) {
-    std::unique_lock<std::mutex> lock(mutex_);
-    std::vector<T> block_data;
-    while (block_data.size() < num) {
-      while (queue_.empty()) {
-        not_full_condition_.notify_one();
-        not_empty_condition_.wait(lock);
-      }
-      block_data.push_back(std::move(queue_.front()));
-      queue_.pop();
-    }
-    not_full_condition_.notify_one();
-    return block_data;
-  }
-
-  bool Empty() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.empty();
-  }
-
-  size_t Size() const {
-    std::lock_guard<std::mutex> lock(mutex_);
-    return queue_.size();
-  }
-
-  void Clear() {
-    while (!Empty()) {
-      Pop();
-    }
-  }
-
- private:
-  size_t capacity_;
-  mutable std::mutex mutex_;
-  std::condition_variable not_full_condition_;
-  std::condition_variable not_empty_condition_;
-  std::queue<T> queue_;
-
- public:
-  WENET_DISALLOW_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace wenet
-
-#endif  // UTILS_BLOCKING_QUEUE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/file.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/file.h
deleted file mode 100644
index 83ad9c8c52fecd334b3549285bf39cd4f59b9f2b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/file.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FILE_H_
-#define UTILS_FILE_H_
-
-#include <fstream>
-#include <string>
-
-namespace wenet {
-
-inline bool FileExists(const std::string& path) {
-  std::ifstream f(path.c_str());
-  return f.good();
-}
-
-}  // namespace wenet
-
-#endif  // UTILS_FILE_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/flags.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/flags.h
deleted file mode 100644
index 3432aa78847322edec8d6d2aec59ed7ca5352fcd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/flags.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_FLAGS_H_
-#define UTILS_FLAGS_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/flags.h"
-
-#endif  // UTILS_FLAGS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/json.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/json.h
deleted file mode 100644
index bf8d94a3e42504139b10daa39b8f8e7a8b2d93cc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/json.h
+++ /dev/null
@@ -1,754 +0,0 @@
-// Copyright (c) From https://github.com/nbsdx/SimpleJSON
-//               2022 Binbin Zhang (binbzha@qq.com)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_JSON_H_
-#define UTILS_JSON_H_
-
-#include <cctype>
-#include <cmath>
-#include <cstdint>
-#include <deque>
-#include <initializer_list>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace json {
-
-using std::deque;
-using std::enable_if;
-using std::initializer_list;
-using std::is_convertible;
-using std::is_floating_point;
-using std::is_integral;
-using std::is_same;
-using std::map;
-using std::string;
-
-namespace {  // NOLINT
-string json_escape(const string& str) {
-  string output;
-  for (unsigned i = 0; i < str.length(); ++i) switch (str[i]) {
-      case '\"':
-        output += "\\\"";
-        break;
-      case '\\':
-        output += "\\\\";
-        break;
-      case '\b':
-        output += "\\b";
-        break;
-      case '\f':
-        output += "\\f";
-        break;
-      case '\n':
-        output += "\\n";
-        break;
-      case '\r':
-        output += "\\r";
-        break;
-      case '\t':
-        output += "\\t";
-        break;
-      default:
-        output += str[i];
-        break;
-    }
-  return std::move(output);
-}
-}  // namespace
-
-class JSON {
-  union BackingData {
-    BackingData(double d) : Float(d) {}
-    BackingData(int l) : Int(l) {}
-    BackingData(bool b) : Bool(b) {}
-    BackingData(string s) : String(new string(s)) {}
-    BackingData() : Int(0) {}
-
-    deque<JSON>* List;
-    map<string, JSON>* Map;
-    string* String;
-    double Float;
-    int Int;
-    bool Bool;
-  } Internal;
-
- public:
-  enum class Class { Null, Object, Array, String, Floating, Integral, Boolean };
-
-  template <typename Container>
-  class JSONWrapper {
-    Container* object;
-
-   public:
-    explicit JSONWrapper(Container* val) : object(val) {}
-    explicit JSONWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::iterator begin() {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::iterator end() {
-      return object ? object->end() : typename Container::iterator();
-    }
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::iterator();
-    }
-  };
-
-  template <typename Container>
-  class JSONConstWrapper {
-    const Container* object;
-
-   public:
-    explicit JSONConstWrapper(const Container* val) : object(val) {}
-    explicit JSONConstWrapper(std::nullptr_t) : object(nullptr) {}
-
-    typename Container::const_iterator begin() const {
-      return object ? object->begin() : typename Container::const_iterator();
-    }
-    typename Container::const_iterator end() const {
-      return object ? object->end() : typename Container::const_iterator();
-    }
-  };
-
-  JSON() : Internal(), Type(Class::Null) {}
-
-  explicit JSON(initializer_list<JSON> list) : JSON() {
-    SetType(Class::Object);
-    for (auto i = list.begin(), e = list.end(); i != e; ++i, ++i)
-      operator[](i->ToString()) = *std::next(i);
-  }
-
-  JSON(JSON&& other) : Internal(other.Internal), Type(other.Type) {
-    other.Type = Class::Null;
-    other.Internal.Map = nullptr;
-  }
-
-  JSON& operator=(JSON&& other) {
-    ClearInternal();
-    Internal = other.Internal;
-    Type = other.Type;
-    other.Internal.Map = nullptr;
-    other.Type = Class::Null;
-    return *this;
-  }
-
-  JSON(const JSON& other) {
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-  }
-
-  JSON& operator=(const JSON& other) {
-    ClearInternal();
-    switch (other.Type) {
-      case Class::Object:
-        Internal.Map = new map<string, JSON>(other.Internal.Map->begin(),
-                                             other.Internal.Map->end());
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>(other.Internal.List->begin(),
-                                        other.Internal.List->end());
-        break;
-      case Class::String:
-        Internal.String = new string(*other.Internal.String);
-        break;
-      default:
-        Internal = other.Internal;
-    }
-    Type = other.Type;
-    return *this;
-  }
-
-  ~JSON() {
-    switch (Type) {
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
-  template <typename T>
-  explicit JSON(T b, typename enable_if<is_same<T, bool>::value>::type* = 0)
-      : Internal(b), Type(Class::Boolean) {}
-
-  template <typename T>
-  explicit JSON(T i, typename enable_if<is_integral<T>::value &&
-                                        !is_same<T, bool>::value>::type* = 0)
-      : Internal(static_cast<int>(i)), Type(Class::Integral) {}
-
-  template <typename T>
-  explicit JSON(T f, typename enable_if<is_floating_point<T>::value>::type* = 0)
-      : Internal(static_cast<double>(f)), Type(Class::Floating) {}
-
-  template <typename T>
-  explicit JSON(T s,
-                typename enable_if<is_convertible<T, string>::value>::type* = 0)
-      : Internal(string(s)), Type(Class::String) {}
-
-  explicit JSON(std::nullptr_t) : Internal(), Type(Class::Null) {}
-
-  static JSON Make(Class type) {
-    JSON ret;
-    ret.SetType(type);
-    return ret;
-  }
-
-  static JSON Load(const string&);
-
-  template <typename T>
-  void append(T arg) {
-    SetType(Class::Array);
-    Internal.List->emplace_back(arg);
-  }
-
-  template <typename T, typename... U>
-  void append(T arg, U... args) {
-    append(arg);
-    append(args...);
-  }
-
-  template <typename T>
-  typename enable_if<is_same<T, bool>::value, JSON&>::type operator=(T b) {
-    SetType(Class::Boolean);
-    Internal.Bool = b;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_integral<T>::value && !is_same<T, bool>::value,
-                     JSON&>::type
-  operator=(T i) {
-    SetType(Class::Integral);
-    Internal.Int = i;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_floating_point<T>::value, JSON&>::type operator=(T f) {
-    SetType(Class::Floating);
-    Internal.Float = f;
-    return *this;
-  }
-
-  template <typename T>
-  typename enable_if<is_convertible<T, string>::value, JSON&>::type operator=(
-      T s) {
-    SetType(Class::String);
-    *Internal.String = string(s);
-    return *this;
-  }
-
-  JSON& operator[](const string& key) {
-    SetType(Class::Object);
-    return Internal.Map->operator[](key);
-  }
-
-  JSON& operator[](unsigned index) {
-    SetType(Class::Array);
-    if (index >= Internal.List->size()) Internal.List->resize(index + 1);
-    return Internal.List->operator[](index);
-  }
-
-  JSON& at(const string& key) { return operator[](key); }
-
-  const JSON& at(const string& key) const { return Internal.Map->at(key); }
-
-  JSON& at(unsigned index) { return operator[](index); }
-
-  const JSON& at(unsigned index) const { return Internal.List->at(index); }
-
-  int length() const {
-    if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  bool hasKey(const string& key) const {
-    if (Type == Class::Object)
-      return Internal.Map->find(key) != Internal.Map->end();
-    return false;
-  }
-
-  int size() const {
-    if (Type == Class::Object)
-      return Internal.Map->size();
-    else if (Type == Class::Array)
-      return Internal.List->size();
-    else
-      return -1;
-  }
-
-  Class JSONType() const { return Type; }
-
-  /// Functions for getting primitives from the JSON object.
-  bool IsNull() const { return Type == Class::Null; }
-
-  string ToString() const {
-    bool b;
-    return std::move(ToString(&b));
-  }
-  string ToString(bool* ok) const {
-    *ok = (Type == Class::String);
-    return *ok ? std::move(json_escape(*Internal.String)) : string("");
-  }
-
-  double ToFloat() const {
-    bool b;
-    return ToFloat(&b);
-  }
-  double ToFloat(bool* ok) const {
-    *ok = (Type == Class::Floating);
-    return *ok ? Internal.Float : 0.0;
-  }
-
-  int ToInt() const {
-    bool b;
-    return ToInt(&b);
-  }
-  int ToInt(bool* ok) const {
-    *ok = (Type == Class::Integral);
-    return *ok ? Internal.Int : 0;
-  }
-
-  bool ToBool() const {
-    bool b;
-    return ToBool(&b);
-  }
-  bool ToBool(bool* ok) const {
-    *ok = (Type == Class::Boolean);
-    return *ok ? Internal.Bool : false;
-  }
-
-  JSONWrapper<map<string, JSON>> ObjectRange() {
-    if (Type == Class::Object)
-      return JSONWrapper<map<string, JSON>>(Internal.Map);
-    return JSONWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONWrapper<deque<JSON>> ArrayRange() {
-    if (Type == Class::Array) return JSONWrapper<deque<JSON>>(Internal.List);
-    return JSONWrapper<deque<JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<map<string, JSON>> ObjectRange() const {
-    if (Type == Class::Object)
-      return JSONConstWrapper<map<string, JSON>>(Internal.Map);
-    return JSONConstWrapper<map<string, JSON>>(nullptr);
-  }
-
-  JSONConstWrapper<deque<JSON>> ArrayRange() const {
-    if (Type == Class::Array)
-      return JSONConstWrapper<deque<JSON>>(Internal.List);
-    return JSONConstWrapper<deque<JSON>>(nullptr);
-  }
-
-  string dump(int depth = 1, string tab = "  ") const {
-    string pad = "";
-    for (int i = 0; i < depth; ++i, pad += tab) {
-    }
-
-    switch (Type) {
-      case Class::Null:
-        return "null";
-      case Class::Object: {
-        string s = "{\n";
-        bool skip = true;
-        for (auto& p : *Internal.Map) {
-          if (!skip) s += ",\n";
-          s += (pad + "\"" + p.first + "\" : " + p.second.dump(depth + 1, tab));
-          skip = false;
-        }
-        s += ("\n" + pad.erase(0, 2) + "}");
-        return s;
-      }
-      case Class::Array: {
-        string s = "[";
-        bool skip = true;
-        for (auto& p : *Internal.List) {
-          if (!skip) s += ", ";
-          s += p.dump(depth + 1, tab);
-          skip = false;
-        }
-        s += "]";
-        return s;
-      }
-      case Class::String:
-        return "\"" + json_escape(*Internal.String) + "\"";
-      case Class::Floating:
-        return std::to_string(Internal.Float);
-      case Class::Integral:
-        return std::to_string(Internal.Int);
-      case Class::Boolean:
-        return Internal.Bool ? "true" : "false";
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  friend std::ostream& operator<<(std::ostream&, const JSON&);
-
- private:
-  void SetType(Class type) {
-    if (type == Type) return;
-
-    ClearInternal();
-
-    switch (type) {
-      case Class::Null:
-        Internal.Map = nullptr;
-        break;
-      case Class::Object:
-        Internal.Map = new map<string, JSON>();
-        break;
-      case Class::Array:
-        Internal.List = new deque<JSON>();
-        break;
-      case Class::String:
-        Internal.String = new string();
-        break;
-      case Class::Floating:
-        Internal.Float = 0.0;
-        break;
-      case Class::Integral:
-        Internal.Int = 0;
-        break;
-      case Class::Boolean:
-        Internal.Bool = false;
-        break;
-    }
-
-    Type = type;
-  }
-
- private:
-  /* beware: only call if YOU know that Internal is allocated. No checks
-  performed here. This function should be called in a constructed JSON just
-  before you are going to overwrite Internal...
-*/
-  void ClearInternal() {
-    switch (Type) {
-      case Class::Object:
-        delete Internal.Map;
-        break;
-      case Class::Array:
-        delete Internal.List;
-        break;
-      case Class::String:
-        delete Internal.String;
-        break;
-      default: {
-      };
-    }
-  }
-
- private:
-  Class Type = Class::Null;
-};
-
-JSON Array() { return std::move(JSON::Make(JSON::Class::Array)); }
-
-template <typename... T>
-JSON Array(T... args) {
-  JSON arr = JSON::Make(JSON::Class::Array);
-  arr.append(args...);
-  return std::move(arr);
-}
-
-JSON Object() { return std::move(JSON::Make(JSON::Class::Object)); }
-
-std::ostream& operator<<(std::ostream& os, const JSON& json) {
-  os << json.dump();
-  return os;
-}
-
-namespace {  // NOLINT
-JSON parse_next(const string&, size_t&);
-
-void consume_ws(const string& str, size_t& offset) {  // NOLINT
-  while (isspace(str[offset])) ++offset;
-}
-
-JSON parse_object(const string& str, size_t& offset) {  // NOLINT
-  JSON Object = JSON::Make(JSON::Class::Object);
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == '}') {
-    ++offset;
-    return std::move(Object);
-  }
-
-  while (true) {
-    JSON Key = parse_next(str, offset);
-    consume_ws(str, offset);
-    if (str[offset] != ':') {
-      std::cerr << "Error: Object: Expected colon, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-    consume_ws(str, ++offset);
-    JSON Value = parse_next(str, offset);
-    Object[Key.ToString()] = Value;
-
-    consume_ws(str, offset);
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == '}') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Object: Expected comma, found '" << str[offset]
-                << "'\n";
-      break;
-    }
-  }
-
-  return std::move(Object);
-}
-
-JSON parse_array(const string& str, size_t& offset) {  // NOLINT
-  JSON Array = JSON::Make(JSON::Class::Array);
-  unsigned index = 0;
-
-  ++offset;
-  consume_ws(str, offset);
-  if (str[offset] == ']') {
-    ++offset;
-    return std::move(Array);
-  }
-
-  while (true) {
-    Array[index++] = parse_next(str, offset);
-    consume_ws(str, offset);
-
-    if (str[offset] == ',') {
-      ++offset;
-      continue;
-    } else if (str[offset] == ']') {
-      ++offset;
-      break;
-    } else {
-      std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset]
-                << "'\n";
-      return std::move(JSON::Make(JSON::Class::Array));
-    }
-  }
-
-  return std::move(Array);
-}
-
-JSON parse_string(const string& str, size_t& offset) {  // NOLINT
-  JSON String;
-  string val;
-  for (char c = str[++offset]; c != '\"'; c = str[++offset]) {
-    if (c == '\\') {
-      switch (str[++offset]) {
-        case '\"':
-          val += '\"';
-          break;
-        case '\\':
-          val += '\\';
-          break;
-        case '/':
-          val += '/';
-          break;
-        case 'b':
-          val += '\b';
-          break;
-        case 'f':
-          val += '\f';
-          break;
-        case 'n':
-          val += '\n';
-          break;
-        case 'r':
-          val += '\r';
-          break;
-        case 't':
-          val += '\t';
-          break;
-        case 'u': {
-          val += "\\u";
-          for (unsigned i = 1; i <= 4; ++i) {
-            c = str[offset + i];
-            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
-              val += c;
-            } else {
-              std::cerr << "ERROR: String: Expected hex character in unicode "
-                           "escape, found '"
-                        << c << "'\n";
-              return std::move(JSON::Make(JSON::Class::String));
-            }
-          }
-          offset += 4;
-        } break;
-        default:
-          val += '\\';
-          break;
-      }
-    } else {
-      val += c;
-    }
-  }
-  ++offset;
-  String = val;
-  return std::move(String);
-}
-
-JSON parse_number(const string& str, size_t& offset) {  // NOLINT
-  JSON Number;
-  string val, exp_str;
-  char c;
-  bool isDouble = false;
-  int exp = 0;
-  while (true) {
-    c = str[offset++];
-    if ((c == '-') || (c >= '0' && c <= '9')) {
-      val += c;
-    } else if (c == '.') {
-      val += c;
-      isDouble = true;
-    } else {
-      break;
-    }
-  }
-  if (c == 'E' || c == 'e') {
-    c = str[offset++];
-    if (c == '-') {
-      ++offset;
-      exp_str += '-';
-    }
-    while (true) {
-      c = str[offset++];
-      if (c >= '0' && c <= '9') {
-        exp_str += c;
-      } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-        std::cerr << "ERROR: Number: Expected a number for exponent, found '"
-                  << c << "'\n";
-        return std::move(JSON::Make(JSON::Class::Null));
-      } else {
-        break;
-      }
-    }
-    exp = std::stol(exp_str);
-  } else if (!isspace(c) && c != ',' && c != ']' && c != '}') {
-    std::cerr << "ERROR: Number: unexpected character '" << c << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  --offset;
-
-  if (isDouble) {
-    Number = std::stod(val) * std::pow(10, exp);
-  } else {
-    if (!exp_str.empty())
-      Number = std::stol(val) * std::pow(10, exp);
-    else
-      Number = std::stol(val);
-  }
-  return std::move(Number);
-}
-
-JSON parse_bool(const string& str, size_t& offset) {  // NOLINT
-  JSON Bool;
-  if (str.substr(offset, 4) == "true") {
-    Bool = true;
-  } else if (str.substr(offset, 5) == "false") {
-    Bool = false;
-  } else {
-    std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '"
-              << str.substr(offset, 5) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += (Bool.ToBool() ? 4 : 5);
-  return std::move(Bool);
-}
-
-JSON parse_null(const string& str, size_t& offset) {  // NOLINT
-  JSON Null;
-  if (str.substr(offset, 4) != "null") {
-    std::cerr << "ERROR: Null: Expected 'null', found '"
-              << str.substr(offset, 4) << "'\n";
-    return std::move(JSON::Make(JSON::Class::Null));
-  }
-  offset += 4;
-  return std::move(Null);
-}
-
-JSON parse_next(const string& str, size_t& offset) {  // NOLINT
-  char value;
-  consume_ws(str, offset);
-  value = str[offset];
-  switch (value) {
-    case '[':
-      return std::move(parse_array(str, offset));
-    case '{':
-      return std::move(parse_object(str, offset));
-    case '\"':
-      return std::move(parse_string(str, offset));
-    case 't':
-    case 'f':
-      return std::move(parse_bool(str, offset));
-    case 'n':
-      return std::move(parse_null(str, offset));
-    default:
-      if ((value <= '9' && value >= '0') || value == '-')
-        return std::move(parse_number(str, offset));
-  }
-  std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n";
-  return JSON();
-}
-}  // namespace
-
-JSON JSON::Load(const string& str) {
-  size_t offset = 0;
-  return std::move(parse_next(str, offset));
-}
-
-}  // namespace json
-
-#endif  // UTILS_JSON_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/log.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/log.h
deleted file mode 100644
index c2bf03f261a8711f74da819d80d68e8eb9fb124a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/log.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_LOG_H_
-#define UTILS_LOG_H_
-
-// Because openfst is a dynamic library compiled with gflags/glog, we must use
-// the gflags/glog from openfst to avoid them linked both statically and
-// dynamically into the executable.
-#include "fst/log.h"
-
-#endif  // UTILS_LOG_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/string.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/string.cc
deleted file mode 100644
index 1ab93adf3cac1bc5a42c0b8c6cadbde399678fef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/string.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/string.h"
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "utils/log.h"
-#include "utils/utils.h"
-
-namespace wenet {
-
-void SplitString(const std::string& str, std::vector<std::string>* strs) {
-  SplitStringToVector(Trim(str), " \t", true, strs);
-}
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out) {
-  size_t start = 0, found = 0, end = full.size();
-  out->clear();
-  while (found != std::string::npos) {
-    found = full.find_first_of(delim, start);
-    // start != end condition is for when the delimiter is at the end
-    if (!omit_empty_strings || (found != start && start != end))
-      out->push_back(full.substr(start, found - start));
-    start = found + 1;
-  }
-}
-
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars) {
-  chars->clear();
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    assert((str[i] & 0xF8) <= 0xF0);
-    if ((str[i] & 0x80) == 0x00) {
-      // The first 128 characters (US-ASCII) in UTF-8 format only need one byte.
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      // The next 1,920 characters need two bytes to encode,
-      // which covers the remainder of almost all Latin-script alphabets.
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      // Three bytes are needed for characters in the rest of
-      // the Basic Multilingual Plane, which contains virtually all characters
-      // in common use, including most Chinese, Japanese and Korean characters.
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      // Four bytes are needed for characters in the other planes of Unicode,
-      // which include less common CJK characters, various historic scripts,
-      // mathematical symbols, and emoji (pictographic symbols).
-      bytes = 4;
-    }
-    chars->push_back(str.substr(i, bytes));
-  }
-}
-
-int UTF8StringLength(const std::string& str) {
-  int len = 0;
-  int bytes = 1;
-  for (size_t i = 0; i < str.length(); i += bytes) {
-    if ((str[i] & 0x80) == 0x00) {
-      bytes = 1;
-    } else if ((str[i] & 0xE0) == 0xC0) {
-      bytes = 2;
-    } else if ((str[i] & 0xF0) == 0xE0) {
-      bytes = 3;
-    } else if ((str[i] & 0xF8) == 0xF0) {
-      bytes = 4;
-    }
-    ++len;
-  }
-  return len;
-}
-
-bool CheckEnglishChar(const std::string& ch) {
-  // all english characters should be encoded in one byte
-  if (ch.size() != 1) return false;
-  // english words may contain apostrophe, i.e., "He's"
-  return isalpha(ch[0]) || ch[0] == '\'';
-}
-
-bool CheckEnglishWord(const std::string& word) {
-  std::vector<std::string> chars;
-  SplitUTF8StringToChars(word, &chars);
-  for (size_t k = 0; k < chars.size(); k++) {
-    if (!CheckEnglishChar(chars[k])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs) {
-  std::string result;
-  if (strs.size() > 0) {
-    for (int i = 0; i < strs.size() - 1; i++) {
-      result += (strs[i] + c);
-    }
-    result += strs.back();
-  }
-  return result;
-}
-
-bool IsAlpha(const std::string& str) {
-  for (size_t i = 0; i < str.size(); i++) {
-    if (!isalpha(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::string ProcessBlank(const std::string& str, bool lowercase) {
-  std::string result;
-  if (!str.empty()) {
-    std::vector<std::string> chars;
-    SplitUTF8StringToChars(Trim(str), &chars);
-
-    for (std::string& ch : chars) {
-      if (ch != kSpaceSymbol) {
-        result.append(ch);
-      } else {
-        // Ignore consecutive space or located in head
-        if (!result.empty() && result.back() != ' ') {
-          result.push_back(' ');
-        }
-      }
-    }
-    // Ignore tailing space
-    if (!result.empty() && result.back() == ' ') {
-      result.pop_back();
-    }
-    // NOTE: convert string to wstring
-    //       see issue 745: https://github.com/wenet-e2e/wenet/issues/745
-    std::locale loc("");
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
-    std::wstring wsresult = converter.from_bytes(result);
-    for (auto& c : wsresult) {
-      c = lowercase ? tolower(c, loc) : toupper(c, loc);
-    }
-    result = converter.to_bytes(wsresult);
-  }
-  return result;
-}
-
-std::string Ltrim(const std::string& str) {
-  size_t start = str.find_first_not_of(WHITESPACE);
-  return (start == std::string::npos) ? "" : str.substr(start);
-}
-
-std::string Rtrim(const std::string& str) {
-  size_t end = str.find_last_not_of(WHITESPACE);
-  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
-}
-
-std::string Trim(const std::string& str) { return Rtrim(Ltrim(str)); }
-
-std::string JoinPath(const std::string& left, const std::string& right) {
-  std::string path(left);
-  if (path.size() && path.back() != '/') {
-    path.push_back('/');
-  }
-  path.append(right);
-  return path;
-}
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str) {
-  unsigned len = str.size() * 2;
-  setlocale(LC_CTYPE, "");
-  wchar_t* p = new wchar_t[len];
-  mbstowcs(p, str.c_str(), len);
-  std::wstring wstr(p);
-  delete[] p;
-  return wstr;
-}
-#endif
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/string.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/string.h
deleted file mode 100644
index bf7a52ae09bce45ab7e34a5277652d7ae91bae1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/string.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_STRING_H_
-#define UTILS_STRING_H_
-
-#include <codecvt>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fst/symbol-table.h"
-
-namespace wenet {
-
-const char WHITESPACE[] = " \n\r\t\f\v";
-
-// Split the string with space or tab.
-void SplitString(const std::string& str, std::vector<std::string>* strs);
-
-void SplitStringToVector(const std::string& full, const char* delim,
-                         bool omit_empty_strings,
-                         std::vector<std::string>* out);
-
-// NOTE(Xingchen Song): we add this function to make it possible to
-// support multilingual recipe in the future, in which characters of
-// different languages are all encoded in UTF-8 format.
-// UTF-8 REF: https://en.wikipedia.org/wiki/UTF-8#Encoding
-// Split the UTF-8 string into chars.
-void SplitUTF8StringToChars(const std::string& str,
-                            std::vector<std::string>* chars);
-
-int UTF8StringLength(const std::string& str);
-
-// Check whether the UTF-8 char is alphabet or '.
-bool CheckEnglishChar(const std::string& ch);
-
-// Check whether the UTF-8 word is only contains alphabet or '.
-bool CheckEnglishWord(const std::string& word);
-
-std::string JoinString(const std::string& c,
-                       const std::vector<std::string>& strs);
-
-bool IsAlpha(const std::string& str);
-
-// Split the UTF-8 string into words by symbol table.
-// Return whether not contains oov.
-bool SplitUTF8StringToWords(
-    const std::string& str,
-    const std::shared_ptr<fst::SymbolTable>& symbol_table,
-    std::vector<std::string>* words);
-
-// Replace ▁ with space, then remove head, tail and consecutive space.
-std::string ProcessBlank(const std::string& str, bool lowercase);
-
-std::string Ltrim(const std::string& str);
-
-std::string Rtrim(const std::string& str);
-
-std::string Trim(const std::string& str);
-
-std::string JoinPath(const std::string& left, const std::string& right);
-
-#ifdef _MSC_VER
-std::wstring ToWString(const std::string& str);
-#endif
-
-}  // namespace wenet
-
-#endif  // UTILS_STRING_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/thread_pool.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/thread_pool.h
deleted file mode 100644
index a78162995d90bf079ad091cf14cb9f2cd4476d05..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/thread_pool.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef UTILS_THREAD_POOL_H_
-#define UTILS_THREAD_POOL_H_
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-class ThreadPool {
- public:
-  explicit ThreadPool(size_t);
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-      -> std::future<typename std::result_of<F(Args...)>::type>;
-  ~ThreadPool();
-
- private:
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()> > tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// the constructor just launches some amount of workers
-inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
-  for (size_t i = 0; i < threads; ++i)
-    workers.emplace_back([this] {
-      for (;;) {
-        std::function<void()> task;
-
-        {
-          std::unique_lock<std::mutex> lock(this->queue_mutex);
-          this->condition.wait(
-              lock, [this] { return this->stop || !this->tasks.empty(); });
-          if (this->stop && this->tasks.empty()) return;
-          task = std::move(this->tasks.front());
-          this->tasks.pop();
-        }
-
-        task();
-      }
-    });
-}
-
-// add new work item to the pool
-template <class F, class... Args>
-auto ThreadPool::enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type> {
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()> >(
-      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop) {
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-    }
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-// the destructor joins all threads
-inline ThreadPool::~ThreadPool() {
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-  for (std::thread& worker : workers) {
-    worker.join();
-  }
-}
-
-#endif  // UTILS_THREAD_POOL_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/timer.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/timer.h
deleted file mode 100644
index 068519f98d140ba0eef68babe2ad2fdcb798c074..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/timer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_TIMER_H_
-#define UTILS_TIMER_H_
-
-#include <chrono>
-
-namespace wenet {
-
-class Timer {
- public:
-  Timer() : time_start_(std::chrono::steady_clock::now()) {}
-  void Reset() { time_start_ = std::chrono::steady_clock::now(); }
-  // return int in milliseconds
-  int Elapsed() const {
-    auto time_now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
-                                                                 time_start_)
-        .count();
-  }
-
- private:
-  std::chrono::time_point<std::chrono::steady_clock> time_start_;
-};
-}  // namespace wenet
-
-#endif  // UTILS_TIMER_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/utils.cc b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/utils.cc
deleted file mode 100644
index c37e36c6e9f629e0a4b11cf21a791aefd58b659f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/utils.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utils/utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <queue>
-#include <utility>
-#include <vector>
-
-#include "utils/log.h"
-
-namespace wenet {
-
-float LogAdd(float x, float y) {
-  static float num_min = -std::numeric_limits<float>::max();
-  if (x <= num_min) return y;
-  if (y <= num_min) return x;
-  float xmax = std::max(x, y);
-  return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-template <typename T>
-struct ValueComp {
-  bool operator()(const std::pair<T, int32_t>& lhs,
-                  const std::pair<T, int32_t>& rhs) const {
-    return lhs.first > rhs.first ||
-           (lhs.first == rhs.first && lhs.second < rhs.second);
-  }
-};
-
-// We refer the pytorch topk implementation
-// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/top_k.cc
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices) {
-  std::vector<std::pair<T, int32_t>> heap_data;
-  int n = data.size();
-  for (int32_t i = 0; i < k && i < n; ++i) {
-    heap_data.emplace_back(data[i], i);
-  }
-  std::priority_queue<std::pair<T, int32_t>, std::vector<std::pair<T, int32_t>>,
-                      ValueComp<T>>
-      pq(ValueComp<T>(), std::move(heap_data));
-  for (int32_t i = k; i < n; ++i) {
-    if (pq.top().first < data[i]) {
-      pq.pop();
-      pq.emplace(data[i], i);
-    }
-  }
-
-  values->resize(std::min(k, n));
-  indices->resize(std::min(k, n));
-  int32_t cur = values->size() - 1;
-  while (!pq.empty()) {
-    const auto& item = pq.top();
-    (*values)[cur] = item.first;
-    (*indices)[cur] = item.second;
-    pq.pop();
-    cur -= 1;
-  }
-}
-
-template void TopK<float>(const std::vector<float>& data, int32_t k,
-                          std::vector<float>* values,
-                          std::vector<int>* indices);
-
-}  // namespace wenet
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/utils.h b/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/utils.h
deleted file mode 100644
index f9957c0b6e8ae27d9260e75cf55e786055827801..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/raspberrypi/utils/utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef UTILS_UTILS_H_
-#define UTILS_UTILS_H_
-
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-namespace wenet {
-
-#define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
-  Type(const Type&) = delete;                \
-  Type& operator=(const Type&) = delete;
-
-const float kFloatMax = std::numeric_limits<float>::max();
-// kSpaceSymbol in UTF-8 is: ▁
-const char kSpaceSymbol[] = "\xe2\x96\x81";
-
-// Return the sum of two probabilities in log scale
-float LogAdd(float x, float y);
-
-template <typename T>
-void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
-          std::vector<int>* indices);
-
-}  // namespace wenet
-
-#endif  // UTILS_UTILS_H_
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/README.md b/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/README.md
deleted file mode 100644
index 08da0ad2a4a67a4c197c7e29b48de6a652294952..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/README.md
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-pip3 install transformers datasets h5py==3.1.0 tqdm argparse -U
-
-DIR_NAME='test_models'
-
-if [[ ! -d ${DIR_NAME} ]]; then
-    mkdir -p ${DIR_NAME}
-fi
-
-if [[ ! -f "${DIR_NAME}/vocab.txt" ]]; then
-    wget 'http://10.113.3.3/data/Model/bert_squad/vocab.txt' -P ${DIR_NAME}
-fi
-
-if [[ ! -f "${DIR_NAME}/train.json" ]]; then
-    wget 'http://10.113.3.3/data/Model/bert_squad/train.json' -P ${DIR_NAME}
-fi
-
-if [[ ! -f "${DIR_NAME}/dev.json" ]]; then
-    wget 'http://10.113.3.3/data/Model/bert_squad/dev.json' -P ${DIR_NAME}
-fi
-
-
-if [[ ! -f "${DIR_NAME}/tokenizer_config.json" ]]; then
-    wget 'http://10.113.3.3/data/Model/bert_squad/tokenizer_config.json' -P ${DIR_NAME}
-fi
-
-if [[ ! -f "${DIR_NAME}/config.json" ]]; then
-    wget 'http://10.113.3.3/data/Model/bert_squad/config.json' -P ${DIR_NAME}
-fi
-
-# model_name="base"
-
-# if [[ ${model_name} == 'base' ]]; then
-#     if [[ ! -f "${DIR_NAME}/bert_base_quant.hdf5" ]]; then
-#         wget 'http://10.113.3.3/data/Model/bert_squad/bert_base_quant.hdf5' -P ${DIR_NAME}
-#         wget 'http://10.113.3.3/data/Model/bert_squad/bert_base_quant.hdf5.md5' -P ${DIR_NAME}
-#     fi
-# fi
-
-# model_name="large"
-
-# if [[ ${model_name} == 'large' ]]; then
-#     if [[ ! -f "${DIR_NAME}/bert_large_quant.hdf5" ]]; then
-#         wget 'http://10.113.3.3/data/Model/bert_squad/bert_large_quant.hdf5' -P ${DIR_NAME}
-#         wget 'http://10.113.3.3/data/Model/bert_squad/bert_large_quant.hdf5.md5' -P ${DIR_NAME}
-#     fi
-# fi
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/app.py b/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/app.py
deleted file mode 100644
index 85b63efcf5c11a7410e847d14c3cb1c8db2995b5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/app.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import json
-import gradio as gr
-import numpy as np
-import torch
-import wenetruntime as wenet
-
-torch.manual_seed(777)  # for lint
-
-wenet.set_log_level(2)
-decoder = wenet.Decoder(lang='chs')
-
-def recognition(audio):
-    sr, y = audio
-    assert sr in [48000, 16000]
-    if sr == 48000:  # Optional resample to 16000
-        y = (y / max(np.max(y), 1) * 32767)[::3].astype("int16")
-    ans = decoder.decode(y.tobytes(), True)
-    return json.loads(ans)
-
-text = "Speech Recognition in WeNet | 基于 WeNet 的语音识别"
-gr.Interface(recognition, inputs="mic", outputs="json",
-             description=text).launch()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/requirements.txt b/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/requirements.txt
deleted file mode 100644
index ef9d340d3c798affa06d885d7f59ada58cd37e93..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/runtime/web/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-wenetruntime
-gradio
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/aishell2.words.txt b/models/audio/speech_recognition/conformer/igie/wenet/test/resources/aishell2.words.txt
deleted file mode 100644
index 5478d9ad9ee70bc7e1f98a6f003b7f7260b9f1ef..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/aishell2.words.txt
+++ /dev/null
@@ -1,5235 +0,0 @@
-<blank> 0
-<unk> 1
-' 2
-( 3
-) 4
-Ａ 5
-𫖯 6
-a 7
-A 8
-b 9
-B 10
-c 11
-C 12
-d 13
-D 14
-e 15
-E 16
-f 17
-F 18
-g 19
-G 20
-h 21
-H 22
-i 23
-I 24
-j 25
-J 26
-k 27
-K 28
-l 29
-L 30
-m 31
-M 32
-n 33
-N 34
-o 35
-O 36
-p 37
-P 38
-q 39
-Q 40
-r 41
-R 42
-s 43
-S 44
-<space> 45
-t 46
-T 47
-u 48
-U 49
-v 50
-V 51
-w 52
-W 53
-x 54
-X 55
-y 56
-Y 57
-z 58
-Z 59
-一 60
-丁 61
-七 62
-万 63
-丈 64
-三 65
-上 66
-下 67
-不 68
-与 69
-丐 70
-丑 71
-专 72
-且 73
-丕 74
-世 75
-丘 76
-丙 77
-业 78
-丛 79
-东 80
-丝 81
-丞 82
-丢 83
-两 84
-严 85
-丧 86
-个 87
-丫 88
-中 89
-丰 90
-串 91
-临 92
-丸 93
-丹 94
-为 95
-主 96
-丽 97
-举 98
-乃 99
-久 100
-么 101
-义 102
-之 103
-乌 104
-乍 105
-乎 106
-乏 107
-乐 108
-乒 109
-乓 110
-乔 111
-乖 112
-乘 113
-乙 114
-九 115
-乞 116
-也 117
-习 118
-乡 119
-书 120
-买 121
-乱 122
-乳 123
-乾 124
-了 125
-予 126
-争 127
-事 128
-二 129
-于 130
-亏 131
-云 132
-互 133
-五 134
-井 135
-亘 136
-亚 137
-些 138
-亟 139
-亡 140
-亢 141
-交 142
-亥 143
-亦 144
-产 145
-亨 146
-亩 147
-享 148
-京 149
-亭 150
-亮 151
-亲 152
-亳 153
-亵 154
-人 155
-亿 156
-什 157
-仁 158
-仄 159
-仅 160
-仆 161
-仇 162
-今 163
-介 164
-仍 165
-从 166
-仑 167
-仓 168
-仔 169
-仕 170
-他 171
-仗 172
-付 173
-仙 174
-仞 175
-仟 176
-仡 177
-代 178
-令 179
-以 180
-仨 181
-仪 182
-们 183
-仰 184
-仲 185
-件 186
-价 187
-任 188
-份 189
-仿 190
-企 191
-伉 192
-伊 193
-伍 194
-伎 195
-伏 196
-伐 197
-休 198
-众 199
-优 200
-伙 201
-会 202
-伞 203
-伟 204
-传 205
-伢 206
-伤 207
-伦 208
-伪 209
-伫 210
-伯 211
-估 212
-伴 213
-伶 214
-伸 215
-伺 216
-似 217
-伽 218
-佃 219
-但 220
-位 221
-低 222
-住 223
-佐 224
-佑 225
-体 226
-何 227
-佗 228
-佘 229
-余 230
-佚 231
-佛 232
-作 233
-佝 234
-佟 235
-你 236
-佣 237
-佩 238
-佬 239
-佯 240
-佰 241
-佳 242
-佶 243
-佻 244
-佼 245
-使 246
-侃 247
-侄 248
-侈 249
-例 250
-侍 251
-侏 252
-侑 253
-侗 254
-供 255
-依 256
-侠 257
-侣 258
-侥 259
-侦 260
-侧 261
-侨 262
-侬 263
-侮 264
-侯 265
-侵 266
-便 267
-促 268
-俄 269
-俊 270
-俎 271
-俏 272
-俐 273
-俑 274
-俗 275
-俘 276
-俚 277
-保 278
-俞 279
-俟 280
-信 281
-俨 282
-俩 283
-俪 284
-俭 285
-修 286
-俯 287
-俱 288
-俸 289
-俺 290
-俾 291
-倌 292
-倍 293
-倒 294
-倔 295
-倘 296
-候 297
-倚 298
-倜 299
-借 300
-倡 301
-倦 302
-倩 303
-倪 304
-倭 305
-债 306
-值 307
-倾 308
-偃 309
-假 310
-偈 311
-偌 312
-偎 313
-偏 314
-偓 315
-偕 316
-做 317
-停 318
-健 319
-偶 320
-偷 321
-偻 322
-偿 323
-傀 324
-傅 325
-傍 326
-傣 327
-傥 328
-储 329
-催 330
-傲 331
-傻 332
-像 333
-僚 334
-僧 335
-僮 336
-僵 337
-僻 338
-儋 339
-儒 340
-儡 341
-儿 342
-兀 343
-允 344
-元 345
-兄 346
-充 347
-兆 348
-先 349
-光 350
-克 351
-免 352
-兑 353
-兔 354
-兖 355
-党 356
-兜 357
-兢 358
-入 359
-全 360
-八 361
-公 362
-六 363
-兮 364
-兰 365
-共 366
-关 367
-兴 368
-兵 369
-其 370
-具 371
-典 372
-兹 373
-养 374
-兼 375
-兽 376
-冀 377
-内 378
-冈 379
-冉 380
-册 381
-再 382
-冒 383
-冕 384
-冗 385
-写 386
-军 387
-农 388
-冠 389
-冢 390
-冤 391
-冥 392
-冬 393
-冯 394
-冰 395
-冲 396
-决 397
-况 398
-冶 399
-冷 400
-冻 401
-冼 402
-冽 403
-净 404
-凄 405
-准 406
-凇 407
-凉 408
-凋 409
-凌 410
-减 411
-凑 412
-凛 413
-凝 414
-几 415
-凡 416
-凤 417
-凭 418
-凯 419
-凰 420
-凳 421
-凶 422
-凸 423
-凹 424
-出 425
-击 426
-函 427
-凿 428
-刀 429
-刁 430
-刃 431
-分 432
-切 433
-刊 434
-刍 435
-刎 436
-刑 437
-划 438
-列 439
-刘 440
-则 441
-刚 442
-创 443
-初 444
-删 445
-判 446
-刨 447
-利 448
-别 449
-刮 450
-到 451
-制 452
-刷 453
-券 454
-刹 455
-刺 456
-刻 457
-刽 458
-剁 459
-剂 460
-剃 461
-削 462
-剌 463
-前 464
-剐 465
-剑 466
-剔 467
-剖 468
-剜 469
-剥 470
-剧 471
-剩 472
-剪 473
-副 474
-割 475
-剽 476
-剿 477
-劈 478
-力 479
-劝 480
-办 481
-功 482
-加 483
-务 484
-劣 485
-动 486
-助 487
-努 488
-劫 489
-劭 490
-励 491
-劲 492
-劳 493
-劵 494
-劾 495
-势 496
-勃 497
-勇 498
-勉 499
-勋 500
-勐 501
-勒 502
-勘 503
-募 504
-勤 505
-勺 506
-勾 507
-勿 508
-匀 509
-包 510
-匆 511
-匈 512
-匏 513
-匕 514
-化 515
-北 516
-匙 517
-匝 518
-匠 519
-匡 520
-匣 521
-匪 522
-匮 523
-匹 524
-区 525
-医 526
-匾 527
-匿 528
-十 529
-千 530
-升 531
-午 532
-卉 533
-半 534
-华 535
-协 536
-卑 537
-卒 538
-卓 539
-单 540
-卖 541
-南 542
-博 543
-卜 544
-卞 545
-占 546
-卡 547
-卢 548
-卤 549
-卦 550
-卧 551
-卫 552
-卯 553
-印 554
-危 555
-卲 556
-即 557
-却 558
-卵 559
-卷 560
-卸 561
-卿 562
-厂 563
-厄 564
-厅 565
-历 566
-厉 567
-压 568
-厌 569
-厕 570
-厘 571
-厚 572
-厝 573
-原 574
-厢 575
-厥 576
-厦 577
-厨 578
-厩 579
-厮 580
-去 581
-县 582
-叁 583
-参 584
-又 585
-叉 586
-及 587
-友 588
-双 589
-反 590
-发 591
-叔 592
-取 593
-受 594
-变 595
-叙 596
-叛 597
-叠 598
-口 599
-古 600
-句 601
-另 602
-叨 603
-叩 604
-只 605
-叫 606
-召 607
-叭 608
-叮 609
-可 610
-台 611
-叱 612
-史 613
-右 614
-叵 615
-叶 616
-号 617
-司 618
-叹 619
-叼 620
-叽 621
-吁 622
-吃 623
-各 624
-吆 625
-合 626
-吉 627
-吊 628
-吋 629
-同 630
-名 631
-后 632
-吏 633
-吐 634
-向 635
-吒 636
-吓 637
-吕 638
-吖 639
-吗 640
-君 641
-吝 642
-吞 643
-吟 644
-吠 645
-否 646
-吧 647
-吨 648
-吩 649
-含 650
-听 651
-吭 652
-吮 653
-启 654
-吱 655
-吴 656
-吵 657
-吸 658
-吹 659
-吻 660
-吼 661
-吾 662
-呀 663
-呃 664
-呆 665
-呈 666
-告 667
-呐 668
-呕 669
-呗 670
-员 671
-呛 672
-呜 673
-呢 674
-呦 675
-周 676
-呱 677
-呲 678
-味 679
-呵 680
-呷 681
-呸 682
-呻 683
-呼 684
-命 685
-咀 686
-咂 687
-咄 688
-咆 689
-咋 690
-和 691
-咎 692
-咏 693
-咐 694
-咒 695
-咔 696
-咕 697
-咖 698
-咘 699
-咙 700
-咚 701
-咝 702
-咣 703
-咤 704
-咦 705
-咧 706
-咨 707
-咩 708
-咪 709
-咫 710
-咬 711
-咭 712
-咯 713
-咱 714
-咳 715
-咸 716
-咻 717
-咽 718
-哀 719
-品 720
-哂 721
-哄 722
-哆 723
-哇 724
-哈 725
-哉 726
-响 727
-哎 728
-哐 729
-哑 730
-哒 731
-哔 732
-哕 733
-哗 734
-哟 735
-哥 736
-哦 737
-哨 738
-哩 739
-哪 740
-哭 741
-哮 742
-哲 743
-哺 744
-哼 745
-哽 746
-唁 747
-唆 748
-唇 749
-唉 750
-唏 751
-唐 752
-唑 753
-唛 754
-唠 755
-唢 756
-唤 757
-唧 758
-唬 759
-售 760
-唯 761
-唰 762
-唱 763
-唳 764
-唷 765
-唾 766
-啃 767
-啄 768
-商 769
-啊 770
-啕 771
-啖 772
-啜 773
-啡 774
-啤 775
-啥 776
-啦 777
-啧 778
-啪 779
-啬 780
-啰 781
-啲 782
-啵 783
-啶 784
-啸 785
-啼 786
-啾 787
-喀 788
-喁 789
-喂 790
-喃 791
-善 792
-喆 793
-喇 794
-喉 795
-喊 796
-喋 797
-喔 798
-喘 799
-喜 800
-喝 801
-喟 802
-喧 803
-喱 804
-喳 805
-喵 806
-喷 807
-喻 808
-喽 809
-嗄 810
-嗅 811
-嗑 812
-嗒 813
-嗓 814
-嗔 815
-嗖 816
-嗜 817
-嗝 818
-嗡 819
-嗣 820
-嗤 821
-嗦 822
-嗨 823
-嗪 824
-嗫 825
-嗬 826
-嗯 827
-嗲 828
-嗷 829
-嗽 830
-嘀 831
-嘈 832
-嘉 833
-嘎 834
-嘏 835
-嘘 836
-嘛 837
-嘞 838
-嘟 839
-嘣 840
-嘭 841
-嘱 842
-嘲 843
-嘴 844
-嘶 845
-嘹 846
-嘻 847
-嘿 848
-噌 849
-噎 850
-噗 851
-噘 852
-噙 853
-噜 854
-噢 855
-噤 856
-器 857
-噩 858
-噪 859
-噬 860
-噱 861
-噶 862
-噻 863
-噼 864
-嚎 865
-嚏 866
-嚓 867
-嚣 868
-嚷 869
-嚼 870
-囊 871
-囍 872
-囔 873
-囗 874
-囚 875
-四 876
-回 877
-因 878
-团 879
-囤 880
-囧 881
-囫 882
-园 883
-囯 884
-困 885
-囱 886
-围 887
-囵 888
-囹 889
-固 890
-国 891
-图 892
-圃 893
-圄 894
-圆 895
-圈 896
-土 897
-圣 898
-在 899
-圩 900
-圪 901
-圭 902
-地 903
-圳 904
-圹 905
-场 906
-圻 907
-圾 908
-址 909
-坂 910
-均 911
-坊 912
-坍 913
-坎 914
-坏 915
-坐 916
-坑 917
-块 918
-坚 919
-坛 920
-坝 921
-坞 922
-坟 923
-坠 924
-坡 925
-坤 926
-坦 927
-坨 928
-坩 929
-坪 930
-坭 931
-坯 932
-坳 933
-坷 934
-坻 935
-垂 936
-垃 937
-垄 938
-垅 939
-型 940
-垌 941
-垒 942
-垚 943
-垛 944
-垡 945
-垢 946
-垣 947
-垤 948
-垦 949
-垩 950
-垫 951
-垭 952
-垮 953
-埂 954
-埃 955
-埇 956
-埋 957
-城 958
-埔 959
-埕 960
-埚 961
-埝 962
-域 963
-埠 964
-埭 965
-埸 966
-培 967
-基 968
-堀 969
-堂 970
-堃 971
-堆 972
-堇 973
-堕 974
-堡 975
-堤 976
-堪 977
-堰 978
-堵 979
-堺 980
-塌 981
-塍 982
-塑 983
-塔 984
-塘 985
-塞 986
-填 987
-塬 988
-塾 989
-境 990
-墅 991
-墉 992
-墓 993
-増 994
-墙 995
-增 996
-墟 997
-墨 998
-墩 999
-壁 1000
-壑 1001
-壕 1002
-壤 1003
-士 1004
-壬 1005
-壮 1006
-声 1007
-壳 1008
-壶 1009
-壹 1010
-处 1011
-备 1012
-复 1013
-夏 1014
-夔 1015
-夕 1016
-外 1017
-夙 1018
-多 1019
-夜 1020
-够 1021
-大 1022
-天 1023
-太 1024
-夫 1025
-夭 1026
-央 1027
-夯 1028
-失 1029
-头 1030
-夷 1031
-夸 1032
-夹 1033
-夺 1034
-奁 1035
-奂 1036
-奄 1037
-奇 1038
-奈 1039
-奉 1040
-奋 1041
-奎 1042
-奏 1043
-契 1044
-奔 1045
-奕 1046
-奖 1047
-套 1048
-奘 1049
-奚 1050
-奠 1051
-奢 1052
-奥 1053
-女 1054
-奴 1055
-奶 1056
-奸 1057
-她 1058
-好 1059
-如 1060
-妃 1061
-妄 1062
-妆 1063
-妇 1064
-妈 1065
-妊 1066
-妍 1067
-妒 1068
-妓 1069
-妖 1070
-妙 1071
-妞 1072
-妤 1073
-妥 1074
-妨 1075
-妩 1076
-妪 1077
-妫 1078
-妮 1079
-妯 1080
-妲 1081
-妹 1082
-妻 1083
-妾 1084
-姆 1085
-姊 1086
-始 1087
-姐 1088
-姑 1089
-姓 1090
-委 1091
-姗 1092
-姚 1093
-姜 1094
-姝 1095
-姣 1096
-姥 1097
-姨 1098
-姬 1099
-姻 1100
-姿 1101
-威 1102
-娃 1103
-娄 1104
-娅 1105
-娆 1106
-娇 1107
-娈 1108
-娉 1109
-娌 1110
-娓 1111
-娘 1112
-娜 1113
-娟 1114
-娠 1115
-娣 1116
-娥 1117
-娩 1118
-娱 1119
-娲 1120
-娴 1121
-娶 1122
-娼 1123
-婀 1124
-婆 1125
-婉 1126
-婊 1127
-婕 1128
-婚 1129
-婢 1130
-婧 1131
-婪 1132
-婴 1133
-婵 1134
-婶 1135
-婷 1136
-婺 1137
-婿 1138
-媒 1139
-媚 1140
-媛 1141
-媞 1142
-媲 1143
-媳 1144
-嫁 1145
-嫂 1146
-嫉 1147
-嫌 1148
-嫒 1149
-嫔 1150
-嫖 1151
-嫚 1152
-嫡 1153
-嫣 1154
-嫦 1155
-嫩 1156
-嫫 1157
-嬅 1158
-嬉 1159
-嬗 1160
-嬛 1161
-嬴 1162
-嬷 1163
-孀 1164
-子 1165
-孑 1166
-孔 1167
-孕 1168
-字 1169
-存 1170
-孙 1171
-孚 1172
-孛 1173
-孜 1174
-孝 1175
-孟 1176
-孢 1177
-季 1178
-孤 1179
-学 1180
-孩 1181
-孪 1182
-孬 1183
-孰 1184
-孱 1185
-孳 1186
-孵 1187
-孺 1188
-孽 1189
-宁 1190
-它 1191
-宅 1192
-宇 1193
-守 1194
-安 1195
-宋 1196
-完 1197
-宏 1198
-宓 1199
-宕 1200
-宗 1201
-官 1202
-宙 1203
-定 1204
-宛 1205
-宜 1206
-宝 1207
-实 1208
-宠 1209
-审 1210
-客 1211
-宣 1212
-室 1213
-宥 1214
-宦 1215
-宪 1216
-宫 1217
-宰 1218
-害 1219
-宴 1220
-宵 1221
-家 1222
-宸 1223
-容 1224
-宽 1225
-宾 1226
-宿 1227
-寂 1228
-寄 1229
-寅 1230
-密 1231
-寇 1232
-富 1233
-寐 1234
-寒 1235
-寓 1236
-寝 1237
-寞 1238
-察 1239
-寡 1240
-寥 1241
-寨 1242
-寮 1243
-寰 1244
-寸 1245
-对 1246
-寺 1247
-寻 1248
-导 1249
-寿 1250
-封 1251
-射 1252
-尅 1253
-将 1254
-尉 1255
-尊 1256
-小 1257
-少 1258
-尔 1259
-尕 1260
-尖 1261
-尘 1262
-尚 1263
-尝 1264
-尤 1265
-尧 1266
-尬 1267
-就 1268
-尴 1269
-尸 1270
-尹 1271
-尺 1272
-尼 1273
-尽 1274
-尾 1275
-尿 1276
-局 1277
-屁 1278
-层 1279
-居 1280
-屈 1281
-屉 1282
-届 1283
-屋 1284
-屌 1285
-屎 1286
-屏 1287
-屐 1288
-屑 1289
-展 1290
-属 1291
-屠 1292
-屡 1293
-履 1294
-屯 1295
-山 1296
-屹 1297
-屿 1298
-岁 1299
-岂 1300
-岌 1301
-岐 1302
-岑 1303
-岔 1304
-岖 1305
-岗 1306
-岚 1307
-岛 1308
-岩 1309
-岬 1310
-岭 1311
-岱 1312
-岳 1313
-岷 1314
-岸 1315
-峁 1316
-峋 1317
-峒 1318
-峙 1319
-峡 1320
-峥 1321
-峦 1322
-峨 1323
-峪 1324
-峭 1325
-峰 1326
-峻 1327
-崂 1328
-崃 1329
-崆 1330
-崇 1331
-崎 1332
-崔 1333
-崖 1334
-崛 1335
-崧 1336
-崩 1337
-崭 1338
-崮 1339
-崴 1340
-崽 1341
-嵇 1342
-嵊 1343
-嵋 1344
-嵌 1345
-嵘 1346
-嵛 1347
-嵩 1348
-嵬 1349
-嶂 1350
-嶙 1351
-嶝 1352
-巅 1353
-巍 1354
-川 1355
-州 1356
-巡 1357
-巢 1358
-工 1359
-左 1360
-巧 1361
-巨 1362
-巩 1363
-巫 1364
-差 1365
-己 1366
-已 1367
-巳 1368
-巴 1369
-巷 1370
-巾 1371
-币 1372
-市 1373
-布 1374
-帅 1375
-帆 1376
-师 1377
-希 1378
-帐 1379
-帕 1380
-帖 1381
-帘 1382
-帚 1383
-帛 1384
-帜 1385
-帝 1386
-带 1387
-帧 1388
-席 1389
-帮 1390
-帷 1391
-常 1392
-帼 1393
-帽 1394
-幂 1395
-幄 1396
-幅 1397
-幌 1398
-幔 1399
-幕 1400
-幡 1401
-幢 1402
-干 1403
-平 1404
-年 1405
-并 1406
-幸 1407
-幺 1408
-幻 1409
-幼 1410
-幽 1411
-广 1412
-庄 1413
-庆 1414
-庇 1415
-床 1416
-序 1417
-庐 1418
-库 1419
-应 1420
-底 1421
-庖 1422
-店 1423
-庙 1424
-庚 1425
-府 1426
-庞 1427
-废 1428
-度 1429
-座 1430
-庭 1431
-庵 1432
-庶 1433
-康 1434
-庸 1435
-庹 1436
-庾 1437
-廉 1438
-廊 1439
-廓 1440
-廖 1441
-延 1442
-廷 1443
-建 1444
-开 1445
-异 1446
-弃 1447
-弄 1448
-弈 1449
-弊 1450
-弋 1451
-式 1452
-弑 1453
-弓 1454
-引 1455
-弗 1456
-弘 1457
-弛 1458
-弟 1459
-张 1460
-弥 1461
-弦 1462
-弧 1463
-弩 1464
-弭 1465
-弯 1466
-弱 1467
-弹 1468
-强 1469
-弼 1470
-归 1471
-当 1472
-录 1473
-彗 1474
-彝 1475
-形 1476
-彤 1477
-彦 1478
-彩 1479
-彪 1480
-彬 1481
-彭 1482
-彰 1483
-影 1484
-彷 1485
-役 1486
-彻 1487
-彼 1488
-往 1489
-征 1490
-径 1491
-待 1492
-徇 1493
-很 1494
-徉 1495
-徊 1496
-律 1497
-徐 1498
-徒 1499
-得 1500
-徘 1501
-徙 1502
-徜 1503
-御 1504
-徨 1505
-循 1506
-微 1507
-德 1508
-徽 1509
-心 1510
-必 1511
-忆 1512
-忌 1513
-忍 1514
-忏 1515
-忐 1516
-忑 1517
-忒 1518
-忖 1519
-志 1520
-忘 1521
-忙 1522
-忠 1523
-忡 1524
-忤 1525
-忧 1526
-忪 1527
-快 1528
-忱 1529
-念 1530
-忻 1531
-忽 1532
-忿 1533
-怀 1534
-态 1535
-怂 1536
-怄 1537
-怅 1538
-怆 1539
-怎 1540
-怒 1541
-怕 1542
-怖 1543
-怜 1544
-思 1545
-怠 1546
-怡 1547
-急 1548
-怦 1549
-性 1550
-怨 1551
-怪 1552
-怫 1553
-怯 1554
-怵 1555
-总 1556
-怼 1557
-怿 1558
-恁 1559
-恃 1560
-恋 1561
-恍 1562
-恐 1563
-恒 1564
-恕 1565
-恙 1566
-恢 1567
-恣 1568
-恤 1569
-恨 1570
-恩 1571
-恪 1572
-恬 1573
-恭 1574
-息 1575
-恰 1576
-恳 1577
-恶 1578
-恸 1579
-恺 1580
-恻 1581
-恼 1582
-恿 1583
-悄 1584
-悉 1585
-悌 1586
-悍 1587
-悔 1588
-悖 1589
-悚 1590
-悟 1591
-悠 1592
-患 1593
-悦 1594
-您 1595
-悬 1596
-悭 1597
-悯 1598
-悱 1599
-悲 1600
-悴 1601
-悸 1602
-悻 1603
-悼 1604
-情 1605
-惆 1606
-惊 1607
-惋 1608
-惑 1609
-惕 1610
-惚 1611
-惜 1612
-惟 1613
-惠 1614
-惦 1615
-惧 1616
-惨 1617
-惩 1618
-惫 1619
-惬 1620
-惭 1621
-惮 1622
-惯 1623
-惰 1624
-想 1625
-惶 1626
-惹 1627
-惺 1628
-愁 1629
-愈 1630
-愉 1631
-意 1632
-愕 1633
-愚 1634
-感 1635
-愣 1636
-愤 1637
-愧 1638
-愫 1639
-愿 1640
-慈 1641
-慌 1642
-慎 1643
-慑 1644
-慕 1645
-慢 1646
-慧 1647
-慨 1648
-慰 1649
-慵 1650
-慷 1651
-憋 1652
-憎 1653
-憔 1654
-憧 1655
-憨 1656
-憩 1657
-憬 1658
-憷 1659
-憾 1660
-懂 1661
-懈 1662
-懊 1663
-懋 1664
-懑 1665
-懒 1666
-懦 1667
-懵 1668
-懿 1669
-戈 1670
-戊 1671
-戌 1672
-戍 1673
-戎 1674
-戏 1675
-成 1676
-我 1677
-戒 1678
-或 1679
-戗 1680
-战 1681
-戚 1682
-戛 1683
-戟 1684
-截 1685
-戬 1686
-戮 1687
-戳 1688
-戴 1689
-户 1690
-戾 1691
-房 1692
-所 1693
-扁 1694
-扇 1695
-扈 1696
-扉 1697
-手 1698
-才 1699
-扎 1700
-扑 1701
-扒 1702
-打 1703
-扔 1704
-托 1705
-扛 1706
-扞 1707
-扣 1708
-扦 1709
-执 1710
-扩 1711
-扪 1712
-扫 1713
-扬 1714
-扭 1715
-扮 1716
-扯 1717
-扰 1718
-扳 1719
-扶 1720
-批 1721
-扼 1722
-找 1723
-承 1724
-技 1725
-抄 1726
-抉 1727
-把 1728
-抑 1729
-抒 1730
-抓 1731
-投 1732
-抖 1733
-抗 1734
-折 1735
-抚 1736
-抛 1737
-抠 1738
-抡 1739
-抢 1740
-护 1741
-报 1742
-抨 1743
-披 1744
-抬 1745
-抱 1746
-抵 1747
-抹 1748
-押 1749
-抽 1750
-抿 1751
-拂 1752
-拄 1753
-担 1754
-拆 1755
-拇 1756
-拈 1757
-拉 1758
-拌 1759
-拍 1760
-拎 1761
-拐 1762
-拒 1763
-拓 1764
-拔 1765
-拖 1766
-拗 1767
-拘 1768
-拙 1769
-拚 1770
-招 1771
-拜 1772
-拟 1773
-拢 1774
-拣 1775
-拥 1776
-拦 1777
-拧 1778
-拨 1779
-择 1780
-括 1781
-拭 1782
-拮 1783
-拯 1784
-拱 1785
-拳 1786
-拴 1787
-拷 1788
-拼 1789
-拽 1790
-拾 1791
-拿 1792
-持 1793
-挂 1794
-指 1795
-按 1796
-挎 1797
-挑 1798
-挖 1799
-挚 1800
-挛 1801
-挝 1802
-挞 1803
-挟 1804
-挠 1805
-挡 1806
-挣 1807
-挤 1808
-挥 1809
-挨 1810
-挪 1811
-挫 1812
-振 1813
-挺 1814
-挽 1815
-捂 1816
-捅 1817
-捆 1818
-捉 1819
-捋 1820
-捌 1821
-捍 1822
-捎 1823
-捏 1824
-捐 1825
-捕 1826
-捞 1827
-损 1828
-捡 1829
-换 1830
-捣 1831
-捧 1832
-据 1833
-捶 1834
-捷 1835
-捺 1836
-捻 1837
-掀 1838
-掂 1839
-掇 1840
-授 1841
-掉 1842
-掌 1843
-掏 1844
-掐 1845
-排 1846
-掖 1847
-掘 1848
-掠 1849
-探 1850
-掣 1851
-接 1852
-控 1853
-推 1854
-掩 1855
-措 1856
-掬 1857
-掮 1858
-掰 1859
-掳 1860
-掴 1861
-掷 1862
-掸 1863
-掺 1864
-揄 1865
-揉 1866
-揍 1867
-描 1868
-提 1869
-插 1870
-握 1871
-揣 1872
-揩 1873
-揪 1874
-揭 1875
-援 1876
-揶 1877
-揽 1878
-搀 1879
-搁 1880
-搂 1881
-搅 1882
-搏 1883
-搐 1884
-搓 1885
-搔 1886
-搜 1887
-搞 1888
-搡 1889
-搧 1890
-搪 1891
-搬 1892
-搭 1893
-携 1894
-搽 1895
-摁 1896
-摄 1897
-摆 1898
-摇 1899
-摈 1900
-摊 1901
-摒 1902
-摔 1903
-摘 1904
-摞 1905
-摧 1906
-摩 1907
-摸 1908
-摹 1909
-撂 1910
-撅 1911
-撇 1912
-撑 1913
-撒 1914
-撕 1915
-撞 1916
-撤 1917
-撩 1918
-撬 1919
-播 1920
-撮 1921
-撰 1922
-撵 1923
-撸 1924
-撺 1925
-撼 1926
-擀 1927
-擂 1928
-擅 1929
-操 1930
-擎 1931
-擒 1932
-擘 1933
-擞 1934
-擢 1935
-擦 1936
-攀 1937
-攒 1938
-攘 1939
-攥 1940
-攫 1941
-支 1942
-收 1943
-攸 1944
-改 1945
-攻 1946
-放 1947
-政 1948
-故 1949
-效 1950
-敌 1951
-敏 1952
-救 1953
-敕 1954
-敖 1955
-教 1956
-敛 1957
-敝 1958
-敞 1959
-敢 1960
-散 1961
-敦 1962
-敬 1963
-数 1964
-敲 1965
-整 1966
-敷 1967
-文 1968
-斋 1969
-斌 1970
-斐 1971
-斑 1972
-斓 1973
-斗 1974
-料 1975
-斛 1976
-斜 1977
-斟 1978
-斡 1979
-斤 1980
-斥 1981
-斧 1982
-斩 1983
-断 1984
-斯 1985
-新 1986
-方 1987
-施 1988
-旁 1989
-旅 1990
-旋 1991
-旌 1992
-族 1993
-旖 1994
-旗 1995
-无 1996
-既 1997
-日 1998
-旦 1999
-旧 2000
-旨 2001
-早 2002
-旬 2003
-旭 2004
-旮 2005
-旯 2006
-旱 2007
-时 2008
-旷 2009
-旺 2010
-旻 2011
-昀 2012
-昂 2013
-昆 2014
-昊 2015
-昌 2016
-明 2017
-昏 2018
-易 2019
-昔 2020
-昕 2021
-昙 2022
-昝 2023
-星 2024
-映 2025
-春 2026
-昧 2027
-昨 2028
-昭 2029
-是 2030
-昱 2031
-昴 2032
-昵 2033
-昶 2034
-昼 2035
-显 2036
-晃 2037
-晋 2038
-晌 2039
-晏 2040
-晒 2041
-晓 2042
-晔 2043
-晕 2044
-晖 2045
-晗 2046
-晚 2047
-晞 2048
-晟 2049
-晤 2050
-晦 2051
-晨 2052
-普 2053
-景 2054
-晰 2055
-晴 2056
-晶 2057
-晷 2058
-智 2059
-晾 2060
-暂 2061
-暄 2062
-暇 2063
-暌 2064
-暑 2065
-暖 2066
-暗 2067
-暧 2068
-暨 2069
-暮 2070
-暴 2071
-暹 2072
-暾 2073
-曈 2074
-曙 2075
-曜 2076
-曝 2077
-曦 2078
-曰 2079
-曲 2080
-曳 2081
-更 2082
-曹 2083
-曼 2084
-曾 2085
-替 2086
-最 2087
-月 2088
-有 2089
-朋 2090
-服 2091
-朐 2092
-朔 2093
-朕 2094
-朗 2095
-望 2096
-朝 2097
-期 2098
-朦 2099
-木 2100
-未 2101
-末 2102
-本 2103
-札 2104
-术 2105
-朱 2106
-朴 2107
-朵 2108
-机 2109
-朽 2110
-杀 2111
-杂 2112
-权 2113
-杆 2114
-杈 2115
-杉 2116
-李 2117
-杏 2118
-材 2119
-村 2120
-杓 2121
-杖 2122
-杜 2123
-杞 2124
-束 2125
-杠 2126
-条 2127
-来 2128
-杨 2129
-杭 2130
-杯 2131
-杰 2132
-杳 2133
-杵 2134
-杷 2135
-松 2136
-板 2137
-极 2138
-构 2139
-枇 2140
-枉 2141
-枋 2142
-析 2143
-枕 2144
-林 2145
-枚 2146
-果 2147
-枝 2148
-枞 2149
-枢 2150
-枣 2151
-枥 2152
-枪 2153
-枫 2154
-枭 2155
-枯 2156
-枰 2157
-枳 2158
-架 2159
-枷 2160
-枸 2161
-柃 2162
-柄 2163
-柏 2164
-某 2165
-柑 2166
-柒 2167
-染 2168
-柔 2169
-柘 2170
-柚 2171
-柜 2172
-柞 2173
-柠 2174
-查 2175
-柩 2176
-柬 2177
-柯 2178
-柱 2179
-柳 2180
-柴 2181
-柿 2182
-栀 2183
-栅 2184
-标 2185
-栈 2186
-栋 2187
-栌 2188
-栎 2189
-栏 2190
-树 2191
-栓 2192
-栖 2193
-栗 2194
-校 2195
-栩 2196
-株 2197
-样 2198
-核 2199
-根 2200
-格 2201
-栽 2202
-栾 2203
-桁 2204
-桂 2205
-桃 2206
-框 2207
-案 2208
-桉 2209
-桌 2210
-桎 2211
-桐 2212
-桑 2213
-桓 2214
-桔 2215
-桠 2216
-桢 2217
-档 2218
-桥 2219
-桦 2220
-桨 2221
-桩 2222
-桴 2223
-桶 2224
-桷 2225
-梁 2226
-梅 2227
-梆 2228
-梏 2229
-梓 2230
-梗 2231
-梢 2232
-梦 2233
-梧 2234
-梨 2235
-梭 2236
-梯 2237
-械 2238
-梳 2239
-梵 2240
-检 2241
-棂 2242
-棉 2243
-棋 2244
-棍 2245
-棒 2246
-棕 2247
-棘 2248
-棚 2249
-棠 2250
-棣 2251
-森 2252
-棱 2253
-棵 2254
-棺 2255
-椁 2256
-椅 2257
-椋 2258
-植 2259
-椎 2260
-椒 2261
-椟 2262
-椤 2263
-椭 2264
-椰 2265
-椴 2266
-椹 2267
-椿 2268
-楂 2269
-楔 2270
-楚 2271
-楞 2272
-楠 2273
-楣 2274
-楷 2275
-楸 2276
-楼 2277
-概 2278
-榄 2279
-榆 2280
-榈 2281
-榉 2282
-榔 2283
-榕 2284
-榛 2285
-榜 2286
-榨 2287
-榫 2288
-榭 2289
-榴 2290
-榷 2291
-榻 2292
-槃 2293
-槌 2294
-槎 2295
-槐 2296
-槛 2297
-槟 2298
-槭 2299
-槽 2300
-槿 2301
-樊 2302
-樟 2303
-模 2304
-樨 2305
-横 2306
-樯 2307
-樱 2308
-樵 2309
-樽 2310
-樾 2311
-橄 2312
-橇 2313
-橐 2314
-橘 2315
-橙 2316
-橡 2317
-橱 2318
-檀 2319
-檐 2320
-檗 2321
-檬 2322
-欠 2323
-次 2324
-欢 2325
-欣 2326
-欧 2327
-欲 2328
-欸 2329
-欺 2330
-款 2331
-歆 2332
-歇 2333
-歉 2334
-歌 2335
-歙 2336
-止 2337
-正 2338
-此 2339
-步 2340
-武 2341
-歧 2342
-歩 2343
-歪 2344
-歹 2345
-死 2346
-歼 2347
-殁 2348
-殃 2349
-殆 2350
-殇 2351
-殉 2352
-殊 2353
-残 2354
-殒 2355
-殓 2356
-殖 2357
-殚 2358
-殡 2359
-殴 2360
-段 2361
-殷 2362
-殿 2363
-毁 2364
-毂 2365
-毅 2366
-毋 2367
-母 2368
-每 2369
-毒 2370
-毓 2371
-比 2372
-毕 2373
-毗 2374
-毙 2375
-毛 2376
-毡 2377
-毫 2378
-毯 2379
-毽 2380
-氏 2381
-民 2382
-氓 2383
-气 2384
-氚 2385
-氛 2386
-氟 2387
-氢 2388
-氤 2389
-氦 2390
-氧 2391
-氨 2392
-氪 2393
-氮 2394
-氯 2395
-氰 2396
-氲 2397
-水 2398
-永 2399
-汀 2400
-汁 2401
-求 2402
-汇 2403
-汉 2404
-汊 2405
-汐 2406
-汕 2407
-汗 2408
-汛 2409
-汝 2410
-汞 2411
-江 2412
-池 2413
-污 2414
-汤 2415
-汨 2416
-汩 2417
-汪 2418
-汰 2419
-汲 2420
-汴 2421
-汶 2422
-汹 2423
-汽 2424
-汾 2425
-沁 2426
-沂 2427
-沃 2428
-沅 2429
-沈 2430
-沉 2431
-沌 2432
-沏 2433
-沐 2434
-沓 2435
-沙 2436
-沛 2437
-沟 2438
-没 2439
-沢 2440
-沣 2441
-沥 2442
-沦 2443
-沧 2444
-沪 2445
-沫 2446
-沭 2447
-沮 2448
-沱 2449
-河 2450
-沸 2451
-油 2452
-治 2453
-沼 2454
-沽 2455
-沾 2456
-沿 2457
-泄 2458
-泉 2459
-泊 2460
-泌 2461
-泓 2462
-泔 2463
-法 2464
-泖 2465
-泗 2466
-泛 2467
-泞 2468
-泠 2469
-泡 2470
-波 2471
-泣 2472
-泥 2473
-注 2474
-泪 2475
-泫 2476
-泮 2477
-泯 2478
-泰 2479
-泱 2480
-泳 2481
-泵 2482
-泷 2483
-泸 2484
-泺 2485
-泻 2486
-泼 2487
-泽 2488
-泾 2489
-洁 2490
-洋 2491
-洒 2492
-洗 2493
-洙 2494
-洛 2495
-洞 2496
-津 2497
-洪 2498
-洮 2499
-洱 2500
-洲 2501
-洵 2502
-洹 2503
-洺 2504
-活 2505
-洼 2506
-洽 2507
-派 2508
-流 2509
-浃 2510
-浅 2511
-浆 2512
-浇 2513
-浈 2514
-浊 2515
-测 2516
-济 2517
-浏 2518
-浐 2519
-浑 2520
-浒 2521
-浓 2522
-浔 2523
-浙 2524
-浚 2525
-浜 2526
-浠 2527
-浣 2528
-浦 2529
-浩 2530
-浪 2531
-浮 2532
-浴 2533
-海 2534
-浸 2535
-涂 2536
-涅 2537
-消 2538
-涉 2539
-涌 2540
-涎 2541
-涑 2542
-涓 2543
-涕 2544
-涛 2545
-涝 2546
-涞 2547
-涟 2548
-涠 2549
-涡 2550
-涣 2551
-涤 2552
-润 2553
-涧 2554
-涨 2555
-涩 2556
-涪 2557
-涮 2558
-涯 2559
-液 2560
-涵 2561
-涸 2562
-涿 2563
-淀 2564
-淄 2565
-淅 2566
-淆 2567
-淇 2568
-淋 2569
-淌 2570
-淑 2571
-淖 2572
-淘 2573
-淝 2574
-淞 2575
-淡 2576
-淤 2577
-淦 2578
-淫 2579
-淬 2580
-淮 2581
-深 2582
-淳 2583
-混 2584
-淹 2585
-添 2586
-淼 2587
-清 2588
-渊 2589
-渌 2590
-渍 2591
-渎 2592
-渐 2593
-渑 2594
-渔 2595
-渗 2596
-渚 2597
-渝 2598
-渠 2599
-渡 2600
-渣 2601
-渤 2602
-渥 2603
-温 2604
-渭 2605
-港 2606
-渲 2607
-渴 2608
-游 2609
-渺 2610
-湃 2611
-湄 2612
-湉 2613
-湍 2614
-湎 2615
-湖 2616
-湘 2617
-湛 2618
-湫 2619
-湾 2620
-湿 2621
-溃 2622
-溅 2623
-溆 2624
-溉 2625
-溏 2626
-源 2627
-溜 2628
-溟 2629
-溢 2630
-溥 2631
-溧 2632
-溪 2633
-溯 2634
-溶 2635
-溺 2636
-滁 2637
-滇 2638
-滋 2639
-滑 2640
-滔 2641
-滕 2642
-滘 2643
-滚 2644
-滞 2645
-满 2646
-滢 2647
-滤 2648
-滥 2649
-滦 2650
-滨 2651
-滩 2652
-滴 2653
-滹 2654
-漂 2655
-漆 2656
-漉 2657
-漏 2658
-漓 2659
-演 2660
-漕 2661
-漠 2662
-漩 2663
-漪 2664
-漫 2665
-漭 2666
-漯 2667
-漱 2668
-漳 2669
-漾 2670
-潆 2671
-潇 2672
-潋 2673
-潍 2674
-潘 2675
-潜 2676
-潞 2677
-潢 2678
-潦 2679
-潭 2680
-潮 2681
-潸 2682
-潺 2683
-潼 2684
-澄 2685
-澈 2686
-澍 2687
-澎 2688
-澜 2689
-澡 2690
-澧 2691
-澳 2692
-澶 2693
-激 2694
-濂 2695
-濑 2696
-濒 2697
-濠 2698
-濡 2699
-濮 2700
-濯 2701
-瀑 2702
-瀚 2703
-瀛 2704
-灌 2705
-灏 2706
-灞 2707
-火 2708
-灭 2709
-灯 2710
-灰 2711
-灵 2712
-灶 2713
-灸 2714
-灼 2715
-灾 2716
-灿 2717
-炀 2718
-炅 2719
-炉 2720
-炊 2721
-炎 2722
-炒 2723
-炔 2724
-炕 2725
-炖 2726
-炙 2727
-炜 2728
-炫 2729
-炬 2730
-炭 2731
-炮 2732
-炯 2733
-炳 2734
-炷 2735
-炸 2736
-点 2737
-炼 2738
-炽 2739
-烀 2740
-烁 2741
-烂 2742
-烃 2743
-烈 2744
-烊 2745
-烘 2746
-烙 2747
-烛 2748
-烟 2749
-烤 2750
-烦 2751
-烧 2752
-烨 2753
-烩 2754
-烫 2755
-烬 2756
-热 2757
-烯 2758
-烷 2759
-烹 2760
-烽 2761
-焉 2762
-焊 2763
-焓 2764
-焕 2765
-焖 2766
-焗 2767
-焘 2768
-焙 2769
-焚 2770
-焦 2771
-焯 2772
-焰 2773
-焱 2774
-然 2775
-煊 2776
-煌 2777
-煎 2778
-煜 2779
-煞 2780
-煤 2781
-煦 2782
-照 2783
-煨 2784
-煮 2785
-煲 2786
-煳 2787
-煽 2788
-熄 2789
-熊 2790
-熏 2791
-熔 2792
-熙 2793
-熟 2794
-熠 2795
-熨 2796
-熬 2797
-熵 2798
-熹 2799
-燃 2800
-燊 2801
-燎 2802
-燕 2803
-燥 2804
-燮 2805
-爆 2806
-爪 2807
-爬 2808
-爱 2809
-爵 2810
-父 2811
-爷 2812
-爸 2813
-爹 2814
-爽 2815
-片 2816
-版 2817
-牌 2818
-牍 2819
-牒 2820
-牙 2821
-牛 2822
-牟 2823
-牠 2824
-牡 2825
-牢 2826
-牧 2827
-物 2828
-牲 2829
-牵 2830
-特 2831
-牺 2832
-牾 2833
-犀 2834
-犁 2835
-犄 2836
-犇 2837
-犊 2838
-犒 2839
-犟 2840
-犬 2841
-犯 2842
-状 2843
-犷 2844
-犸 2845
-犹 2846
-狂 2847
-狄 2848
-狈 2849
-狐 2850
-狒 2851
-狗 2852
-狙 2853
-狞 2854
-狠 2855
-狡 2856
-狩 2857
-独 2858
-狭 2859
-狮 2860
-狰 2861
-狱 2862
-狸 2863
-狼 2864
-猁 2865
-猎 2866
-猖 2867
-猛 2868
-猜 2869
-猝 2870
-猥 2871
-猩 2872
-猪 2873
-猫 2874
-猬 2875
-献 2876
-猴 2877
-猷 2878
-猹 2879
-猾 2880
-猿 2881
-獒 2882
-獗 2883
-獭 2884
-獾 2885
-玄 2886
-率 2887
-玉 2888
-王 2889
-玑 2890
-玖 2891
-玛 2892
-玟 2893
-玥 2894
-玩 2895
-玫 2896
-玮 2897
-环 2898
-现 2899
-玲 2900
-玳 2901
-玷 2902
-玹 2903
-玺 2904
-玻 2905
-珀 2906
-珂 2907
-珈 2908
-珉 2909
-珊 2910
-珍 2911
-珏 2912
-珑 2913
-珙 2914
-珞 2915
-珠 2916
-珥 2917
-班 2918
-珮 2919
-珲 2920
-珺 2921
-球 2922
-琅 2923
-理 2924
-琉 2925
-琊 2926
-琏 2927
-琐 2928
-琛 2929
-琢 2930
-琤 2931
-琥 2932
-琦 2933
-琨 2934
-琪 2935
-琬 2936
-琮 2937
-琰 2938
-琳 2939
-琴 2940
-琵 2941
-琶 2942
-琼 2943
-瑁 2944
-瑄 2945
-瑕 2946
-瑙 2947
-瑚 2948
-瑛 2949
-瑜 2950
-瑞 2951
-瑟 2952
-瑠 2953
-瑭 2954
-瑰 2955
-瑶 2956
-瑷 2957
-瑾 2958
-璀 2959
-璃 2960
-璇 2961
-璋 2962
-璐 2963
-璞 2964
-璟 2965
-璧 2966
-璨 2967
-瓜 2968
-瓢 2969
-瓣 2970
-瓦 2971
-瓮 2972
-瓯 2973
-瓶 2974
-瓷 2975
-甄 2976
-甘 2977
-甚 2978
-甜 2979
-生 2980
-甥 2981
-用 2982
-甩 2983
-甫 2984
-甬 2985
-甭 2986
-田 2987
-由 2988
-甲 2989
-申 2990
-电 2991
-男 2992
-甸 2993
-町 2994
-画 2995
-畅 2996
-畈 2997
-畊 2998
-界 2999
-畏 3000
-畔 3001
-留 3002
-畜 3003
-略 3004
-番 3005
-畴 3006
-畸 3007
-畿 3008
-疃 3009
-疆 3010
-疏 3011
-疑 3012
-疖 3013
-疗 3014
-疙 3015
-疚 3016
-疝 3017
-疟 3018
-疡 3019
-疣 3020
-疤 3021
-疫 3022
-疮 3023
-疯 3024
-疱 3025
-疲 3026
-疴 3027
-疵 3028
-疸 3029
-疹 3030
-疼 3031
-疽 3032
-疾 3033
-病 3034
-症 3035
-痉 3036
-痊 3037
-痍 3038
-痒 3039
-痔 3040
-痕 3041
-痘 3042
-痛 3043
-痞 3044
-痢 3045
-痣 3046
-痧 3047
-痨 3048
-痪 3049
-痫 3050
-痰 3051
-痱 3052
-痴 3053
-痹 3054
-痼 3055
-瘀 3056
-瘁 3057
-瘙 3058
-瘟 3059
-瘠 3060
-瘢 3061
-瘤 3062
-瘦 3063
-瘩 3064
-瘪 3065
-瘫 3066
-瘳 3067
-瘴 3068
-瘸 3069
-瘾 3070
-癌 3071
-癖 3072
-癜 3073
-癞 3074
-癣 3075
-癫 3076
-登 3077
-白 3078
-百 3079
-皂 3080
-的 3081
-皆 3082
-皇 3083
-皋 3084
-皎 3085
-皑 3086
-皓 3087
-皖 3088
-皙 3089
-皮 3090
-皱 3091
-皿 3092
-盂 3093
-盅 3094
-盆 3095
-盈 3096
-益 3097
-盎 3098
-盏 3099
-盐 3100
-监 3101
-盒 3102
-盔 3103
-盖 3104
-盗 3105
-盘 3106
-盛 3107
-盟 3108
-目 3109
-盯 3110
-盱 3111
-盲 3112
-直 3113
-相 3114
-盹 3115
-盼 3116
-盾 3117
-省 3118
-眈 3119
-眉 3120
-看 3121
-眙 3122
-真 3123
-眠 3124
-眨 3125
-眩 3126
-眬 3127
-眯 3128
-眶 3129
-眷 3130
-眸 3131
-眺 3132
-眼 3133
-着 3134
-睁 3135
-睇 3136
-睐 3137
-睑 3138
-睛 3139
-睡 3140
-睢 3141
-督 3142
-睦 3143
-睫 3144
-睬 3145
-睹 3146
-睽 3147
-睾 3148
-睿 3149
-瞄 3150
-瞅 3151
-瞌 3152
-瞎 3153
-瞑 3154
-瞒 3155
-瞟 3156
-瞠 3157
-瞥 3158
-瞧 3159
-瞩 3160
-瞪 3161
-瞬 3162
-瞭 3163
-瞰 3164
-瞳 3165
-瞻 3166
-瞿 3167
-矍 3168
-矗 3169
-矛 3170
-矜 3171
-矢 3172
-矣 3173
-知 3174
-矩 3175
-矫 3176
-矬 3177
-短 3178
-矮 3179
-石 3180
-矶 3181
-矸 3182
-矽 3183
-矾 3184
-矿 3185
-砀 3186
-码 3187
-砂 3188
-砌 3189
-砍 3190
-砒 3191
-研 3192
-砖 3193
-砚 3194
-砝 3195
-砣 3196
-砥 3197
-砭 3198
-砰 3199
-破 3200
-砷 3201
-砸 3202
-砺 3203
-砼 3204
-砾 3205
-础 3206
-硅 3207
-硌 3208
-硒 3209
-硕 3210
-硖 3211
-硚 3212
-硝 3213
-硫 3214
-硬 3215
-确 3216
-硼 3217
-碉 3218
-碌 3219
-碍 3220
-碎 3221
-碑 3222
-碓 3223
-碗 3224
-碘 3225
-碚 3226
-碜 3227
-碟 3228
-碣 3229
-碧 3230
-碰 3231
-碱 3232
-碳 3233
-碴 3234
-碾 3235
-磁 3236
-磅 3237
-磊 3238
-磋 3239
-磐 3240
-磕 3241
-磨 3242
-磴 3243
-磷 3244
-磺 3245
-礁 3246
-示 3247
-礼 3248
-社 3249
-祀 3250
-祁 3251
-祈 3252
-祉 3253
-祎 3254
-祐 3255
-祖 3256
-祚 3257
-祛 3258
-祝 3259
-神 3260
-祟 3261
-祠 3262
-祢 3263
-祥 3264
-票 3265
-祭 3266
-祯 3267
-祷 3268
-祸 3269
-祺 3270
-禀 3271
-禁 3272
-禄 3273
-禅 3274
-福 3275
-禧 3276
-禹 3277
-禺 3278
-离 3279
-禽 3280
-禾 3281
-秀 3282
-私 3283
-秃 3284
-秆 3285
-秉 3286
-秋 3287
-种 3288
-科 3289
-秒 3290
-秘 3291
-租 3292
-秣 3293
-秤 3294
-秦 3295
-秧 3296
-秩 3297
-积 3298
-称 3299
-秸 3300
-移 3301
-秽 3302
-稀 3303
-程 3304
-稍 3305
-税 3306
-稔 3307
-稚 3308
-稞 3309
-稠 3310
-稣 3311
-稳 3312
-稷 3313
-稹 3314
-稻 3315
-稼 3316
-稽 3317
-稿 3318
-穆 3319
-穗 3320
-穴 3321
-究 3322
-穷 3323
-穹 3324
-空 3325
-穿 3326
-突 3327
-窃 3328
-窄 3329
-窈 3330
-窍 3331
-窑 3332
-窒 3333
-窕 3334
-窖 3335
-窗 3336
-窘 3337
-窜 3338
-窝 3339
-窟 3340
-窠 3341
-窥 3342
-窦 3343
-窨 3344
-窿 3345
-立 3346
-竖 3347
-站 3348
-竞 3349
-竟 3350
-章 3351
-竣 3352
-童 3353
-竭 3354
-端 3355
-竹 3356
-竺 3357
-竽 3358
-竿 3359
-笃 3360
-笆 3361
-笈 3362
-笋 3363
-笑 3364
-笔 3365
-笙 3366
-笛 3367
-笠 3368
-符 3369
-笨 3370
-第 3371
-笳 3372
-笸 3373
-笼 3374
-等 3375
-筋 3376
-筏 3377
-筐 3378
-筑 3379
-筒 3380
-答 3381
-策 3382
-筛 3383
-筝 3384
-筠 3385
-筱 3386
-筵 3387
-筷 3388
-筹 3389
-签 3390
-简 3391
-箍 3392
-箔 3393
-箕 3394
-算 3395
-管 3396
-箩 3397
-箫 3398
-箭 3399
-箱 3400
-箴 3401
-篁 3402
-篆 3403
-篇 3404
-篑 3405
-篓 3406
-篝 3407
-篡 3408
-篦 3409
-篪 3410
-篮 3411
-篱 3412
-篷 3413
-篼 3414
-簇 3415
-簋 3416
-簧 3417
-簪 3418
-簸 3419
-簿 3420
-籁 3421
-籍 3422
-米 3423
-类 3424
-籼 3425
-籽 3426
-粉 3427
-粑 3428
-粒 3429
-粕 3430
-粗 3431
-粘 3432
-粟 3433
-粤 3434
-粥 3435
-粪 3436
-粮 3437
-粱 3438
-粲 3439
-粳 3440
-粹 3441
-粼 3442
-粽 3443
-精 3444
-糊 3445
-糕 3446
-糖 3447
-糗 3448
-糙 3449
-糟 3450
-糠 3451
-糯 3452
-系 3453
-紊 3454
-素 3455
-索 3456
-紧 3457
-紫 3458
-累 3459
-絮 3460
-綦 3461
-繁 3462
-纂 3463
-纠 3464
-纡 3465
-红 3466
-纣 3467
-纤 3468
-约 3469
-级 3470
-纨 3471
-纪 3472
-纫 3473
-纬 3474
-纭 3475
-纯 3476
-纰 3477
-纱 3478
-纲 3479
-纳 3480
-纵 3481
-纶 3482
-纷 3483
-纸 3484
-纹 3485
-纺 3486
-纽 3487
-纾 3488
-线 3489
-绀 3490
-练 3491
-组 3492
-绅 3493
-细 3494
-织 3495
-终 3496
-绉 3497
-绊 3498
-绋 3499
-绌 3500
-绍 3501
-绎 3502
-经 3503
-绑 3504
-绒 3505
-结 3506
-绔 3507
-绕 3508
-绘 3509
-给 3510
-绚 3511
-绛 3512
-络 3513
-绝 3514
-绞 3515
-统 3516
-绢 3517
-绣 3518
-绥 3519
-继 3520
-绩 3521
-绪 3522
-绫 3523
-续 3524
-绮 3525
-绯 3526
-绰 3527
-绳 3528
-维 3529
-绵 3530
-绷 3531
-绸 3532
-绻 3533
-综 3534
-绽 3535
-绿 3536
-缀 3537
-缄 3538
-缅 3539
-缆 3540
-缇 3541
-缉 3542
-缎 3543
-缓 3544
-缔 3545
-缕 3546
-编 3547
-缘 3548
-缙 3549
-缚 3550
-缛 3551
-缜 3552
-缝 3553
-缠 3554
-缢 3555
-缤 3556
-缨 3557
-缩 3558
-缪 3559
-缬 3560
-缭 3561
-缮 3562
-缰 3563
-缱 3564
-缴 3565
-缸 3566
-缺 3567
-罂 3568
-罄 3569
-罐 3570
-网 3571
-罔 3572
-罕 3573
-罗 3574
-罚 3575
-罡 3576
-罢 3577
-罩 3578
-罪 3579
-置 3580
-署 3581
-罹 3582
-羁 3583
-羊 3584
-羌 3585
-美 3586
-羔 3587
-羚 3588
-羞 3589
-羡 3590
-群 3591
-羧 3592
-羯 3593
-羲 3594
-羸 3595
-羹 3596
-羽 3597
-羿 3598
-翁 3599
-翅 3600
-翊 3601
-翌 3602
-翎 3603
-翔 3604
-翘 3605
-翟 3606
-翠 3607
-翡 3608
-翩 3609
-翰 3610
-翱 3611
-翻 3612
-翼 3613
-耀 3614
-老 3615
-考 3616
-耄 3617
-者 3618
-耆 3619
-耋 3620
-而 3621
-耍 3622
-耐 3623
-耒 3624
-耕 3625
-耗 3626
-耘 3627
-耙 3628
-耜 3629
-耪 3630
-耳 3631
-耶 3632
-耷 3633
-耸 3634
-耻 3635
-耽 3636
-耿 3637
-聂 3638
-聆 3639
-聊 3640
-聋 3641
-职 3642
-联 3643
-聘 3644
-聚 3645
-聪 3646
-肃 3647
-肆 3648
-肇 3649
-肉 3650
-肋 3651
-肌 3652
-肖 3653
-肘 3654
-肚 3655
-肛 3656
-肝 3657
-肠 3658
-股 3659
-肢 3660
-肤 3661
-肥 3662
-肩 3663
-肪 3664
-肮 3665
-肯 3666
-肱 3667
-育 3668
-肴 3669
-肺 3670
-肾 3671
-肿 3672
-胀 3673
-胁 3674
-胃 3675
-胆 3676
-背 3677
-胎 3678
-胖 3679
-胗 3680
-胚 3681
-胛 3682
-胜 3683
-胞 3684
-胡 3685
-胤 3686
-胥 3687
-胧 3688
-胫 3689
-胭 3690
-胯 3691
-胰 3692
-胱 3693
-胳 3694
-胶 3695
-胸 3696
-胺 3697
-能 3698
-脂 3699
-脆 3700
-脉 3701
-脊 3702
-脍 3703
-脏 3704
-脐 3705
-脑 3706
-脓 3707
-脖 3708
-脚 3709
-脯 3710
-脱 3711
-脸 3712
-脾 3713
-腆 3714
-腈 3715
-腊 3716
-腋 3717
-腌 3718
-腐 3719
-腑 3720
-腓 3721
-腔 3722
-腕 3723
-腥 3724
-腩 3725
-腭 3726
-腮 3727
-腰 3728
-腱 3729
-腴 3730
-腹 3731
-腺 3732
-腻 3733
-腼 3734
-腾 3735
-腿 3736
-膀 3737
-膈 3738
-膊 3739
-膏 3740
-膑 3741
-膛 3742
-膜 3743
-膝 3744
-膨 3745
-膳 3746
-膺 3747
-臀 3748
-臂 3749
-臃 3750
-臆 3751
-臊 3752
-臣 3753
-臧 3754
-自 3755
-臬 3756
-臭 3757
-至 3758
-致 3759
-臻 3760
-臼 3761
-舀 3762
-舅 3763
-舆 3764
-舌 3765
-舍 3766
-舐 3767
-舒 3768
-舔 3769
-舛 3770
-舜 3771
-舞 3772
-舟 3773
-航 3774
-舫 3775
-般 3776
-舰 3777
-舱 3778
-舵 3779
-舶 3780
-舷 3781
-舸 3782
-船 3783
-艇 3784
-艋 3785
-艘 3786
-艮 3787
-良 3788
-艰 3789
-色 3790
-艳 3791
-艺 3792
-艾 3793
-艿 3794
-节 3795
-芊 3796
-芋 3797
-芍 3798
-芒 3799
-芗 3800
-芙 3801
-芜 3802
-芝 3803
-芥 3804
-芦 3805
-芩 3806
-芪 3807
-芬 3808
-芭 3809
-芮 3810
-芯 3811
-花 3812
-芳 3813
-芷 3814
-芸 3815
-芹 3816
-芽 3817
-芾 3818
-苇 3819
-苋 3820
-苍 3821
-苏 3822
-苑 3823
-苓 3824
-苔 3825
-苗 3826
-苛 3827
-苞 3828
-苟 3829
-苡 3830
-苣 3831
-若 3832
-苦 3833
-苫 3834
-苯 3835
-英 3836
-苷 3837
-苹 3838
-茁 3839
-茂 3840
-范 3841
-茄 3842
-茅 3843
-茆 3844
-茉 3845
-茌 3846
-茎 3847
-茗 3848
-茛 3849
-茜 3850
-茧 3851
-茨 3852
-茫 3853
-茬 3854
-茯 3855
-茱 3856
-茳 3857
-茴 3858
-茵 3859
-茶 3860
-茸 3861
-茹 3862
-茼 3863
-荀 3864
-荃 3865
-荆 3866
-荇 3867
-草 3868
-荏 3869
-荐 3870
-荒 3871
-荔 3872
-荚 3873
-荛 3874
-荞 3875
-荟 3876
-荠 3877
-荡 3878
-荣 3879
-荤 3880
-荧 3881
-荨 3882
-荫 3883
-药 3884
-荷 3885
-荸 3886
-荻 3887
-荼 3888
-莅 3889
-莆 3890
-莉 3891
-莎 3892
-莒 3893
-莓 3894
-莘 3895
-莜 3896
-莞 3897
-莠 3898
-莪 3899
-莫 3900
-莱 3901
-莲 3902
-莴 3903
-获 3904
-莹 3905
-莺 3906
-莽 3907
-菀 3908
-菁 3909
-菅 3910
-菇 3911
-菊 3912
-菌 3913
-菏 3914
-菖 3915
-菘 3916
-菜 3917
-菠 3918
-菡 3919
-菩 3920
-菱 3921
-菲 3922
-萃 3923
-萄 3924
-萋 3925
-萌 3926
-萍 3927
-萎 3928
-萝 3929
-萤 3930
-营 3931
-萦 3932
-萧 3933
-萨 3934
-萱 3935
-萸 3936
-落 3937
-葆 3938
-著 3939
-葚 3940
-葛 3941
-葡 3942
-董 3943
-葩 3944
-葫 3945
-葬 3946
-葱 3947
-葳 3948
-葵 3949
-葺 3950
-蒂 3951
-蒋 3952
-蒙 3953
-蒜 3954
-蒯 3955
-蒲 3956
-蒸 3957
-蒿 3958
-蓁 3959
-蓄 3960
-蓉 3961
-蓓 3962
-蓝 3963
-蓟 3964
-蓥 3965
-蓦 3966
-蓬 3967
-蓼 3968
-蔑 3969
-蔓 3970
-蔗 3971
-蔚 3972
-蔡 3973
-蔫 3974
-蔬 3975
-蔷 3976
-蔺 3977
-蔻 3978
-蔼 3979
-蔽 3980
-蕃 3981
-蕉 3982
-蕊 3983
-蕙 3984
-蕨 3985
-蕲 3986
-蕴 3987
-蕾 3988
-薄 3989
-薇 3990
-薏 3991
-薛 3992
-薪 3993
-薯 3994
-薰 3995
-薷 3996
-藁 3997
-藉 3998
-藏 3999
-藐 4000
-藓 4001
-藕 4002
-藜 4003
-藠 4004
-藤 4005
-藩 4006
-藻 4007
-藿 4008
-蘑 4009
-蘸 4010
-虎 4011
-虏 4012
-虐 4013
-虑 4014
-虔 4015
-虚 4016
-虞 4017
-虫 4018
-虱 4019
-虹 4020
-虻 4021
-虽 4022
-虾 4023
-蚀 4024
-蚁 4025
-蚂 4026
-蚊 4027
-蚌 4028
-蚓 4029
-蚕 4030
-蚝 4031
-蚣 4032
-蚤 4033
-蚪 4034
-蚬 4035
-蚯 4036
-蚱 4037
-蚴 4038
-蛀 4039
-蛆 4040
-蛇 4041
-蛉 4042
-蛊 4043
-蛋 4044
-蛎 4045
-蛐 4046
-蛔 4047
-蛙 4048
-蛛 4049
-蛟 4050
-蛤 4051
-蛮 4052
-蛰 4053
-蛳 4054
-蛹 4055
-蛾 4056
-蜀 4057
-蜂 4058
-蜃 4059
-蜇 4060
-蜈 4061
-蜊 4062
-蜍 4063
-蜒 4064
-蜓 4065
-蜕 4066
-蜗 4067
-蜘 4068
-蜚 4069
-蜜 4070
-蜡 4071
-蜢 4072
-蜥 4073
-蜱 4074
-蜴 4075
-蜷 4076
-蜻 4077
-蜿 4078
-蝇 4079
-蝈 4080
-蝉 4081
-蝌 4082
-蝎 4083
-蝗 4084
-蝙 4085
-蝠 4086
-蝮 4087
-蝴 4088
-蝶 4089
-蝽 4090
-螂 4091
-螃 4092
-螈 4093
-融 4094
-螨 4095
-螳 4096
-螺 4097
-蟀 4098
-蟆 4099
-蟊 4100
-蟋 4101
-蟑 4102
-蟒 4103
-蟠 4104
-蟹 4105
-蟾 4106
-蠊 4107
-蠕 4108
-蠡 4109
-蠢 4110
-血 4111
-衅 4112
-行 4113
-衍 4114
-衔 4115
-街 4116
-衙 4117
-衡 4118
-衢 4119
-衣 4120
-补 4121
-表 4122
-衩 4123
-衫 4124
-衬 4125
-衮 4126
-衰 4127
-衲 4128
-衷 4129
-袁 4130
-袂 4131
-袄 4132
-袅 4133
-袈 4134
-袋 4135
-袍 4136
-袒 4137
-袖 4138
-袜 4139
-被 4140
-袭 4141
-袱 4142
-裁 4143
-裂 4144
-装 4145
-裆 4146
-裔 4147
-裕 4148
-裘 4149
-裙 4150
-裟 4151
-裤 4152
-裨 4153
-裱 4154
-裳 4155
-裴 4156
-裸 4157
-裹 4158
-褂 4159
-褐 4160
-褒 4161
-褓 4162
-褔 4163
-褚 4164
-褛 4165
-褥 4166
-褪 4167
-褴 4168
-褶 4169
-襁 4170
-襄 4171
-襟 4172
-西 4173
-要 4174
-覃 4175
-覆 4176
-见 4177
-观 4178
-规 4179
-觅 4180
-视 4181
-览 4182
-觉 4183
-觊 4184
-觎 4185
-觐 4186
-觑 4187
-角 4188
-觞 4189
-解 4190
-觥 4191
-触 4192
-言 4193
-訾 4194
-詹 4195
-誉 4196
-誓 4197
-警 4198
-譬 4199
-计 4200
-订 4201
-讣 4202
-认 4203
-讥 4204
-讧 4205
-讨 4206
-让 4207
-讪 4208
-训 4209
-议 4210
-讯 4211
-记 4212
-讲 4213
-讳 4214
-讴 4215
-讶 4216
-讷 4217
-许 4218
-讹 4219
-论 4220
-讼 4221
-讽 4222
-设 4223
-访 4224
-诀 4225
-证 4226
-诃 4227
-评 4228
-诅 4229
-识 4230
-诈 4231
-诉 4232
-诊 4233
-诋 4234
-词 4235
-诏 4236
-译 4237
-诓 4238
-试 4239
-诗 4240
-诘 4241
-诙 4242
-诚 4243
-诛 4244
-话 4245
-诞 4246
-诟 4247
-诠 4248
-诡 4249
-询 4250
-诣 4251
-诤 4252
-该 4253
-详 4254
-诧 4255
-诩 4256
-诫 4257
-诬 4258
-语 4259
-误 4260
-诱 4261
-诲 4262
-说 4263
-诵 4264
-诶 4265
-请 4266
-诸 4267
-诹 4268
-诺 4269
-读 4270
-诽 4271
-课 4272
-诿 4273
-谀 4274
-谁 4275
-调 4276
-谅 4277
-谆 4278
-谈 4279
-谊 4280
-谋 4281
-谌 4282
-谍 4283
-谎 4284
-谏 4285
-谐 4286
-谑 4287
-谓 4288
-谕 4289
-谖 4290
-谘 4291
-谙 4292
-谚 4293
-谛 4294
-谜 4295
-谟 4296
-谢 4297
-谣 4298
-谤 4299
-谦 4300
-谧 4301
-谨 4302
-谩 4303
-谬 4304
-谭 4305
-谮 4306
-谯 4307
-谱 4308
-谴 4309
-谶 4310
-谷 4311
-豁 4312
-豆 4313
-豇 4314
-豉 4315
-豌 4316
-豚 4317
-象 4318
-豢 4319
-豪 4320
-豫 4321
-豹 4322
-豺 4323
-貂 4324
-貅 4325
-貉 4326
-貌 4327
-貔 4328
-贝 4329
-贞 4330
-负 4331
-贡 4332
-财 4333
-责 4334
-贤 4335
-败 4336
-账 4337
-货 4338
-质 4339
-贩 4340
-贪 4341
-贫 4342
-贬 4343
-购 4344
-贮 4345
-贯 4346
-贰 4347
-贱 4348
-贲 4349
-贴 4350
-贵 4351
-贷 4352
-贸 4353
-费 4354
-贺 4355
-贻 4356
-贼 4357
-贾 4358
-贿 4359
-赁 4360
-赂 4361
-赃 4362
-资 4363
-赅 4364
-赈 4365
-赉 4366
-赊 4367
-赋 4368
-赌 4369
-赎 4370
-赏 4371
-赐 4372
-赓 4373
-赔 4374
-赖 4375
-赘 4376
-赚 4377
-赛 4378
-赝 4379
-赞 4380
-赠 4381
-赡 4382
-赢 4383
-赣 4384
-赤 4385
-赦 4386
-赫 4387
-走 4388
-赳 4389
-赴 4390
-赵 4391
-赶 4392
-起 4393
-趁 4394
-超 4395
-越 4396
-趋 4397
-趟 4398
-趣 4399
-足 4400
-趴 4401
-趵 4402
-趸 4403
-趺 4404
-趾 4405
-跃 4406
-跄 4407
-跆 4408
-跋 4409
-跌 4410
-跎 4411
-跑 4412
-跚 4413
-跛 4414
-距 4415
-跟 4416
-跤 4417
-跨 4418
-跪 4419
-跬 4420
-路 4421
-跳 4422
-践 4423
-跶 4424
-跷 4425
-跹 4426
-跺 4427
-跻 4428
-踉 4429
-踊 4430
-踌 4431
-踏 4432
-踝 4433
-踞 4434
-踢 4435
-踩 4436
-踪 4437
-踮 4438
-踯 4439
-踱 4440
-踵 4441
-踹 4442
-踺 4443
-蹁 4444
-蹂 4445
-蹄 4446
-蹈 4447
-蹉 4448
-蹊 4449
-蹋 4450
-蹒 4451
-蹚 4452
-蹦 4453
-蹩 4454
-蹬 4455
-蹭 4456
-蹲 4457
-蹴 4458
-蹶 4459
-蹼 4460
-蹿 4461
-躁 4462
-躅 4463
-躇 4464
-躏 4465
-身 4466
-躬 4467
-躯 4468
-躲 4469
-躺 4470
-车 4471
-轧 4472
-轨 4473
-轩 4474
-轫 4475
-转 4476
-轮 4477
-软 4478
-轰 4479
-轱 4480
-轲 4481
-轳 4482
-轴 4483
-轶 4484
-轸 4485
-轻 4486
-轼 4487
-载 4488
-轿 4489
-较 4490
-辄 4491
-辅 4492
-辆 4493
-辈 4494
-辉 4495
-辊 4496
-辍 4497
-辐 4498
-辑 4499
-输 4500
-辕 4501
-辖 4502
-辗 4503
-辘 4504
-辙 4505
-辛 4506
-辜 4507
-辞 4508
-辟 4509
-辣 4510
-辨 4511
-辩 4512
-辫 4513
-辰 4514
-辱 4515
-边 4516
-辽 4517
-达 4518
-迁 4519
-迂 4520
-迄 4521
-迅 4522
-过 4523
-迈 4524
-迎 4525
-运 4526
-近 4527
-返 4528
-还 4529
-这 4530
-进 4531
-远 4532
-违 4533
-连 4534
-迟 4535
-迢 4536
-迥 4537
-迦 4538
-迩 4539
-迪 4540
-迫 4541
-迭 4542
-述 4543
-迷 4544
-迸 4545
-迹 4546
-追 4547
-退 4548
-送 4549
-适 4550
-逃 4551
-逅 4552
-逆 4553
-选 4554
-逊 4555
-逋 4556
-逍 4557
-透 4558
-逐 4559
-逑 4560
-递 4561
-途 4562
-逗 4563
-通 4564
-逛 4565
-逝 4566
-逞 4567
-速 4568
-造 4569
-逡 4570
-逢 4571
-逮 4572
-逯 4573
-逵 4574
-逸 4575
-逻 4576
-逼 4577
-逾 4578
-遁 4579
-遂 4580
-遇 4581
-遍 4582
-遏 4583
-遐 4584
-遑 4585
-道 4586
-遗 4587
-遛 4588
-遢 4589
-遣 4590
-遥 4591
-遨 4592
-遭 4593
-遮 4594
-遴 4595
-遵 4596
-避 4597
-邀 4598
-邂 4599
-邃 4600
-邋 4601
-邑 4602
-邓 4603
-邕 4604
-邙 4605
-邛 4606
-邝 4607
-邡 4608
-邢 4609
-那 4610
-邦 4611
-邪 4612
-邬 4613
-邮 4614
-邯 4615
-邰 4616
-邱 4617
-邳 4618
-邵 4619
-邸 4620
-邹 4621
-邺 4622
-邻 4623
-郁 4624
-郅 4625
-郇 4626
-郊 4627
-郎 4628
-郑 4629
-郓 4630
-郜 4631
-郝 4632
-郡 4633
-郧 4634
-部 4635
-郫 4636
-郭 4637
-郯 4638
-郴 4639
-郸 4640
-都 4641
-鄂 4642
-鄙 4643
-鄞 4644
-鄢 4645
-鄱 4646
-酉 4647
-酊 4648
-酋 4649
-酌 4650
-配 4651
-酐 4652
-酒 4653
-酗 4654
-酚 4655
-酝 4656
-酞 4657
-酣 4658
-酥 4659
-酩 4660
-酪 4661
-酬 4662
-酮 4663
-酯 4664
-酰 4665
-酱 4666
-酵 4667
-酶 4668
-酷 4669
-酸 4670
-酿 4671
-醇 4672
-醉 4673
-醋 4674
-醍 4675
-醐 4676
-醒 4677
-醛 4678
-醺 4679
-采 4680
-釉 4681
-释 4682
-里 4683
-重 4684
-野 4685
-量 4686
-金 4687
-釜 4688
-鉴 4689
-銮 4690
-鏖 4691
-鑫 4692
-钇 4693
-针 4694
-钉 4695
-钊 4696
-钎 4697
-钏 4698
-钐 4699
-钒 4700
-钓 4701
-钗 4702
-钙 4703
-钛 4704
-钜 4705
-钝 4706
-钞 4707
-钟 4708
-钠 4709
-钢 4710
-钣 4711
-钥 4712
-钦 4713
-钧 4714
-钨 4715
-钩 4716
-钮 4717
-钯 4718
-钰 4719
-钱 4720
-钲 4721
-钳 4722
-钴 4723
-钵 4724
-钻 4725
-钼 4726
-钾 4727
-钿 4728
-铀 4729
-铁 4730
-铂 4731
-铃 4732
-铄 4733
-铅 4734
-铆 4735
-铉 4736
-铋 4737
-铍 4738
-铎 4739
-铐 4740
-铑 4741
-铖 4742
-铛 4743
-铜 4744
-铝 4745
-铟 4746
-铠 4747
-铡 4748
-铣 4749
-铤 4750
-铧 4751
-铨 4752
-铩 4753
-铬 4754
-铭 4755
-铮 4756
-铰 4757
-铲 4758
-银 4759
-铷 4760
-铸 4761
-铺 4762
-链 4763
-铿 4764
-销 4765
-锁 4766
-锂 4767
-锄 4768
-锅 4769
-锆 4770
-锈 4771
-锉 4772
-锋 4773
-锌 4774
-锏 4775
-锐 4776
-锑 4777
-锒 4778
-错 4779
-锚 4780
-锟 4781
-锡 4782
-锢 4783
-锣 4784
-锤 4785
-锥 4786
-锦 4787
-锨 4788
-锭 4789
-键 4790
-锯 4791
-锰 4792
-锲 4793
-锴 4794
-锵 4795
-锷 4796
-锹 4797
-锻 4798
-镀 4799
-镁 4800
-镂 4801
-镇 4802
-镉 4803
-镊 4804
-镌 4805
-镍 4806
-镏 4807
-镐 4808
-镑 4809
-镔 4810
-镕 4811
-镖 4812
-镜 4813
-镣 4814
-镭 4815
-镯 4816
-镰 4817
-镳 4818
-镶 4819
-长 4820
-门 4821
-闩 4822
-闪 4823
-闫 4824
-闭 4825
-问 4826
-闯 4827
-闰 4828
-闲 4829
-闳 4830
-间 4831
-闵 4832
-闷 4833
-闸 4834
-闹 4835
-闺 4836
-闻 4837
-闽 4838
-闾 4839
-阀 4840
-阁 4841
-阂 4842
-阄 4843
-阅 4844
-阆 4845
-阉 4846
-阎 4847
-阐 4848
-阑 4849
-阔 4850
-阕 4851
-阖 4852
-阙 4853
-阚 4854
-阜 4855
-队 4856
-阡 4857
-阪 4858
-阮 4859
-阱 4860
-防 4861
-阳 4862
-阴 4863
-阵 4864
-阶 4865
-阻 4866
-阿 4867
-陀 4868
-陂 4869
-附 4870
-际 4871
-陆 4872
-陇 4873
-陈 4874
-陉 4875
-陋 4876
-陌 4877
-降 4878
-限 4879
-陕 4880
-陛 4881
-陡 4882
-院 4883
-除 4884
-陨 4885
-险 4886
-陪 4887
-陬 4888
-陵 4889
-陶 4890
-陷 4891
-隅 4892
-隆 4893
-隋 4894
-隍 4895
-随 4896
-隐 4897
-隔 4898
-隗 4899
-隘 4900
-隙 4901
-障 4902
-隧 4903
-隶 4904
-隼 4905
-隽 4906
-难 4907
-雀 4908
-雁 4909
-雄 4910
-雅 4911
-集 4912
-雇 4913
-雉 4914
-雌 4915
-雍 4916
-雏 4917
-雒 4918
-雕 4919
-雨 4920
-雪 4921
-雯 4922
-雳 4923
-零 4924
-雷 4925
-雹 4926
-雾 4927
-需 4928
-霁 4929
-霄 4930
-霆 4931
-震 4932
-霈 4933
-霉 4934
-霍 4935
-霎 4936
-霏 4937
-霓 4938
-霖 4939
-霜 4940
-霞 4941
-霪 4942
-露 4943
-霸 4944
-霹 4945
-霾 4946
-靑 4947
-青 4948
-靓 4949
-靖 4950
-静 4951
-靛 4952
-非 4953
-靠 4954
-靡 4955
-面 4956
-革 4957
-靳 4958
-靴 4959
-靶 4960
-鞅 4961
-鞋 4962
-鞍 4963
-鞑 4964
-鞘 4965
-鞠 4966
-鞭 4967
-韦 4968
-韧 4969
-韩 4970
-韫 4971
-韬 4972
-韭 4973
-音 4974
-韵 4975
-韶 4976
-页 4977
-顶 4978
-顷 4979
-项 4980
-顺 4981
-须 4982
-顽 4983
-顾 4984
-顿 4985
-颀 4986
-颁 4987
-颂 4988
-预 4989
-颅 4990
-领 4991
-颇 4992
-颈 4993
-颊 4994
-颌 4995
-颍 4996
-颐 4997
-频 4998
-颓 4999
-颖 5000
-颗 5001
-题 5002
-颚 5003
-颜 5004
-额 5005
-颠 5006
-颢 5007
-颤 5008
-颦 5009
-颧 5010
-风 5011
-飒 5012
-飓 5013
-飘 5014
-飙 5015
-飚 5016
-飞 5017
-食 5018
-飧 5019
-餍 5020
-餐 5021
-餮 5022
-饕 5023
-饥 5024
-饨 5025
-饪 5026
-饭 5027
-饮 5028
-饯 5029
-饰 5030
-饱 5031
-饲 5032
-饴 5033
-饵 5034
-饶 5035
-饷 5036
-饺 5037
-饼 5038
-饽 5039
-饿 5040
-馀 5041
-馁 5042
-馄 5043
-馅 5044
-馆 5045
-馈 5046
-馊 5047
-馋 5048
-馍 5049
-馏 5050
-馑 5051
-馒 5052
-馕 5053
-首 5054
-馗 5055
-香 5056
-馥 5057
-馨 5058
-马 5059
-驭 5060
-驮 5061
-驯 5062
-驰 5063
-驱 5064
-驳 5065
-驴 5066
-驶 5067
-驷 5068
-驸 5069
-驹 5070
-驻 5071
-驼 5072
-驾 5073
-驿 5074
-骁 5075
-骂 5076
-骄 5077
-骅 5078
-骆 5079
-骇 5080
-骈 5081
-骊 5082
-骋 5083
-验 5084
-骏 5085
-骐 5086
-骑 5087
-骓 5088
-骗 5089
-骚 5090
-骛 5091
-骜 5092
-骝 5093
-骞 5094
-骠 5095
-骡 5096
-骤 5097
-骥 5098
-骨 5099
-骰 5100
-骷 5101
-骸 5102
-骺 5103
-骼 5104
-髂 5105
-髅 5106
-髋 5107
-髌 5108
-髓 5109
-高 5110
-髦 5111
-髯 5112
-鬃 5113
-鬓 5114
-鬟 5115
-鬼 5116
-魁 5117
-魂 5118
-魄 5119
-魅 5120
-魇 5121
-魉 5122
-魍 5123
-魏 5124
-魔 5125
-魟 5126
-鱼 5127
-鱿 5128
-鲁 5129
-鲅 5130
-鲈 5131
-鲍 5132
-鲑 5133
-鲜 5134
-鲟 5135
-鲠 5136
-鲢 5137
-鲤 5138
-鲨 5139
-鲫 5140
-鲭 5141
-鲳 5142
-鲶 5143
-鲷 5144
-鲸 5145
-鲼 5146
-鳃 5147
-鳄 5148
-鳅 5149
-鳌 5150
-鳍 5151
-鳕 5152
-鳖 5153
-鳗 5154
-鳝 5155
-鳞 5156
-鳟 5157
-鸟 5158
-鸠 5159
-鸡 5160
-鸢 5161
-鸣 5162
-鸥 5163
-鸦 5164
-鸩 5165
-鸪 5166
-鸫 5167
-鸭 5168
-鸯 5169
-鸳 5170
-鸵 5171
-鸽 5172
-鸾 5173
-鸿 5174
-鹁 5175
-鹂 5176
-鹃 5177
-鹅 5178
-鹉 5179
-鹊 5180
-鹌 5181
-鹏 5182
-鹑 5183
-鹜 5184
-鹞 5185
-鹤 5186
-鹦 5187
-鹧 5188
-鹫 5189
-鹭 5190
-鹰 5191
-鹳 5192
-鹿 5193
-麂 5194
-麋 5195
-麒 5196
-麓 5197
-麝 5198
-麟 5199
-麦 5200
-麸 5201
-麻 5202
-麾 5203
-黄 5204
-黍 5205
-黎 5206
-黏 5207
-黑 5208
-黔 5209
-默 5210
-黛 5211
-黝 5212
-黟 5213
-黯 5214
-鼎 5215
-鼓 5216
-鼠 5217
-鼬 5218
-鼹 5219
-鼻 5220
-鼾 5221
-齐 5222
-齿 5223
-龃 5224
-龄 5225
-龅 5226
-龈 5227
-龉 5228
-龊 5229
-龌 5230
-龙 5231
-龚 5232
-龟 5233
-<sos/eos> 5234
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/librispeech.train_960_unigram5000.bpemodel b/models/audio/speech_recognition/conformer/igie/wenet/test/resources/librispeech.train_960_unigram5000.bpemodel
deleted file mode 100644
index 3d24c47cf1a19b69928d186fdb93ab31e964ca75..0000000000000000000000000000000000000000
Binary files a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/librispeech.train_960_unigram5000.bpemodel and /dev/null differ
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/librispeech.words.txt b/models/audio/speech_recognition/conformer/igie/wenet/test/resources/librispeech.words.txt
deleted file mode 100644
index 23a5adcbe4b3d883596e1675a7efbc16afacf4f1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/librispeech.words.txt
+++ /dev/null
@@ -1,5002 +0,0 @@
-<blank> 0
-<unk> 1
-' 2
-▁ 3
-A 4
-▁A 5
-▁AB 6
-▁ABANDON 7
-ABETH 8
-ABILITY 9
-ABLE 10
-▁ABLE 11
-ABLY 12
-▁ABODE 13
-ABOUT 14
-▁ABOUT 15
-▁ABOVE 16
-▁ABRAHAM 17
-▁ABROAD 18
-▁ABSENCE 19
-▁ABSENT 20
-▁ABSOLUTE 21
-▁ABSOLUTELY 22
-▁ABSORB 23
-▁ABSTRACT 24
-▁ABSURD 25
-▁ABUNDANCE 26
-▁ABUNDANT 27
-▁ABUSE 28
-AC 29
-▁ACCENT 30
-▁ACCEPT 31
-▁ACCEPTED 32
-▁ACCESS 33
-▁ACCIDENT 34
-▁ACCOMPANIED 35
-▁ACCOMPANY 36
-▁ACCOMPLISH 37
-▁ACCORD 38
-▁ACCORDING 39
-▁ACCORDINGLY 40
-▁ACCOUNT 41
-▁ACCUMULAT 42
-▁ACCURATE 43
-▁ACCUSE 44
-▁ACCUSTOMED 45
-ACH 46
-▁ACHIEVE 47
-ACIOUS 48
-▁ACKNOWLEDG 49
-▁ACQUAINTANCE 50
-▁ACQUAINTED 51
-▁ACQUIRED 52
-▁ACROSS 53
-▁ACT 54
-▁ACTION 55
-▁ACTIVE 56
-▁ACTIVITY 57
-▁ACTUAL 58
-▁ACTUALLY 59
-AD 60
-▁ADAM 61
-▁ADAPT 62
-▁ADD 63
-▁ADDED 64
-▁ADDITION 65
-▁ADDRESS 66
-▁ADDRESSED 67
-ADE 68
-▁ADHERE 69
-▁ADJUST 70
-▁ADMINISTER 71
-▁ADMINISTRATION 72
-▁ADMIRABLE 73
-▁ADMIRAL 74
-▁ADMIRATION 75
-▁ADMIRE 76
-▁ADMIRING 77
-▁ADMIT 78
-▁ADMITTED 79
-▁ADOPT 80
-▁ADORN 81
-▁ADVANCE 82
-▁ADVANCED 83
-▁ADVANCING 84
-▁ADVANTAGE 85
-▁ADVENTURE 86
-▁ADVERTISE 87
-▁ADVICE 88
-▁ADVISE 89
-▁AFFAIR 90
-▁AFFAIRS 91
-▁AFFECT 92
-▁AFFECTED 93
-▁AFFECTION 94
-▁AFFECTIONATE 95
-▁AFFIRM 96
-▁AFFLICT 97
-▁AFFORD 98
-▁AFRAID 99
-▁AFRICA 100
-▁AFTER 101
-▁AFTERNOON 102
-▁AFTERWARD 103
-▁AFTERWARDS 104
-AG 105
-▁AGAIN 106
-▁AGAINST 107
-AGE 108
-▁AGE 109
-▁AGENT 110
-▁AGITATED 111
-▁AGITATION 112
-▁AGO 113
-▁AGONY 114
-▁AGREE 115
-▁AGREEABLE 116
-▁AGREED 117
-AH 118
-▁AH 119
-▁AHEAD 120
-▁AID 121
-▁AIM 122
-▁AIR 123
-AK 124
-AL 125
-▁AL 126
-▁ALADDIN 127
-▁ALARM 128
-▁ALAS 129
-▁ALBERT 130
-▁ALEXANDER 131
-▁ALICE 132
-▁ALIVE 133
-▁ALL 134
-▁ALLOW 135
-▁ALLOWED 136
-ALLY 137
-▁ALMOST 138
-▁ALONE 139
-▁ALONG 140
-▁ALOUD 141
-▁ALREADY 142
-▁ALSO 143
-▁ALTAR 144
-▁ALTER 145
-▁ALTHOUGH 146
-▁ALTOGETHER 147
-▁ALWAYS 148
-▁ALYOSHA 149
-AM 150
-▁AM 151
-▁AMA 152
-▁AMBASSADOR 153
-▁AMBITION 154
-▁AMBITIOUS 155
-▁AMELIA 156
-▁AMERICA 157
-▁AMERICAN 158
-▁AMIABLE 159
-▁AMID 160
-▁AMONG 161
-▁AMOUNT 162
-▁AMUSEMENT 163
-AN 164
-▁AN 165
-ANCE 166
-▁ANCHOR 167
-▁ANCIENT 168
-AND 169
-▁AND 170
-▁ANDREW 171
-ANG 172
-▁ANGEL 173
-▁ANGER 174
-▁ANGLE 175
-▁ANGRILY 176
-▁ANGRY 177
-▁ANGUISH 178
-▁ANIMAL 179
-▁ANIMALS 180
-▁ANNA 181
-▁ANNE 182
-▁ANNOUNC 183
-▁ANNOUNCED 184
-▁ANOTHER 185
-ANS 186
-▁ANSWER 187
-▁ANSWERED 188
-ANT 189
-▁ANTHEA 190
-▁ANTI 191
-▁ANTICIPATE 192
-▁ANXIETY 193
-▁ANXIOUS 194
-▁ANY 195
-▁ANYBODY 196
-▁ANYHOW 197
-▁ANYONE 198
-▁ANYTHING 199
-▁ANYWHERE 200
-AP 201
-▁APART 202
-▁APARTMENT 203
-▁APOLOGI 204
-▁APPARATUS 205
-▁APPARENT 206
-▁APPARENTLY 207
-▁APPEAL 208
-▁APPEAR 209
-▁APPEARANCE 210
-▁APPEARED 211
-▁APPETITE 212
-▁APPLE 213
-▁APPLICATION 214
-▁APPLIED 215
-▁APPLY 216
-▁APPOINTED 217
-▁APPOINTMENT 218
-▁APPRECIATE 219
-▁APPREHEND 220
-▁APPREHENSION 221
-▁APPROACH 222
-▁APPROACHED 223
-▁APPROACHING 224
-▁APPROPRIATE 225
-▁APPROVE 226
-▁APRIL 227
-▁APRON 228
-▁APT 229
-AR 230
-▁AR 231
-▁ARAB 232
-▁ARAMIS 233
-▁ARCH 234
-▁ARCHITECT 235
-ARD 236
-▁ARDENT 237
-▁ARE 238
-▁ARGUE 239
-▁ARGUMENT 240
-ARIES 241
-▁ARISE 242
-▁ARISTOCRAT 243
-▁ARM 244
-▁ARMS 245
-▁ARMY 246
-▁AROSE 247
-▁AROUND 248
-▁ARRANGED 249
-▁ARRANGEMENT 250
-▁ARRAY 251
-▁ARREST 252
-▁ARRIVAL 253
-▁ARRIVE 254
-▁ARRIVED 255
-▁ARRIVING 256
-▁ARROW 257
-▁ART 258
-ARTAGNAN 259
-▁ARTHUR 260
-▁ARTICLE 261
-▁ARTIFICIAL 262
-▁ARTIST 263
-ARY 264
-AS 265
-▁AS 266
-▁ASCEND 267
-▁ASCERTAIN 268
-▁ASHAMED 269
-▁ASHES 270
-▁ASHORE 271
-▁ASIDE 272
-▁ASK 273
-▁ASKED 274
-▁ASKING 275
-▁ASLEEP 276
-▁ASPECT 277
-▁ASSASSIN 278
-▁ASSAULT 279
-▁ASSEMBLED 280
-▁ASSEMBLY 281
-▁ASSERT 282
-▁ASSIST 283
-▁ASSISTANCE 284
-▁ASSISTANT 285
-▁ASSOCIATE 286
-▁ASSOCIATION 287
-▁ASSUME 288
-▁ASSUMED 289
-▁ASSURANCE 290
-▁ASSURE 291
-▁ASSURED 292
-▁ASTONISHED 293
-▁ASTONISHMENT 294
-AT 295
-▁AT 296
-ATE 297
-ATED 298
-ATH 299
-ATING 300
-ATION 301
-ATIONS 302
-ATIVE 303
-▁ATLANTIC 304
-▁ATMOSPHERE 305
-ATOR 306
-ATORY 307
-▁ATTACHED 308
-▁ATTACHMENT 309
-▁ATTACK 310
-▁ATTAIN 311
-▁ATTEMPT 312
-▁ATTEND 313
-▁ATTENDANT 314
-▁ATTENTION 315
-▁ATTENTIVE 316
-▁ATTITUDE 317
-▁ATTORNEY 318
-▁ATTRACT 319
-▁ATTRIBUTE 320
-AU 321
-▁AUDIENCE 322
-▁AUGUST 323
-▁AUNT 324
-▁AUTHOR 325
-▁AUTHORITIES 326
-▁AUTHORITY 327
-▁AUTUMN 328
-AV 329
-▁AVAIL 330
-▁AVENUE 331
-▁AVERAGE 332
-▁AVOID 333
-AW 334
-▁AWAIT 335
-▁AWAKE 336
-▁AWAKENED 337
-▁AWARE 338
-▁AWAY 339
-▁AWFUL 340
-▁AWHILE 341
-▁AWKWARD 342
-▁AWOKE 343
-▁AXE 344
-AY 345
-B 346
-▁B 347
-BA 348
-▁BA 349
-▁BABY 350
-▁BACHELOR 351
-▁BACK 352
-▁BACKGROUND 353
-▁BACKWARD 354
-▁BAD 355
-▁BADE 356
-▁BAG 357
-▁BAKE 358
-▁BAL 359
-▁BALANCE 360
-▁BALL 361
-▁BALLOON 362
-▁BAN 363
-▁BAND 364
-▁BANK 365
-▁BAPTI 366
-▁BAR 367
-▁BARBAR 368
-▁BARE 369
-▁BARGAIN 370
-▁BARK 371
-▁BARON 372
-▁BARREL 373
-▁BARRICADE 374
-▁BARRIER 375
-▁BASE 376
-▁BASIN 377
-▁BASKET 378
-▁BATH 379
-▁BATTER 380
-▁BATTLE 381
-▁BAY 382
-BBE 383
-BBLE 384
-BE 385
-▁BE 386
-▁BEAR 387
-▁BEARD 388
-▁BEARING 389
-▁BEAST 390
-▁BEAT 391
-▁BEATEN 392
-▁BEAUTIFUL 393
-▁BEAUTY 394
-▁BECAME 395
-▁BECAUSE 396
-▁BECOME 397
-▁BECOMING 398
-▁BED 399
-▁BEDROOM 400
-▁BEEN 401
-▁BEFORE 402
-▁BEG 403
-▁BEGAN 404
-▁BEGGAR 405
-▁BEGGED 406
-▁BEGIN 407
-▁BEGINNING 408
-▁BEGUN 409
-▁BEHALF 410
-▁BEHAVE 411
-▁BEHAVIOUR 412
-▁BEHELD 413
-▁BEHIND 414
-▁BEHOLD 415
-▁BEING 416
-BEL 417
-▁BELIEF 418
-▁BELIEVE 419
-▁BELIEVED 420
-▁BELIEVING 421
-▁BELL 422
-▁BELONG 423
-▁BELOVED 424
-▁BELOW 425
-▁BENCH 426
-▁BENDING 427
-▁BENEATH 428
-▁BENEFIT 429
-▁BENT 430
-BER 431
-▁BERNARD 432
-▁BESIDE 433
-▁BESIDES 434
-▁BEST 435
-▁BESTOW 436
-▁BETRAY 437
-▁BETTER 438
-▁BETWEEN 439
-▁BEWILDERED 440
-▁BEYOND 441
-BI 442
-▁BI 443
-▁BIBLE 444
-▁BID 445
-▁BIG 446
-▁BILL 447
-▁BILLY 448
-▁BIND 449
-▁BIRD 450
-▁BIRDS 451
-▁BIRTH 452
-▁BISHOP 453
-▁BIT 454
-▁BITTER 455
-▁BLA 456
-▁BLACK 457
-▁BLADE 458
-▁BLAME 459
-▁BLANK 460
-▁BLANKET 461
-BLE 462
-▁BLESS 463
-▁BLEW 464
-▁BLIND 465
-▁BLISS 466
-▁BLOCK 467
-▁BLOOD 468
-▁BLOOM 469
-▁BLOSSOM 470
-▁BLOW 471
-▁BLU 472
-▁BLUE 473
-▁BLUSH 474
-BO 475
-▁BO 476
-BOARD 477
-▁BOARD 478
-▁BOAST 479
-▁BOAT 480
-▁BOB 481
-▁BODIES 482
-▁BODY 483
-▁BOIL 484
-▁BOLD 485
-▁BOLT 486
-▁BON 487
-▁BOND 488
-▁BONNET 489
-▁BOOK 490
-▁BOOKS 491
-▁BOOT 492
-▁BOOTS 493
-▁BORDER 494
-▁BORE 495
-▁BORN 496
-▁BORNE 497
-BOROUGH 498
-▁BORROW 499
-▁BOSOM 500
-▁BOSTON 501
-▁BOTH 502
-▁BOTTLE 503
-▁BOTTOM 504
-▁BOUGHT 505
-▁BOUND 506
-▁BOW 507
-▁BOWED 508
-▁BOWL 509
-▁BOX 510
-▁BOY 511
-▁BOYS 512
-BRA 513
-▁BRA 514
-▁BRAIN 515
-▁BRANCH 516
-▁BRANCHES 517
-▁BRAND 518
-▁BRAVE 519
-▁BREAD 520
-▁BREAK 521
-▁BREAKFAST 522
-▁BREAKING 523
-▁BREAST 524
-▁BREATH 525
-▁BREE 526
-▁BRETHREN 527
-▁BRETON 528
-▁BRI 529
-▁BRICK 530
-▁BRIDE 531
-▁BRIDGE 532
-▁BRIDLE 533
-▁BRIEF 534
-▁BRIG 535
-▁BRIGHT 536
-▁BRILLIANT 537
-▁BRING 538
-▁BRINGING 539
-▁BRISK 540
-▁BRITAIN 541
-▁BRITISH 542
-▁BRO 543
-▁BROAD 544
-▁BROKE 545
-▁BROKEN 546
-▁BROOD 547
-▁BROOK 548
-▁BROTHER 549
-▁BROTHERS 550
-▁BROUGHT 551
-▁BROW 552
-▁BROWN 553
-▁BRUCE 554
-▁BRUSH 555
-▁BRUTAL 556
-▁BRUTE 557
-BU 558
-▁BU 559
-▁BUCK 560
-▁BUILD 561
-▁BUILDING 562
-▁BUILT 563
-▁BULK 564
-▁BULL 565
-▁BULLET 566
-▁BUNCH 567
-▁BUNDLE 568
-▁BUR 569
-▁BURDEN 570
-BURG 571
-▁BURIED 572
-BURN 573
-▁BURN 574
-▁BURNING 575
-▁BURST 576
-BURY 577
-▁BUSH 578
-▁BUSHES 579
-▁BUSINESS 580
-▁BUSY 581
-▁BUT 582
-▁BUTTER 583
-▁BUTTERFLY 584
-▁BUY 585
-BY 586
-▁BY 587
-C 588
-▁C 589
-CA 590
-▁CA 591
-▁CAB 592
-▁CABIN 593
-▁CAESAR 594
-▁CAKE 595
-▁CAL 596
-▁CALAMIT 597
-▁CALCULATED 598
-▁CALIFORNIA 599
-▁CALL 600
-▁CALLED 601
-▁CALLING 602
-▁CALM 603
-▁CAME 604
-▁CAMP 605
-▁CAMPAIGN 606
-▁CAN 607
-▁CANDID 608
-▁CANDLE 609
-▁CANNON 610
-▁CANNOT 611
-▁CANOE 612
-▁CANVAS 613
-▁CAP 614
-▁CAPABLE 615
-▁CAPACITY 616
-▁CAPITAL 617
-▁CAPTAIN 618
-▁CAPTURE 619
-CAR 620
-▁CAR 621
-▁CARD 622
-▁CARDINAL 623
-▁CARE 624
-▁CAREFUL 625
-▁CAREFULLY 626
-▁CARELESS 627
-▁CARLYLE 628
-▁CARPENTER 629
-▁CARPET 630
-▁CARR 631
-▁CARRIAGE 632
-▁CARRIED 633
-▁CARRY 634
-▁CARRYING 635
-▁CART 636
-▁CARVED 637
-▁CASE 638
-CAST 639
-▁CAST 640
-▁CASTLE 641
-▁CASUAL 642
-▁CAT 643
-▁CATCH 644
-▁CATHEDRAL 645
-▁CATHERINE 646
-▁CATHOLIC 647
-▁CATTLE 648
-▁CAUGHT 649
-▁CAUSE 650
-▁CAUSED 651
-▁CAUTION 652
-▁CAVALRY 653
-▁CAVE 654
-CE 655
-▁CE 656
-▁CEASE 657
-▁CEASED 658
-▁CEILING 659
-▁CELEBRAT 660
-▁CELL 661
-▁CELLAR 662
-CENT 663
-▁CENT 664
-▁CENTER 665
-▁CENTRAL 666
-▁CENTRE 667
-▁CENTURIES 668
-▁CENTURY 669
-▁CEREMONY 670
-▁CERTAIN 671
-▁CERTAINLY 672
-▁CETERA 673
-CH 674
-▁CH 675
-CHA 676
-▁CHA 677
-▁CHAIN 678
-▁CHAIR 679
-▁CHALLENGE 680
-▁CHAMBER 681
-▁CHAMPION 682
-▁CHANCE 683
-▁CHANCELLOR 684
-▁CHANGE 685
-▁CHANGED 686
-▁CHANGING 687
-▁CHANNEL 688
-▁CHAP 689
-▁CHAPTER 690
-▁CHAR 691
-▁CHARACTER 692
-▁CHARACTERISTIC 693
-▁CHARGE 694
-▁CHARIOT 695
-▁CHARLES 696
-▁CHARLOTTE 697
-▁CHARM 698
-▁CHARMING 699
-▁CHASE 700
-▁CHATEAU 701
-▁CHATTER 702
-▁CHAUVELIN 703
-CHE 704
-▁CHE 705
-▁CHEAP 706
-▁CHECK 707
-CHED 708
-▁CHEEK 709
-▁CHEEKS 710
-▁CHEER 711
-▁CHEERFUL 712
-▁CHEESE 713
-▁CHERISH 714
-▁CHEST 715
-CHI 716
-▁CHI 717
-▁CHICAGO 718
-▁CHICKEN 719
-▁CHIEF 720
-▁CHILD 721
-▁CHILDHOOD 722
-▁CHILDREN 723
-▁CHILL 724
-▁CHIMNEY 725
-▁CHIN 726
-▁CHINA 727
-▁CHINESE 728
-CHO 729
-▁CHOICE 730
-▁CHOOSE 731
-▁CHOP 732
-▁CHORUS 733
-▁CHOSE 734
-▁CHOSEN 735
-▁CHRIS 736
-▁CHRIST 737
-▁CHRISTIAN 738
-▁CHRISTMAS 739
-▁CHU 740
-▁CHUCK 741
-▁CHURCH 742
-CI 743
-▁CIGAR 744
-▁CIRCLE 745
-▁CIRCULAR 746
-▁CIRCULAT 747
-▁CIRCUMSTANCE 748
-▁CIRCUMSTANCES 749
-▁CITI 750
-▁CITIES 751
-▁CITY 752
-▁CIVIL 753
-▁CIVILI 754
-CK 755
-▁CL 756
-▁CLAIM 757
-▁CLAIR 758
-▁CLAPP 759
-▁CLARA 760
-▁CLASP 761
-▁CLASS 762
-▁CLASSES 763
-▁CLAW 764
-▁CLAY 765
-▁CLEAN 766
-▁CLEAR 767
-▁CLEARLY 768
-▁CLERGY 769
-▁CLERK 770
-▁CLEVER 771
-▁CLIFF 772
-▁CLIMATE 773
-▁CLIMB 774
-▁CLO 775
-▁CLOAK 776
-CLOCK 777
-▁CLOCK 778
-CLOSE 779
-▁CLOSE 780
-▁CLOSED 781
-▁CLOSELY 782
-▁CLOTH 783
-▁CLOTHES 784
-▁CLOUD 785
-▁CLOUDS 786
-▁CLUB 787
-▁CLUSTER 788
-▁CLUTCH 789
-CO 790
-▁CO 791
-▁COACH 792
-▁COAL 793
-▁COARSE 794
-▁COAST 795
-▁COAT 796
-▁COCK 797
-▁COFFEE 798
-▁COFFIN 799
-▁COIN 800
-▁COL 801
-▁COLD 802
-▁COLLAR 803
-▁COLLECT 804
-▁COLLEGE 805
-▁COLONEL 806
-▁COLONI 807
-▁COLONY 808
-▁COLOR 809
-▁COLOUR 810
-▁COLUMN 811
-▁COM 812
-COMB 813
-▁COMB 814
-▁COMBAT 815
-▁COMBINATION 816
-▁COMBINED 817
-▁COME 818
-▁COMES 819
-▁COMFORT 820
-▁COMFORTABLE 821
-▁COMING 822
-▁COMMAND 823
-▁COMMENCED 824
-▁COMMEND 825
-▁COMMENT 826
-▁COMMERCE 827
-▁COMMERCIAL 828
-▁COMMISSION 829
-▁COMMIT 830
-▁COMMITTED 831
-▁COMMITTEE 832
-▁COMMON 833
-▁COMMUN 834
-▁COMMUNICAT 835
-▁COMMUNICATION 836
-▁COMMUNITY 837
-▁COMP 838
-▁COMPANION 839
-▁COMPANIONS 840
-▁COMPANY 841
-▁COMPARATIVELY 842
-▁COMPARE 843
-▁COMPARISON 844
-▁COMPASS 845
-▁COMPELLED 846
-▁COMPLAIN 847
-▁COMPLETE 848
-▁COMPLETELY 849
-▁COMPLEX 850
-▁COMPLIMENT 851
-▁COMPOSED 852
-▁COMPOSITION 853
-▁COMPREHEND 854
-▁COMRADE 855
-CON 856
-▁CON 857
-▁CONCEAL 858
-▁CONCEIVE 859
-▁CONCENTRAT 860
-▁CONCEPTION 861
-▁CONCERN 862
-▁CONCERNED 863
-▁CONCERNING 864
-▁CONCERT 865
-▁CONCLUD 866
-▁CONCLUDED 867
-▁CONCLUSION 868
-▁CONDEMN 869
-▁CONDITION 870
-▁CONDITIONS 871
-▁CONDUCT 872
-▁CONF 873
-▁CONFESS 874
-▁CONFIDE 875
-▁CONFIDENCE 876
-▁CONFIDENT 877
-▁CONFINED 878
-▁CONFIRM 879
-▁CONFLICT 880
-▁CONFOUND 881
-▁CONFRONT 882
-▁CONFUSED 883
-▁CONFUSION 884
-▁CONGRESS 885
-▁CONJECTURE 886
-▁CONNECTED 887
-▁CONNECTION 888
-▁CONQUER 889
-▁CONQUEST 890
-▁CONSCIENCE 891
-▁CONSCIOUS 892
-▁CONSCIOUSNESS 893
-▁CONSEIL 894
-▁CONSENT 895
-▁CONSEQUENCE 896
-▁CONSEQUENTLY 897
-▁CONSIDER 898
-▁CONSIDERABLE 899
-▁CONSIDERATION 900
-▁CONSIDERED 901
-▁CONSIST 902
-▁CONSOLATION 903
-▁CONSPICUOUS 904
-▁CONSTANCE 905
-▁CONSTANT 906
-▁CONSTANTLY 907
-▁CONSTITUTE 908
-▁CONSTITUTION 909
-▁CONSTRUCT 910
-▁CONSULT 911
-▁CONSUM 912
-▁CONTACT 913
-▁CONTAIN 914
-▁CONTEMPLATE 915
-▁CONTEMPT 916
-▁CONTEND 917
-▁CONTENT 918
-▁CONTEST 919
-▁CONTINENT 920
-▁CONTINUAL 921
-▁CONTINUALLY 922
-▁CONTINUE 923
-▁CONTINUED 924
-▁CONTRACT 925
-▁CONTRADICT 926
-▁CONTRARY 927
-▁CONTRAST 928
-▁CONTRIBUTE 929
-▁CONTROL 930
-▁CONVENIENT 931
-▁CONVENT 932
-▁CONVENTION 933
-▁CONVERSATION 934
-▁CONVERSE 935
-▁CONVERT 936
-▁CONVEY 937
-▁CONVICT 938
-▁CONVICTION 939
-▁CONVINCE 940
-▁CONVINCED 941
-▁CONVULS 942
-▁COOK 943
-▁COOL 944
-▁COPIE 945
-▁COPPER 946
-▁COPY 947
-▁COR 948
-▁CORDIAL 949
-▁CORN 950
-▁CORNER 951
-▁CORPORAL 952
-▁CORPSE 953
-▁CORRECT 954
-▁CORRESPOND 955
-▁CORRIDOR 956
-▁CORRUPT 957
-▁COSETTE 958
-▁COST 959
-▁COSTUME 960
-▁COTTAGE 961
-▁COTTON 962
-▁COUCH 963
-▁COULD 964
-▁COULDN 965
-▁COUNCIL 966
-▁COUNSEL 967
-▁COUNT 968
-▁COUNTENANCE 969
-▁COUNTER 970
-▁COUNTESS 971
-▁COUNTRIES 972
-▁COUNTRY 973
-▁COUPLE 974
-▁COURAGE 975
-▁COURSE 976
-▁COURT 977
-▁COUSIN 978
-▁COVER 979
-▁COVERED 980
-▁COW 981
-▁COWARD 982
-▁CRA 983
-▁CRACK 984
-▁CRAFT 985
-▁CRAWL 986
-▁CRE 987
-▁CREAM 988
-▁CREATED 989
-▁CREATURE 990
-▁CREATURES 991
-▁CREDIT 992
-▁CREEK 993
-▁CREEP 994
-▁CREP 995
-▁CREW 996
-▁CRIED 997
-▁CRIES 998
-▁CRIME 999
-▁CRIMINAL 1000
-▁CRIMSON 1001
-▁CRISTO 1002
-▁CRITIC 1003
-▁CRO 1004
-▁CROSS 1005
-▁CROSSED 1006
-▁CROW 1007
-▁CROWD 1008
-▁CROWN 1009
-▁CRU 1010
-▁CRUEL 1011
-▁CRUMBS 1012
-▁CRUSH 1013
-▁CRY 1014
-▁CRYING 1015
-▁CRYSTAL 1016
-CTOR 1017
-CU 1018
-▁CU 1019
-▁CULTIVATE 1020
-▁CULTURE 1021
-CUM 1022
-▁CUNNING 1023
-▁CUP 1024
-▁CUR 1025
-▁CURIOSITY 1026
-▁CURIOUS 1027
-▁CURL 1028
-▁CURRENT 1029
-▁CURSE 1030
-▁CURTAIN 1031
-▁CUSHION 1032
-▁CUSTOM 1033
-▁CUT 1034
-▁CUTTING 1035
-CY 1036
-▁CYRIL 1037
-D 1038
-▁D 1039
-DA 1040
-▁DA 1041
-▁DAGGER 1042
-▁DAILY 1043
-▁DAMAGE 1044
-▁DAMN 1045
-▁DAMP 1046
-▁DAMSEL 1047
-▁DAN 1048
-▁DANCE 1049
-▁DANCING 1050
-▁DANGER 1051
-▁DANGEROUS 1052
-▁DANGLARS 1053
-▁DANIEL 1054
-▁DAR 1055
-▁DARE 1056
-▁DARED 1057
-▁DARK 1058
-▁DARKNESS 1059
-▁DARLING 1060
-▁DASH 1061
-▁DATE 1062
-▁DAUGHTER 1063
-▁DAVID 1064
-▁DAWN 1065
-▁DAY 1066
-▁DAYS 1067
-DDING 1068
-DDLE 1069
-DE 1070
-▁DE 1071
-▁DEAD 1072
-▁DEAF 1073
-▁DEAL 1074
-▁DEAR 1075
-▁DEAREST 1076
-▁DEATH 1077
-▁DEBATE 1078
-▁DEBT 1079
-▁DECAY 1080
-▁DECEIVE 1081
-▁DECEMBER 1082
-▁DECIDE 1083
-▁DECIDED 1084
-▁DECISION 1085
-▁DECK 1086
-▁DECLARE 1087
-▁DECLARED 1088
-▁DECLINE 1089
-▁DECORAT 1090
-▁DECREE 1091
-▁DEEP 1092
-▁DEEPLY 1093
-▁DEFEAT 1094
-▁DEFECT 1095
-▁DEFENCE 1096
-▁DEFEND 1097
-▁DEFENSE 1098
-▁DEFI 1099
-▁DEFINITE 1100
-▁DEGREE 1101
-▁DELAY 1102
-▁DELIBERATE 1103
-▁DELICACY 1104
-▁DELICATE 1105
-▁DELICIOUS 1106
-▁DELIGHT 1107
-▁DELIGHTED 1108
-▁DELIGHTFUL 1109
-▁DELIVER 1110
-▁DEMAND 1111
-▁DEMANDED 1112
-▁DEMOCRATIC 1113
-▁DEMON 1114
-DEN 1115
-▁DEN 1116
-▁DENIED 1117
-▁DENY 1118
-▁DEPART 1119
-▁DEPARTMENT 1120
-▁DEPARTURE 1121
-▁DEPEND 1122
-▁DEPOSIT 1123
-▁DEPRESS 1124
-▁DEPRIVED 1125
-▁DEPTH 1126
-DER 1127
-▁DERIVED 1128
-▁DESCEND 1129
-▁DESCENDED 1130
-▁DESCENT 1131
-▁DESCRIBE 1132
-▁DESCRIBED 1133
-▁DESCRIPTION 1134
-▁DESERT 1135
-▁DESERVE 1136
-▁DESIGN 1137
-▁DESIRABLE 1138
-▁DESIRE 1139
-▁DESIRED 1140
-▁DESIROUS 1141
-▁DESK 1142
-▁DESOLATE 1143
-▁DESPAIR 1144
-▁DESPATCH 1145
-▁DESPERATE 1146
-▁DESPISE 1147
-▁DESPITE 1148
-▁DESTINED 1149
-▁DESTINY 1150
-▁DESTROY 1151
-▁DESTROYED 1152
-▁DESTRUCTION 1153
-▁DETAIL 1154
-▁DETAIN 1155
-▁DETECT 1156
-▁DETECTIVE 1157
-▁DETERMIN 1158
-▁DETERMINATION 1159
-▁DETERMINED 1160
-▁DEVELOP 1161
-▁DEVELOPMENT 1162
-▁DEVICE 1163
-▁DEVIL 1164
-▁DEVOTED 1165
-▁DEVOTION 1166
-▁DEVOUR 1167
-▁DEXTER 1168
-▁DI 1169
-▁DIAMOND 1170
-▁DIANA 1171
-▁DICK 1172
-▁DID 1173
-▁DIDN 1174
-▁DIE 1175
-▁DIED 1176
-▁DIFFER 1177
-▁DIFFERENCE 1178
-▁DIFFERENT 1179
-▁DIFFICULT 1180
-▁DIFFICULTIES 1181
-▁DIFFICULTY 1182
-▁DIG 1183
-▁DIGNIFIED 1184
-▁DIGNITY 1185
-▁DIM 1186
-▁DIMINISH 1187
-▁DIN 1188
-▁DINNER 1189
-▁DIRECT 1190
-▁DIRECTED 1191
-▁DIRECTION 1192
-▁DIRECTLY 1193
-▁DIRTY 1194
-▁DIS 1195
-▁DISAGREEABLE 1196
-▁DISAPPEAR 1197
-▁DISAPPEARED 1198
-▁DISAPPOINT 1199
-▁DISAPPOINTMENT 1200
-▁DISC 1201
-▁DISCERN 1202
-▁DISCHARGE 1203
-▁DISCIPLE 1204
-▁DISCIPLINE 1205
-▁DISCOURAGE 1206
-▁DISCOURSE 1207
-▁DISCOVER 1208
-▁DISCOVERED 1209
-▁DISCOVERY 1210
-▁DISCUSS 1211
-▁DISCUSSION 1212
-▁DISDAIN 1213
-▁DISEASE 1214
-▁DISGRACE 1215
-▁DISGUISE 1216
-▁DISGUST 1217
-▁DISH 1218
-▁DISLIKE 1219
-▁DISMAL 1220
-▁DISMAY 1221
-▁DISMISS 1222
-▁DISORDER 1223
-▁DISPLAY 1224
-▁DISPOSED 1225
-▁DISPOSITION 1226
-▁DISPUTE 1227
-▁DISSOLV 1228
-▁DISTANCE 1229
-▁DISTANT 1230
-▁DISTINCT 1231
-▁DISTINCTION 1232
-▁DISTINGUISH 1233
-▁DISTINGUISHED 1234
-▁DISTRACT 1235
-▁DISTRESS 1236
-▁DISTRIBUT 1237
-▁DISTRICT 1238
-▁DISTRUST 1239
-▁DISTURB 1240
-▁DIV 1241
-▁DIVERS 1242
-▁DIVIDE 1243
-▁DIVIDED 1244
-▁DIVINE 1245
-▁DIVISION 1246
-▁DIXON 1247
-DO 1248
-▁DO 1249
-▁DOCTOR 1250
-▁DOCTRINE 1251
-▁DOCUMENT 1252
-▁DOES 1253
-▁DOESN 1254
-▁DOG 1255
-▁DOGS 1256
-▁DOING 1257
-▁DOLLARS 1258
-DOLPH 1259
-▁DOMESTIC 1260
-▁DOMINION 1261
-▁DON 1262
-▁DONE 1263
-▁DONKEY 1264
-▁DOOR 1265
-▁DOORS 1266
-▁DOORWAY 1267
-▁DOROTHY 1268
-▁DOUBLE 1269
-▁DOUBT 1270
-▁DOUBTFUL 1271
-▁DOUBTLESS 1272
-▁DOWN 1273
-▁DOWNSTAIRS 1274
-▁DRAG 1275
-▁DRAGG 1276
-▁DRAGON 1277
-▁DRAIN 1278
-▁DRAKE 1279
-▁DRAMA 1280
-▁DRANK 1281
-▁DRAP 1282
-▁DRAUGHT 1283
-▁DRAW 1284
-▁DRAWING 1285
-▁DRAWN 1286
-▁DREAD 1287
-▁DREADFUL 1288
-▁DREAM 1289
-▁DREARY 1290
-▁DRESS 1291
-▁DRESSED 1292
-▁DREW 1293
-▁DRI 1294
-▁DRIFT 1295
-▁DRINK 1296
-▁DRIVE 1297
-▁DRIVEN 1298
-▁DRIVER 1299
-▁DRIVING 1300
-▁DROOP 1301
-▁DROP 1302
-▁DROPPED 1303
-▁DROPPING 1304
-▁DROVE 1305
-▁DROWN 1306
-▁DRUG 1307
-▁DRUM 1308
-▁DRUNK 1309
-▁DRY 1310
-▁DU 1311
-▁DUCHESS 1312
-▁DUCK 1313
-▁DUE 1314
-▁DUKE 1315
-▁DULL 1316
-▁DUMB 1317
-▁DUN 1318
-▁DUNBAR 1319
-▁DUR 1320
-▁DUSK 1321
-▁DUST 1322
-▁DUTCH 1323
-▁DUTIES 1324
-▁DUTY 1325
-▁DWARF 1326
-▁DWELL 1327
-▁DWELT 1328
-DY 1329
-▁DYING 1330
-E 1331
-▁E 1332
-EA 1333
-▁EACH 1334
-▁EAGER 1335
-▁EAGERLY 1336
-▁EAGLE 1337
-▁EAR 1338
-▁EARL 1339
-▁EARLIER 1340
-▁EARLIEST 1341
-▁EARLY 1342
-▁EARN 1343
-▁EARNEST 1344
-▁EARS 1345
-▁EARTH 1346
-▁EASE 1347
-▁EASIER 1348
-▁EASILY 1349
-▁EAST 1350
-▁EASTERN 1351
-▁EASY 1352
-▁EAT 1353
-▁EATEN 1354
-▁EATING 1355
-▁ECHO 1356
-ED 1357
-▁EDGE 1358
-▁EDITH 1359
-▁EDITOR 1360
-▁EDUCAT 1361
-▁EDUCATION 1362
-▁EDWARD 1363
-EF 1364
-▁EFFECT 1365
-▁EFFORT 1366
-▁EGGS 1367
-▁EGYPT 1368
-▁EGYPTIAN 1369
-▁EIGHT 1370
-▁EIGHTEEN 1371
-▁EIGHTY 1372
-▁EITHER 1373
-EL 1374
-▁EL 1375
-▁ELABORATE 1376
-▁ELBOW 1377
-▁ELDER 1378
-▁ELDEST 1379
-▁ELEANOR 1380
-▁ELECT 1381
-▁ELECTRIC 1382
-▁ELEGANT 1383
-▁ELEMENT 1384
-▁ELEPHANT 1385
-▁ELEVEN 1386
-▁ELI 1387
-ELLA 1388
-▁ELSE 1389
-▁ELSEWHERE 1390
-▁ELSIE 1391
-EM 1392
-▁EM 1393
-▁EMBARK 1394
-▁EMBARRASS 1395
-▁EMBRACE 1396
-▁EMBROIDER 1397
-EMENT 1398
-▁EMERG 1399
-▁EMILY 1400
-▁EMINENT 1401
-▁EMOTION 1402
-▁EMPEROR 1403
-▁EMPHASI 1404
-▁EMPIRE 1405
-▁EMPLOY 1406
-▁EMPLOYED 1407
-▁EMPTY 1408
-EN 1409
-▁EN 1410
-▁ENABLE 1411
-ENCE 1412
-▁ENCHANT 1413
-ENCIES 1414
-▁ENCLOS 1415
-▁ENCOUNTER 1416
-▁ENCOURAGE 1417
-▁END 1418
-▁ENDEAVOR 1419
-▁ENDEAVOUR 1420
-▁ENDURE 1421
-ENED 1422
-▁ENEMIES 1423
-▁ENEMY 1424
-▁ENERGETIC 1425
-▁ENERGY 1426
-▁ENGAGE 1427
-▁ENGAGED 1428
-▁ENGAGEMENT 1429
-▁ENGINE 1430
-▁ENGLAND 1431
-▁ENGLISH 1432
-▁ENJOY 1433
-▁ENJOYMENT 1434
-▁ENLIGHTEN 1435
-▁ENORMOUS 1436
-▁ENOUGH 1437
-ENS 1438
-▁ENSU 1439
-ENT 1440
-▁ENTER 1441
-▁ENTERED 1442
-▁ENTERPRISE 1443
-▁ENTERTAIN 1444
-▁ENTHUSIASM 1445
-▁ENTIRE 1446
-▁ENTIRELY 1447
-▁ENTITLED 1448
-▁ENTRANCE 1449
-▁ENTREAT 1450
-▁ENVELOPE 1451
-▁ENVY 1452
-▁EPI 1453
-▁EQUAL 1454
-▁EQUALLY 1455
-ER 1456
-▁ER 1457
-▁ERE 1458
-▁ERECT 1459
-▁ERRAND 1460
-▁ERROR 1461
-ERS 1462
-ES 1463
-▁ESCAPE 1464
-▁ESCAPED 1465
-▁ESCORT 1466
-▁ESPECIALLY 1467
-▁ESSENCE 1468
-▁ESSENTIAL 1469
-EST 1470
-▁ESTABLISH 1471
-▁ESTABLISHED 1472
-▁ESTABLISHMENT 1473
-▁ESTATE 1474
-▁ESTEEM 1475
-▁ESTIMATE 1476
-▁ESTRALLA 1477
-ET 1478
-▁ETERNAL 1479
-▁ETERNITY 1480
-ETH 1481
-ETT 1482
-ETTE 1483
-▁EUROPE 1484
-▁EUSTACE 1485
-EV 1486
-▁EVA 1487
-▁EVEN 1488
-▁EVENING 1489
-▁EVENTS 1490
-EVER 1491
-▁EVER 1492
-▁EVERY 1493
-▁EVERYBODY 1494
-▁EVERYONE 1495
-▁EVERYTHING 1496
-▁EVERYWHERE 1497
-▁EVIDENCE 1498
-▁EVIDENT 1499
-▁EVIDENTLY 1500
-▁EVIL 1501
-EX 1502
-▁EX 1503
-▁EXACT 1504
-▁EXACTLY 1505
-▁EXAMINATION 1506
-▁EXAMINE 1507
-▁EXAMINED 1508
-▁EXAMINING 1509
-▁EXAMPLE 1510
-▁EXCEED 1511
-▁EXCEEDINGLY 1512
-▁EXCELLENCY 1513
-▁EXCELLENT 1514
-▁EXCEPT 1515
-▁EXCEPTION 1516
-▁EXCESS 1517
-▁EXCHANGE 1518
-▁EXCITE 1519
-▁EXCITED 1520
-▁EXCITEMENT 1521
-▁EXCITING 1522
-▁EXCLAIMED 1523
-▁EXCLAMATION 1524
-▁EXCLUSIVE 1525
-▁EXCURSION 1526
-▁EXCUSE 1527
-▁EXECUT 1528
-▁EXECUTION 1529
-▁EXERCISE 1530
-▁EXHAUST 1531
-▁EXHIBIT 1532
-▁EXIST 1533
-▁EXISTENCE 1534
-▁EXPAND 1535
-▁EXPECT 1536
-▁EXPECTATION 1537
-▁EXPECTED 1538
-▁EXPEDITION 1539
-▁EXPENSE 1540
-▁EXPERIENCE 1541
-▁EXPERIMENT 1542
-▁EXPLAIN 1543
-▁EXPLAINED 1544
-▁EXPLANATION 1545
-▁EXPLORE 1546
-▁EXPOSED 1547
-▁EXPRESS 1548
-▁EXPRESSED 1549
-▁EXPRESSION 1550
-▁EXQUISITE 1551
-▁EXTEND 1552
-▁EXTENDED 1553
-▁EXTENSIVE 1554
-▁EXTENT 1555
-▁EXTERNAL 1556
-▁EXTRA 1557
-▁EXTRACT 1558
-▁EXTRAORDINARY 1559
-▁EXTREME 1560
-▁EXTREMELY 1561
-▁EXTREMITY 1562
-EY 1563
-▁EYE 1564
-▁EYEBROWS 1565
-▁EYES 1566
-F 1567
-▁F 1568
-FA 1569
-▁FA 1570
-▁FACE 1571
-▁FACES 1572
-▁FACILIT 1573
-▁FACING 1574
-▁FACT 1575
-▁FACULTIES 1576
-▁FACULTY 1577
-▁FADED 1578
-▁FAIL 1579
-▁FAILED 1580
-▁FAILURE 1581
-▁FAINT 1582
-▁FAIR 1583
-▁FAIRLY 1584
-▁FAIRY 1585
-▁FAITH 1586
-▁FAITHFUL 1587
-FALL 1588
-▁FALL 1589
-▁FALLEN 1590
-▁FALLING 1591
-▁FALSE 1592
-▁FAME 1593
-▁FAMILIAR 1594
-▁FAMILIES 1595
-▁FAMILY 1596
-▁FAMOUS 1597
-▁FAN 1598
-▁FANCIED 1599
-▁FANCIES 1600
-▁FANCY 1601
-▁FANNY 1602
-▁FANTASTIC 1603
-▁FAR 1604
-▁FAREWELL 1605
-▁FARM 1606
-▁FARMER 1607
-▁FARTHER 1608
-▁FASHION 1609
-▁FAST 1610
-▁FASTENED 1611
-▁FAT 1612
-▁FATAL 1613
-▁FATE 1614
-▁FATHER 1615
-▁FATIGUE 1616
-▁FAULT 1617
-▁FAVOR 1618
-▁FAVORITE 1619
-▁FAVOUR 1620
-▁FAVOURITE 1621
-FE 1622
-▁FE 1623
-▁FEAR 1624
-▁FEARFUL 1625
-▁FEAST 1626
-▁FEATHER 1627
-▁FEATURE 1628
-▁FEATURES 1629
-▁FEBRUARY 1630
-▁FEDERAL 1631
-▁FEEBLE 1632
-▁FEED 1633
-▁FEEL 1634
-▁FEELING 1635
-▁FEELINGS 1636
-▁FEET 1637
-▁FELICITY 1638
-▁FELL 1639
-▁FELLOW 1640
-▁FELT 1641
-▁FEMALE 1642
-▁FEMININE 1643
-▁FENCE 1644
-FER 1645
-▁FER 1646
-▁FERTIL 1647
-▁FETCH 1648
-▁FEVER 1649
-▁FEW 1650
-FF 1651
-FI 1652
-▁FI 1653
-FIELD 1654
-▁FIELD 1655
-▁FIELDS 1656
-▁FIERCE 1657
-▁FIFTEEN 1658
-▁FIFTH 1659
-▁FIFTY 1660
-▁FIGHT 1661
-▁FIGHTING 1662
-▁FIGURE 1663
-▁FILL 1664
-▁FILLED 1665
-▁FILM 1666
-▁FIN 1667
-▁FINAL 1668
-▁FINALLY 1669
-▁FIND 1670
-▁FINDING 1671
-▁FINE 1672
-▁FINGER 1673
-▁FINGERS 1674
-▁FINISH 1675
-▁FINISHED 1676
-▁FIRE 1677
-▁FIRM 1678
-▁FIRMLY 1679
-▁FIRST 1680
-▁FISH 1681
-▁FISHERMAN 1682
-▁FIT 1683
-▁FITTED 1684
-▁FIVE 1685
-▁FIX 1686
-▁FIXED 1687
-▁FL 1688
-▁FLAG 1689
-▁FLAME 1690
-▁FLANK 1691
-▁FLASH 1692
-▁FLAT 1693
-▁FLATTER 1694
-▁FLED 1695
-▁FLEE 1696
-▁FLEET 1697
-▁FLESH 1698
-▁FLEW 1699
-▁FLICKER 1700
-▁FLIGHT 1701
-▁FLO 1702
-▁FLOCK 1703
-▁FLOOD 1704
-▁FLOOR 1705
-▁FLORENCE 1706
-▁FLOUR 1707
-▁FLOURISH 1708
-▁FLOW 1709
-▁FLOWER 1710
-▁FLOWERS 1711
-▁FLU 1712
-▁FLUTTER 1713
-▁FLY 1714
-▁FLYING 1715
-▁FO 1716
-▁FOG 1717
-FOLD 1718
-▁FOLD 1719
-FOLK 1720
-▁FOLK 1721
-▁FOLLOW 1722
-▁FOLLOWED 1723
-▁FOLLOWING 1724
-▁FOLLY 1725
-▁FOND 1726
-▁FOOD 1727
-▁FOOL 1728
-▁FOOLISH 1729
-FOOT 1730
-▁FOOT 1731
-▁FOOTSTEPS 1732
-FOR 1733
-▁FOR 1734
-▁FORBID 1735
-▁FORCE 1736
-▁FORCED 1737
-FORD 1738
-▁FORE 1739
-▁FOREHEAD 1740
-▁FOREIGN 1741
-▁FORESEE 1742
-▁FOREST 1743
-▁FORGET 1744
-▁FORGIVE 1745
-▁FORGOT 1746
-▁FORGOTTEN 1747
-FORM 1748
-▁FORM 1749
-▁FORMED 1750
-▁FORMER 1751
-▁FORMIDABLE 1752
-▁FORSAKE 1753
-▁FORTH 1754
-▁FORTNIGHT 1755
-▁FORTUNATE 1756
-▁FORTUNE 1757
-▁FORTY 1758
-▁FORWARD 1759
-▁FOUGHT 1760
-▁FOUND 1761
-▁FOUNTAIN 1762
-▁FOUR 1763
-▁FOURTEEN 1764
-▁FOURTH 1765
-▁FOWL 1766
-▁FOX 1767
-▁FRA 1768
-▁FRAGMENT 1769
-▁FRAME 1770
-▁FRANCE 1771
-▁FRANCIS 1772
-▁FRANCS 1773
-▁FRANK 1774
-▁FRED 1775
-▁FREDERICK 1776
-▁FREE 1777
-▁FREEDOM 1778
-▁FRENCH 1779
-▁FREQUENT 1780
-▁FREQUENTLY 1781
-▁FRESH 1782
-▁FRI 1783
-▁FRIDAY 1784
-▁FRIEND 1785
-▁FRIENDLY 1786
-▁FRIENDS 1787
-▁FRIENDSHIP 1788
-▁FRIGHT 1789
-▁FRIGHTENED 1790
-▁FRIGHTFUL 1791
-▁FRINGE 1792
-▁FRO 1793
-▁FROG 1794
-▁FROM 1795
-▁FRONT 1796
-▁FROWN 1797
-▁FRUIT 1798
-FT 1799
-▁FU 1800
-FUL 1801
-▁FULFIL 1802
-▁FULL 1803
-▁FULLY 1804
-▁FUN 1805
-▁FUNCTION 1806
-▁FUNDAMENTAL 1807
-▁FUNERAL 1808
-▁FUNNY 1809
-▁FUR 1810
-▁FURIOUS 1811
-▁FURNISH 1812
-▁FURNITURE 1813
-▁FURTHER 1814
-▁FUTURE 1815
-G 1816
-▁G 1817
-GA 1818
-▁GA 1819
-▁GAIN 1820
-▁GAINED 1821
-▁GALL 1822
-▁GALLANT 1823
-▁GALLERY 1824
-▁GALLOP 1825
-▁GAME 1826
-GAN 1827
-GAR 1828
-▁GAR 1829
-▁GARDEN 1830
-▁GARRISON 1831
-▁GASP 1832
-GATE 1833
-▁GATE 1834
-▁GATHER 1835
-▁GATHERED 1836
-▁GAVE 1837
-▁GAY 1838
-GE 1839
-▁GE 1840
-GED 1841
-GEN 1842
-▁GEN 1843
-▁GENERAL 1844
-▁GENERALLY 1845
-▁GENERATION 1846
-▁GENEROSITY 1847
-▁GENEROUS 1848
-▁GENIUS 1849
-▁GENTLE 1850
-▁GENTLEMAN 1851
-▁GENTLEMEN 1852
-▁GENTLY 1853
-▁GENUINE 1854
-▁GEORGE 1855
-GER 1856
-▁GER 1857
-▁GERMAN 1858
-▁GESTURE 1859
-▁GET 1860
-▁GETTING 1861
-GG 1862
-▁GHASTL 1863
-▁GHOST 1864
-GI 1865
-▁GI 1866
-▁GIANT 1867
-▁GIFT 1868
-▁GIGANTIC 1869
-▁GIL 1870
-▁GILBERT 1871
-GING 1872
-▁GIRL 1873
-▁GIRLS 1874
-▁GIVE 1875
-▁GIVEN 1876
-▁GIVING 1877
-▁GLA 1878
-▁GLACIER 1879
-▁GLAD 1880
-▁GLANCE 1881
-▁GLANCING 1882
-▁GLASS 1883
-▁GLEAM 1884
-▁GLEN 1885
-▁GLID 1886
-▁GLIMMER 1887
-▁GLIMPSE 1888
-▁GLITTER 1889
-▁GLOBE 1890
-▁GLOOM 1891
-▁GLOOMY 1892
-▁GLORIOUS 1893
-▁GLORY 1894
-▁GLOVE 1895
-▁GLOW 1896
-GN 1897
-GO 1898
-▁GO 1899
-▁GOAT 1900
-▁GOD 1901
-▁GODDESS 1902
-▁GOES 1903
-▁GOING 1904
-▁GOLD 1905
-▁GOLDEN 1906
-▁GONE 1907
-▁GOOD 1908
-▁GORGEOUS 1909
-▁GOSPEL 1910
-▁GOSSIP 1911
-▁GOT 1912
-▁GOVERN 1913
-▁GOVERNMENT 1914
-▁GOVERNOR 1915
-▁GOWN 1916
-GRA 1917
-▁GRA 1918
-▁GRACE 1919
-▁GRACEFUL 1920
-▁GRACIOUS 1921
-▁GRADUALLY 1922
-▁GRAND 1923
-▁GRANDFATHER 1924
-▁GRANDMOTHER 1925
-▁GRANITE 1926
-▁GRANT 1927
-▁GRASP 1928
-▁GRASS 1929
-▁GRATEFUL 1930
-▁GRATIFY 1931
-▁GRATITUDE 1932
-▁GRAVE 1933
-▁GRAVITY 1934
-▁GRAY 1935
-▁GRE 1936
-▁GREAT 1937
-▁GREATER 1938
-▁GREATEST 1939
-▁GREATLY 1940
-▁GREEK 1941
-▁GREEN 1942
-▁GREW 1943
-▁GREY 1944
-▁GRI 1945
-▁GRIEF 1946
-▁GRIEVE 1947
-▁GRIM 1948
-▁GRIN 1949
-▁GRO 1950
-▁GROAN 1951
-▁GROUND 1952
-▁GROUP 1953
-▁GROVE 1954
-▁GROW 1955
-▁GROWING 1956
-▁GROWL 1957
-▁GROWN 1958
-▁GROWTH 1959
-GU 1960
-▁GU 1961
-▁GUARD 1962
-GUE 1963
-▁GUESS 1964
-▁GUEST 1965
-▁GUIDE 1966
-▁GUILT 1967
-▁GUILTY 1968
-▁GUINEA 1969
-▁GUN 1970
-H 1971
-HA 1972
-▁HA 1973
-▁HABIT 1974
-▁HABITUAL 1975
-▁HAD 1976
-▁HAIR 1977
-▁HALE 1978
-▁HALF 1979
-▁HALL 1980
-▁HALT 1981
-HAM 1982
-▁HAM 1983
-▁HAMILTON 1984
-▁HAMMER 1985
-HAN 1986
-▁HAND 1987
-▁HANDKERCHIEF 1988
-▁HANDS 1989
-▁HANDSOME 1990
-▁HANG 1991
-▁HANGING 1992
-▁HANS 1993
-▁HAPPEN 1994
-▁HAPPENED 1995
-▁HAPPIER 1996
-▁HAPPILY 1997
-▁HAPPINESS 1998
-▁HAPPY 1999
-HAR 2000
-▁HAR 2001
-▁HARBOR 2002
-▁HARBOUR 2003
-▁HARD 2004
-▁HARDLY 2005
-▁HARM 2006
-▁HARMONI 2007
-▁HARMONY 2008
-▁HARRY 2009
-▁HARSH 2010
-▁HARVEST 2011
-▁HAS 2012
-▁HASTE 2013
-▁HASTENED 2014
-▁HASTILY 2015
-▁HAT 2016
-▁HATE 2017
-▁HATH 2018
-▁HATRED 2019
-▁HAUNT 2020
-▁HAVE 2021
-▁HAVEN 2022
-▁HAVING 2023
-▁HAWK 2024
-▁HAY 2025
-HE 2026
-▁HE 2027
-HEAD 2028
-▁HEAD 2029
-▁HEADS 2030
-▁HEALTH 2031
-▁HEAP 2032
-▁HEAR 2033
-▁HEARD 2034
-▁HEARING 2035
-▁HEART 2036
-▁HEAT 2037
-▁HEAVEN 2038
-▁HEAVILY 2039
-▁HEAVY 2040
-▁HEBREW 2041
-▁HEDGE 2042
-▁HEIGHT 2043
-▁HELD 2044
-▁HELEN 2045
-▁HELP 2046
-▁HELPLESS 2047
-HEN 2048
-▁HENCE 2049
-▁HENRY 2050
-HER 2051
-▁HER 2052
-▁HERBERT 2053
-▁HERCULES 2054
-▁HERE 2055
-▁HERO 2056
-▁HERSELF 2057
-▁HESITATE 2058
-▁HESITATED 2059
-▁HESITATION 2060
-HI 2061
-▁HI 2062
-▁HID 2063
-▁HIDDEN 2064
-▁HIDE 2065
-▁HIDEOUS 2066
-▁HIGH 2067
-▁HIGHER 2068
-▁HIGHEST 2069
-▁HILL 2070
-▁HILLS 2071
-▁HIM 2072
-▁HIMSELF 2073
-HIN 2074
-▁HIND 2075
-▁HINT 2076
-▁HIS 2077
-▁HISTORY 2078
-▁HIT 2079
-▁HITHER 2080
-▁HITHERTO 2081
-HO 2082
-▁HO 2083
-▁HOARSE 2084
-HOLD 2085
-▁HOLD 2086
-▁HOLDING 2087
-▁HOLE 2088
-▁HOLIDAY 2089
-▁HOLLAND 2090
-▁HOLLOW 2091
-▁HOLY 2092
-▁HOME 2093
-▁HONEST 2094
-▁HONEY 2095
-▁HONOR 2096
-▁HONOUR 2097
-HOOD 2098
-▁HOOK 2099
-▁HOPE 2100
-▁HOPED 2101
-▁HOPELESS 2102
-▁HOPING 2103
-▁HORI 2104
-▁HORN 2105
-▁HORRIBLE 2106
-▁HORRID 2107
-▁HORROR 2108
-▁HORSE 2109
-▁HORSES 2110
-▁HOSPITAL 2111
-▁HOST 2112
-▁HOT 2113
-▁HOTEL 2114
-▁HOUR 2115
-▁HOURS 2116
-HOUSE 2117
-▁HOUSE 2118
-▁HOUSEHOLD 2119
-▁HOUSEKEEPER 2120
-▁HOUSES 2121
-▁HOW 2122
-▁HOWEVER 2123
-HU 2124
-▁HU 2125
-▁HUGE 2126
-▁HUM 2127
-▁HUMAN 2128
-▁HUMANITY 2129
-▁HUMBLE 2130
-▁HUMOR 2131
-▁HUMOUR 2132
-▁HUNDRED 2133
-▁HUNG 2134
-▁HUNGER 2135
-▁HUNGRY 2136
-▁HUNT 2137
-▁HUNTER 2138
-▁HUNTING 2139
-▁HURRIED 2140
-▁HURRY 2141
-HURST 2142
-▁HURT 2143
-▁HUSBAND 2144
-▁HUSH 2145
-▁HUT 2146
-HY 2147
-▁HY 2148
-▁HYMN 2149
-▁HYPNOTI 2150
-I 2151
-▁I 2152
-IA 2153
-IAL 2154
-IAN 2155
-IANS 2156
-IB 2157
-IBLE 2158
-IC 2159
-ICAL 2160
-▁ICE 2161
-ICK 2162
-ID 2163
-▁IDEA 2164
-▁IDEAL 2165
-▁IDEAS 2166
-▁IDENTITY 2167
-▁IDIOT 2168
-▁IDLE 2169
-IE 2170
-IED 2171
-IER 2172
-IES 2173
-IF 2174
-▁IF 2175
-IFICATION 2176
-IFIED 2177
-IFYING 2178
-IG 2179
-IGHT 2180
-IGN 2181
-▁IGNOR 2182
-▁IGNORANCE 2183
-▁IGNORANT 2184
-IL 2185
-ILE 2186
-ILITY 2187
-▁ILL 2188
-▁ILLUSION 2189
-▁ILLUSTRAT 2190
-ILY 2191
-IM 2192
-▁IMAGE 2193
-▁IMAGINATION 2194
-▁IMAGINE 2195
-▁IMITAT 2196
-▁IMMEDIATE 2197
-▁IMMEDIATELY 2198
-▁IMMENSE 2199
-▁IMMORTAL 2200
-▁IMP 2201
-▁IMPART 2202
-▁IMPATIENCE 2203
-▁IMPATIENT 2204
-▁IMPERFECT 2205
-▁IMPERIAL 2206
-▁IMPORT 2207
-▁IMPORTANCE 2208
-▁IMPORTANT 2209
-▁IMPOSSIBLE 2210
-▁IMPRESSED 2211
-▁IMPRESSION 2212
-▁IMPROVE 2213
-▁IMPROVEMENT 2214
-▁IMPULSE 2215
-IN 2216
-▁IN 2217
-INA 2218
-▁INCAPABLE 2219
-▁INCENSE 2220
-▁INCESSANT 2221
-▁INCHES 2222
-▁INCIDENT 2223
-▁INCLINATION 2224
-▁INCLINED 2225
-▁INCLUD 2226
-▁INCOME 2227
-▁INCREASE 2228
-▁INCREASED 2229
-▁INCREASING 2230
-▁INDEED 2231
-▁INDEPENDENCE 2232
-▁INDEPENDENT 2233
-▁INDIA 2234
-▁INDIAN 2235
-▁INDIANS 2236
-▁INDIFFERENCE 2237
-▁INDIFFERENT 2238
-▁INDIGNANT 2239
-▁INDIGNATION 2240
-▁INDIVIDUAL 2241
-▁INDUCE 2242
-▁INDULGE 2243
-▁INDUSTRY 2244
-INE 2245
-INESS 2246
-▁INEVITABLE 2247
-▁INFANT 2248
-▁INFERIOR 2249
-▁INFINITE 2250
-▁INFLICT 2251
-▁INFLUENCE 2252
-▁INFORMATION 2253
-▁INFORMED 2254
-ING 2255
-▁INHABIT 2256
-▁INHABITANTS 2257
-▁INHERIT 2258
-▁INJURED 2259
-▁INJURY 2260
-▁INJUSTICE 2261
-▁INNOCENCE 2262
-▁INNOCENT 2263
-▁INNUMERABLE 2264
-▁INQUIRE 2265
-▁INQUIRED 2266
-▁INQUIRIES 2267
-▁INQUIRY 2268
-▁INSECT 2269
-▁INSIDE 2270
-▁INSIST 2271
-▁INSPECTOR 2272
-▁INSTANCE 2273
-▁INSTANT 2274
-▁INSTANTLY 2275
-▁INSTEAD 2276
-▁INSTINCT 2277
-▁INSTINCTIVELY 2278
-▁INSTITUTION 2279
-▁INSTRUCT 2280
-▁INSTRUMENT 2281
-▁INSULT 2282
-▁INTELLECT 2283
-▁INTELLECTUAL 2284
-▁INTELLIGENCE 2285
-▁INTELLIGENT 2286
-▁INTELLIGIBLE 2287
-▁INTEND 2288
-▁INTENDED 2289
-▁INTENSE 2290
-▁INTENSITY 2291
-▁INTENT 2292
-▁INTENTION 2293
-▁INTER 2294
-▁INTERCOURSE 2295
-▁INTEREST 2296
-▁INTERESTED 2297
-▁INTERESTING 2298
-▁INTERFERE 2299
-▁INTERNAL 2300
-▁INTERPOSED 2301
-▁INTERPRET 2302
-▁INTERRUPT 2303
-▁INTERRUPTED 2304
-▁INTERVAL 2305
-▁INTERVEN 2306
-▁INTERVIEW 2307
-▁INTIMACY 2308
-▁INTIMATE 2309
-▁INTO 2310
-▁INTRODUCED 2311
-▁INVARIABLY 2312
-▁INVENT 2313
-▁INVESTIGAT 2314
-▁INVISIBLE 2315
-▁INVITATION 2316
-▁INVITED 2317
-IO 2318
-ION 2319
-IONS 2320
-IOUS 2321
-IP 2322
-IR 2323
-IRE 2324
-▁IRELAND 2325
-▁IRISH 2326
-▁IRON 2327
-▁IRRE 2328
-▁IRREGULAR 2329
-▁IRRESISTIBLE 2330
-IS 2331
-▁IS 2332
-▁ISABEL 2333
-ISH 2334
-▁ISLAND 2335
-ISM 2336
-▁ISN 2337
-ISON 2338
-▁ISSUE 2339
-IST 2340
-ISTIC 2341
-ISTS 2342
-IT 2343
-▁IT 2344
-▁ITALIAN 2345
-▁ITALY 2346
-ITCH 2347
-ITE 2348
-ITIES 2349
-▁ITS 2350
-▁ITSELF 2351
-ITUDE 2352
-ITY 2353
-IUM 2354
-IUS 2355
-IVE 2356
-J 2357
-▁J 2358
-JA 2359
-▁JA 2360
-▁JACK 2361
-▁JACKSON 2362
-▁JACOB 2363
-▁JAMES 2364
-▁JANE 2365
-▁JANUARY 2366
-▁JAPANESE 2367
-▁JAR 2368
-▁JASPER 2369
-▁JAW 2370
-▁JE 2371
-▁JEALOUS 2372
-▁JEAN 2373
-▁JERK 2374
-▁JERRY 2375
-▁JERUSALEM 2376
-▁JEST 2377
-▁JESUS 2378
-▁JEW 2379
-▁JEWEL 2380
-JI 2381
-▁JIM 2382
-▁JIMMIE 2383
-▁JIMMY 2384
-JO 2385
-▁JO 2386
-▁JOB 2387
-▁JOE 2388
-▁JOHN 2389
-▁JOHNSON 2390
-▁JOIN 2391
-▁JOINED 2392
-▁JOKE 2393
-▁JOLLY 2394
-▁JONES 2395
-▁JOSEPH 2396
-▁JOURNAL 2397
-▁JOURNEY 2398
-▁JOY 2399
-JU 2400
-▁JU 2401
-▁JUD 2402
-▁JUDGE 2403
-▁JUDGMENT 2404
-▁JUICE 2405
-▁JULIA 2406
-▁JULIE 2407
-▁JULIUS 2408
-▁JUMP 2409
-▁JUMPED 2410
-▁JUNE 2411
-▁JUNGLE 2412
-▁JUST 2413
-▁JUSTICE 2414
-▁JUSTIFY 2415
-K 2416
-▁K 2417
-KA 2418
-▁KA 2419
-▁KATE 2420
-▁KATY 2421
-KE 2422
-▁KEEN 2423
-KEEP 2424
-▁KEEP 2425
-▁KEEPING 2426
-▁KEITH 2427
-▁KEN 2428
-▁KENNEDY 2429
-▁KEPT 2430
-KER 2431
-▁KETTLE 2432
-▁KEY 2433
-KI 2434
-▁KI 2435
-▁KICK 2436
-▁KILL 2437
-▁KILLED 2438
-KIN 2439
-▁KIND 2440
-▁KINDLY 2441
-▁KINDNESS 2442
-KING 2443
-▁KING 2444
-▁KINGDOM 2445
-▁KISS 2446
-▁KISSED 2447
-▁KIT 2448
-▁KITCHEN 2449
-▁KITTY 2450
-▁KNEE 2451
-▁KNEES 2452
-▁KNELT 2453
-▁KNEW 2454
-▁KNIFE 2455
-▁KNIGHT 2456
-▁KNIT 2457
-▁KNOCK 2458
-▁KNOT 2459
-▁KNOW 2460
-▁KNOWING 2461
-▁KNOWLEDGE 2462
-▁KNOWN 2463
-▁KNOWS 2464
-KO 2465
-▁KO 2466
-KY 2467
-L 2468
-LA 2469
-▁LA 2470
-▁LABOR 2471
-▁LABOUR 2472
-LAC 2473
-▁LACE 2474
-▁LACK 2475
-▁LAD 2476
-▁LADDER 2477
-▁LADIES 2478
-▁LADY 2479
-▁LAID 2480
-▁LAKE 2481
-▁LAMB 2482
-▁LAMENT 2483
-▁LAMP 2484
-LAN 2485
-LAND 2486
-▁LAND 2487
-▁LANDLORD 2488
-▁LANDSCAPE 2489
-▁LANE 2490
-▁LANGUAGE 2491
-▁LANTERN 2492
-▁LAP 2493
-LAR 2494
-▁LARGE 2495
-▁LARGER 2496
-▁LAST 2497
-▁LATE 2498
-▁LATER 2499
-▁LATTER 2500
-▁LAUGH 2501
-▁LAUGHED 2502
-▁LAUGHING 2503
-▁LAUGHTER 2504
-▁LAUNCELOT 2505
-▁LAUNCH 2506
-▁LAURA 2507
-▁LAW 2508
-▁LAWS 2509
-▁LAWYER 2510
-▁LAY 2511
-LD 2512
-LE 2513
-▁LE 2514
-▁LEAD 2515
-▁LEADER 2516
-▁LEADING 2517
-▁LEAF 2518
-▁LEAGUE 2519
-▁LEAN 2520
-▁LEANED 2521
-▁LEANING 2522
-▁LEAP 2523
-▁LEARN 2524
-▁LEARNED 2525
-▁LEAST 2526
-▁LEATHER 2527
-▁LEAVE 2528
-▁LEAVES 2529
-▁LEAVING 2530
-▁LECTURE 2531
-LED 2532
-▁LED 2533
-▁LEFT 2534
-▁LEG 2535
-▁LEGEND 2536
-▁LEGISLATURE 2537
-▁LEGS 2538
-LEIGH 2539
-▁LEISURE 2540
-▁LEMON 2541
-▁LEND 2542
-▁LENGTH 2543
-▁LEONORA 2544
-LER 2545
-LES 2546
-LESS 2547
-▁LESS 2548
-▁LESSON 2549
-▁LEST 2550
-LET 2551
-▁LET 2552
-▁LETTER 2553
-▁LETTERS 2554
-▁LEVEL 2555
-▁LEVIN 2556
-LEY 2557
-LF 2558
-LI 2559
-▁LI 2560
-▁LIBERAL 2561
-▁LIBERTY 2562
-▁LIBRARY 2563
-LIE 2564
-▁LIE 2565
-▁LIES 2566
-▁LIEUTENANT 2567
-▁LIFE 2568
-▁LIFT 2569
-▁LIFTED 2570
-LIGHT 2571
-▁LIGHT 2572
-▁LIGHTNING 2573
-LIKE 2574
-▁LIKE 2575
-▁LIKED 2576
-▁LIKELY 2577
-▁LIKEWISE 2578
-▁LIMB 2579
-▁LIMIT 2580
-LIN 2581
-▁LIN 2582
-▁LINCOLN 2583
-LINE 2584
-▁LINE 2585
-▁LINES 2586
-LINESS 2587
-LING 2588
-▁LINGER 2589
-▁LION 2590
-▁LIPS 2591
-▁LIQUID 2592
-▁LIQUOR 2593
-▁LIST 2594
-▁LISTEN 2595
-▁LISTENED 2596
-▁LISTENING 2597
-▁LITERALLY 2598
-▁LITERARY 2599
-▁LITERATURE 2600
-▁LITTLE 2601
-▁LIVE 2602
-▁LIVED 2603
-▁LIVES 2604
-▁LIVING 2605
-LL 2606
-LO 2607
-▁LO 2608
-▁LOAD 2609
-▁LOAF 2610
-▁LOCAL 2611
-LOCK 2612
-▁LOCK 2613
-▁LOCKED 2614
-▁LODGE 2615
-▁LODGING 2616
-▁LOFTY 2617
-▁LOG 2618
-LON 2619
-▁LONDON 2620
-▁LONELY 2621
-LONG 2622
-▁LONG 2623
-▁LONGER 2624
-▁LOOK 2625
-▁LOOKED 2626
-▁LOOKING 2627
-▁LOOKS 2628
-▁LOOSE 2629
-▁LORD 2630
-▁LOSE 2631
-▁LOSING 2632
-▁LOSS 2633
-▁LOST 2634
-▁LOT 2635
-▁LOUD 2636
-▁LOUIS 2637
-▁LOUNG 2638
-▁LOVE 2639
-▁LOVED 2640
-▁LOVELY 2641
-▁LOVER 2642
-▁LOVING 2643
-LOW 2644
-▁LOW 2645
-▁LOWER 2646
-▁LOYAL 2647
-LT 2648
-▁LU 2649
-▁LUC 2650
-▁LUCK 2651
-▁LUCY 2652
-▁LUNCH 2653
-LUNG 2654
-LUS 2655
-▁LUXURY 2656
-LY 2657
-▁LYING 2658
-▁LYN 2659
-M 2660
-▁M 2661
-MA 2662
-▁MA 2663
-▁MAC 2664
-▁MACHINE 2665
-▁MAD 2666
-▁MADAM 2667
-▁MADAME 2668
-▁MADE 2669
-▁MADEMOISELLE 2670
-▁MAGGIE 2671
-▁MAGIC 2672
-▁MAGICIAN 2673
-▁MAGISTRATE 2674
-▁MAGNIFICENT 2675
-▁MAID 2676
-▁MAIDEN 2677
-▁MAIN 2678
-▁MAINTAIN 2679
-▁MAJESTY 2680
-▁MAJOR 2681
-▁MAJORITY 2682
-▁MAKE 2683
-▁MAKES 2684
-▁MAKING 2685
-▁MAL 2686
-▁MALE 2687
-▁MAMMA 2688
-MAN 2689
-▁MAN 2690
-▁MANAGE 2691
-▁MANAGED 2692
-▁MANIFEST 2693
-▁MANKIND 2694
-▁MANNER 2695
-▁MANUFACTURE 2696
-▁MANUSCRIPT 2697
-▁MANY 2698
-▁MAR 2699
-▁MARBLE 2700
-▁MARCH 2701
-▁MARGARET 2702
-▁MARGUERITE 2703
-▁MARIAN 2704
-▁MARILLA 2705
-▁MARK 2706
-▁MARKED 2707
-▁MARKET 2708
-▁MARQUIS 2709
-▁MARRIAGE 2710
-▁MARRIED 2711
-▁MARRY 2712
-▁MARSH 2713
-▁MARTHA 2714
-▁MARTIAN 2715
-▁MARTIN 2716
-▁MARTYR 2717
-▁MARVEL 2718
-▁MARVELLOUS 2719
-▁MARY 2720
-▁MASK 2721
-▁MASS 2722
-▁MASTER 2723
-▁MAT 2724
-▁MATCH 2725
-▁MATE 2726
-▁MATERIAL 2727
-▁MATTER 2728
-▁MATTERS 2729
-▁MATTHEW 2730
-▁MAXIM 2731
-▁MAY 2732
-▁MAYBE 2733
-MBLED 2734
-ME 2735
-▁ME 2736
-▁MEADOW 2737
-▁MEAL 2738
-▁MEAN 2739
-▁MEANING 2740
-▁MEANS 2741
-▁MEANT 2742
-▁MEANTIME 2743
-▁MEANWHILE 2744
-▁MEASURE 2745
-▁MEAT 2746
-▁MECHANICAL 2747
-▁MEDI 2748
-▁MEDICAL 2749
-▁MEDICINE 2750
-▁MEET 2751
-▁MEETING 2752
-▁MELANCHOLY 2753
-▁MEMBER 2754
-▁MEMBERS 2755
-▁MEMORIES 2756
-▁MEMORY 2757
-MEN 2758
-▁MEN 2759
-MENT 2760
-▁MENTAL 2761
-▁MENTION 2762
-▁MENTIONED 2763
-MENTS 2764
-MER 2765
-▁MER 2766
-▁MERCHANT 2767
-▁MERCY 2768
-▁MERE 2769
-▁MERELY 2770
-▁MERIT 2771
-▁MERRY 2772
-▁MESSAGE 2773
-▁MESSENGER 2774
-▁MET 2775
-▁METAL 2776
-▁METHOD 2777
-▁MEXICAN 2778
-MI 2779
-▁MI 2780
-▁MICHAEL 2781
-▁MID 2782
-▁MIDDLE 2783
-▁MIDNIGHT 2784
-MIDST 2785
-▁MIDST 2786
-▁MIGHT 2787
-▁MIGHTY 2788
-▁MIL 2789
-▁MILD 2790
-▁MILE 2791
-▁MILES 2792
-▁MILITARY 2793
-▁MILK 2794
-▁MILL 2795
-▁MILLION 2796
-▁MIN 2797
-▁MIND 2798
-▁MINE 2799
-▁MINGLED 2800
-▁MINISTER 2801
-▁MINUTE 2802
-▁MINUTES 2803
-▁MIRACLE 2804
-▁MIRROR 2805
-▁MIRTH 2806
-▁MIS 2807
-▁MISCHIEF 2808
-▁MISERABLE 2809
-▁MISERY 2810
-▁MISFORTUNE 2811
-▁MISS 2812
-▁MISSION 2813
-▁MISSUS 2814
-▁MIST 2815
-▁MISTAKE 2816
-▁MISTAKEN 2817
-▁MISTER 2818
-▁MISTRESS 2819
-▁MITYA 2820
-▁MIX 2821
-▁MIXTURE 2822
-MMED 2823
-MO 2824
-▁MO 2825
-▁MOCK 2826
-▁MODE 2827
-▁MODERATE 2828
-▁MODERN 2829
-▁MODEST 2830
-▁MOMENT 2831
-▁MON 2832
-▁MONARCH 2833
-MOND 2834
-▁MONDAY 2835
-▁MONEY 2836
-▁MONK 2837
-▁MONKEY 2838
-▁MONSIEUR 2839
-▁MONSTER 2840
-▁MONSTROUS 2841
-MONT 2842
-▁MONTE 2843
-▁MONTH 2844
-▁MONTHS 2845
-▁MONUMENT 2846
-▁MOOD 2847
-▁MOON 2848
-▁MOONLIGHT 2849
-▁MOR 2850
-▁MORAL 2851
-MORE 2852
-▁MORE 2853
-▁MOREOVER 2854
-▁MORNING 2855
-▁MORROW 2856
-▁MORTAL 2857
-▁MOSCOW 2858
-MOST 2859
-▁MOST 2860
-▁MOTHER 2861
-▁MOTION 2862
-▁MOTIONLESS 2863
-▁MOTIVE 2864
-▁MOTOR 2865
-▁MOULD 2866
-▁MOUNT 2867
-▁MOUNTAIN 2868
-▁MOUNTAINS 2869
-▁MOUNTED 2870
-▁MOURN 2871
-▁MOUSE 2872
-MOUTH 2873
-▁MOUTH 2874
-▁MOVE 2875
-▁MOVED 2876
-▁MOVEMENT 2877
-▁MOVING 2878
-MP 2879
-▁MU 2880
-▁MUCH 2881
-▁MUD 2882
-▁MULE 2883
-▁MULTITUDE 2884
-▁MURDER 2885
-▁MURDERER 2886
-▁MURMUR 2887
-▁MURMURED 2888
-▁MUSCLE 2889
-▁MUSCULAR 2890
-▁MUSIC 2891
-▁MUSKET 2892
-▁MUST 2893
-▁MUTTERED 2894
-▁MUTUAL 2895
-MY 2896
-▁MY 2897
-▁MYSELF 2898
-▁MYSTERIOUS 2899
-▁MYSTERY 2900
-N 2901
-NA 2902
-▁NA 2903
-▁NAIL 2904
-▁NAKED 2905
-▁NAME 2906
-▁NAMED 2907
-▁NANCY 2908
-▁NAPOLEON 2909
-▁NARRAT 2910
-▁NARROW 2911
-▁NATASHA 2912
-▁NATION 2913
-▁NATIONAL 2914
-▁NATIVE 2915
-▁NATURAL 2916
-▁NATURALLY 2917
-▁NATURE 2918
-▁NAUGHT 2919
-▁NAUTILUS 2920
-▁NAV 2921
-▁NAVIGAT 2922
-▁NAY 2923
-NCE 2924
-ND 2925
-NE 2926
-▁NE 2927
-▁NEAR 2928
-▁NEARER 2929
-▁NEAREST 2930
-▁NEARLY 2931
-▁NEAT 2932
-▁NECESSARILY 2933
-▁NECESSARY 2934
-▁NECESSITY 2935
-▁NECK 2936
-NED 2937
-▁NEED 2938
-▁NEEDED 2939
-▁NEGLECT 2940
-▁NEGRO 2941
-▁NEIGHBOR 2942
-▁NEIGHBORHOOD 2943
-▁NEIGHBOUR 2944
-▁NEIGHBOURHOOD 2945
-▁NEITHER 2946
-▁NEPHEW 2947
-NER 2948
-▁NERVE 2949
-▁NERVOUS 2950
-NESS 2951
-▁NEST 2952
-▁NEVER 2953
-▁NEVERTHELESS 2954
-▁NEW 2955
-▁NEWS 2956
-▁NEWSPAPER 2957
-▁NEXT 2958
-NEY 2959
-NG 2960
-NI 2961
-▁NI 2962
-NIC 2963
-▁NICE 2964
-▁NICHOLAS 2965
-▁NIECE 2966
-▁NIGH 2967
-▁NIGHT 2968
-▁NIGHTINGALE 2969
-▁NINE 2970
-▁NINETEEN 2971
-▁NINETY 2972
-NING 2973
-▁NINTH 2974
-NNIE 2975
-NNY 2976
-NO 2977
-▁NO 2978
-▁NOBILITY 2979
-▁NOBLE 2980
-▁NOBODY 2981
-▁NODDED 2982
-▁NOISE 2983
-▁NONE 2984
-▁NONSENSE 2985
-▁NOR 2986
-▁NORMAL 2987
-▁NORMAN 2988
-▁NORTH 2989
-▁NORTHERN 2990
-▁NOSE 2991
-▁NOT 2992
-▁NOTE 2993
-▁NOTHING 2994
-▁NOTICE 2995
-▁NOTICED 2996
-▁NOTWITHSTANDING 2997
-▁NOVEL 2998
-▁NOVEMBER 2999
-▁NOW 3000
-▁NOWHERE 3001
-NT 3002
-▁NU 3003
-▁NUMBER 3004
-▁NUMEROUS 3005
-▁NURSE 3006
-▁NUT 3007
-NY 3008
-O 3009
-▁O 3010
-▁OAK 3011
-▁OATH 3012
-▁OB 3013
-▁OBEDIENCE 3014
-▁OBEY 3015
-▁OBJECT 3016
-▁OBJECTION 3017
-▁OBLIGATION 3018
-▁OBLIGED 3019
-▁OBSCURE 3020
-▁OBSERVATION 3021
-▁OBSERVE 3022
-▁OBSERVED 3023
-▁OBSERVING 3024
-▁OBSTACLE 3025
-▁OBSTINATE 3026
-▁OBTAIN 3027
-▁OBTAINED 3028
-▁OBVIOUS 3029
-OC 3030
-▁OCCASION 3031
-▁OCCASIONALLY 3032
-▁OCCUPATION 3033
-▁OCCUPIED 3034
-▁OCCUPY 3035
-▁OCCUR 3036
-▁OCCURRED 3037
-▁OCCURRENCE 3038
-▁OCEAN 3039
-▁OCTOBER 3040
-OD 3041
-▁ODD 3042
-▁OF 3043
-▁OFF 3044
-▁OFFEND 3045
-▁OFFER 3046
-▁OFFERED 3047
-▁OFFICE 3048
-▁OFFICER 3049
-▁OFFICERS 3050
-▁OFFICIAL 3051
-▁OFTEN 3052
-OG 3053
-▁OH 3054
-▁OIL 3055
-OL 3056
-▁OLD 3057
-▁OLIVER 3058
-OLOGICAL 3059
-OLOGIST 3060
-OLOGY 3061
-OM 3062
-ON 3063
-▁ON 3064
-▁ONCE 3065
-ONE 3066
-▁ONE 3067
-▁ONLY 3068
-OO 3069
-OOK 3070
-OON 3071
-OP 3072
-▁OPEN 3073
-▁OPENED 3074
-▁OPENING 3075
-▁OPERA 3076
-▁OPERATION 3077
-▁OPINION 3078
-▁OPPONENT 3079
-▁OPPORTUNITY 3080
-▁OPPOSITE 3081
-▁OPPOSITION 3082
-▁OPPRESS 3083
-OR 3084
-▁OR 3085
-▁ORANGE 3086
-▁ORCHARD 3087
-ORD 3088
-▁ORDER 3089
-▁ORDERED 3090
-▁ORDERS 3091
-▁ORDINARY 3092
-▁ORGAN 3093
-▁ORGANI 3094
-▁ORIGIN 3095
-▁ORIGINAL 3096
-▁ORNAMENT 3097
-ORS 3098
-ORY 3099
-OS 3100
-OT 3101
-▁OTHER 3102
-▁OTHERS 3103
-▁OTHERWISE 3104
-OU 3105
-▁OUGHT 3106
-▁OUNCE 3107
-OUR 3108
-▁OUR 3109
-▁OURSELVES 3110
-OUS 3111
-▁OUT 3112
-▁OUTRAGE 3113
-▁OUTSIDE 3114
-OV 3115
-▁OVEN 3116
-▁OVER 3117
-▁OVERCOME 3118
-▁OVERFLOW 3119
-▁OVERLOOK 3120
-▁OVERTAKE 3121
-▁OVERWHELM 3122
-OW 3123
-▁OWE 3124
-▁OWING 3125
-▁OWL 3126
-▁OWN 3127
-▁OYSTER 3128
-P 3129
-▁P 3130
-PA 3131
-▁PA 3132
-▁PACE 3133
-▁PACIFIC 3134
-▁PACK 3135
-▁PAGE 3136
-▁PAID 3137
-▁PAIN 3138
-▁PAINFUL 3139
-▁PAINTED 3140
-▁PAIR 3141
-▁PAL 3142
-▁PALACE 3143
-▁PALE 3144
-▁PALM 3145
-▁PAN 3146
-▁PAPA 3147
-▁PAPER 3148
-▁PAPERS 3149
-▁PAR 3150
-▁PARA 3151
-▁PARADISE 3152
-▁PARALLEL 3153
-▁PARCEL 3154
-▁PARDON 3155
-▁PARENTS 3156
-▁PARIS 3157
-▁PARK 3158
-▁PARLIAMENT 3159
-▁PARLOR 3160
-▁PARLOUR 3161
-▁PART 3162
-▁PARTICLE 3163
-▁PARTICULAR 3164
-▁PARTICULARLY 3165
-▁PARTIES 3166
-▁PARTNER 3167
-▁PARTS 3168
-▁PARTY 3169
-▁PASS 3170
-▁PASSAGE 3171
-▁PASSED 3172
-▁PASSENGER 3173
-▁PASSING 3174
-▁PASSION 3175
-▁PASSIONATE 3176
-▁PAST 3177
-▁PAT 3178
-▁PATCH 3179
-▁PATH 3180
-▁PATIENCE 3181
-▁PATIENT 3182
-▁PATRIOT 3183
-▁PAUL 3184
-▁PAUSE 3185
-▁PAUSED 3186
-▁PAVEMENT 3187
-▁PAW 3188
-▁PAY 3189
-PE 3190
-▁PE 3191
-▁PEA 3192
-▁PEACE 3193
-▁PEAK 3194
-▁PEARL 3195
-▁PEASANT 3196
-PEC 3197
-▁PECULIAR 3198
-PED 3199
-▁PEEP 3200
-▁PEER 3201
-▁PEN 3202
-▁PENCIL 3203
-▁PENETRATE 3204
-▁PENNY 3205
-▁PEOPLE 3206
-▁PEPPER 3207
-PER 3208
-▁PER 3209
-▁PERCEIVE 3210
-▁PERCEIVED 3211
-▁PERCEIVING 3212
-▁PERCEPTION 3213
-▁PERCH 3214
-▁PERFECT 3215
-▁PERFECTION 3216
-▁PERFECTLY 3217
-▁PERFORM 3218
-▁PERFORMANCE 3219
-▁PERFUME 3220
-▁PERHAPS 3221
-▁PERIL 3222
-▁PERIOD 3223
-▁PERISH 3224
-▁PERMANENT 3225
-▁PERMISSION 3226
-▁PERMIT 3227
-▁PERMITTED 3228
-▁PERPETUAL 3229
-▁PERPLEX 3230
-▁PERSECUT 3231
-▁PERSIST 3232
-▁PERSON 3233
-▁PERSONAL 3234
-▁PERSONS 3235
-▁PERSUADE 3236
-▁PET 3237
-▁PETER 3238
-PHA 3239
-▁PHARAOH 3240
-▁PHENOMENA 3241
-▁PHENOMENON 3242
-▁PHIL 3243
-▁PHILADELPHIA 3244
-▁PHILIP 3245
-▁PHILOSOPHER 3246
-▁PHILOSOPHY 3247
-▁PHOENIX 3248
-▁PHOTOGRAPH 3249
-▁PHRASE 3250
-▁PHYSICAL 3251
-▁PHYSICIAN 3252
-▁PI 3253
-▁PIANO 3254
-▁PICK 3255
-▁PICKED 3256
-▁PICTURE 3257
-PIECE 3258
-▁PIECE 3259
-▁PIECES 3260
-▁PIERCED 3261
-▁PIERRE 3262
-▁PIG 3263
-▁PILE 3264
-▁PILGRIM 3265
-▁PILL 3266
-▁PILLOW 3267
-▁PILOT 3268
-▁PIN 3269
-▁PINE 3270
-▁PINK 3271
-▁PINOCCHIO 3272
-▁PIPE 3273
-▁PIRATE 3274
-▁PISTOL 3275
-▁PIT 3276
-▁PITCH 3277
-▁PITIFUL 3278
-▁PITY 3279
-▁PLA 3280
-▁PLAC 3281
-▁PLACE 3282
-▁PLACED 3283
-▁PLACES 3284
-▁PLAGUE 3285
-▁PLAIN 3286
-▁PLAINLY 3287
-▁PLAN 3288
-▁PLANET 3289
-▁PLANT 3290
-▁PLATE 3291
-▁PLATFORM 3292
-▁PLAY 3293
-▁PLAYED 3294
-▁PLAYING 3295
-PLE 3296
-▁PLEA 3297
-▁PLEASANT 3298
-▁PLEASE 3299
-▁PLEASED 3300
-▁PLEASURE 3301
-▁PLEDGE 3302
-▁PLENTY 3303
-▁PLOT 3304
-▁PLOUGH 3305
-▁PLUCK 3306
-▁PLUM 3307
-▁PLUNDER 3308
-▁PLUNGE 3309
-PO 3310
-▁PO 3311
-▁POCKET 3312
-▁POEM 3313
-▁POET 3314
-▁POETRY 3315
-▁POINT 3316
-▁POINTED 3317
-▁POISON 3318
-▁POLE 3319
-▁POLICE 3320
-▁POLICY 3321
-▁POLISH 3322
-▁POLITE 3323
-▁POLITICAL 3324
-▁POLITICS 3325
-▁POLLY 3326
-▁POND 3327
-▁PONY 3328
-▁POOL 3329
-▁POOR 3330
-▁POPE 3331
-▁POPULAR 3332
-▁POPULATION 3333
-▁PORCH 3334
-PORT 3335
-▁PORT 3336
-▁PORTHOS 3337
-▁PORTION 3338
-▁PORTRAIT 3339
-POSE 3340
-▁POSITION 3341
-▁POSITIVE 3342
-▁POSSESS 3343
-▁POSSESSED 3344
-▁POSSESSION 3345
-▁POSSIBILITY 3346
-▁POSSIBLE 3347
-▁POSSIBLY 3348
-▁POST 3349
-▁POT 3350
-▁POUND 3351
-▁POUNDS 3352
-▁POUR 3353
-▁POVERTY 3354
-▁POWDER 3355
-▁POWER 3356
-▁POWERFUL 3357
-▁POWERS 3358
-PP 3359
-PPING 3360
-▁PRA 3361
-▁PRACTICAL 3362
-▁PRACTICE 3363
-▁PRACTISE 3364
-▁PRAIRIE 3365
-▁PRAISE 3366
-▁PRAY 3367
-▁PRAYER 3368
-▁PRE 3369
-▁PREACH 3370
-▁PRECAUTION 3371
-▁PRECEDE 3372
-▁PRECEDING 3373
-▁PRECIOUS 3374
-▁PRECISE 3375
-▁PRECISELY 3376
-▁PREFER 3377
-▁PREFERRED 3378
-▁PREJUDICE 3379
-▁PREPARATION 3380
-▁PREPARE 3381
-▁PREPARED 3382
-▁PREPARING 3383
-▁PRESENCE 3384
-▁PRESENT 3385
-▁PRESENTED 3386
-▁PRESENTLY 3387
-▁PRESERV 3388
-▁PRESIDENT 3389
-▁PRESS 3390
-▁PRESSED 3391
-▁PRESSURE 3392
-▁PRESUME 3393
-▁PRETEND 3394
-▁PRETTY 3395
-▁PREVAIL 3396
-▁PREVENT 3397
-▁PREVIOUS 3398
-▁PRI 3399
-▁PRICE 3400
-▁PRIDE 3401
-▁PRIEST 3402
-▁PRIMITIVE 3403
-▁PRINCE 3404
-▁PRINCESS 3405
-▁PRINCIPAL 3406
-▁PRINCIPLE 3407
-▁PRINT 3408
-▁PRISCILLA 3409
-▁PRISON 3410
-▁PRISONER 3411
-▁PRIVATE 3412
-▁PRIVILEGE 3413
-▁PRO 3414
-▁PROBABILITY 3415
-▁PROBABLE 3416
-▁PROBABLY 3417
-▁PROBLEM 3418
-▁PROCEED 3419
-▁PROCEEDED 3420
-▁PROCESS 3421
-▁PROCLAIM 3422
-▁PROCURE 3423
-▁PRODUCE 3424
-▁PRODUCED 3425
-▁PRODUCING 3426
-▁PRODUCT 3427
-▁PROFESS 3428
-▁PROFESSION 3429
-▁PROFESSOR 3430
-▁PROFIT 3431
-▁PROFOUND 3432
-▁PROGRESS 3433
-▁PROHIBIT 3434
-▁PROJECT 3435
-▁PROMINENT 3436
-▁PROMISE 3437
-▁PROMISED 3438
-▁PROMISING 3439
-▁PROMOTE 3440
-▁PROMPT 3441
-▁PRONOUNC 3442
-▁PROOF 3443
-▁PROP 3444
-▁PROPER 3445
-▁PROPERLY 3446
-▁PROPERTY 3447
-▁PROPHET 3448
-▁PROPORTION 3449
-▁PROPOSAL 3450
-▁PROPOSE 3451
-▁PROPOSED 3452
-▁PROPOSITION 3453
-▁PROPRIETOR 3454
-▁PROSPECT 3455
-▁PROSPERITY 3456
-▁PROTECT 3457
-▁PROTECTION 3458
-▁PROTEST 3459
-▁PROUD 3460
-▁PROVE 3461
-▁PROVED 3462
-▁PROVERB 3463
-▁PROVIDE 3464
-▁PROVIDED 3465
-▁PROVINCE 3466
-▁PROVISION 3467
-▁PROVOKE 3468
-▁PRUDENCE 3469
-▁PRUDENT 3470
-PS 3471
-▁PSMITH 3472
-▁PU 3473
-▁PUBLIC 3474
-▁PUBLISH 3475
-▁PUFF 3476
-▁PULL 3477
-▁PULLED 3478
-▁PULSE 3479
-▁PUNISH 3480
-▁PUNISHMENT 3481
-▁PUPIL 3482
-▁PUR 3483
-▁PURCHASE 3484
-▁PURE 3485
-▁PURPLE 3486
-▁PURPOSE 3487
-▁PURSE 3488
-▁PURSUE 3489
-▁PURSUED 3490
-▁PURSUIT 3491
-▁PUSH 3492
-▁PUSHED 3493
-▁PUT 3494
-▁PUTTING 3495
-Q 3496
-QUA 3497
-▁QUA 3498
-▁QUAINT 3499
-▁QUALITIES 3500
-▁QUALITY 3501
-▁QUANTITY 3502
-▁QUARREL 3503
-▁QUARTER 3504
-QUE 3505
-▁QUEEN 3506
-▁QUEER 3507
-▁QUESTION 3508
-▁QUESTIONS 3509
-QUI 3510
-▁QUI 3511
-▁QUICK 3512
-▁QUICKLY 3513
-▁QUIET 3514
-▁QUIETLY 3515
-▁QUITE 3516
-▁QUIVER 3517
-▁QUIXOTE 3518
-▁QUO 3519
-▁QUOTH 3520
-R 3521
-▁R 3522
-RA 3523
-▁RA 3524
-▁RABBIT 3525
-▁RACE 3526
-▁RACHEL 3527
-▁RADIANT 3528
-▁RAG 3529
-▁RAGE 3530
-▁RAIL 3531
-▁RAILROAD 3532
-▁RAILWAY 3533
-▁RAIN 3534
-▁RAINBOW 3535
-▁RAISE 3536
-▁RAISED 3537
-▁RAISING 3538
-▁RALPH 3539
-▁RAM 3540
-RAN 3541
-▁RAN 3542
-▁RANG 3543
-▁RANGE 3544
-▁RANK 3545
-▁RAOUL 3546
-▁RAPID 3547
-▁RAPIDLY 3548
-▁RARE 3549
-▁RASCAL 3550
-RATE 3551
-▁RATE 3552
-▁RATHER 3553
-▁RATIONAL 3554
-▁RATTL 3555
-▁RAVEN 3556
-▁RAY 3557
-RE 3558
-▁RE 3559
-▁REACH 3560
-▁REACHED 3561
-▁REACTION 3562
-▁READ 3563
-▁READER 3564
-▁READILY 3565
-▁READING 3566
-▁READY 3567
-▁REAL 3568
-▁REALI 3569
-▁REALITY 3570
-▁REALLY 3571
-▁REAR 3572
-▁REASON 3573
-▁REBECCA 3574
-▁REBEL 3575
-▁RECALL 3576
-▁RECEIVE 3577
-▁RECEIVED 3578
-▁RECEIVING 3579
-▁RECENT 3580
-▁RECEPTION 3581
-▁RECESS 3582
-▁RECIT 3583
-▁RECKLESS 3584
-▁RECKON 3585
-▁RECOGNI 3586
-▁RECOLLECT 3587
-▁RECOLLECTION 3588
-▁RECOMMEND 3589
-▁RECONCIL 3590
-▁RECORD 3591
-▁RECOVER 3592
-▁RECOVERED 3593
-RED 3594
-▁RED 3595
-▁REDUCED 3596
-▁REFER 3597
-▁REFERENCE 3598
-▁REFINED 3599
-▁REFLECT 3600
-▁REFLECTION 3601
-▁REFORM 3602
-▁REFRAIN 3603
-▁REFRESH 3604
-▁REFUGE 3605
-▁REFUSE 3606
-▁REFUSED 3607
-▁REGAIN 3608
-▁REGARD 3609
-▁REGARDED 3610
-▁REGIMENT 3611
-▁REGION 3612
-▁REGRET 3613
-▁REGULAR 3614
-▁REGULAT 3615
-▁REIGN 3616
-▁REJECT 3617
-▁REJOICE 3618
-▁REJOICING 3619
-▁RELATE 3620
-▁RELATED 3621
-▁RELATION 3622
-▁RELATIVE 3623
-▁RELAX 3624
-▁RELEASE 3625
-▁RELI 3626
-▁RELIEF 3627
-▁RELIEVE 3628
-▁RELIGION 3629
-▁RELIGIOUS 3630
-▁RELUCTANT 3631
-▁REMAIN 3632
-▁REMAINED 3633
-▁REMARK 3634
-▁REMARKABLE 3635
-▁REMARKED 3636
-▁REMEDY 3637
-▁REMEMBER 3638
-▁REMEMBERED 3639
-▁REMEMBRANCE 3640
-▁REMIND 3641
-▁REMORSE 3642
-▁REMOTE 3643
-▁REMOVE 3644
-▁REMOVED 3645
-▁RENDER 3646
-▁RENDERED 3647
-▁RENEW 3648
-▁RENT 3649
-▁REP 3650
-▁REPAIR 3651
-▁REPEAT 3652
-▁REPEATED 3653
-▁REPENT 3654
-▁REPLIED 3655
-▁REPLY 3656
-▁REPORT 3657
-▁REPRESENT 3658
-▁REPRESENTATIVE 3659
-▁REPROACH 3660
-▁REPUBLIC 3661
-▁REPUTATION 3662
-▁REQUEST 3663
-▁REQUIRE 3664
-▁REQUIRED 3665
-▁RESCUE 3666
-▁RESEMBLANCE 3667
-▁RESEMBLE 3668
-▁RESERVE 3669
-▁RESIDENCE 3670
-▁RESIGN 3671
-▁RESIST 3672
-▁RESISTANCE 3673
-▁RESOLUTE 3674
-▁RESOLUTION 3675
-▁RESOLVED 3676
-▁RESORT 3677
-▁RESOURCE 3678
-▁RESPECT 3679
-▁RESPONSE 3680
-▁RESPONSIBILITY 3681
-▁RESPONSIBLE 3682
-RESS 3683
-▁REST 3684
-▁RESTLESS 3685
-▁RESTORE 3686
-▁RESTRAIN 3687
-▁RESULT 3688
-▁RESUMED 3689
-▁RETAIN 3690
-▁RETIRE 3691
-▁RETIRED 3692
-▁RETORTED 3693
-▁RETREAT 3694
-▁RETURN 3695
-▁RETURNED 3696
-▁RETURNING 3697
-▁REV 3698
-▁REVEAL 3699
-▁REVELATION 3700
-▁REVENGE 3701
-▁REVER 3702
-▁REVIEW 3703
-▁REVOLT 3704
-▁REVOLUTION 3705
-▁REWARD 3706
-RG 3707
-RI 3708
-▁RI 3709
-▁RIBBON 3710
-RIC 3711
-▁RICH 3712
-▁RICHARD 3713
-▁RICHMOND 3714
-RICK 3715
-▁RID 3716
-▁RIDE 3717
-RIDGE 3718
-▁RIDICULOUS 3719
-▁RIDING 3720
-RIES 3721
-▁RIFLE 3722
-RIGHT 3723
-▁RIGHT 3724
-▁RIGHTEOUS 3725
-▁RIGID 3726
-RIN 3727
-RING 3728
-▁RING 3729
-▁RIPE 3730
-RIS 3731
-▁RISE 3732
-▁RISING 3733
-▁RISK 3734
-▁RIVAL 3735
-▁RIVER 3736
-RK 3737
-RN 3738
-RO 3739
-▁RO 3740
-▁ROAD 3741
-▁ROAR 3742
-▁ROAST 3743
-▁ROB 3744
-▁ROBBER 3745
-▁ROBE 3746
-▁ROBERT 3747
-▁ROBIN 3748
-▁ROCK 3749
-▁ROCKS 3750
-▁RODE 3751
-▁ROLL 3752
-▁ROLLED 3753
-▁ROMAN 3754
-▁ROME 3755
-RON 3756
-▁ROOF 3757
-▁ROOM 3758
-▁ROOT 3759
-▁ROPE 3760
-▁ROSA 3761
-▁ROSE 3762
-▁ROUGH 3763
-▁ROUND 3764
-ROUS 3765
-▁ROUSED 3766
-▁ROUTE 3767
-ROW 3768
-▁ROW 3769
-▁ROYAL 3770
-RS 3771
-RT 3772
-RU 3773
-▁RU 3774
-▁RUB 3775
-▁RUBBED 3776
-▁RUBBING 3777
-▁RUDE 3778
-▁RUIN 3779
-▁RULE 3780
-▁RUM 3781
-▁RUN 3782
-▁RUNNING 3783
-▁RUSH 3784
-▁RUSHED 3785
-▁RUSSIA 3786
-▁RUSSIAN 3787
-▁RUTH 3788
-RY 3789
-S 3790
-▁S 3791
-▁SA 3792
-▁SACRED 3793
-▁SACRIFICE 3794
-▁SAD 3795
-▁SADDLE 3796
-▁SAFE 3797
-▁SAFETY 3798
-▁SAID 3799
-SAIL 3800
-▁SAIL 3801
-▁SAILOR 3802
-▁SAINT 3803
-▁SAKE 3804
-▁SAL 3805
-▁SALT 3806
-▁SALUTE 3807
-▁SAM 3808
-▁SAME 3809
-▁SAMUEL 3810
-▁SAN 3811
-▁SANCHO 3812
-▁SAND 3813
-▁SANG 3814
-▁SANK 3815
-▁SARAH 3816
-▁SAT 3817
-▁SATISFACTION 3818
-▁SATISFACTORY 3819
-▁SATISFIED 3820
-▁SATISFY 3821
-▁SATURDAY 3822
-▁SAUCE 3823
-▁SAVAGE 3824
-▁SAVE 3825
-▁SAVED 3826
-▁SAVING 3827
-▁SAW 3828
-▁SAY 3829
-▁SAYING 3830
-▁SAYS 3831
-▁SC 3832
-▁SCA 3833
-▁SCALE 3834
-▁SCANDAL 3835
-▁SCAR 3836
-▁SCARCE 3837
-▁SCARCELY 3838
-▁SCARECROW 3839
-▁SCARLET 3840
-▁SCATTERED 3841
-▁SCENE 3842
-▁SCENT 3843
-▁SCH 3844
-▁SCHEME 3845
-▁SCHOLAR 3846
-▁SCHOOL 3847
-▁SCIENCE 3848
-▁SCIENTIFIC 3849
-▁SCOLD 3850
-▁SCORE 3851
-▁SCORN 3852
-▁SCOTCH 3853
-▁SCOTLAND 3854
-▁SCOTT 3855
-▁SCOUNDREL 3856
-▁SCOUT 3857
-▁SCRAMBLE 3858
-▁SCRAP 3859
-▁SCRATCH 3860
-▁SCREAM 3861
-▁SCREEN 3862
-▁SCREW 3863
-▁SCROOGE 3864
-SE 3865
-▁SE 3866
-▁SEA 3867
-▁SEAL 3868
-▁SEARCH 3869
-▁SEASON 3870
-▁SEAT 3871
-▁SEATED 3872
-▁SECOND 3873
-▁SECRET 3874
-▁SECRETARY 3875
-▁SECTION 3876
-▁SECURE 3877
-▁SECURITY 3878
-▁SEE 3879
-▁SEEING 3880
-▁SEEK 3881
-▁SEEM 3882
-▁SEEMED 3883
-▁SEEMS 3884
-▁SEEN 3885
-▁SEI 3886
-▁SELDOM 3887
-▁SELECT 3888
-▁SELF 3889
-▁SELFISH 3890
-▁SELL 3891
-▁SENATE 3892
-▁SENATOR 3893
-▁SEND 3894
-▁SENSATION 3895
-▁SENSE 3896
-▁SENSIBLE 3897
-▁SENSITIVE 3898
-▁SENT 3899
-▁SENTENCE 3900
-▁SENTIMENT 3901
-▁SEPARATE 3902
-▁SEPARATED 3903
-▁SEPTEMBER 3904
-▁SER 3905
-▁SERENE 3906
-▁SERGEANT 3907
-▁SERIES 3908
-▁SERIOUS 3909
-▁SERMON 3910
-▁SERPENT 3911
-▁SERVANT 3912
-▁SERVANTS 3913
-▁SERVE 3914
-▁SERVED 3915
-▁SERVICE 3916
-▁SERVING 3917
-▁SET 3918
-▁SETTING 3919
-▁SETTLE 3920
-▁SETTLED 3921
-▁SEVEN 3922
-▁SEVENTEEN 3923
-▁SEVENTY 3924
-▁SEVERAL 3925
-▁SEVERE 3926
-▁SEX 3927
-SH 3928
-▁SH 3929
-▁SHA 3930
-▁SHADE 3931
-▁SHADOW 3932
-▁SHAGGY 3933
-▁SHAKE 3934
-▁SHAKESPEARE 3935
-▁SHAKING 3936
-▁SHALL 3937
-▁SHAME 3938
-▁SHAPE 3939
-▁SHARE 3940
-▁SHARP 3941
-▁SHARPLY 3942
-▁SHAWL 3943
-▁SHE 3944
-▁SHEEP 3945
-▁SHELTER 3946
-▁SHEPHERD 3947
-▁SHERIFF 3948
-▁SHIELD 3949
-▁SHIFT 3950
-▁SHILLING 3951
-▁SHINE 3952
-▁SHINING 3953
-SHIP 3954
-▁SHIP 3955
-▁SHIPS 3956
-SHIRE 3957
-▁SHIRT 3958
-▁SHIVER 3959
-▁SHOCK 3960
-▁SHOE 3961
-▁SHOES 3962
-▁SHONE 3963
-▁SHOOK 3964
-▁SHOOT 3965
-▁SHOP 3966
-▁SHORE 3967
-▁SHORT 3968
-▁SHOT 3969
-▁SHOULD 3970
-▁SHOULDER 3971
-▁SHOULDERS 3972
-▁SHOUT 3973
-▁SHOUTED 3974
-▁SHOW 3975
-▁SHOWED 3976
-▁SHOWN 3977
-▁SHREWD 3978
-▁SHRIEK 3979
-▁SHRILL 3980
-▁SHRINK 3981
-▁SHUDDER 3982
-▁SHUT 3983
-▁SI 3984
-▁SICK 3985
-SIDE 3986
-▁SIDE 3987
-▁SIDES 3988
-▁SIEGE 3989
-▁SIGH 3990
-▁SIGHED 3991
-▁SIGHT 3992
-▁SIGN 3993
-▁SIGNAL 3994
-▁SIGNIFICANCE 3995
-▁SIGNIFICANT 3996
-▁SILENCE 3997
-▁SILENT 3998
-▁SILK 3999
-▁SILLY 4000
-▁SILVER 4001
-▁SIMILAR 4002
-▁SIMON 4003
-▁SIMPLE 4004
-▁SIMPLICITY 4005
-▁SIMPLY 4006
-▁SIN 4007
-▁SINCE 4008
-▁SING 4009
-▁SINGING 4010
-▁SINGLE 4011
-▁SINGULAR 4012
-▁SINK 4013
-▁SIR 4014
-▁SISTER 4015
-▁SIT 4016
-▁SITTING 4017
-▁SITUATED 4018
-▁SITUATION 4019
-▁SIX 4020
-▁SIXTEEN 4021
-▁SIXTY 4022
-▁SKETCH 4023
-▁SKI 4024
-▁SKILFUL 4025
-▁SKILL 4026
-▁SKIN 4027
-▁SKIRT 4028
-▁SKULL 4029
-▁SKY 4030
-▁SLAIN 4031
-▁SLAUGHTER 4032
-▁SLAVE 4033
-▁SLAVERY 4034
-▁SLAVES 4035
-▁SLEDGE 4036
-▁SLEEP 4037
-▁SLEEVE 4038
-▁SLENDER 4039
-▁SLEPT 4040
-▁SLEW 4041
-▁SLICE 4042
-▁SLID 4043
-▁SLIGHT 4044
-▁SLIGHTEST 4045
-▁SLIGHTLY 4046
-▁SLIM 4047
-▁SLIP 4048
-▁SLIPPED 4049
-▁SLO 4050
-▁SLOPE 4051
-▁SLOW 4052
-▁SLOWLY 4053
-▁SLUMBER 4054
-▁SMALL 4055
-▁SMART 4056
-▁SMASH 4057
-▁SMELL 4058
-▁SMILE 4059
-▁SMILED 4060
-▁SMILING 4061
-▁SMITH 4062
-▁SMOKE 4063
-▁SMOKING 4064
-▁SMOOTH 4065
-▁SMOT 4066
-▁SNAKE 4067
-▁SNAP 4068
-▁SNATCH 4069
-▁SNEER 4070
-▁SNOW 4071
-▁SO 4072
-▁SOCIAL 4073
-▁SOCIETY 4074
-▁SOFT 4075
-▁SOFTLY 4076
-▁SOIL 4077
-▁SOLD 4078
-▁SOLDIER 4079
-▁SOLDIERS 4080
-▁SOLEMN 4081
-▁SOLICIT 4082
-▁SOLID 4083
-▁SOLITARY 4084
-▁SOLITUDE 4085
-▁SOLOMON 4086
-▁SOLUTION 4087
-SOME 4088
-▁SOME 4089
-▁SOMEBODY 4090
-▁SOMEHOW 4091
-▁SOMEONE 4092
-▁SOMETHING 4093
-▁SOMETIMES 4094
-▁SOMEWHAT 4095
-▁SOMEWHERE 4096
-SON 4097
-▁SON 4098
-▁SONG 4099
-▁SOON 4100
-▁SOONER 4101
-▁SOOTH 4102
-▁SORROW 4103
-▁SORRY 4104
-▁SORT 4105
-▁SOUGHT 4106
-▁SOUL 4107
-▁SOUND 4108
-▁SOURCE 4109
-▁SOUTH 4110
-▁SOUTHERN 4111
-▁SOVEREIGN 4112
-▁SP 4113
-▁SPACE 4114
-▁SPAIN 4115
-▁SPAKE 4116
-▁SPANIARD 4117
-▁SPANISH 4118
-▁SPAR 4119
-▁SPARE 4120
-▁SPARK 4121
-▁SPEAK 4122
-▁SPEAKING 4123
-▁SPEAR 4124
-▁SPECIAL 4125
-▁SPECIES 4126
-▁SPECIMEN 4127
-▁SPECK 4128
-▁SPECTACLE 4129
-▁SPECTATOR 4130
-▁SPECULAT 4131
-▁SPEECH 4132
-▁SPEED 4133
-▁SPELL 4134
-▁SPEND 4135
-▁SPENT 4136
-▁SPHERE 4137
-▁SPI 4138
-▁SPIN 4139
-▁SPIRIT 4140
-▁SPIRITS 4141
-▁SPIRITUAL 4142
-▁SPITE 4143
-▁SPLASH 4144
-▁SPLENDID 4145
-▁SPLENDOR 4146
-▁SPLIT 4147
-▁SPOIL 4148
-▁SPOKE 4149
-▁SPOKEN 4150
-▁SPOON 4151
-▁SPORT 4152
-▁SPOT 4153
-▁SPRANG 4154
-▁SPREAD 4155
-▁SPRING 4156
-▁SPRINKLE 4157
-▁SPUR 4158
-▁SQU 4159
-▁SQUARE 4160
-▁SQUEE 4161
-▁SQUIRE 4162
-▁SQUIRREL 4163
-ST 4164
-▁ST 4165
-▁STA 4166
-▁STABLE 4167
-▁STAFF 4168
-▁STAGE 4169
-▁STAGGER 4170
-▁STAIRCASE 4171
-▁STAIRS 4172
-▁STALK 4173
-▁STAMP 4174
-▁STAND 4175
-▁STANDARD 4176
-▁STANDING 4177
-▁STAR 4178
-▁STARED 4179
-▁STARS 4180
-▁START 4181
-▁STARTED 4182
-▁STARTLED 4183
-▁STATE 4184
-▁STATEMENT 4185
-▁STATES 4186
-▁STATION 4187
-▁STATUE 4188
-▁STAY 4189
-▁STE 4190
-STEAD 4191
-▁STEADILY 4192
-▁STEADY 4193
-▁STEAL 4194
-▁STEAM 4195
-▁STEEL 4196
-▁STEEP 4197
-▁STEP 4198
-▁STEPHEN 4199
-▁STEPPED 4200
-▁STEPS 4201
-STER 4202
-▁STERN 4203
-▁STICK 4204
-▁STIFF 4205
-▁STILL 4206
-▁STIR 4207
-▁STIRRED 4208
-▁STO 4209
-▁STOCK 4210
-▁STOLE 4211
-▁STOMACH 4212
-STONE 4213
-▁STONE 4214
-▁STONES 4215
-▁STOOD 4216
-▁STOOPED 4217
-▁STOP 4218
-▁STOPPED 4219
-▁STOPPING 4220
-▁STORE 4221
-▁STORIES 4222
-▁STORM 4223
-▁STORY 4224
-▁STOUT 4225
-STRA 4226
-▁STRAIGHT 4227
-▁STRAIN 4228
-▁STRAIT 4229
-▁STRANGE 4230
-▁STRANGER 4231
-▁STRAP 4232
-▁STRAT 4233
-▁STRAW 4234
-▁STRAY 4235
-▁STREAK 4236
-▁STREAM 4237
-▁STREET 4238
-▁STREETS 4239
-▁STRENGTH 4240
-▁STRETCH 4241
-▁STRETCHED 4242
-▁STREW 4243
-▁STRICKEN 4244
-▁STRICT 4245
-▁STRIKE 4246
-▁STRIKING 4247
-▁STRING 4248
-▁STRIP 4249
-▁STRO 4250
-▁STROKE 4251
-▁STRONG 4252
-▁STRUCK 4253
-▁STRUCTURE 4254
-▁STRUGGLE 4255
-▁STRUGGLING 4256
-▁STUCK 4257
-▁STUDENT 4258
-▁STUDIED 4259
-▁STUDIES 4260
-▁STUDIO 4261
-▁STUDY 4262
-▁STUFF 4263
-▁STUMBLE 4264
-▁STUMP 4265
-▁STUPID 4266
-▁STYLE 4267
-▁SU 4268
-▁SUB 4269
-▁SUBDUED 4270
-▁SUBJECT 4271
-▁SUBLIME 4272
-▁SUBMIT 4273
-▁SUBSEQUENT 4274
-▁SUBSTANCE 4275
-▁SUBSTANTIAL 4276
-▁SUBTLE 4277
-▁SUCCEED 4278
-▁SUCCEEDED 4279
-▁SUCCESS 4280
-▁SUCCESSFUL 4281
-▁SUCH 4282
-▁SUDDEN 4283
-▁SUDDENLY 4284
-▁SUFFER 4285
-▁SUFFERED 4286
-▁SUFFERING 4287
-▁SUFFICE 4288
-▁SUFFICIENT 4289
-▁SUFFICIENTLY 4290
-▁SUFFRAGE 4291
-▁SUGAR 4292
-▁SUGGEST 4293
-▁SUGGESTED 4294
-▁SUGGESTION 4295
-▁SUIT 4296
-▁SULLEN 4297
-▁SULTAN 4298
-▁SUM 4299
-▁SUMMER 4300
-▁SUMMIT 4301
-▁SUMMON 4302
-▁SUN 4303
-▁SUNDAY 4304
-▁SUNK 4305
-▁SUNLIGHT 4306
-▁SUNRISE 4307
-▁SUNSET 4308
-▁SUNSHINE 4309
-▁SUPER 4310
-▁SUPERINTEND 4311
-▁SUPERIOR 4312
-▁SUPPER 4313
-▁SUPPLIED 4314
-▁SUPPLIES 4315
-▁SUPPLY 4316
-▁SUPPORT 4317
-▁SUPPOSE 4318
-▁SUPPOSED 4319
-▁SUPPOSING 4320
-▁SUPPRESS 4321
-▁SUPREME 4322
-▁SUR 4323
-▁SURE 4324
-▁SURELY 4325
-▁SURFACE 4326
-▁SURGEON 4327
-▁SURPASS 4328
-▁SURPRISE 4329
-▁SURPRISED 4330
-▁SURPRISING 4331
-▁SURRENDER 4332
-▁SURROUNDED 4333
-▁SURROUNDING 4334
-▁SURVEY 4335
-▁SURVIV 4336
-▁SUSAN 4337
-▁SUSPECT 4338
-▁SUSPICION 4339
-▁SUSPICIOUS 4340
-▁SUSTAIN 4341
-▁SW 4342
-▁SWA 4343
-▁SWALLOW 4344
-▁SWARM 4345
-▁SWEAR 4346
-▁SWEAT 4347
-▁SWEEP 4348
-▁SWEET 4349
-▁SWELL 4350
-▁SWEPT 4351
-▁SWIFT 4352
-▁SWIM 4353
-▁SWIMMING 4354
-▁SWORD 4355
-▁SWORE 4356
-▁SWUNG 4357
-▁SY 4358
-▁SYLVIA 4359
-▁SYMBOL 4360
-▁SYMPATHETIC 4361
-▁SYMPATHI 4362
-▁SYMPATHY 4363
-▁SYMPTOM 4364
-▁SYSTEM 4365
-T 4366
-▁T 4367
-TA 4368
-▁TA 4369
-▁TABLE 4370
-▁TAIL 4371
-▁TAKE 4372
-▁TAKEN 4373
-▁TAKING 4374
-▁TALE 4375
-▁TALENT 4376
-▁TALK 4377
-▁TALKED 4378
-▁TALKING 4379
-▁TALL 4380
-TAN 4381
-▁TANG 4382
-▁TANK 4383
-▁TAP 4384
-▁TAR 4385
-▁TASK 4386
-▁TASTE 4387
-▁TAUGHT 4388
-▁TAX 4389
-TE 4390
-▁TE 4391
-▁TEA 4392
-▁TEACH 4393
-▁TEACHER 4394
-▁TEAR 4395
-▁TEARS 4396
-TED 4397
-▁TEETH 4398
-▁TELEGRAPH 4399
-▁TELEPHONE 4400
-▁TELL 4401
-▁TELLING 4402
-▁TEMPER 4403
-▁TEMPERAMENT 4404
-▁TEMPERATURE 4405
-▁TEMPEST 4406
-▁TEMPLE 4407
-▁TEMPORARY 4408
-▁TEMPT 4409
-▁TEMPTATION 4410
-TEN 4411
-▁TEN 4412
-▁TENDENCY 4413
-▁TENDER 4414
-▁TENDERNESS 4415
-TER 4416
-TERIOR 4417
-▁TERM 4418
-▁TERMS 4419
-▁TERRACE 4420
-▁TERRIBLE 4421
-▁TERRIBLY 4422
-▁TERRIFIED 4423
-▁TERRITORY 4424
-▁TERROR 4425
-▁TEST 4426
-▁TESTIMONY 4427
-▁TEXT 4428
-TH 4429
-▁TH 4430
-▁THAN 4431
-▁THANK 4432
-▁THAT 4433
-THE 4434
-▁THE 4435
-▁THEATRE 4436
-▁THEIR 4437
-▁THEM 4438
-▁THEMSELVES 4439
-▁THEN 4440
-THER 4441
-▁THERE 4442
-▁THEREFORE 4443
-▁THEREUPON 4444
-▁THESE 4445
-▁THEY 4446
-▁THICK 4447
-▁THIEF 4448
-▁THIEVES 4449
-▁THIN 4450
-▁THING 4451
-▁THINGS 4452
-THINK 4453
-▁THINK 4454
-▁THINKING 4455
-▁THIRD 4456
-▁THIRST 4457
-▁THIRTEEN 4458
-▁THIRTY 4459
-▁THIS 4460
-▁THITHER 4461
-▁THOMAS 4462
-▁THORNTON 4463
-▁THOROUGH 4464
-▁THOROUGHLY 4465
-THORPE 4466
-▁THOSE 4467
-▁THOU 4468
-▁THOUGH 4469
-▁THOUGHT 4470
-▁THOUGHTFULLY 4471
-▁THOUGHTS 4472
-▁THOUSAND 4473
-▁THREAD 4474
-▁THREAT 4475
-▁THREATENED 4476
-▁THREATENING 4477
-▁THREE 4478
-▁THRESHOLD 4479
-▁THREW 4480
-▁THRILL 4481
-▁THRO 4482
-▁THROAT 4483
-▁THRONE 4484
-▁THRONG 4485
-▁THROUGH 4486
-▁THROUGHOUT 4487
-▁THROW 4488
-▁THROWING 4489
-▁THROWN 4490
-▁THRUST 4491
-▁THUMB 4492
-▁THUNDER 4493
-▁THUS 4494
-▁THY 4495
-▁THYSELF 4496
-TI 4497
-▁TI 4498
-TIC 4499
-▁TICKET 4500
-▁TIDE 4501
-▁TIDINGS 4502
-▁TIED 4503
-TIES 4504
-▁TIGHT 4505
-▁TILL 4506
-▁TIMBER 4507
-TIME 4508
-▁TIME 4509
-▁TIMES 4510
-▁TIMID 4511
-TIN 4512
-▁TIN 4513
-TING 4514
-▁TINY 4515
-TION 4516
-▁TIP 4517
-▁TIRED 4518
-▁TITLE 4519
-TO 4520
-▁TO 4521
-▁TOBACCO 4522
-▁TODAY 4523
-▁TOGETHER 4524
-▁TOLD 4525
-▁TOM 4526
-▁TOMB 4527
-▁TOMORROW 4528
-TON 4529
-▁TONE 4530
-▁TONGUE 4531
-▁TOO 4532
-▁TOOK 4533
-▁TOP 4534
-▁TORMENT 4535
-▁TORRENT 4536
-▁TORTURE 4537
-▁TOTAL 4538
-▁TOUCH 4539
-▁TOUCHED 4540
-▁TOWARD 4541
-▁TOWARDS 4542
-▁TOWER 4543
-▁TOWN 4544
-▁TRA 4545
-▁TRACE 4546
-▁TRACK 4547
-▁TRADE 4548
-▁TRADITION 4549
-▁TRAGEDY 4550
-▁TRAGIC 4551
-▁TRAIL 4552
-▁TRAIN 4553
-▁TRAITOR 4554
-▁TRAMP 4555
-▁TRANQUIL 4556
-▁TRANS 4557
-▁TRANSPORT 4558
-▁TRAP 4559
-▁TRAVEL 4560
-▁TRAVELLER 4561
-▁TRE 4562
-▁TREAD 4563
-▁TREASURE 4564
-▁TREAT 4565
-▁TREATED 4566
-▁TREATMENT 4567
-▁TREE 4568
-▁TREES 4569
-▁TREMBLE 4570
-▁TREMBLED 4571
-▁TREMBLING 4572
-▁TREMENDOUS 4573
-▁TRENCH 4574
-TRI 4575
-▁TRI 4576
-▁TRIAL 4577
-▁TRIBE 4578
-▁TRICK 4579
-▁TRIED 4580
-▁TRIFLE 4581
-▁TRIFLING 4582
-▁TRIP 4583
-▁TRISTRAM 4584
-▁TRIUMPH 4585
-▁TRIUMPHANT 4586
-TRO 4587
-▁TROOP 4588
-▁TROOPS 4589
-▁TROT 4590
-▁TROUBLE 4591
-▁TROUBLED 4592
-▁TROUSERS 4593
-▁TROUT 4594
-▁TRU 4595
-▁TRUE 4596
-▁TRULY 4597
-▁TRUMPET 4598
-▁TRUNK 4599
-▁TRUST 4600
-▁TRUTH 4601
-▁TRY 4602
-▁TRYING 4603
-TTE 4604
-TTERED 4605
-TTLE 4606
-▁TU 4607
-▁TUESDAY 4608
-▁TULLIVER 4609
-▁TUMBLE 4610
-▁TUMULT 4611
-TUR 4612
-▁TURKEY 4613
-▁TURN 4614
-▁TURNED 4615
-▁TURNING 4616
-▁TURTLE 4617
-▁TWAS 4618
-▁TWELVE 4619
-▁TWENTIETH 4620
-▁TWENTY 4621
-▁TWICE 4622
-▁TWILIGHT 4623
-▁TWIN 4624
-▁TWIST 4625
-▁TWO 4626
-TY 4627
-▁TYPE 4628
-▁TYRANT 4629
-U 4630
-UB 4631
-UC 4632
-UCH 4633
-UD 4634
-UG 4635
-UGH 4636
-▁UGLY 4637
-UL 4638
-ULATION 4639
-▁ULTIMATE 4640
-UM 4641
-▁UMBRELLA 4642
-UN 4643
-▁UN 4644
-▁UNABLE 4645
-▁UNC 4646
-▁UNCERTAIN 4647
-▁UNCLE 4648
-▁UNCOMFORTABLE 4649
-▁UNCOMMON 4650
-▁UNCONSCIOUS 4651
-UND 4652
-▁UND 4653
-▁UNDER 4654
-▁UNDERNEATH 4655
-▁UNDERSTAND 4656
-▁UNDERSTANDING 4657
-▁UNDERSTOOD 4658
-▁UNDERTAKE 4659
-▁UNDERTAKING 4660
-▁UNDOUBTEDLY 4661
-▁UNEASINESS 4662
-▁UNEASY 4663
-▁UNEXPECTED 4664
-▁UNFORTUNATE 4665
-▁UNHAPPY 4666
-▁UNIFORM 4667
-▁UNION 4668
-▁UNITED 4669
-▁UNIVERSAL 4670
-▁UNIVERSE 4671
-▁UNIVERSITY 4672
-▁UNJUST 4673
-▁UNKNOWN 4674
-▁UNLESS 4675
-▁UNLIKE 4676
-▁UNNATURAL 4677
-▁UNNECESSARY 4678
-▁UNPLEASANT 4679
-▁UNRE 4680
-▁UNSEEN 4681
-▁UNTIL 4682
-▁UNTO 4683
-▁UNUSUAL 4684
-▁UNWILLING 4685
-▁UNWORTHY 4686
-UOUS 4687
-UP 4688
-▁UP 4689
-▁UPON 4690
-▁UPPER 4691
-▁UPSTAIRS 4692
-UR 4693
-URE 4694
-▁URGE 4695
-US 4696
-▁US 4697
-USE 4698
-▁USE 4699
-▁USED 4700
-▁USEFUL 4701
-▁USELESS 4702
-▁USUAL 4703
-▁USUALLY 4704
-UT 4705
-▁UTILI 4706
-▁UTMOST 4707
-▁UTTER 4708
-▁UTTERED 4709
-▁UTTERLY 4710
-UX 4711
-V 4712
-VA 4713
-▁VA 4714
-▁VACANT 4715
-▁VAGUE 4716
-▁VAIN 4717
-VAL 4718
-▁VAL 4719
-▁VALENTINE 4720
-▁VALJEAN 4721
-▁VALLEY 4722
-▁VALUABLE 4723
-▁VALUE 4724
-VAN 4725
-▁VAN 4726
-▁VANISHED 4727
-▁VARI 4728
-▁VARIETY 4729
-▁VARIOUS 4730
-▁VAST 4731
-▁VAULT 4732
-VE 4733
-▁VE 4734
-▁VEGETABLE 4735
-▁VEHICLE 4736
-▁VEIL 4737
-▁VELVET 4738
-▁VEN 4739
-▁VENGEANCE 4740
-▁VENTURE 4741
-▁VENTURED 4742
-VER 4743
-▁VER 4744
-▁VERSE 4745
-▁VERY 4746
-▁VESSEL 4747
-▁VEXED 4748
-VI 4749
-▁VI 4750
-▁VIBRAT 4751
-▁VICE 4752
-▁VICTIM 4753
-▁VICTOR 4754
-▁VICTORY 4755
-▁VIEW 4756
-▁VIGOROUS 4757
-▁VILLAGE 4758
-▁VILLAIN 4759
-VILLE 4760
-▁VILLEFORT 4761
-▁VINE 4762
-▁VIOLENCE 4763
-▁VIOLENT 4764
-▁VIOLET 4765
-▁VIRGIN 4766
-▁VIRGINIA 4767
-▁VIRTUE 4768
-▁VIRTUOUS 4769
-▁VISIBLE 4770
-▁VISION 4771
-▁VISIT 4772
-▁VISITOR 4773
-▁VITAL 4774
-▁VIVID 4775
-VO 4776
-▁VO 4777
-VOCATION 4778
-▁VOICE 4779
-▁VOL 4780
-▁VOLUME 4781
-▁VOLUNTEER 4782
-▁VOTE 4783
-▁VOW 4784
-▁VOYAGE 4785
-▁VULGAR 4786
-W 4787
-▁W 4788
-WA 4789
-▁WA 4790
-▁WAG 4791
-▁WAGON 4792
-▁WAIST 4793
-▁WAISTCOAT 4794
-▁WAIT 4795
-▁WAITED 4796
-▁WAITING 4797
-▁WAKE 4798
-▁WAL 4799
-▁WALK 4800
-▁WALKED 4801
-▁WALKING 4802
-▁WALL 4803
-▁WALLS 4804
-▁WALTER 4805
-▁WANDER 4806
-▁WANDERING 4807
-▁WANT 4808
-▁WANTED 4809
-WAR 4810
-▁WAR 4811
-WARD 4812
-▁WARM 4813
-▁WARN 4814
-▁WARNING 4815
-▁WARRANT 4816
-▁WARRIOR 4817
-▁WAS 4818
-▁WASH 4819
-▁WASHINGTON 4820
-▁WATCH 4821
-▁WATCHED 4822
-▁WATCHING 4823
-▁WATER 4824
-▁WAVE 4825
-▁WAVES 4826
-▁WAVING 4827
-▁WAX 4828
-WAY 4829
-▁WAY 4830
-▁WAYS 4831
-WE 4832
-▁WE 4833
-▁WEAK 4834
-▁WEAKNESS 4835
-▁WEALTH 4836
-▁WEAPON 4837
-▁WEAR 4838
-▁WEARY 4839
-▁WEATHER 4840
-▁WEDDING 4841
-▁WEEK 4842
-▁WEEKS 4843
-▁WEEP 4844
-▁WEIGH 4845
-▁WEIGHT 4846
-▁WELCOME 4847
-▁WELFARE 4848
-WELL 4849
-▁WELL 4850
-▁WENT 4851
-▁WEPT 4852
-▁WERE 4853
-▁WEST 4854
-▁WESTERN 4855
-▁WH 4856
-▁WHALE 4857
-▁WHAT 4858
-▁WHATEVER 4859
-▁WHEAT 4860
-▁WHEEL 4861
-▁WHEN 4862
-▁WHENCE 4863
-▁WHERE 4864
-▁WHEREFORE 4865
-▁WHEREUPON 4866
-▁WHETHER 4867
-▁WHI 4868
-▁WHICH 4869
-▁WHILE 4870
-▁WHILST 4871
-▁WHIP 4872
-▁WHIRL 4873
-▁WHISK 4874
-▁WHISPER 4875
-▁WHISPERED 4876
-▁WHISTLE 4877
-▁WHITE 4878
-▁WHITHER 4879
-▁WHO 4880
-▁WHOLE 4881
-▁WHOLLY 4882
-▁WHOM 4883
-▁WHOSE 4884
-▁WHY 4885
-WI 4886
-▁WI 4887
-WICK 4888
-▁WICKED 4889
-▁WIDE 4890
-▁WIDOW 4891
-▁WIFE 4892
-▁WILD 4893
-▁WILDERNESS 4894
-▁WILL 4895
-▁WILLIAM 4896
-▁WILLING 4897
-▁WILSON 4898
-▁WILT 4899
-WIN 4900
-▁WIN 4901
-▁WIND 4902
-▁WINDOW 4903
-▁WINDOWS 4904
-▁WINE 4905
-▁WINGS 4906
-▁WINTER 4907
-▁WIP 4908
-▁WIRE 4909
-▁WISDOM 4910
-▁WISE 4911
-▁WISH 4912
-▁WISHED 4913
-▁WISHES 4914
-▁WIT 4915
-▁WITCH 4916
-▁WITH 4917
-▁WITHDRAW 4918
-▁WITHDREW 4919
-▁WITHIN 4920
-▁WITHOUT 4921
-▁WITNESS 4922
-▁WIVES 4923
-WN 4924
-▁WOE 4925
-▁WOKE 4926
-▁WOLF 4927
-▁WOLVES 4928
-▁WOMAN 4929
-▁WOMEN 4930
-▁WON 4931
-▁WONDER 4932
-▁WONDERED 4933
-▁WONDERFUL 4934
-▁WONDERING 4935
-WOOD 4936
-▁WOOD 4937
-▁WOODEN 4938
-▁WOODS 4939
-▁WORD 4940
-▁WORDS 4941
-▁WORE 4942
-WORK 4943
-▁WORK 4944
-▁WORKED 4945
-▁WORKING 4946
-▁WORLD 4947
-▁WORM 4948
-▁WORN 4949
-▁WORRIED 4950
-▁WORRY 4951
-▁WORSE 4952
-▁WORSHIP 4953
-▁WORST 4954
-WORTH 4955
-▁WORTH 4956
-▁WORTHY 4957
-▁WOULD 4958
-▁WOULDN 4959
-▁WOUND 4960
-▁WOUNDED 4961
-▁WRAP 4962
-▁WRAPPED 4963
-▁WRATH 4964
-▁WRECK 4965
-▁WREN 4966
-▁WRETCH 4967
-▁WRETCHED 4968
-▁WRINKL 4969
-▁WRIST 4970
-▁WRITE 4971
-▁WRITER 4972
-▁WRITING 4973
-▁WRITTEN 4974
-▁WRONG 4975
-▁WROTE 4976
-▁WROUGHT 4977
-X 4978
-Y 4979
-▁YA 4980
-▁YARD 4981
-▁YE 4982
-▁YEAR 4983
-▁YEARS 4984
-▁YELLOW 4985
-▁YES 4986
-▁YESTERDAY 4987
-▁YET 4988
-▁YIELD 4989
-▁YO 4990
-▁YONDER 4991
-▁YORK 4992
-▁YOU 4993
-▁YOUNG 4994
-▁YOUR 4995
-▁YOURSELF 4996
-▁YOURSELVES 4997
-▁YOUTH 4998
-Z 4999
-ZZ 5000
-<sos/eos> 5001
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/non-linguistic-symbols.invalid b/models/audio/speech_recognition/conformer/igie/wenet/test/resources/non-linguistic-symbols.invalid
deleted file mode 100644
index 131d3ff322e80fdfca92e9df9dd1e7492545ff5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/non-linguistic-symbols.invalid
+++ /dev/null
@@ -1,4 +0,0 @@
-#1
-<<aaa>>
-{{BBB}}
-[[ccc]]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/non-linguistic-symbols.valid b/models/audio/speech_recognition/conformer/igie/wenet/test/resources/non-linguistic-symbols.valid
deleted file mode 100644
index 307b9966d76ad278478d0ef74c4d3fa4c152a99b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/test/resources/non-linguistic-symbols.valid
+++ /dev/null
@@ -1,6 +0,0 @@
-{~!@#$%^&*()_+`1234567890-=[]|\\:;"'<>,./?}
-[~!@#$%^&*()_+`1234567890-={}|\\:;"'<>,./?]
-<~!@#$%^&*()_+`1234567890-={}|\\:;"'[],./?>
-{qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM}
-[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]
-<qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM>
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/test/test_file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/test/test_file_utils.py
deleted file mode 100644
index cc38ae3bc0084ae13fe16f5b570c473e0f4fec1d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/test/test_file_utils.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright [2021-12-04] <sxc19@mails.tsinghua.edu.cn, Xingchen Song>
-
-import pytest
-
-from wenet.utils.file_utils import read_non_lang_symbols
-
-
-@pytest.mark.parametrize(
-    "non_lang_symbol_table_path",
-    [
-        "test/resources/non-linguistic-symbols.valid",
-        "test/resources/non-linguistic-symbols.invalid"
-    ]
-)
-def test_read_non_lang_symbols(non_lang_symbol_table_path):
-    path = non_lang_symbol_table_path
-    try:
-        syms = read_non_lang_symbols(path)
-        assert syms[0] == "{~!@#$%^&*()_+`1234567890-=[]|\\\\:;\"'<>,./?}"
-        assert syms[1] == "[~!@#$%^&*()_+`1234567890-={}|\\\\:;\"'<>,./?]"
-        assert syms[2] == "<~!@#$%^&*()_+`1234567890-={}|\\\\:;\"'[],./?>"
-        assert syms[3] == "{qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM}"
-        assert syms[4] == "[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]"
-        assert syms[5] == "<qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM>"
-    except Exception as e:
-        assert path == "test/resources/non-linguistic-symbols.invalid"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/test/test_tokenize.py b/models/audio/speech_recognition/conformer/igie/wenet/test/test_tokenize.py
deleted file mode 100644
index 157d79a372bd079e0c538fbd082cf34985840a56..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/test/test_tokenize.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import pytest
-
-import wenet.dataset.processor as processor
-
-@pytest.mark.parametrize(
-    "symbol_table_path",
-    [
-        "test/resources/librispeech.words.txt",
-        "test/resources/aishell2.words.txt"
-    ]
-)
-def test_tokenize(symbol_table_path):
-    txts = [
-        {"txt": "震东好帅"},
-        {"txt": " 吴迪也好帅 "},
-        {"txt": "binbin is also handsome"},
-        {"txt": " life is short i use wenet "},
-        {"txt": "超哥 is the most handsome 吧"},
-        {"txt": " 人生苦短i use wenet "},
-        {"txt": "人生苦短I USE WENET"},
-        {"txt": "zhendong ist so schön"},
-        {"txt": " zhendong ist so schön "},
-        {"txt": "It's okay"}
-    ]
-    if symbol_table_path == "test/resources/librispeech.words.txt":
-        bpe_model = "test/resources/librispeech.train_960_unigram5000.bpemodel"
-        refs = [
-            {"tokens": ['震', '东', '好', '帅'],
-             "label": [1, 1, 1, 1]},
-            {"tokens": ['吴', '迪', '也', '好', '帅'],
-             "label": [1, 1, 1, 1, 1]},
-            {"tokens": ['▁B', 'IN', 'B', 'IN', '▁IS', '▁ALSO', "▁HANDSOME"],
-             "label": [347, 2216, 346, 2216, 2332, 143, 1990]},
-            {"tokens": ['▁LIFE', '▁IS', '▁SHORT', '▁I', '▁USE', '▁WE',
-                        'NE', 'T'],
-             "label": [2568, 2332, 3968, 2152, 4699, 4833, 2926, 4366]},
-            {"tokens": ['超', '哥', '▁IS', '▁THE', '▁MOST', '▁HANDSOME', '吧'],
-             "label": [1, 1, 2332, 4435, 2860, 1990, 1]},
-            {"tokens": ['人', '生', '苦', '短', '▁I', '▁USE', '▁WE', 'NE', 'T'],
-             "label": [1, 1, 1, 1, 2152, 4699, 4833, 2926, 4366]},
-            {"tokens": ['人', '生', '苦', '短', '▁I', '▁USE', '▁WE', 'NE', 'T'],
-             "label": [1, 1, 1, 1, 2152, 4699, 4833, 2926, 4366]},
-            {"tokens": ['▁', 'Z', 'HEN', 'DO', 'NG', '▁IS', 'T', '▁SO', '▁SCH',
-                        'Ö', 'N'],
-             "label": [3, 4999, 2048, 1248, 2960, 2332, 4366, 4072, 3844,
-                       1, 2901]},
-            {"tokens": ['▁', 'Z', 'HEN', 'DO', 'NG', '▁IS', 'T', '▁SO', '▁SCH',
-                        'Ö', 'N'],
-             "label": [3, 4999, 2048, 1248, 2960, 2332, 4366, 4072, 3844,
-                       1, 2901]},
-            {"tokens": ['▁IT', "'", 'S', '▁O', 'KA', 'Y'],
-             "label": [2344, 2, 3790, 3010, 2418, 4979]}
-        ]
-    else:
-        bpe_model = None
-        refs = [
-            {"tokens": ['震', '东', '好', '帅'],
-             "label": [4932, 80, 1059, 1375]},
-            {"tokens": ['吴', '迪', '也', '好', '帅'],
-             "label": [656, 4540, 117, 1059, 1375]},
-            {"tokens": ['b', 'i', 'n', 'b', 'i', 'n', '▁', 'i', 's', '▁',
-                        'a', 'l', 's', 'o', '▁', 'h', 'a', 'n', 'd', 's',
-                        'o', 'm', 'e'],
-             "label": [9, 23, 33, 9, 23, 33, 1, 23, 43, 1, 7, 29, 43, 35,
-                       1, 21, 7, 33, 13, 43, 35, 31, 15]},
-            {"tokens": ['l', 'i', 'f', 'e', '▁', 'i', 's', '▁', 's', 'h',
-                        'o', 'r', 't', '▁', 'i', '▁', 'u', 's', 'e', '▁',
-                        'w', 'e', 'n', 'e', 't'],
-             "label": [29, 23, 17, 15, 1, 23, 43, 1, 43, 21, 35, 41, 46,
-                       1, 23, 1, 48, 43, 15, 1, 52, 15, 33, 15, 46]},
-            {"tokens": ['超', '哥', '▁', 'i', 's', '▁', 't', 'h', 'e', '▁',
-                        'm', 'o', 's', 't', '▁', 'h', 'a', 'n', 'd', 's', 'o',
-                        'm', 'e', '▁', '吧'],
-             "label": [4395, 736, 1, 23, 43, 1, 46, 21, 15, 1, 31, 35, 43, 46,
-                       1, 21, 7, 33, 13, 43, 35, 31, 15, 1, 647]},
-            {"tokens": ['人', '生', '苦', '短', 'i', '▁', 'u', 's', 'e', '▁',
-                        'w', 'e', 'n', 'e', 't'],
-             "label": [155, 2980, 3833, 3178, 23, 1, 48, 43, 15, 1, 52, 15, 33,
-                       15, 46]},
-            {"tokens": ['人', '生', '苦', '短', 'I', '▁', 'U', 'S', 'E', '▁',
-                        'W', 'E', 'N', 'E', 'T'],
-             "label": [155, 2980, 3833, 3178, 24, 1, 49, 44, 16, 1, 53, 16, 34,
-                       16, 47]},
-            {"tokens": ['z', 'h', 'e', 'n', 'd', 'o', 'n', 'g', '▁', 'i', 's',
-                        't', '▁', 's', 'o', '▁', 's', 'c', 'h', 'ö', 'n'],
-             "label": [58, 21, 15, 33, 13, 35, 33, 19, 1, 23, 43, 46, 1, 43,
-                       35, 1, 43, 11, 21, 1, 33]},
-            {"tokens": ['z', 'h', 'e', 'n', 'd', 'o', 'n', 'g', '▁', 'i', 's',
-                        't', '▁', 's', 'o', '▁', 's', 'c', 'h', 'ö', 'n'],
-             "label": [58, 21, 15, 33, 13, 35, 33, 19, 1, 23, 43, 46, 1, 43,
-                       35, 1, 43, 11, 21, 1, 33]},
-            {"tokens": ['I', 't', "'", 's', '▁', 'o', 'k', 'a', 'y'],
-             "label": [24, 46, 2, 43, 1, 35, 27, 7, 56]}
-        ]
-    symbol_table = {}
-    with open(symbol_table_path, 'r') as f:
-        lines = f.readlines()
-        for l in lines:
-            l = l.strip().split()
-            symbol_table[l[0]] = int(l[1])
-    outs = processor.tokenize(
-        txts, symbol_table, bpe_model, split_with_space=False
-    )
-    for (hyp, ref) in zip(outs, refs):
-        assert(len(hyp["tokens"]) == len(ref["tokens"]))
-        assert(all([h == r for h, r in zip(hyp["tokens"], ref["tokens"])]))
-        assert(len(hyp["label"]) == len(ref["label"]))
-        assert(all([h == r for h, r in zip(hyp["label"], ref["label"])]))
-
-@pytest.mark.parametrize("use_pbe_model", [True, False])
-def test_non_lang_symbol_tokenize(use_pbe_model):
-    data = [{"txt": "我是{NOISE}"}]
-    symbol_table = {"我": 1, "是": 2, "{NOISE}": 3}
-
-    if use_pbe_model:
-        bpe_model = "test/resources/librispeech.train_960_unigram5000.bpemodel"
-
-        sample = next(processor.tokenize(data, symbol_table, bpe_model,
-                                         non_lang_syms=["{NOISE}"]))
-
-        assert sample["tokens"] == ["我", "是", "{NOISE}"]
-    else:
-        sample = next(processor.tokenize(data, symbol_table,
-                                         non_lang_syms=["{NOISE}"]))
-
-        assert sample["tokens"] == ["我", "是", "{NOISE}"]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/alignment.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/alignment.sh
deleted file mode 100644
index 64d860bb61761cadca750c9baf91eddb49e56728..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/alignment.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 Mobvoi Inc. All Rights Reserved.
-. ./path.sh || exit 1;
-
-stage=0 # start from 0 if you need to start from data preparation
-stop_stage=0
-
-nj=16
-feat_dir=raw_wav
-dict=data/dict/lang_char.txt
-
-dir=exp/
-config=$dir/train.yaml
-checkpoint=
-checkpoint=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/avg_20.pt
-config=/home/diwu/github/latest/wenet/examples/aishell/s0/exp/transformer/train.yaml
-set=
-ali_format=$feat_dir/$set/format.data
-ali_format=format.data
-ali_result=$dir/ali
-
-. tools/parse_options.sh || exit 1;
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    nj=32
-    # Prepare required data for ctc alignment
-    echo "Prepare data, prepare required format"
-    for x in $set; do
-        tools/format_data.sh --nj ${nj} \
-            --feat-type wav --feat $feat_dir/$x/wav.scp \
-            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
-
-    done
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    # Test model, please specify the model you want to use by --checkpoint
-        python wenet/bin/alignment_deprecated.py --gpu -1 \
-            --config $config \
-            --input_file $ali_format \
-            --checkpoint $checkpoint \
-            --batch_size 1 \
-            --dict $dict \
-            --result_file $ali_result \
-
-fi
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/analyze_dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/analyze_dataset.py
deleted file mode 100644
index d4373b065c301972fe0164b6df3591166000acfc..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/analyze_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2022 Horizon Inc. (authors: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Analyze Dataset, Duration/TextLength/Speed etc.
-
-Usage:
-. ./path.sh && python3 tools/analyze_dataset.py \
-    --data_type "shard" \
-    --data_list data/test/data.list \
-    --output_dir exp/analyze_test \
-    --num_thread 32
-"""
-
-import os
-import json
-import math
-import time
-import numpy
-import logging
-import librosa
-import tarfile
-import argparse
-import torchaudio
-import multiprocessing
-
-from wenet.utils.file_utils import read_lists
-from wenet.dataset.processor import AUDIO_FORMAT_SETS
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='Analyze dataset')
-    parser.add_argument('--data_type',
-                        default='wav_scp',
-                        choices=['wav_scp', 'raw', 'shard'],
-                        help='dataset type')
-    parser.add_argument('--output_dir', type=str,
-                        default="exp", help='write info to output dir')
-    parser.add_argument('--data_list', default=None,
-                        help='used in raw/shard mode')
-    parser.add_argument('--wav_scp', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--text', default=None,
-                        help='used in wav_scp mode')
-    parser.add_argument('--num_thread', type=int,
-                        default=4, help='number of threads')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def analyze(datas, output_file, thread_id):
-    with open(output_file, "w", encoding='utf8') as f:
-        for i, data in enumerate(datas):
-            if type(data['wav']) is numpy.ndarray:
-                y, sample_rate = data['wav'], data['sample_rate']
-                data['wav'] = "None"  # NOTE(xcsong): Do not save wav.
-            elif type(data['wav'] is str):
-                y, sample_rate = librosa.load(data['wav'], sr=16000)
-            data['dur'] = len(y) / sample_rate
-            data['txt_length'] = len(data['txt'])
-            data['speed'] = data['txt_length'] / data['dur']
-            # Trim the beginning and ending silence
-            _, index = librosa.effects.trim(y, top_db=30)
-            data['leading_sil'] = librosa.get_duration(
-                y=y[:index[0]], sr=16000) * 1000 if index[0] > 0 else 0
-            data['trailing_sil'] = librosa.get_duration(
-                y=y[index[1]:], sr=16000) * 1000 if index[1] < len(y) else 0
-            data_str = json.dumps(data, ensure_ascii=False)
-            f.write("{}\n".format(data_str))
-            if thread_id == 0 and i % 100 == 0:
-                logging.info("\tThread-{}: processed {}/{}".format(
-                    thread_id, i, len(datas)))
-
-
-def read_tar(file):
-    try:
-        with tarfile.open(fileobj=open(file, "rb"), mode="r|*") as stream:
-            prev_prefix = None
-            data = {}
-            valid = True
-            for tarinfo in stream:
-                name = tarinfo.name
-                pos = name.rfind('.')
-                assert pos > 0
-                prefix, postfix = name[:pos], name[pos + 1:]
-                if prev_prefix is not None and prefix != prev_prefix:
-                    data['key'] = prev_prefix
-                    if valid:
-                        yield data
-                    data = {}
-                    valid = True
-                with stream.extractfile(tarinfo) as file_obj:
-                    try:
-                        if postfix == 'txt':
-                            data['txt'] = file_obj.read().decode(
-                                'utf8').strip()
-                        elif postfix in AUDIO_FORMAT_SETS:
-                            waveform, sample_rate = torchaudio.load(
-                                file_obj)
-                            # single channel
-                            data['wav'] = waveform.numpy()[0, :]
-                            data['sample_rate'] = sample_rate
-                        else:
-                            data[postfix] = file_obj.read()
-                    except Exception as ex:
-                        valid = False
-                        logging.warning(
-                            'error: {} when parse {}'.format(ex, name))
-                prev_prefix = prefix
-            # The last data in tar
-            if prev_prefix is not None:
-                data['key'] = prev_prefix
-                yield data
-    except Exception as ex:
-        logging.warning(
-            'tar_file error: {} when processing {}'.format(ex, file))
-
-
-def main():
-    start_time = time.time()
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.output_dir + "/partition", exist_ok=True)
-    datas = [[] for i in range(args.num_thread)]
-
-    logging.info("Stage-1: Loading data.list OR wav.scp...")
-    if args.data_type == "shard":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        total = 0
-        for line in lists:
-            for data in read_tar(line):
-                datas[total % args.num_thread].append(data)
-                total = total + 1
-    elif args.data_type == "raw":
-        assert args.data_list is not None
-        lists = read_lists(args.data_list)
-        # partition
-        for i, line in enumerate(lists):
-            data = json.loads(line)
-            datas[i % args.num_thread].append(data)
-    elif args.data_type == "wav_scp":
-        assert args.wav_scp is not None
-        assert args.text is not None
-        wavs, texts = {}, {}
-        # wavs
-        for line in read_lists(args.wav_scp):
-            line = line.strip().split()
-            wavs[line[0]] = line[1]
-        # texts
-        for line in read_lists(args.text):
-            line = line.strip().split(maxsplit=1)
-            texts[line[0]] = line[1]
-        sorted(wavs)
-        sorted(texts)
-        # partition
-        for i, (key1, key2) in enumerate(zip(wavs, texts)):
-            assert key1 == key2
-            datas[i % args.num_thread].append(
-                {'key': key1, "wav": wavs[key1], "txt": texts[key1]}
-            )
-
-    logging.info("Stage-2: Start Analyze")
-    # threads
-    pool = multiprocessing.Pool(processes=args.num_thread)
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        pool.apply_async(analyze, (datas[i], output_file, i))
-    pool.close()
-    pool.join()
-
-    logging.info("Stage-3: Sort and Write Result")
-    datas = []
-    for i in range(args.num_thread):
-        output_file = os.path.join(
-            args.output_dir, "partition", "part-{}".format(i))
-        with open(output_file, "r", encoding='utf8') as f:
-            for line in f.readlines():
-                data = json.loads(line)
-                datas.append(data)
-    total_dur = sum([x['dur'] for x in datas])
-    total_len = sum([x['txt_length'] for x in datas])
-    total_leading_sil = sum([x['leading_sil'] for x in datas])
-    total_trailing_sil = sum([x['trailing_sil'] for x in datas])
-    num_datas = len(datas)
-    names = ['key', 'dur', 'txt_length', 'speed',
-             'leading_sil', 'trailing_sil']
-    units = ['', 's', '', 'char/s', 'ms', 'ms']
-    avgs = [0, total_dur / num_datas, total_len / num_datas,
-            total_len / total_dur, total_leading_sil / num_datas,
-            total_trailing_sil / num_datas]
-    stds = [0, sum([(x['dur'] - avgs[1])**2 for x in datas]),
-            sum([(x['txt_length'] - avgs[2])**2 for x in datas]),
-            sum([(x['txt_length'] / x['dur'] - avgs[3])**2 for x in datas]),
-            sum([(x['leading_sil'] - avgs[4])**2 for x in datas]),
-            sum([(x['trailing_sil'] - avgs[5])**2 for x in datas])]
-    stds = [math.sqrt(x / num_datas) for x in stds]
-    parts = ['max', 'P99', 'P75', 'P50', 'P25', 'min']
-    index = [num_datas - 1, int(num_datas * 0.99), int(num_datas * 0.75),
-             int(num_datas * 0.50), int(num_datas * 0.25), 0]
-
-    with open(args.output_dir + "/analyze_result_brief",
-              "w", encoding='utf8') as f:
-        for i, (name, unit, avg, std) in enumerate(
-                zip(names, units, avgs, stds)):
-            if name == 'key':
-                continue
-            f.write("==================\n")
-
-            datas.sort(key=lambda x: x[name])
-            for p, j in zip(parts, index):
-                f.write("{} {}: {:.3f} {} (wav_id: {})\n".format(
-                    p, name, datas[j][name], unit, datas[j]['key']))
-            f.write("avg {}: {:.3f} {}\n".format(
-                name, avg, unit))
-            f.write("std {}: {:.3f}\n".format(
-                name, std))
-    os.system("cat {}".format(args.output_dir + "/analyze_result_brief"))
-
-    datas.sort(key=lambda x: x['dur'])
-    with open(args.output_dir + "/analyze_result", "w", encoding='utf8') as f:
-        for data in datas:
-            f.write("{}\n".format(json.dumps(data, ensure_ascii=False)))
-
-    end_time = time.time()
-    logging.info("Time Cost: {:.3f}s".format(end_time - start_time))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/cmvn_kaldi2json.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/cmvn_kaldi2json.py
deleted file mode 100644
index 9966046c95a9d50438c4857b785cb7985182e376..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/cmvn_kaldi2json.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import logging
-import sys
-import json
-
-def kaldi2json(kaldi_cmvn_file):
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    cmvn_info = {'mean_stat:' : means,
-                 'var_stat' : variance,
-                 'frame_num' : count}
-    return cmvn_info
-
-if __name__ == '__main__':
-    with open(sys.argv[2], 'w') as fout:
-        cmvn = kaldi2json(sys.argv[1])
-        fout.write(json.dumps(cmvn))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/combine_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/combine_data.sh
deleted file mode 100644
index 8a56c43f1a2a238d78270f94f3d22f1af540e912..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/combine_data.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2014  David Snyder
-
-# This script combines the data from multiple source directories into
-# a single destination directory.
-
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
-# about what these directories contain.
-
-# Begin configuration section.
-extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
-skip_fix=false # skip the fix_data_dir.sh in the end
-# End configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-if [ -f parse_options.sh ]; then . parse_options.sh || exit 1; fi
-
-if [ $# -lt 2 ]; then
-  echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
-  echo "Note, files that don't appear in all source dirs will not be combined,"
-  echo "with the exception of utt2uniq and segments, which are created where necessary."
-  exit 1
-fi
-
-dest=$1;
-shift;
-
-first_src=$1;
-
-rm -r $dest 2>/dev/null
-mkdir -p $dest;
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/utt2spk ]; then
-    echo "$0: no such file $dir/utt2spk"
-    exit 1;
-  fi
-done
-
-# Check that frame_shift are compatible, where present together with features.
-dir_with_frame_shift=
-for dir in $*; do
-  if [[ -f $dir/feats.scp && -f $dir/frame_shift ]]; then
-    if [[ $dir_with_frame_shift ]] &&
-       ! cmp -s $dir_with_frame_shift/frame_shift $dir/frame_shift; then
-      echo "$0:error: different frame_shift in directories $dir and " \
-           "$dir_with_frame_shift. Cannot combine features."
-      exit 1;
-    fi
-    dir_with_frame_shift=$dir
-  fi
-done
-
-# W.r.t. utt2uniq file the script has different behavior compared to other files
-# it is not compulsary for it to exist in src directories, but if it exists in
-# even one it should exist in all. We will create the files where necessary
-has_utt2uniq=false
-for in_dir in $*; do
-  if [ -f $in_dir/utt2uniq ]; then
-    has_utt2uniq=true
-    break
-  fi
-done
-
-if $has_utt2uniq; then
-  # we are going to create an utt2uniq file in the destdir
-  for in_dir in $*; do
-    if [ ! -f $in_dir/utt2uniq ]; then
-      # we assume that utt2uniq is a one to one mapping
-      cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
-    else
-      cat $in_dir/utt2uniq
-    fi
-  done | sort -k1 > $dest/utt2uniq
-  echo "$0: combined utt2uniq"
-else
-  echo "$0 [info]: not combining utt2uniq as it does not exist"
-fi
-# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
-extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
-
-# segments are treated similarly to utt2uniq. If it exists in some, but not all
-# src directories, then we generate segments where necessary.
-has_segments=false
-for in_dir in $*; do
-  if [ -f $in_dir/segments ]; then
-    has_segments=true
-    break
-  fi
-done
-
-if $has_segments; then
-  for in_dir in $*; do
-    if [ ! -f $in_dir/segments ]; then
-      echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
-      utils/data/get_segments_for_data.sh $in_dir
-    else
-      cat $in_dir/segments
-    fi
-  done | sort -k1 > $dest/segments
-  echo "$0: combined segments"
-else
-  echo "$0 [info]: not combining segments as it does not exist"
-fi
-
-for file in utt2spk utt2lang utt2dur utt2num_frames reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
-  exists_somewhere=false
-  absent_somewhere=false
-  for d in $*; do
-    if [ -f $d/$file ]; then
-      exists_somewhere=true
-    else
-      absent_somewhere=true
-      fi
-  done
-
-  if ! $absent_somewhere; then
-    set -o pipefail
-    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
-    set +o pipefail
-    echo "$0: combined $file"
-  else
-    if ! $exists_somewhere; then
-      echo "$0 [info]: not combining $file as it does not exist"
-    else
-      echo "$0 [info]: **not combining $file as it does not exist everywhere**"
-    fi
-  fi
-done
-
-tools/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
-
-if [[ $dir_with_frame_shift ]]; then
-  cp $dir_with_frame_shift/frame_shift $dest
-fi
-
-if ! $skip_fix ; then
-  tools/fix_data_dir.sh $dest || exit 1;
-fi
-
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute-cer.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/compute-cer.py
deleted file mode 100644
index a0a8f8fe1f59251c5d8fefeb62ef469276fc6063..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute-cer.py
+++ /dev/null
@@ -1,532 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import sys
-import unicodedata
-import codecs
-
-remove_tag = True
-spacelist = [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰', '『', '』', '《', '》']
-
-def characterize(string) :
-    res = []
-    i = 0
-    while i < len(string):
-        char = string[i]
-        if char in puncts:
-            i += 1
-            continue
-        cat1 = unicodedata.category(char)
-        # https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
-            i += 1
-            continue
-        if cat1 == 'Lo':  # letter-other
-            res.append(char)
-            i += 1
-        else:
-            # some input looks like: <unk><noise>, we want to separate it to two words.
-            sep = ' '
-            if char == '<':
-                sep = '>'
-            j = i + 1
-            while j < len(string):
-                c = string[j]
-                if ord(c) >= 128 or (c in spacelist) or (c == sep):
-                    break
-                j += 1
-            if j < len(string) and string[j] == '>':
-                j += 1
-            res.append(string[i:j])
-            i = j
-    return res
-
-def stripoff_tags(x):
-    if not x:
-        return ''
-    chars = []
-    i = 0
-    T = len(x)
-    while i < T:
-        if x[i] == '<':
-            while i < T and x[i] != '>':
-                i += 1
-            i += 1
-        else:
-            chars.append(x[i])
-            i += 1
-    return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-            x = x.upper()
-        if x in ignore_words:
-            continue
-        if remove_tag:
-            x = stripoff_tags(x)
-        if not x:
-            continue
-        if split and x in split:
-            new_sentence += split[x]
-        if x.isalnum():
-            for k in x:
-                new_sentence.append(k)
-        else:
-            new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-    def __init__(self) :
-        self.data = {}
-        self.space = []
-        self.cost = {}
-        self.cost['cor'] = 0
-        self.cost['sub'] = 1
-        self.cost['del'] = 1
-        self.cost['ins'] = 1
-
-    def calculate(self, lab, rec) :
-        # Initialization
-        lab.insert(0, '')
-        rec.insert(0, '')
-        while len(self.space) < len(lab) :
-            self.space.append([])
-        for row in self.space :
-            for element in row :
-                element['dist'] = 0
-                element['error'] = 'non'
-            while len(row) < len(rec) :
-                row.append({'dist' : 0, 'error' : 'non'})
-        for i in range(len(lab)) :
-            self.space[i][0]['dist'] = i
-            self.space[i][0]['error'] = 'del'
-        for j in range(len(rec)) :
-            self.space[0][j]['dist'] = j
-            self.space[0][j]['error'] = 'ins'
-        self.space[0][0]['error'] = 'non'
-        for token in lab :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        for token in rec :
-            if token not in self.data and len(token) > 0 :
-                self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0,
-                                    'ins' : 0, 'del' : 0}
-        # Computing edit distance
-        for i, lab_token in enumerate(lab) :
-            for j, rec_token in enumerate(rec) :
-                if i == 0 or j == 0 :
-                    continue
-                min_dist = sys.maxsize
-                min_error = 'none'
-                dist = self.space[i - 1][j]['dist'] + self.cost['del']
-                error = 'del'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
-                error = 'ins'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                if lab_token == rec_token :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
-                    error = 'cor'
-                else :
-                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
-                    error = 'sub'
-                if dist < min_dist :
-                    min_dist = dist
-                    min_error = error
-                self.space[i][j]['dist'] = min_dist
-                self.space[i][j]['error'] = min_error
-        # Tracing back
-        result = {'lab': [], 'rec': [], 'all': 0, 'cor': 0, 'sub': 0,
-                  'ins': 0, 'del': 0}
-        i = len(lab) - 1
-        j = len(rec) - 1
-        while True :
-            if self.space[i][j]['error'] == 'cor' :  # correct
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-                    result['all'] = result['all'] + 1
-                    result['cor'] = result['cor'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'sub' :  # substitution
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-                    result['all'] = result['all'] + 1
-                    result['sub'] = result['sub'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, rec[j])
-                i = i - 1
-                j = j - 1
-            elif self.space[i][j]['error'] == 'del' :  # deletion
-                if len(lab[i]) > 0 :
-                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-                    result['all'] = result['all'] + 1
-                    result['del'] = result['del'] + 1
-                result['lab'].insert(0, lab[i])
-                result['rec'].insert(0, "")
-                i = i - 1
-            elif self.space[i][j]['error'] == 'ins' :  # insertion
-                if len(rec[j]) > 0 :
-                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-                    result['ins'] = result['ins'] + 1
-                result['lab'].insert(0, "")
-                result['rec'].insert(0, rec[j])
-                j = j - 1
-            elif self.space[i][j]['error'] == 'non' :  # starting point
-                break
-            else :  # shouldn't reach here
-                print('this should not happen , i={i} , j={j} , \
-                      error={error}'.
-                      format(i=i, j=j, error=self.space[i][j]['error']))
-        return result
-
-    def overall(self) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in self.data :
-            result['all'] = result['all'] + self.data[token]['all']
-            result['cor'] = result['cor'] + self.data[token]['cor']
-            result['sub'] = result['sub'] + self.data[token]['sub']
-            result['ins'] = result['ins'] + self.data[token]['ins']
-            result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def cluster(self, data) :
-        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
-        for token in data :
-            if token in self.data :
-                result['all'] = result['all'] + self.data[token]['all']
-                result['cor'] = result['cor'] + self.data[token]['cor']
-                result['sub'] = result['sub'] + self.data[token]['sub']
-                result['ins'] = result['ins'] + self.data[token]['ins']
-                result['del'] = result['del'] + self.data[token]['del']
-        return result
-
-    def keys(self) :
-        return list(self.data.keys())
-
-def width(string):
-    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-    unicode_names = [unicodedata.name(char) for char in word]
-    for i in reversed(range(len(unicode_names))) :
-        if unicode_names[i].startswith('DIGIT') :  # 1
-            unicode_names[i] = 'Number'  # 'DIGIT'
-        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-            # 明 / 郎
-            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-                unicode_names[i].startswith('LATIN SMALL LETTER')) :
-            # A / a
-            unicode_names[i] = 'English'  # 'LATIN LETTER'
-        elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-        elif (unicode_names[i].startswith('AMPERSAND') or
-              unicode_names[i].startswith('APOSTROPHE') or
-              unicode_names[i].startswith('COMMERCIAL AT') or
-              unicode_names[i].startswith('DEGREE CELSIUS') or
-              unicode_names[i].startswith('EQUALS SIGN') or
-              unicode_names[i].startswith('FULL STOP') or
-              unicode_names[i].startswith('HYPHEN-MINUS') or
-              unicode_names[i].startswith('LOW LINE') or
-              unicode_names[i].startswith('NUMBER SIGN') or
-              unicode_names[i].startswith('PLUS SIGN') or
-              unicode_names[i].startswith('SEMICOLON')) :
-            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-            del unicode_names[i]
-        else :
-            return 'Other'
-    if len(unicode_names) == 0 :
-        return 'Other'
-    if len(unicode_names) == 1 :
-        return unicode_names[0]
-    for i in range(len(unicode_names) - 1) :
-        if unicode_names[i] != unicode_names[i + 1] :
-            return 'Other'
-    return unicode_names[0]
-
-def usage() :
-    print("compute-wer.py : compute word error rate (WER) \
-          and align recognition results and references.")
-    print("         usage : python compute-wer.py [--cs={0,1}] \
-          [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] \
-          [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1 :
-        usage()
-        sys.exit(0)
-    calculator = Calculator()
-    cluster_file = ''
-    ignore_words = set()
-    tochar = False
-    verbose = 1
-    padding_symbol = ' '
-    case_sensitive = False
-    max_words_per_line = sys.maxsize
-    split = None
-    while len(sys.argv) > 3:
-        a = '--maxw='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):]
-            del sys.argv[1]
-            max_words_per_line = int(b)
-            continue
-        a = '--rt='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            remove_tag = (b == 'true') or (b != '0')
-            continue
-        a = '--cs='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            case_sensitive = (b == 'true') or (b != '0')
-            continue
-        a = '--cluster='
-        if sys.argv[1].startswith(a):
-            cluster_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            continue
-        a = '--splitfile='
-        if sys.argv[1].startswith(a):
-            split_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            split = dict()
-            with codecs.open(split_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    words = line.strip().split()
-                    if len(words) >= 2:
-                        split[words[0]] = words[1:]
-            continue
-        a = '--ig='
-        if sys.argv[1].startswith(a):
-            ignore_file = sys.argv[1][len(a):]
-            del sys.argv[1]
-            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-                for line in fh:  # line in unicode
-                    line = line.strip()
-                    if len(line) > 0:
-                        ignore_words.add(line)
-            continue
-        a = '--char='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            tochar = (b == 'true') or (b != '0')
-            continue
-        a = '--v='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            verbose = 0
-            try:
-                verbose = int(b)
-            except Exception:
-                if b == 'true' or b != '0':
-                    verbose = 1
-            continue
-        a = '--padding-symbol='
-        if sys.argv[1].startswith(a):
-            b = sys.argv[1][len(a):].lower()
-            del sys.argv[1]
-            if b == 'space':
-                padding_symbol = ' '
-            elif b == 'underline':
-                padding_symbol = '_'
-            continue
-        if True or sys.argv[1].startswith('-'):
-            # ignore invalid switch
-            del sys.argv[1]
-            continue
-
-    if not case_sensitive:
-        ig = set([w.upper() for w in ignore_words])
-        ignore_words = ig
-
-    default_clusters = {}
-    default_words = {}
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    rec_set = {}
-    if split and not case_sensitive:
-        newsplit = dict()
-        for w in split:
-            words = split[w]
-            for i in range(len(words)):
-                words[i] = words[i].upper()
-            newsplit[w.upper()] = words
-        split = newsplit
-
-    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-        for line in fh:
-            if tochar:
-                array = characterize(line)
-            else:
-                array = line.strip().split()
-            if len(array) == 0:
-                continue
-            fid = array[0]
-            rec_set[fid] = normalize(array[1:], ignore_words,
-                                     case_sensitive, split)
-
-    # compute error rate on the interaction of reference file and hyp file
-    for line in open(ref_file, 'r', encoding='utf-8') :
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.rstrip('\n').split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-        if verbose:
-            print('\nutt: %s' % fid)
-
-        for word in rec + lab :
-            if word not in default_words :
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters :
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name] :
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-
-        result = calculator.calculate(lab, rec)
-        if verbose:
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('WER: %4.2f %%' % wer, end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-            space = {}
-            space['lab'] = []
-            space['rec'] = []
-            for idx in range(len(result['lab'])) :
-                len_lab = width(result['lab'][idx])
-                len_rec = width(result['rec'][idx])
-                length = max(len_lab, len_rec)
-                space['lab'].append(length - len_lab)
-                space['rec'].append(length - len_rec)
-            upper_lab = len(result['lab'])
-            upper_rec = len(result['rec'])
-            lab1, rec1 = 0, 0
-            while lab1 < upper_lab or rec1 < upper_rec:
-                if verbose > 1:
-                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('lab:', end=' ')
-                lab2 = min(upper_lab, lab1 + max_words_per_line)
-                for idx in range(lab1, lab2):
-                    token = result['lab'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['lab'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print()
-                if verbose > 1:
-                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
-                else:
-                    print('rec:', end=' ')
-                rec2 = min(upper_rec, rec1 + max_words_per_line)
-                for idx in range(rec1, rec2):
-                    token = result['rec'][idx]
-                    print('{token}'.format(token=token), end='')
-                    for n in range(space['rec'][idx]) :
-                        print(padding_symbol, end='')
-                    print(' ', end='')
-                print('\n', end='\n')
-                lab1 = lab2
-                rec1 = rec2
-
-    if verbose:
-        print('==================================================='
-              '========================')
-        print()
-
-    result = calculator.overall()
-    if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] +
-                    result['del']) * 100.0 / result['all']
-    else :
-        wer = 0.0
-    print('Overall -> %4.2f %%' % wer, end=' ')
-    print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'],
-           result['del'], result['ins']))
-    if not verbose:
-        print()
-
-    if verbose:
-        for cluster_id in default_clusters :
-            result = calculator.cluster(k for k in default_clusters[cluster_id])
-            if result['all'] != 0 :
-                wer = float(result['ins'] + result['sub'] +
-                            result['del']) * 100.0 / result['all']
-            else :
-                wer = 0.0
-            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-            print('N=%d C=%d S=%d D=%d I=%d' %
-                  (result['all'], result['cor'], result['sub'],
-                   result['del'], result['ins']))
-        if len(cluster_file) > 0 :  # compute separated WERs for word clusters
-            cluster_id = ''
-            cluster = []
-            for line in open(cluster_file, 'r', encoding='utf-8') :
-                for token in line.decode('utf-8').rstrip('\n').split() :
-                    # end of cluster reached, like </Keyword>
-                    if token[0:2] == '</' and token[len(token) - 1] == '>' and \
-                       token.lstrip('</').rstrip('>') == cluster_id :
-                        result = calculator.cluster(cluster)
-                        if result['all'] != 0 :
-                            wer = float(result['ins'] + result['sub'] +
-                                        result['del']) * 100.0 / result['all']
-                        else :
-                            wer = 0.0
-                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
-                        print('N=%d C=%d S=%d D=%d I=%d' %
-                              (result['all'], result['cor'], result['sub'],
-                               result['del'], result['ins']))
-                        cluster_id = ''
-                        cluster = []
-                    # begin of cluster reached, like <Keyword>
-                    elif (token[0] == '<' and token[len(token) - 1] == '>' and
-                          cluster_id == ''):
-                        cluster_id = token.lstrip('<').rstrip('>')
-                        cluster = []
-                    # general terms, like WEATHER / CAR / ...
-                    else :
-                        cluster.append(token)
-        print()
-        print('======================================='
-              '====================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute-wer.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/compute-wer.py
deleted file mode 100644
index a3eefc0dc7b67f252e685da71a5189312e74ef85..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute-wer.py
+++ /dev/null
@@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute_cmvn_stats.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/compute_cmvn_stats.py
deleted file mode 100644
index 9c89789c47be0c855939469e86040f10398e9d89..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute_cmvn_stats.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-import argparse
-import json
-import codecs
-import yaml
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.utils.data import Dataset, DataLoader
-
-torchaudio.set_audio_backend("sox_io")
-
-
-class CollateFunc(object):
-    ''' Collate function for AudioDataset
-    '''
-
-    def __init__(self, feat_dim, resample_rate):
-        self.feat_dim = feat_dim
-        self.resample_rate = resample_rate
-        pass
-
-    def __call__(self, batch):
-        mean_stat = torch.zeros(self.feat_dim)
-        var_stat = torch.zeros(self.feat_dim)
-        number = 0
-        for item in batch:
-            value = item[1].strip().split(",")
-            assert len(value) == 3 or len(value) == 1
-            wav_path = value[0]
-            sample_rate = torchaudio.backend.sox_io_backend.info(wav_path).sample_rate
-            resample_rate = sample_rate
-            # len(value) == 3 means segmented wav.scp,
-            # len(value) == 1 means original wav.scp
-            if len(value) == 3:
-                start_frame = int(float(value[1]) * sample_rate)
-                end_frame = int(float(value[2]) * sample_rate)
-                waveform, sample_rate = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_path,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(item[1])
-
-            waveform = waveform * (1 << 15)
-            if self.resample_rate != 0 and self.resample_rate != sample_rate:
-                resample_rate = self.resample_rate
-                waveform = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=self.feat_dim,
-                              dither=0.0,
-                              energy_floor=0.0,
-                              sample_frequency=resample_rate)
-            mean_stat += torch.sum(mat, axis=0)
-            var_stat += torch.sum(torch.square(mat), axis=0)
-            number += mat.shape[0]
-        return number, mean_stat, var_stat
-
-
-class AudioDataset(Dataset):
-    def __init__(self, data_file):
-        self.items = []
-        with codecs.open(data_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                arr = line.strip().split()
-                self.items.append((arr[0], arr[1]))
-
-    def __len__(self):
-        return len(self.items)
-
-    def __getitem__(self, idx):
-        return self.items[idx]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='extract CMVN stats')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for processing')
-    parser.add_argument('--train_config',
-                        default='',
-                        help='training yaml conf')
-    parser.add_argument('--in_scp', default=None, help='wav scp file')
-    parser.add_argument('--out_cmvn',
-                        default='global_cmvn',
-                        help='global cmvn file')
-
-    doc = "Print log after every log_interval audios are processed."
-    parser.add_argument("--log_interval", type=int, default=1000, help=doc)
-    args = parser.parse_args()
-
-    with open(args.train_config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    feat_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    resample_rate = 0
-    if 'resample_conf' in configs['dataset_conf']:
-        resample_rate = configs['dataset_conf']['resample_conf']['resample_rate']
-        print('using resample and new sample rate is {}'.format(resample_rate))
-
-    collate_func = CollateFunc(feat_dim, resample_rate)
-    dataset = AudioDataset(args.in_scp)
-    batch_size = 20
-    data_loader = DataLoader(dataset,
-                             batch_size=batch_size,
-                             shuffle=True,
-                             sampler=None,
-                             num_workers=args.num_workers,
-                             collate_fn=collate_func)
-
-    with torch.no_grad():
-        all_number = 0
-        all_mean_stat = torch.zeros(feat_dim)
-        all_var_stat = torch.zeros(feat_dim)
-        wav_number = 0
-        for i, batch in enumerate(data_loader):
-            number, mean_stat, var_stat = batch
-            all_mean_stat += mean_stat
-            all_var_stat += var_stat
-            all_number += number
-            wav_number += batch_size
-
-            if wav_number % args.log_interval == 0:
-                print(f'processed {wav_number} wavs, {all_number} frames',
-                      file=sys.stderr,
-                      flush=True)
-
-    cmvn_info = {
-        'mean_stat': list(all_mean_stat.tolist()),
-        'var_stat': list(all_var_stat.tolist()),
-        'frame_num': all_number
-    }
-
-    with open(args.out_cmvn, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute_fbank_feats.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/compute_fbank_feats.py
deleted file mode 100644
index 4cc7dae54de6e8b24b14148bd3930d19b4d7b28c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/compute_fbank_feats.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import logging
-
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-import wenet.dataset.kaldi_io as kaldi_io
-
-# The "sox" backends are deprecated and will be removed in 0.9.0 release.
-# So here we use sox_io backend
-torchaudio.set_audio_backend("sox_io")
-
-
-def parse_opts():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--num_mel_bins',
-                        default=80,
-                        type=int,
-                        help='Number of triangular mel-frequency bins')
-    parser.add_argument('--frame_length',
-                        type=int,
-                        default=25,
-                        help='Frame length in milliseconds')
-    parser.add_argument('--frame_shift',
-                        type=int,
-                        default=10,
-                        help='Frame shift in milliseconds')
-    parser.add_argument('--dither',
-                        type=int,
-                        default=0.0,
-                        help='Dithering constant (0.0 means no dither)')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_scp', help='wav scp file')
-    parser.add_argument('out_ark', help='output ark file')
-    parser.add_argument('out_scp', help='output scp file')
-    args = parser.parse_args()
-    return args
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp(wav_scp_file):
-    wav_list = []
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_list.append((arr[0], arr[1]))
-    return wav_list
-
-
-# wav format: <key> <wav_path>
-def load_wav_scp_dict(wav_scp_file):
-    wav_dict = {}
-    with open(wav_scp_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_dict[arr[0]] = arr[1]
-    return wav_dict
-
-
-# Segments format: <key> <wav_key> <start> <end>
-def load_wav_segments(wav_scp_file, segments_file):
-    wav_dict = load_wav_scp_dict(wav_scp_file)
-    audio_list = []
-    with open(segments_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 4
-            key = arr[0]
-            wav_file = wav_dict[arr[1]]
-            start = float(arr[2])
-            end = float(arr[3])
-            audio_list.append((key, wav_file, start, end))
-    return audio_list
-
-
-if __name__ == '__main__':
-    args = parse_opts()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    if args.segments is None:
-        audio_list = load_wav_scp(args.wav_scp)
-    else:
-        audio_list = load_wav_segments(args.wav_scp, args.segments)
-
-    count = 0
-    with open(args.out_ark, 'wb') as ark_fout, \
-         open(args.out_scp, 'w', encoding='utf8') as scp_fout:
-        for item in audio_list:
-            if len(item) == 2:
-                key, wav_path = item
-                waveform, sample_rate = torchaudio.load_wav(wav_path)
-            else:
-                assert len(item) == 4
-                key, wav_path, start, end = item
-                sample_rate = torchaudio.info(wav_path).sample_rate
-                frame_offset = int(start * sample_rate)
-                num_frames = int((end - start) * sample_rate)
-                waveform, sample_rate = torchaudio.load_wav(
-                    wav_path, frame_offset, num_frames)
-
-            mat = kaldi.fbank(waveform,
-                              num_mel_bins=args.num_mel_bins,
-                              frame_length=args.frame_length,
-                              frame_shift=args.frame_shift,
-                              dither=args.dither,
-                              energy_floor=0.0,
-                              sample_frequency=sample_rate)
-            mat = mat.detach().numpy()
-            kaldi_io.write_ark_scp(key, mat, ark_fout, scp_fout)
-            count += 1
-            if count % 10000 == 0:
-                logging.info('Progress {}/{}'.format(count, len(audio_list)))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/copy_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/copy_data_dir.sh
deleted file mode 100644
index ee880c4c3ca398a58a4e306467c639b0a76310bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/copy_data_dir.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/segments ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
-else # no segments->wav indexed by utt.
-  if [ -f $srcdir/wav.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  fi
-fi
-
-if [ -f $srcdir/reco2file_and_channel ]; then
-  cp $srcdir/reco2file_and_channel $destdir/
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/utt2num_frames ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames
-fi
-if [ -f $srcdir/reco2dur ]; then
-  if [ -f $srcdir/segments ]; then
-    cp $srcdir/reco2dur $destdir/reco2dur
-  else
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
-  fi
-fi
-if [ -f $srcdir/spk2gender ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-for f in frame_shift stm glm ctm; do
-  if [ -f $srcdir/$f ]; then
-    cp $srcdir/$f $destdir
-  fi
-done
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-echo $validate_opts
-echo $destdir
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/decode.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/decode.sh
deleted file mode 100644
index 1d49b0e48631f4818fb9c464df66904170275a33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/decode.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2021 Mobvoi Inc. All Rights Reserved.
-# Author: binbinzhang@mobvoi.com (Binbin Zhang)
-export GLOG_logtostderr=1
-export GLOG_v=2
-
-set -e
-
-nj=1
-chunk_size=-1
-ctc_weight=0.0
-reverse_weight=0.0
-rescoring_weight=1.0
-# For CTC WFST based decoding
-fst_path=
-dict_path=
-acoustic_scale=1.0
-beam=15.0
-lattice_beam=12.0
-min_active=200
-max_active=7000
-blank_skip_thresh=1.0
-length_penalty=0.0
-
-. tools/parse_options.sh || exit 1;
-if [ $# != 5 ]; then
-  echo "Usage: $0 [options] <wav.scp> <label_file> <model_file> <unit_file> <output_dir>"
-  exit 1;
-fi
-
-if ! which decoder_main > /dev/null; then
-  echo "decoder_main is not built, please go to runtime/libtorch to build it."
-  exit 1;
-fi
-
-scp=$1
-label_file=$2
-model_file=$3
-unit_file=$4
-dir=$5
-
-mkdir -p $dir/split${nj}
-
-# Step 1. Split wav.scp
-split_scps=""
-for n in $(seq ${nj}); do
-  split_scps="${split_scps} ${dir}/split${nj}/wav.${n}.scp"
-done
-tools/data/split_scp.pl ${scp} ${split_scps}
-
-# Step 2. Parallel decoding
-wfst_decode_opts=
-if [ ! -z $fst_path ]; then
-  wfst_decode_opts="--fst_path $fst_path"
-  wfst_decode_opts="$wfst_decode_opts --beam $beam"
-  wfst_decode_opts="$wfst_decode_opts --dict_path $dict_path"
-  wfst_decode_opts="$wfst_decode_opts --lattice_beam $lattice_beam"
-  wfst_decode_opts="$wfst_decode_opts --max_active $max_active"
-  wfst_decode_opts="$wfst_decode_opts --min_active $min_active"
-  wfst_decode_opts="$wfst_decode_opts --acoustic_scale $acoustic_scale"
-  wfst_decode_opts="$wfst_decode_opts --blank_skip_thresh $blank_skip_thresh"
-  wfst_decode_opts="$wfst_decode_opts --length_penalty $length_penalty"
-  echo $wfst_decode_opts > $dir/config
-fi
-for n in $(seq ${nj}); do
-{
-  decoder_main \
-     --rescoring_weight $rescoring_weight \
-     --ctc_weight $ctc_weight \
-     --reverse_weight $reverse_weight \
-     --chunk_size $chunk_size \
-     --wav_scp ${dir}/split${nj}/wav.${n}.scp \
-     --model_path $model_file \
-     --unit_path $unit_file \
-     $wfst_decode_opts \
-     --result ${dir}/split${nj}/${n}.text &> ${dir}/split${nj}/${n}.log
-} &
-done
-wait
-
-# Step 3. Merge files
-for n in $(seq ${nj}); do
-  cat ${dir}/split${nj}/${n}.text
-done > ${dir}/text
-tail $dir/split${nj}/*.log | grep RTF | awk '{sum+=$NF}END{print sum/NR}' > $dir/rtf
-
-# Step 4. Compute WER
-python3 tools/compute-wer.py --char=1 --v=1 \
-  $label_file $dir/text > $dir/wer
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/feat_to_shape.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/feat_to_shape.sh
deleted file mode 100644
index ab6d45c60709dd05a38f8da269d617233d0d39f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/feat_to_shape.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Begin configuration section.
-nj=4
-cmd=run.pl
-verbose=0
-filetype=""
-preprocess_conf=""
-# End configuration section.
-
-help_message=$(cat << EOF
-Usage: $0 [options] <input-scp> <output-scp> [<log-dir>]
-e.g.: $0 data/train/feats.scp data/train/shape.scp data/train/log
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-
-echo "$0 $*" 1>&2 # Print the command line for logging
-
-. parse_options.sh || exit 1;
-
-if [ $# -lt 2 ] || [ $# -gt 3 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-scp=$1
-outscp=$2
-data=$(dirname ${scp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-split_scps=""
-for n in $(seq ${nj}); do
-    split_scps="${split_scps} ${logdir}/feats.${n}.scp"
-done
-
-utils/split_scp.pl ${scp} ${split_scps}
-
-if [ -n "${preprocess_conf}" ]; then
-    preprocess_opt="--preprocess-conf ${preprocess_conf}"
-else
-    preprocess_opt=""
-fi
-if [ -n "${filetype}" ]; then
-    filetype_opt="--filetype ${filetype}"
-else
-    filetype_opt=""
-fi
-
-${cmd} JOB=1:${nj} ${logdir}/feat_to_shape.JOB.log \
-    feat-to-len --verbose=${verbose} \
-    scp:${logdir}/feats.JOB.scp ark,t:${logdir}/shape.JOB.scp
-
-feat_dim=$(feat-to-dim scp:$logdir/feats.1.scp -)
-
-# concatenate the .scp files together.
-for n in $(seq ${nj}); do
-    sed "s:\ *$:,$feat_dim:g" ${logdir}/shape.${n}.scp
-done > ${outscp}
-
-rm -f ${logdir}/feats.*.scp 2>/dev/null
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/filter_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/filter_scp.pl
deleted file mode 100644
index b76d37f41be0886470281978bfacf97f6b8ae976..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: utils/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fix_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/fix_data_dir.sh
deleted file mode 100644
index d1644c1cac4264c78eae7d91b03c4126baf7ec4c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fix_data_dir.sh
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/bin/bash
-
-# This script makes sure that only the segments present in
-# all of "feats.scp", "wav.scp" [if present], segments [if present]
-# text, and utt2spk are present in any of them.
-# It puts the original contents of data-dir into
-# data-dir/.backup
-
-cmd="$@"
-
-utt_extra_files=
-spk_extra_files=
-
-. tools/parse_options.sh
-
-if [ $# != 1 ]; then
-  echo "Usage: utils/data/fix_data_dir.sh <data-dir>"
-  echo "e.g.: utils/data/fix_data_dir.sh data/train"
-  echo "This script helps ensure that the various files in a data directory"
-  echo "are correctly sorted and filtered, for example removing utterances"
-  echo "that have no features (if feats.scp is present)"
-  exit 1
-fi
-
-data=$1
-
-if [ -f $data/images.scp ]; then
-  image/fix_data_dir.sh $cmd
-  exit $?
-fi
-
-mkdir -p $data/.backup
-
-[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
-
-[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
-
-set -e -o pipefail -u
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted {
-  file=$1
-  sort -k1,1 -u <$file >$file.tmp
-  if ! cmp -s $file $file.tmp; then
-    echo "$0: file $1 is not in sorted order or not unique, sorting it"
-    mv $file.tmp $file
-  else
-    rm $file.tmp
-  fi
-}
-
-for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
-    reco2file_and_channel spk2gender utt2lang utt2emo utt2uniq utt2dur reco2dur utt2num_frames; do
-  if [ -f $data/$x ]; then
-    cp $data/$x $data/.backup/$x
-    check_sorted $data/$x
-  fi
-done
-
-
-function filter_file {
-  filter=$1
-  file_to_filter=$2
-  cp $file_to_filter ${file_to_filter}.tmp
-  tools/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
-  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
-    length1=$(cat ${file_to_filter}.tmp | wc -l)
-    length2=$(cat ${file_to_filter} | wc -l)
-    if [ $length1 -ne $length2 ]; then
-      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
-    fi
-  fi
-  rm $file_to_filter.tmp
-}
-
-function filter_recordings {
-  # We call this once before the stage when we filter on utterance-id, and once
-  # after.
-
-  if [ -f $data/segments ]; then
-  # We have a segments file -> we need to filter this and the file wav.scp, and
-  # reco2file_and_utt, if it exists, to make sure they have the same list of
-  # recording-ids.
-
-    if [ ! -f $data/wav.scp ]; then
-      echo "$0: $data/segments exists but not $data/wav.scp"
-      exit 1;
-    fi
-    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
-    n1=$(cat $tmpdir/recordings | wc -l)
-    [ ! -s $tmpdir/recordings ] && \
-      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
-    tools/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
-    mv $tmpdir/recordings.tmp $tmpdir/recordings
-
-
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    filter_file $tmpdir/recordings $data/segments
-    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
-    rm $data/segments.tmp
-
-    filter_file $tmpdir/recordings $data/wav.scp
-    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
-    [ -f $data/reco2dur ] && filter_file $tmpdir/recordings $data/reco2dur
-    true
-  fi
-}
-
-function filter_speakers {
-  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
-  tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $f $tmpdir/speakers
-    fi
-  done
-
-  filter_file $tmpdir/speakers $data/spk2utt
-  tools/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
-
-  for s in cmvn.scp spk2gender $spk_extra_files; do
-    f=$data/$s
-    if [ -f $f ]; then
-      filter_file $tmpdir/speakers $f
-    fi
-  done
-}
-
-function filter_utts {
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  echo "$(cat $tmpdir/utts | wc -l)"
-  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
-
-  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
-    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
-    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-
-  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
-    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
-
-  if [ -f $data/utt2uniq ]; then
-    ! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
-      echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;
-  fi
-
-  maybe_wav=
-  maybe_reco2dur=
-  [ ! -f $data/segments ] && maybe_wav=wav.scp # wav indexed by utts only if segments does not exist.
-  [ -s $data/reco2dur ] && [ ! -f $data/segments ] && maybe_reco2dur=reco2dur # reco2dur indexed by utts
-
-  maybe_utt2dur=
-  if [ -f $data/utt2dur ]; then
-    cat $data/utt2dur | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2dur.ok || exit 1
-    maybe_utt2dur=utt2dur.ok
-  fi
-
-  maybe_utt2num_frames=
-  if [ -f $data/utt2num_frames ]; then
-    cat $data/utt2num_frames | \
-      awk '{ if (NF == 2 && $2 > 0) { print }}' > $data/utt2num_frames.ok || exit 1
-    maybe_utt2num_frames=utt2num_frames.ok
-  fi
-
-  for x in feats.scp text segments utt2lang utt2emo $maybe_wav $maybe_utt2dur $maybe_utt2num_frames; do
-    if [ -f $data/$x ]; then
-      tools/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
-      echo "$data/$x, $(cat $tmpdir/utts | wc -l), $(cat $tmpdir/utts.tmp | wc -l)"
-      mv $tmpdir/utts.tmp $tmpdir/utts
-      # echo "$tmpdir/utts"
-    fi
-  done
-  rm $data/utt2dur.ok 2>/dev/null || true
-  rm $data/utt2num_frames.ok 2>/dev/null || true
-
-  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
-    rm $tmpdir/utts && exit 1;
-
-
-  if [ -f $data/utt2spk ]; then
-    new_nutts=$(cat $tmpdir/utts | wc -l)
-    old_nutts=$(cat $data/utt2spk | wc -l)
-    if [ $new_nutts -ne $old_nutts ]; then
-      echo "fix_data_dir.sh: kept $new_nutts utterances out of $old_nutts"
-    else
-      echo "fix_data_dir.sh: kept all $old_nutts utterances."
-    fi
-  fi
-
-  for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2emo utt2dur utt2num_frames $maybe_wav $maybe_reco2dur $utt_extra_files; do
-    if [ -f $data/$x ]; then
-      cp $data/$x $data/.backup/$x
-      if ! cmp -s $data/$x <( tools/filter_scp.pl $tmpdir/utts $data/$x ) ; then
-        tools/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
-      fi
-    fi
-  done
-
-}
-
-filter_recordings
-filter_speakers
-filter_utts
-filter_speakers
-filter_recordings
-
-tools/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
-
-echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/flake8_hook.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/flake8_hook.py
deleted file mode 100644
index bbe21bf4aa8ab460aca0eba5a24785e4d6b2c39d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/flake8_hook.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-import sys
-
-from flake8.main import git
-
-if __name__ == '__main__':
-    sys.exit(
-        git.hook(
-            strict=True,
-            lazy=git.config_for('lazy'),
-        )
-    )
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/format_data.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/format_data.sh
deleted file mode 100644
index 51f4602dfa0bac7873541c7f621ef4bb9eb29c94..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/format_data.sh
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#                Mobvoi Corporation (Author: Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-echo "$0 $*" >&2 # Print the command line for logging
-. ./path.sh
-
-nj=1
-cmd=run.pl
-nlsyms=""
-lang=""
-feat=""
-feat_type="kaldi"
-oov="<unk>"
-bpecode=""
-allow_one_column=false
-raw=""
-verbose=0
-trans_type=char
-filetype=""
-preprocess_conf=""
-category=""
-out="" # If omitted, write in stdout
-help_message=$(cat << EOF
-Usage: $0 <data-dir> <dict>
-e.g. $0 data/train data/lang_1char/train_units.txt
-Options:
-  --nj <nj>                                        # number of parallel jobs
-  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
-  --feat <feat-scp>                                # feat.scp or feat1.scp,feat2.scp,...
-  --feat-type <feat-type>                          # kaldi or wav
-  --oov <oov-word>                                 # Default: <unk>
-  --out <outputfile>                               # If omitted, write in stdout
-  --filetype <mat|hdf5|sound.hdf5>                 # Specify the format of feats file
-  --preprocess-conf <json>                         # Apply preprocess to feats when creating shape.scp
-  --verbose <num>                                  # Default: 0
-EOF
-)
-. tools/parse_options.sh
-
-if [ $# != 2 ]; then
-    echo "${help_message}" 1>&2
-    exit 1;
-fi
-
-set -euo pipefail
-
-dir=$1
-dic=$2
-tmpdir=$(mktemp -d ${dir}/tmp-XXXXX)
-#trap 'rm -rf ${tmpdir}' EXIT
-
-# 1. Create scp files for inputs
-#   These are not necessary for decoding mode, and make it as an option
-input=
-if [ -n "${feat}" ]; then
-    _feat_scps=$(echo "${feat}" | tr ',' ' ' )
-    read -r -a feat_scps <<< $_feat_scps
-    num_feats=${#feat_scps[@]}
-
-    for (( i=1; i<=num_feats; i++ )); do
-        feat=${feat_scps[$((i-1))]}
-        mkdir -p ${tmpdir}/input_${i}
-        input+="input_${i} "
-        cat ${feat} > ${tmpdir}/input_${i}/feat.scp
-
-        # Dump in the "legacy" style JSON format
-        if [ -n "${filetype}" ]; then
-            awk -v filetype=${filetype} '{print $1 " " filetype}' ${feat} \
-                > ${tmpdir}/input_${i}/filetype.scp
-        fi
-
-        if [ ${feat_type} == "kaldi" ]; then
-            tools/feat_to_shape.sh --cmd "${cmd}" --nj ${nj} \
-                --filetype "${filetype}" \
-                --preprocess-conf "${preprocess_conf}" \
-                --verbose ${verbose} ${feat} ${tmpdir}/input_${i}/shape.scp
-        elif [ ${feat_type} == "wav" ] || [ ${feat_type} == "flac" ] || [ ${feat_type} == "opus" ]; then
-            if [ -f $dir/segments ]; then
-                # used for segmented wav.scp
-                awk '{print $1" "$4-$3}' $dir/segments > $dir/utt2dur
-            fi
-            if [ ! -f $dir/utt2dur ]; then
-                tools/wav_to_duration.sh --nj ${nj} \
-                    ${feat} ${tmpdir}/input_${i}/shape.scp
-            # use the existed utt2dur as shape.scp directly
-            else
-                cp $dir/utt2dur ${tmpdir}/input_${i}/shape.scp
-            fi
-        fi
-    done
-fi
-
-# 2. Create scp files for outputs
-mkdir -p ${tmpdir}/output
-if [ -n "${bpecode}" ]; then
-    if [ "${trans_type}" == "cn_char_en_bpe" ]; then
-        tools/text2token.py -s 1 -n 1 -m ${bpecode} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-    else
-        paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text \
-            | tools/spm_encode --model=${bpecode} --output_format=piece) \
-            > ${tmpdir}/output/token.scp
-    fi
-elif [ -n "${nlsyms}" ]; then
-    tools/text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-elif [ -n "${raw}" ]; then
-    cat $dir/text > ${tmpdir}/output/token.scp
-else
-    tools/text2token.py -s 1 -n 1 ${dir}/text --trans_type ${trans_type} > ${tmpdir}/output/token.scp
-fi
-< ${tmpdir}/output/token.scp tools/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/output/tokenid.scp
-odim=$(cat ${dic} | wc -l)
-< ${tmpdir}/output/tokenid.scp awk -v odim=${odim} '{print $1 " " NF-1 "," odim}' > ${tmpdir}/output/shape.scp
-
-cat ${dir}/text > ${tmpdir}/output/text.scp
-
-# 3. Create scp files for the others
-mkdir -p ${tmpdir}/other
-if [ -n "${lang}" ]; then
-    awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/other/lang.scp
-fi
-
-if [ -n "${category}" ]; then
-    awk -v category=${category} '{print $1 " " category}' ${dir}/text \
-        > ${tmpdir}/other/category.scp
-fi
-#cat ${dir}/utt2spk > ${tmpdir}/other/utt2spk.scp
-
-# 4. Merge scp files into a one file
-opts=""
-for intype in ${input} output other; do
-    if [ -z "$(find "${tmpdir}/${intype}" -name "*.scp")" ]; then
-        continue
-    fi
-
-    if [ ${intype} != other ]; then
-        opts+="--${intype%_*}-scps "
-    else
-        opts+="--scps "
-    fi
-
-    for x in "${tmpdir}/${intype}"/*.scp; do
-        k=$(basename ${x} .scp)
-        if [ ${k} = shape ]; then
-            opts+="shape:${x}:shape "
-        else
-            opts+="${k}:${x} "
-        fi
-    done
-done
-
-if ${allow_one_column}; then
-    opts+="--allow-one-column true "
-else
-    opts+="--allow-one-column false "
-fi
-
-if [ -n "${out}" ]; then
-    opts+="-O ${out}"
-fi
-
-tools/merge_scp2txt.py --verbose ${verbose} ${opts}
-
-#rm -fr ${tmpdir}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/add_lex_disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/add_lex_disambig.pl
deleted file mode 100644
index dd8a25de6e1140a6d19b1e876f2e76f528532edf..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/add_lex_disambig.pl
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env perl
-#  Copyright 2010-2011  Microsoft Corporation
-#            2013-2016  Johns Hopkins University (author: Daniel Povey)
-#                 2015  Hainan Xu
-#                 2015  Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-# With the --pron-probs option, expects the second field
-# of each lexicon line to be a pron-prob.
-# With the --sil-probs option, expects three additional
-# fields after the pron-prob, representing various components
-# of the silence probability model.
-
-$pron_probs = 0;
-$sil_probs = 0;
-$first_allowed_disambig = 1;
-
-for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
-  if ($ARGV[0] eq "--pron-probs") {
-    $pron_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--sil-probs") {
-    $sil_probs = 1;
-    shift @ARGV;
-  }
-  if ($ARGV[0] eq "--first-allowed-disambig") {
-    $first_allowed_disambig = 0 + $ARGV[1];
-    if ($first_allowed_disambig < 1) {
-      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
-    }
-    shift @ARGV;
-    shift @ARGV;
-  }
-}
-
-if (@ARGV != 2) {
-  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
-    "This script adds disambiguation symbols to a lexicon in order to\n" .
-    "make decoding graphs determinizable; it adds pseudo-phone\n" .
-    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
-    "to ensure that all pronunciations are different, and that none\n" .
-    "is a prefix of another.\n" .
-    "It prints to the standard output the number of the largest-numbered" .
-    "disambiguation symbol that was used.\n" .
-    "\n" .
-    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
-    "           --sil-probs        [should be with --pron-probs option]\n" .
-    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
-    "                              the silence probability model\n" .
-    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
-    "                              that this script is allowed to add.  By default this is\n" .
-    "                              #1, but you can set this to a larger value using this option.\n" .
-    "e.g.:\n" .
-    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
-    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) {
-      $p = shift @A;
-      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
-    }
-    if ($sil_probs) {
-      $silp = shift @A;
-      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-      $correction = shift @A;
-      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
-    }
-    if (!(@A)) {
-      die "Bad lexicon line $1, no phone in phone list";
-    }
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that it exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    if ($pron_probs) { shift @A; } # remove pron-prob.
-    if ($sil_probs) {
-      shift @A; # Remove silprob
-      shift @A; # Remove silprob
-    }
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-# max_disambig will always be the highest-numbered disambiguation symbol that
-# has been used so far.
-$max_disambig = $first_allowed_disambig - 1;
-
-foreach $l (@L) {
-  @A = split(" ", $l);
-  $word = shift @A;
-  if ($pron_probs) {
-    $pron_prob = shift @A;
-  }
-  if ($sil_probs) {
-    $sil_word_prob = shift @A;
-    $word_sil_correction = shift @A;
-    $prev_nonsil_correction = shift @A
-  }
-  $phnseq = join(" ", @A);
-  if (!defined $issubseq{$phnseq}
-      && $count{$phnseq} == 1) {
-    ;                           # Do nothing.
-  } else {
-    if ($phnseq eq "") {        # need disambig symbols for the empty string
-      # that are not use anywhere else.
-      $max_disambig++;
-      $reserved_for_the_empty_string{$max_disambig} = 1;
-      $phnseq = "#$max_disambig";
-    } else {
-      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
-      if (!defined $cur_disambig) {
-        $cur_disambig = $first_allowed_disambig;
-      } else {
-        $cur_disambig++;           # Get a number that has not been used yet for
-                                   # this phone sequence.
-      }
-      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
-        $cur_disambig++;
-      }
-      if ($cur_disambig > $max_disambig) {
-        $max_disambig = $cur_disambig;
-      }
-      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
-      $phnseq = $phnseq . " #" . $cur_disambig;
-    }
-  }
-  if ($pron_probs) {
-    if ($sil_probs) {
-      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
-    } else {
-      print O "$word\t$pron_prob\t$phnseq\n";
-    }
-  } else {
-    print O "$word\t$phnseq\n";
-  }
-}
-
-print $max_disambig . "\n";
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/compile_lexicon_token_fst.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/compile_lexicon_token_fst.sh
deleted file mode 100644
index b67814fe3f3244b14b8e494bfe46c4829c4f8bd6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/compile_lexicon_token_fst.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Copyright 2015       Yajie Miao    (Carnegie Mellon University)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
-# phoneme and character-based lexicons.
-set -eo pipefail
-. tools/parse_options.sh
-
-if [ $# -ne 3 ]; then
-  echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
-  echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo "lexicon.txt units.txt"
-  echo "options: "
-  exit 1;
-fi
-
-srcdir=$1
-tmpdir=$2
-dir=$3
-mkdir -p $dir $tmpdir
-
-[ -f path.sh ] && . ./path.sh
-
-export LC_ALL=C
-
-cp $srcdir/units.txt $dir
-
-# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
-# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
-perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
-
-# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
-# Without these symbols, determinization will fail.
-ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
-ndisambig=$[$ndisambig+1];
-
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
-
-# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
-# the actual model unit, and the disambiguation symbols.
-cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
-(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
-
-# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
-# so here use ctc_token_fst_compact
-tools/fst/ctc_token_fst_compact.py $dir/tokens.txt | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
-  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
-
-# Encode the words with indices. Will be used in lexicon and language model FST compiling.
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-
-# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
-token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
-word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
-
-tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
-  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
-  --keep_isymbols=false --keep_osymbols=false |   \
-  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
-  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-
-echo "Lexicon and token FSTs compiling succeeded"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst.py
deleted file mode 100644
index d81644b9cd216177a10a17772781d3293abe084f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 1 <eps> <eps>')
-print('1 1 <blank> <eps>')
-print('2 2 <blank> <eps>')
-print('2 0 <eps> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 3
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(1, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst_compact.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst_compact.py
deleted file mode 100644
index d3018d8b14ce25108cb1acc637cecded5d41be13..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst_compact.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    node = 1
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, node, phone, phone))
-            print('{} {} {} {}'.format(node, node, phone, '<eps>'))
-            print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
-        node += 1
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst_corrected.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst_corrected.py
deleted file mode 100644
index 81f7079eccb9e6447c46cdfdf6378aca7efe4a09..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/ctc_token_fst_corrected.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-def il(n):
-    return n + 1
-
-
-def ol(n):
-    return n + 1
-
-
-def s(n):
-    return n
-
-
-if __name__ == "__main__":
-    with open(sys.argv[1]) as f:
-        lines = f.readlines()
-    phone_count = 0
-    disambig_count = 0
-    for line in lines:
-        sp = line.split()
-        phone = sp[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        if phone.startswith('#'):
-            disambig_count += 1
-        else:
-            phone_count += 1
-
-    # 1. add start state
-    print('0 0 {} 0'.format(il(0)))
-
-    # 2. 0 -> i, i -> i, i -> 0
-    for i in range(1, phone_count + 1):
-        print('0 {} {} {}'.format(s(i), il(i), ol(i)))
-        print('{} {} {} 0'.format(s(i), s(i), il(i)))
-        print('{} 0 {} 0'.format(s(i), il(0)))
-
-    # 3. i -> other phone
-    for i in range(1, phone_count + 1):
-        for j in range(1, phone_count + 1):
-            if i != j:
-                print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
-
-    # 4. add disambiguous arcs on every final state
-    for i in range(0, phone_count + 1):
-        for j in range(phone_count + 2, phone_count + disambig_count + 2):
-            print('{} {} {} {}'.format(s(i), s(i), 0, j))
-
-    # 5. every i is final state
-    for i in range(0, phone_count + 1):
-        print(s(i))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/eps2disambig.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/eps2disambig.pl
deleted file mode 100644
index e1d84a6bf56703596a0e4552d184f7168f724bcb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/eps2disambig.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-#                2015 Guoguo Chen
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces epsilon with #0 on the input side only, of the G.fst
-# acceptor.
-
-while(<>){
-  if (/\s+#0\s+/) {
-    print STDERR "$0: ERROR: LM has word #0, " .
-                 "which is reserved as disambiguation symbol\n";
-    exit 1;
-  }
-  s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
-  print;
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/make_lexicon_fst.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/make_lexicon_fst.pl
deleted file mode 100644
index f97129c05cb3ba6460be401e92001261acfaf746..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/make_lexicon_fst.pl
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011  Microsoft Corporation
-#                2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
-
-$pron_probs = 0;
-
-if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
-  $pron_probs = 1;
-  shift @ARGV;
-}
-
-if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-  print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
-  print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
-  print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
-  print STDERR "  word phone1 phone2 ... phoneN;\n";
-  print STDERR "if the --pron-probs option is used, each line is:\n";
-  print STDERR "  word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
-  print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
-  print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
-  print STDERR "this is your responsibility.\n\n";
-  print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
-  print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
-  print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
-  exit(1);
-}
-
-$lexfn = shift @ARGV;
-if (@ARGV == 0) {
-  $silprob = 0.0;
-} elsif (@ARGV == 2) {
-  ($silprob,$silphone) = @ARGV;
-} else {
-  ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if ($silprob != 0.0) {
-  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-  $silcost = -log($silprob);
-  $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-  $loopstate = 0;
-  $nextstate = 1;               # next unallocated state.
-  while (<L>) {
-    @A = split(" ", $_);
-    @A == 0 && die "Empty lexicon line.";
-    foreach $a (@A) {
-      if ($a eq "<eps>") {
-        die "Bad lexicon line $_ (<eps> is forbidden)";
-      }
-    }
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-      } else {
-        $ns = $loopstate;
-      }
-      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-      $word_or_eps = "<eps>";
-      $pron_cost_string = ""; # so we only print it on the first arc of the word.
-      $s = $ns;
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-} else {                        # have silence probs.
-  $startstate = 0;
-  $loopstate = 1;
-  $silstate = 2;   # state from where we go to loopstate after emitting silence.
-  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-  if (!defined $sildisambig) {
-    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
-    $nextstate = 3;
-  } else {
-    $disambigstate = 3;
-    $nextstate = 4;
-    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-  }
-  while (<L>) {
-    @A = split(" ", $_);
-    $w = shift @A;
-    if (! $pron_probs) {
-      $pron_cost = 0.0;
-    } else {
-      $pron_prob = shift @A;
-      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
-        die "Bad pronunciation probability in line $_";
-      }
-      $pron_cost = -log($pron_prob);
-    }
-    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
-    $s = $loopstate;
-    $word_or_eps = $w;
-    while (@A > 0) {
-      $p = shift @A;
-      if (@A > 0) {
-        $ns = $nextstate++;
-        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
-        $word_or_eps = "<eps>";
-        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
-        $s = $ns;
-      } elsif (!defined($silphone) || $p ne $silphone) {
-        # This is non-deterministic but relatively compact,
-        # and avoids epsilons.
-        $local_nosilcost = $nosilcost + $pron_cost;
-        $local_silcost = $silcost + $pron_cost;
-        print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
-        print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
-      } else {
-        # no point putting opt-sil after silence word.
-        print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
-      }
-    }
-  }
-  print "$loopstate\t0\n";      # final-cost.
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/make_tlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/make_tlg.sh
deleted file mode 100644
index 98694e5540968760f0c27eaf30a6668f4c46c50d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/make_tlg.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lm_dir=$1
-src_lang=$2
-tgt_lang=$3
-
-arpa_lm=${lm_dir}/lm.arpa
-[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
-
-rm -rf $tgt_lang
-cp -r $src_lang $tgt_lang
-
-# Compose the language model to FST
-cat $arpa_lm | \
-   grep -v '<s> <s>' | \
-   grep -v '</s> <s>' | \
-   grep -v '</s> </s>' | \
-   grep -v -i '<unk>' | \
-   grep -v -i '<spoken_noise>' | \
-   arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
-   tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
-     --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
-
-
-echo  "Checking how stochastic G is (the first of these numbers should be small):"
-fstisstochastic $tgt_lang/G.fst
-
-# Compose the token, lexicon and language-model FST into the final decoding graph
-fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
-fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
-
-echo "Composing decoding graph TLG.fst succeeded"
-#rm -r $tgt_lang/LG.fst   # We don't need to keep this intermediate FST
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/prepare_dict.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/prepare_dict.py
deleted file mode 100644
index 8a6a3cfe7cfded0c863637deef0bae2f2ede5557..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/prepare_dict.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-# sys.argv[1]: e2e model unit file(lang_char.txt)
-# sys.argv[2]: raw lexicon file
-# sys.argv[3]: output lexicon file
-# sys.argv[4]: bpemodel
-
-unit_table = set()
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for line in fin:
-        unit = line.split()[0]
-        unit_table.add(unit)
-
-
-def contain_oov(units):
-    for unit in units:
-        if unit not in unit_table:
-            return True
-    return False
-
-
-bpemode = len(sys.argv) > 4
-if bpemode:
-    import sentencepiece as spm
-    sp = spm.SentencePieceProcessor()
-    sp.Load(sys.argv[4])
-lexicon_table = set()
-with open(sys.argv[2], 'r', encoding='utf8') as fin, \
-        open(sys.argv[3], 'w', encoding='utf8') as fout:
-    for line in fin:
-        word = line.split()[0]
-        if word == 'SIL' and not bpemode:  # `sil` might be a valid piece in bpemodel
-            continue
-        elif word == '<SPOKEN_NOISE>':
-            continue
-        else:
-            # each word only has one pronunciation for e2e system
-            if word in lexicon_table:
-                continue
-            if bpemode:
-                # We assume that the lexicon does not contain code-switch,
-                # i.e. the word contains both English and Chinese.
-                # see PR https://github.com/wenet-e2e/wenet/pull/1693
-                # and Issue https://github.com/wenet-e2e/wenet/issues/1653
-                if word.encode('utf8').isalpha():
-                    pieces = sp.EncodeAsPieces(word)
-                else:
-                    pieces = word
-                if contain_oov(pieces):
-                    print(
-                        'Ignoring words {}, which contains oov unit'.format(
-                            ''.join(word).strip('▁'))
-                    )
-                    continue
-                chars = ' '.join(
-                    [p if p in unit_table else '<unk>' for p in pieces])
-            else:
-                # ignore words with OOV
-                if contain_oov(word):
-                    print('Ignoring words {}, which contains oov unit'.format(word))
-                    continue
-                # Optional, append ▁ in front of english word
-                # we assume the model unit of our e2e system is char now.
-                if word.encode('utf8').isalpha() and '▁' in unit_table:
-                    word = '▁' + word
-                chars = ' '.join(word)  # word is a char list
-            fout.write('{} {}\n'.format(word, chars))
-            lexicon_table.add(word)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/remove_oovs.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/remove_oovs.pl
deleted file mode 100644
index ac914c3bd9363eded791cdeb309fd05e980c4f2e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/remove_oovs.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script removes lines that contain these OOVs on either the
-# third or fourth fields  of the line.  It is intended to remove arcs
-# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
-
-if (  @ARGV < 1 && @ARGV > 2) {
-    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
-}
-
-$unklist = shift @ARGV;
-open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
-while(<S>){
-    @A = split(" ", $_);
-    @A == 1 || die "Bad line in unknown-symbol list: $_";
-    $unk{$A[0]} = 1;
-}
-
-$num_removed = 0;
-while(<>){
-    @A = split(" ", $_);
-    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
-        $num_removed++;
-    } else {
-        print;
-    }
-}
-print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/rnnt_token_fst.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/rnnt_token_fst.py
deleted file mode 100644
index cc6def1703311ab700a4a01f22c1adda32db9b0d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/rnnt_token_fst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-print('0 0 <blank> <eps>')
-
-with open(sys.argv[1], 'r', encoding='utf8') as fin:
-    for entry in fin:
-        fields = entry.strip().split(' ')
-        phone = fields[0]
-        if phone == '<eps>' or phone == '<blank>':
-            continue
-        elif '#' in phone:  # disambiguous phone
-            print('{} {} {} {}'.format(0, 0, '<eps>', phone))
-        else:
-            print('{} {} {} {}'.format(0, 0, phone, phone))
-print('0')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/s2eps.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/s2eps.pl
deleted file mode 100644
index ffeeb8eb6af3c4f319f31ebff80be388d8f59e1a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/fst/s2eps.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script replaces <s> and </s> with <eps> (on both input and output sides),
-# for the G.fst acceptor.
-
-while(<>){
-    @A = split(" ", $_);
-    if ( @A >= 4 ) {
-        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
-        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
-    }
-    print join("\t", @A) . "\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/git-pre-commit b/models/audio/speech_recognition/conformer/igie/wenet/tools/git-pre-commit
deleted file mode 100644
index b6e448ed375a0ddf502ce332685de8a99e88dc08..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/git-pre-commit
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "Running pre-commit flake8"
-python tools/flake8_hook.py
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/install_srilm.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/install_srilm.sh
deleted file mode 100644
index 4aa113c14722a73fd3d3f84430025d44173c207b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/install_srilm.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
-#           2022  Binbin Zhang(binbzha@qq.com)
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-  echo "You should run this script in tools/ directory!!"
-  exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-srilm_url="https://github.com/BitSpeech/SRILM/archive/refs/tags/1.7.3.tar.gz"
-
-if [ ! -f ./srilm.tar.gz ];  then
-  if ! wget -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-  fi
-fi
-
-tar -zxvf srilm.tar.gz
-mv SRILM-1.7.3 srilm
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cd srilm
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-make || exit
-cd ..
-
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/k2/make_hlg.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/k2/make_hlg.sh
deleted file mode 100644
index 18c2268487410824ae11b199cf06f37acd717c88..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/k2/make_hlg.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-
-lexion_dir=$1
-lm_dir=$2
-tgt_dir=$3
-
-# k2 and icefall updates very fast. Below commits are veryfied in this script.
-# k2 3dc222f981b9fdbc8061b3782c3b385514a2d444, icefall 499ac24ecba64f687ff244c7d66baa5c222ecf0f
-
-# For k2 installation, please refer to https://github.com/k2-fsa/k2/
-python -c "import k2; print(k2.__file__)"
-python -c "import torch; import _k2; print(_k2.__file__)"
-
-# Prepare necessary icefall scripts
-if [ ! -d tools/k2/icefall ]; then
-    git clone --depth 1 https://github.com/k2-fsa/icefall.git tools/k2/icefall
-fi
-pip3 install -r tools/k2/icefall/requirements.txt
-export PYTHONPATH=`pwd`/tools/k2/icefall:`pwd`/tools/k2/icefall/egs/aishell/ASR/local:$PYTHONPATH
-
-# 8.1 Prepare char based lang
-mkdir -p $tgt_dir
-python tools/k2/prepare_char.py $lexion_dir/units.txt $lm_dir/wordlist $tgt_dir
-echo "Compile lexicon L.pt L_disambig.pt succeeded"
-
-# 8.2 Prepare G
-mkdir -p data/lm
-python -m kaldilm \
-    --read-symbol-table="$tgt_dir/words.txt" \
-    --disambig-symbol='#0' \
-    --max-order=3 \
-    $lm_dir/lm.arpa > data/lm/G_3_gram.fst.txt
-
-# 8.3 Compile HLG
-python tools/k2/icefall/egs/aishell/ASR/local/compile_hlg.py --lang-dir $tgt_dir
-echo "Compile decoding graph HLG.pt succeeded"
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/k2/prepare_char.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/k2/prepare_char.py
deleted file mode 100644
index 6e05042c42eb280135f6be7cdb3566b185258b90..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/k2/prepare_char.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                  Wei Kang)
-# Copyright    2022  Ximalaya Speech Team (author: Xiang Lyu)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-
-This script generates the following files in the directory sys.argv[3]:
-
-    - lexicon.txt
-    - lexicon_disambig.txt
-    - L.pt
-    - L_disambig.pt
-    - tokens.txt
-    - words.txt
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-import k2
-import torch
-from prepare_lang import (
-    Lexicon,
-    add_disambig_symbols,
-    add_self_loops,
-    write_lexicon,
-    write_mapping,
-)
-
-
-def lexicon_to_fst_no_sil(
-    lexicon: Lexicon,
-    token2id: Dict[str, int],
-    word2id: Dict[str, int],
-    need_self_loops: bool = False,
-) -> k2.Fsa:
-    """Convert a lexicon to an FST (in k2 format).
-
-    Args:
-      lexicon:
-        The input lexicon. See also :func:`read_lexicon`
-      token2id:
-        A dict mapping tokens to IDs.
-      word2id:
-        A dict mapping words to IDs.
-      need_self_loops:
-        If True, add self-loop to states with non-epsilon output symbols
-        on at least one arc out of the state. The input label for this
-        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
-    Returns:
-      Return an instance of `k2.Fsa` representing the given lexicon.
-    """
-    loop_state = 0  # words enter and leave from here
-    next_state = 1  # the next un-allocated state, will be incremented as we go
-
-    arcs = []
-
-    # The blank symbol <blk> is defined in local/train_bpe_model.py
-    assert token2id["<blank>"] == 0
-    assert word2id["<eps>"] == 0
-
-    eps = 0
-
-    for word, pieces in lexicon:
-        assert len(pieces) > 0, f"{word} has no pronunciations"
-        cur_state = loop_state
-
-        word = word2id[word]
-        pieces = [
-            token2id[i] if i in token2id else token2id["<unk>"] for i in pieces
-        ]
-
-        for i in range(len(pieces) - 1):
-            w = word if i == 0 else eps
-            arcs.append([cur_state, next_state, pieces[i], w, 0])
-
-            cur_state = next_state
-            next_state += 1
-
-        # now for the last piece of this word
-        i = len(pieces) - 1
-        w = word if i == 0 else eps
-        arcs.append([cur_state, loop_state, pieces[i], w, 0])
-
-    if need_self_loops:
-        disambig_token = token2id["#0"]
-        disambig_word = word2id["#0"]
-        arcs = add_self_loops(
-            arcs,
-            disambig_token=disambig_token,
-            disambig_word=disambig_word,
-        )
-
-    final_state = next_state
-    arcs.append([loop_state, final_state, -1, -1, 0])
-    arcs.append([final_state])
-
-    arcs = sorted(arcs, key=lambda arc: arc[0])
-    arcs = [[str(i) for i in arc] for arc in arcs]
-    arcs = [" ".join(arc) for arc in arcs]
-    arcs = "\n".join(arcs)
-
-    fsa = k2.Fsa.from_str(arcs, acceptor=False)
-    return fsa
-
-
-def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool:
-    """Check if all the given tokens are in token symbol table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that contains all the valid tokens.
-      tokens:
-        A list of tokens.
-    Returns:
-      Return True if there is any token not in the token_sym_table,
-      otherwise False.
-    """
-    for tok in tokens:
-        if tok not in token_sym_table:
-            return True
-    return False
-
-
-def generate_lexicon(
-    token_sym_table: Dict[str, int], words: List[str]
-) -> Lexicon:
-    """Generate a lexicon from a word list and token_sym_table.
-
-    Args:
-      token_sym_table:
-        Token symbol table that mapping token to token ids.
-      words:
-        A list of strings representing words.
-    Returns:
-      Return a dict whose keys are words and values are the corresponding
-          tokens.
-    """
-    lexicon = []
-    for word in words:
-        chars = list(word.strip(" \t"))
-        if contain_oov(token_sym_table, chars):
-            continue
-        lexicon.append((word, chars))
-
-    # The OOV word is <UNK>
-    lexicon.append(("<UNK>", ["<unk>"]))
-    return lexicon
-
-
-def generate_tokens(text_file: str) -> Dict[str, int]:
-    """Generate tokens from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate tokens.
-    Returns:
-      Return a dict whose keys are tokens and values are token ids ranged
-      from 0 to len(keys) - 1.
-    """
-    token2id: Dict[str, int] = dict()
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            char, index = line.replace('\n', '').split()
-            assert char not in token2id
-            token2id[char] = int(index)
-    assert token2id['<blank>'] == 0
-    return token2id
-
-
-def generate_words(text_file: str) -> Dict[str, int]:
-    """Generate words from the given text file.
-
-    Args:
-      text_file:
-        A file that contains text lines to generate words.
-    Returns:
-      Return a dict whose keys are words and values are words ids ranged
-      from 0 to len(keys) - 1.
-    """
-    words = []
-    with open(text_file, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.replace('\n', '')
-            assert word not in words
-            words.append(word)
-    words.sort()
-
-    # We put '<eps>' '<UNK>' at begining of word2id
-    # '#0', '<s>', '</s>' at end of word2id
-    words = [word for word in words
-             if word not in ['<eps>', '<UNK>', '#0', '<s>', '</s>']]
-    words.insert(0, '<eps>')
-    words.insert(1, '<UNK>')
-    words.append('#0')
-    words.append('<s>')
-    words.append('</s>')
-    word2id = {j: i for i, j in enumerate(words)}
-    return word2id
-
-
-def main():
-    token2id = generate_tokens(sys.argv[1])
-    word2id = generate_words(sys.argv[2])
-    tgt_dir = Path(sys.argv[3])
-
-    words = [word for word in word2id.keys()
-             if word not in
-             ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]]
-    lexicon = generate_lexicon(token2id, words)
-
-    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
-    next_token_id = max(token2id.values()) + 1
-    for i in range(max_disambig + 1):
-        disambig = f"#{i}"
-        assert disambig not in token2id
-        token2id[disambig] = next_token_id
-        next_token_id += 1
-
-    write_mapping(tgt_dir / "tokens.txt", token2id)
-    write_mapping(tgt_dir / "words.txt", word2id)
-    write_lexicon(tgt_dir / "lexicon.txt", lexicon)
-    write_lexicon(tgt_dir / "lexicon_disambig.txt", lexicon_disambig)
-
-    L = lexicon_to_fst_no_sil(
-        lexicon,
-        token2id=token2id,
-        word2id=word2id,
-    )
-    L_disambig = lexicon_to_fst_no_sil(
-        lexicon_disambig,
-        token2id=token2id,
-        word2id=word2id,
-        need_self_loops=True,
-    )
-    torch.save(L.as_dict(), tgt_dir / "L.pt")
-    torch.save(L_disambig.as_dict(), tgt_dir / "L_disambig.pt")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/latency_metrics.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/latency_metrics.py
deleted file mode 100644
index df2d8eee45f8e2d7c8536f208d44fafaeac3341f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/latency_metrics.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2022 Horizon Inc. (author: Xingchen Song)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import argparse
-import logging
-import librosa
-import torch
-import torchaudio
-import yaml
-
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.font_manager as fm
-import torchaudio.compliance.kaldi as kaldi
-
-from wenet.utils.init_model import init_model
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.common import replace_duplicates_with_blank
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Analyze latency and plot CTC-Spike.')
-    parser.add_argument('--config', required=True,
-                        type=str, help='configration')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=0,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--ckpt', required=True,
-                        type=str, help='model checkpoint')
-    parser.add_argument('--tag', required=True,
-                        type=str, help='image subtitle')
-    parser.add_argument('--wavscp', required=True,
-                        type=str, help='wav.scp')
-    parser.add_argument('--alignment', required=True,
-                        type=str, help='force alignment, generated by Kaldi.')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='chunk size')
-    parser.add_argument('--left_chunks', default=-1,
-                        type=int, help='left chunks')
-    parser.add_argument('--font', required=True,
-                        type=str, help='font file')
-    parser.add_argument('--dict', required=True,
-                        type=str, help='dict file')
-    parser.add_argument('--result_dir', required=True,
-                        type=str, help='saving pdf')
-    parser.add_argument('--model_type', default='ctc',
-                        choices=['ctc', 'transducer'],
-                        help='show latency metrics from ctc models or rnn-t models')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    torch.manual_seed(777)
-
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    symbol_table = read_symbol_table(args.dict)
-    char_dict = {v: k for k, v in symbol_table.items()}
-
-    # 1. Load model
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-
-    model = init_model(conf)
-    load_checkpoint(model, args.ckpt)
-    model = model.eval().to(device)
-
-    subsampling = model.encoder.embed.subsampling_rate
-    eos = model.eos_symbol()
-
-    with open(args.wavscp, 'r') as fin:
-        wavs = fin.readlines()
-
-    # 2. Forward model (get streaming_timestamps)
-    timestamps = {}
-    for idx, wav in enumerate(wavs):
-        if idx % 100 == 0:
-            logging.info("processed {}.".format(idx))
-        key, wav = wav.strip().split(' ', 1)
-        waveform, sr = torchaudio.load(wav)
-        resample_rate = conf['dataset_conf']['resample_conf']['resample_rate']
-        waveform = torchaudio.transforms.Resample(
-            orig_freq=sr, new_freq=resample_rate)(waveform)
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(
-            waveform,
-            num_mel_bins=conf['dataset_conf']['fbank_conf']['num_mel_bins'],
-            frame_length=conf['dataset_conf']['fbank_conf']['frame_length'],
-            frame_shift=conf['dataset_conf']['fbank_conf']['frame_shift'],
-            dither=0.0, energy_floor=0.0,
-            sample_frequency=resample_rate,
-        )
-
-        speech = mat.unsqueeze(0).to(device)
-        speech_lengths = torch.tensor([mat.size(0)]).to(device)
-
-        # Let's assume batch_size = 1
-        encoder_out, encoder_mask = model.encoder(
-            speech, speech_lengths, args.chunk_size, args.left_chunks)
-
-        maxlen = encoder_out.size(1)  # (B, maxlen, encoder_dim)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # CTC greedy search
-        if args.model_type == 'ctc':
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (B, maxlen, vocab_size)
-            topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            topk_prob = topk_prob.view(1, maxlen)  # (B, maxlen)
-            mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-            topk_index = topk_index.masked_fill_(mask, eos)  # (B, maxlen)
-            topk_prob = topk_prob.masked_fill_(mask, 0.0)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            hyps = [replace_duplicates_with_blank(hyp) for hyp in hyps]
-            scores = [prob.tolist() for prob in topk_prob]
-            timestamps[key] = [hyps[0], scores[0], wav]
-
-        if args.model_type == 'transducer':
-            hyps = []
-            scores = []
-            # fake padding
-            padding = torch.zeros(1, 1).to(encoder_out.device)
-            # sos
-            pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-            cache = model.predictor.init_state(1, method="zero",
-                                               device=encoder_out.device)
-            new_cache: List[torch.Tensor] = []
-            t = 0
-            hyps = []
-            prev_out_nblk = True
-            pred_out_step = None
-            per_frame_max_noblk = 1
-            per_frame_noblk = 0
-            while t < encoder_out_lens:
-                encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-                if prev_out_nblk:
-                    step_outs = model.predictor.forward_step(pred_input_step,
-                                                             padding, cache)
-                    pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-                joint_out_step = model.joint(encoder_out_step, pred_out_step)  # [1,1,v]
-                joint_out_probs = joint_out_step.log_softmax(dim=-1)
-                scores.append(torch.max(joint_out_probs).item())
-
-                joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-                if joint_out_max != model.blank:
-                    hyps.append(joint_out_max.item())
-                    prev_out_nblk = True
-                    per_frame_noblk = per_frame_noblk + 1
-                    pred_input_step = joint_out_max.reshape(1, 1)
-                    # state_m, state_c =  clstate_out_m, state_out_c
-                    cache = new_cache
-
-                if joint_out_max == model.blank or \
-                   per_frame_noblk >= per_frame_max_noblk:
-                    if joint_out_max == model.blank:
-                        prev_out_nblk = False
-                        hyps.append(model.blank)
-                    # TODO(Mddct): make t in chunk for streamming
-                    # or t should't be too lang to predict none blank
-                    t = t + 1
-                    per_frame_noblk = 0
-            timestamps[key] = [hyps, scores, wav]
-
-    # 3. Analyze latency
-    with open(args.alignment, 'r') as fin:
-        aligns = fin.readlines()
-    not_found, len_unequal, ignored = 0, 0, 0
-    datas = []
-    for align in aligns:
-        key, align = align.strip().split(' ', 1)
-        if key not in timestamps:
-            not_found += 1
-            continue
-        fa, st = [], []  # force_alignment, streaming_timestamps
-        text_fa, text_st = "", ""
-        for i, token in enumerate(align.split()):
-            if token != '<blank>':
-                text_fa += token
-                # NOTE(xcsong): W/O subsample
-                fa.append(i * 10)
-        # ignore alignment_errors >= 70ms
-        frames_fa = len(align.split())
-        frames_st = len(timestamps[key][0]) * subsampling
-        if abs(frames_st - frames_fa) >= 7:
-            ignored += 1
-            continue
-        for i, token_id in enumerate(timestamps[key][0]):
-            if token_id != 0:
-                text_st += char_dict[token_id]
-                # NOTE(xcsong): W subsample
-                st.append(i * subsampling * 10)
-        if len(fa) != len(st):
-            len_unequal += 1
-            continue
-        # datas[i] = [key, text_fa, text_st, list_of_diff,
-        #             FirstTokenDelay, LastTokenDelay, AvgTokenDelay,
-        #             streaming_timestamps, force_alignment]
-        datas.append([key, text_fa, text_st,
-                     [a - b for a, b in zip(st, fa)],
-                     st[0] - fa[0], st[-1] - fa[-1],
-                     (sum(st) - sum(fa)) / len(st),
-                     timestamps[key], align.split()])
-
-    logging.info("not found: {}, length unequal: {}, ignored: {}, \
-        valid samples: {}".format(not_found, len_unequal, ignored, len(datas)))
-
-    # 4. Plot and print
-    num_datas = len(datas)
-    names = ['FirstTokenDelay', 'LastTokenDelay', 'AvgTokenDelay']
-    names_index = [4, 5, 6]
-    parts = ['max', 'P90', 'P75', 'P50', 'P25', 'min']
-    parts_index = [num_datas - 1, int(num_datas * 0.90), int(num_datas * 0.75),
-                   int(num_datas * 0.50), int(num_datas * 0.25), 0]
-    for name, name_idx in zip(names, names_index):
-        def f(name_idx=name_idx):
-            return name_idx
-        datas.sort(key=lambda x: x[f()])
-        logging.info("==========================")
-        for p, i in zip(parts, parts_index):
-            data = datas[i]
-            # i.e., LastTokenDelay P90: 270.000 ms (wav_id: BAC009S0902W0144)
-            logging.info("{} {}: {:.3f} ms (wav_id: {})".format(
-                name, p, data[f()], datas[i][0]))
-
-            font = fm.FontProperties(fname=args.font)
-            plt.rcParams['axes.unicode_minus'] = False
-            # we will have 2 sub-plots (force-align + streaming timestamps)
-            # plus one wav-plot
-            fig, axes = plt.subplots(figsize=(60, 60), nrows=3, ncols=1)
-            for j in range(2):
-                if j == 0:
-                    # subplot-0: streaming_timestamps
-                    plt_prefix = args.tag + "_" + name + "_" + p
-                    x = np.arange(len(data[7][0])) * subsampling
-                    hyps, scores = data[7][0], data[7][1]
-                else:
-                    # subplot-1: force_alignments
-                    plt_prefix = "force_alignment"
-                    x = np.arange(len(data[8]))
-                    hyps = [symbol_table[d] for d in data[8]]
-                    scores = [0.0] * len(data[8])
-                axes[j].set_title(plt_prefix, fontsize=30)
-                for frame, token, prob in zip(x, hyps, scores):
-                    if char_dict[token] != '<blank>':
-                        axes[j].bar(
-                            frame, np.exp(prob),
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                        axes[j].text(
-                            frame, np.exp(prob),
-                            '{} {:.3f} {}'.format(
-                                char_dict[token], np.exp(prob), frame),
-                            fontdict=dict(fontsize=24),
-                            fontproperties=font,
-                        )
-                    else:
-                        axes[j].bar(
-                            frame, 0.01,
-                            label='{} {:.3f}'.format(
-                                char_dict[token], np.exp(prob)),
-                        )
-                axes[j].tick_params(labelsize=25)
-
-            # subplot-2: wav
-            # wav, hardcode sample_rate to 16000
-            samples, sr = librosa.load(data[7][2], sr=16000)
-            time = np.arange(0, len(samples)) * (1.0 / sr)
-            axes[-1].plot(time, samples)
-
-            # i.e., RESULT_DIR/LTD_P90_120ms_BAC009S0768W0342.pdf
-            plt.savefig(args.result_dir + "/" + name + "_" +
-                        p + "_" + str(data[f()]) + "ms" + "_" + data[0] + ".pdf")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/make_raw_list.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/make_raw_list.py
deleted file mode 100644
index 2f84f015542bb38da027b8ea61e8638f873cec33..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/make_raw_list.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('output_file', help='output list file')
-    args = parser.parse_args()
-
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    if args.segments is not None:
-        segments_table = {}
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    with open(args.text_file, 'r', encoding='utf8') as fin, \
-         open(args.output_file, 'w', encoding='utf8') as fout:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if args.segments is None:
-                assert key in wav_table
-                wav = wav_table[key]
-                line = dict(key=key, wav=wav, txt=txt)
-            else:
-                assert key in segments_table
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                line = dict(key=key, wav=wav, txt=txt, start=start, end=end)
-            json_line = json.dumps(line, ensure_ascii=False)
-            fout.write(json_line + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/make_shard_list.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/make_shard_list.py
deleted file mode 100644
index 1f7d82829808c9cc181bbc5e0f60cccef8795bae..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/make_shard_list.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import io
-import logging
-import os
-import tarfile
-import time
-import multiprocessing
-
-import torch
-import torchaudio
-import torchaudio.backend.sox_io_backend as sox
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def write_tar_file(data_list,
-                   no_segments,
-                   tar_file,
-                   resample=16000,
-                   index=0,
-                   total=1):
-    logging.info('Processing {} {}/{}'.format(tar_file, index, total))
-    read_time = 0.0
-    save_time = 0.0
-    write_time = 0.0
-    with tarfile.open(tar_file, "w") as tar:
-        prev_wav = None
-        for item in data_list:
-            if no_segments:
-                key, txt, wav = item
-            else:
-                key, txt, wav, start, end = item
-
-            suffix = wav.split('.')[-1]
-            assert suffix in AUDIO_FORMAT_SETS
-            if no_segments:
-                ts = time.time()
-                with open(wav, 'rb') as fin:
-                    data = fin.read()
-                read_time += (time.time() - ts)
-            else:
-                if wav != prev_wav:
-                    ts = time.time()
-                    waveforms, sample_rate = sox.load(wav, normalize=False)
-                    read_time += (time.time() - ts)
-                    prev_wav = wav
-                start = int(start * sample_rate)
-                end = int(end * sample_rate)
-                audio = waveforms[:1, start:end]
-
-                # resample
-                if sample_rate != resample:
-                    if not audio.is_floating_point():
-                        # normalize the audio before resample
-                        # because resample can't process int audio
-                        audio = audio / (1 << 15)
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-                        audio = (audio * (1 << 15)).short()
-                    else:
-                        audio = torchaudio.transforms.Resample(
-                            sample_rate, resample)(audio)
-
-                ts = time.time()
-                f = io.BytesIO()
-                sox.save(f, audio, resample, format="wav", bits_per_sample=16)
-                # Save to wav for segments file
-                suffix = "wav"
-                f.seek(0)
-                data = f.read()
-                save_time += (time.time() - ts)
-
-            assert isinstance(txt, str)
-            ts = time.time()
-            txt_file = key + '.txt'
-            txt = txt.encode('utf8')
-            txt_data = io.BytesIO(txt)
-            txt_info = tarfile.TarInfo(txt_file)
-            txt_info.size = len(txt)
-            tar.addfile(txt_info, txt_data)
-
-            wav_file = key + '.' + suffix
-            wav_data = io.BytesIO(data)
-            wav_info = tarfile.TarInfo(wav_file)
-            wav_info.size = len(data)
-            tar.addfile(wav_info, wav_data)
-            write_time += (time.time() - ts)
-        logging.info('read {} save {} write {}'.format(read_time, save_time,
-                                                       write_time))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument('--num_utts_per_shard',
-                        type=int,
-                        default=1000,
-                        help='num utts per shard')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='num threads for make shards')
-    parser.add_argument('--prefix',
-                        default='shards',
-                        help='prefix of shards tar file')
-    parser.add_argument('--segments', default=None, help='segments file')
-    parser.add_argument('--resample',
-                        type=int,
-                        default=16000,
-                        help='segments file')
-    parser.add_argument('wav_file', help='wav file')
-    parser.add_argument('text_file', help='text file')
-    parser.add_argument('shards_dir', help='output shards dir')
-    parser.add_argument('shards_list', help='output shards list file')
-    args = parser.parse_args()
-    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s %(message)s')
-
-    torch.set_num_threads(1)
-    wav_table = {}
-    with open(args.wav_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            wav_table[arr[0]] = arr[1]
-
-    no_segments = True
-    segments_table = {}
-    if args.segments is not None:
-        no_segments = False
-        with open(args.segments, 'r', encoding='utf8') as fin:
-            for line in fin:
-                arr = line.strip().split()
-                assert len(arr) == 4
-                segments_table[arr[0]] = (arr[1], float(arr[2]), float(arr[3]))
-
-    data = []
-    with open(args.text_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split(maxsplit=1)
-            key = arr[0]
-            txt = arr[1] if len(arr) > 1 else ''
-            if no_segments:
-                assert key in wav_table
-                wav = wav_table[key]
-                data.append((key, txt, wav))
-            else:
-                wav_key, start, end = segments_table[key]
-                wav = wav_table[wav_key]
-                data.append((key, txt, wav, start, end))
-
-    num = args.num_utts_per_shard
-    chunks = [data[i:i + num] for i in range(0, len(data), num)]
-    os.makedirs(args.shards_dir, exist_ok=True)
-
-    # Using thread pool to speedup
-    pool = multiprocessing.Pool(processes=args.num_threads)
-    shards_list = []
-    tasks_list = []
-    num_chunks = len(chunks)
-    for i, chunk in enumerate(chunks):
-        tar_file = os.path.join(args.shards_dir,
-                                '{}_{:09d}.tar'.format(args.prefix, i))
-        shards_list.append(tar_file)
-        pool.apply_async(
-            write_tar_file,
-            (chunk, no_segments, tar_file, args.resample, i, num_chunks))
-
-    pool.close()
-    pool.join()
-
-    with open(args.shards_list, 'w', encoding='utf8') as fout:
-        for name in shards_list:
-            fout.write(name + '\n')
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/merge_scp2txt.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/merge_scp2txt.py
deleted file mode 100644
index 51f1c42f272f0fd9fec0a7d69ee860d2f1eb6158..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/merge_scp2txt.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-from distutils.util import strtobool
-from io import open
-import logging
-import sys
-
-PY2 = sys.version_info[0] == 2
-sys.stdin = codecs.getreader('utf-8')(sys.stdin if PY2 else sys.stdin.buffer)
-sys.stdout = codecs.getwriter('utf-8')(
-    sys.stdout if PY2 else sys.stdout.buffer)
-
-
-# Special types:
-def shape(x):
-    """Change str to List[int]
-
-    >>> shape('3,5')
-    [3, 5]
-    >>> shape(' [3, 5] ')
-    [3, 5]
-
-    """
-
-    # x: ' [3, 5] ' -> '3, 5'
-    x = x.strip()
-    if x[0] == '[':
-        x = x[1:]
-    if x[-1] == ']':
-        x = x[:-1]
-
-    return list(map(int, x.split(',')))
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Given each file paths with such format as '
-        '<key>:<file>:<type>. type> can be omitted and the default '
-        'is "str". e.g. {} '
-        '--input-scps feat:data/feats.scp shape:data/utt2feat_shape:shape '
-        '--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape '
-        '--output-scps text:data/text shape:data/utt2text_shape:shape '
-        '--scps utt2spk:data/utt2spk'.format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the inputs')
-    parser.add_argument('--output-scps',
-                        type=str,
-                        nargs='*',
-                        action='append',
-                        default=[],
-                        help='files for the outputs')
-    parser.add_argument('--scps',
-                        type=str,
-                        nargs='+',
-                        default=[],
-                        help='The files except for the input and outputs')
-    parser.add_argument('--verbose',
-                        '-V',
-                        default=1,
-                        type=int,
-                        help='Verbose option')
-    parser.add_argument('--allow-one-column',
-                        type=strtobool,
-                        default=False,
-                        help='Allow one column in input scp files. '
-                        'In this case, the value will be empty string.')
-    parser.add_argument('--out',
-                        '-O',
-                        type=str,
-                        help='The output filename. '
-                        'If omitted, then output to sys.stdout')
-    return parser
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    args.scps = [args.scps]
-
-    # logging info
-    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
-    if args.verbose > 0:
-        logging.basicConfig(level=logging.INFO, format=logfmt)
-    else:
-        logging.basicConfig(level=logging.WARN, format=logfmt)
-
-    inputs = {}
-    assert (len(args.input_scps) == 1)
-    for f in args.input_scps[0]:
-        arr = f.strip().split(':')
-        inputs[arr[0]] = arr[1]
-    assert ('feat' in inputs)
-    assert ('shape' in inputs)
-
-    outputs = {}
-    assert (len(args.output_scps) == 1)
-    for f in args.output_scps[0]:
-        arr = f.strip().split(':')
-        outputs[arr[0]] = arr[1]
-    assert ('shape' in outputs)
-    assert ('text' in outputs)
-    assert ('token' in outputs)
-    assert ('tokenid' in outputs)
-
-    files = [
-        inputs['feat'], inputs['shape'], outputs['text'], outputs['token'],
-        outputs['tokenid'], outputs['shape']
-    ]
-    fields = ['feat', 'feat_shape', 'text', 'token', 'tokenid', 'token_shape']
-    fids = [open(f, 'r', encoding='utf-8') for f in files]
-
-    if args.out is None:
-        out = sys.stdout
-    else:
-        out = open(args.out, 'w', encoding='utf-8')
-    done = False
-    while not done:
-        for i, fid in enumerate(fids):
-            line = fid.readline()
-            if line == '':
-                done = True
-                break
-            arr = line.strip().split()
-            content = ' '.join(arr[1:])
-            if i == 0:
-                out.write('utt:{}'.format(arr[0]))
-            out.write('\t')
-            out.write('{}:{}'.format(fields[i], content))
-        out.write('\n')
-
-    for f in fids:
-        f.close()
-    if args.out is not None:
-        out.close()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/onnx2horizonbin.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/onnx2horizonbin.py
deleted file mode 100644
index a94b647fb19d1446d4bc506c399c85677dddde9f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/onnx2horizonbin.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-import argparse
-import copy
-import logging
-import os
-import sys
-import random
-import torch
-import yaml
-import numpy as np
-
-from torch.utils.data import DataLoader
-
-from wenet.utils.common import remove_duplicates_and_blank
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import to_numpy
-from wenet.bin.export_onnx_bpu import export_encoder, export_ctc
-
-
-try:
-    import hbdk  # noqa: F401
-    import horizon_nn  # noqa: F401
-    from horizon_tc_ui import HB_ONNXRuntime
-except ImportError:
-    print('Please install hbdk,horizon_nn,horizon_tc_ui !')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-def save_data(tensor, dirs, prefix):
-    if tensor.requires_grad:
-        data = tensor.detach().numpy().astype(np.float32)
-    else:
-        data = tensor.numpy().astype(np.float32)
-    os.makedirs(dirs, exist_ok=True)
-    data.tofile(dirs + "/" + prefix + ".bin")
-
-
-def make_calibration_data(enc, args, conf):
-    conf['shuffle'] = True
-    logger.info(conf)
-    dataset = Dataset(
-        "shard", args.cali_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    cal_data_dir = os.path.join(args.output_dir, 'cal_data_dir')
-    for batch_idx, batch in enumerate(dataloader):
-        if batch_idx >= args.max_samples:
-            break
-        if batch_idx % 100 == 0:
-            logger.info("processed {} samples.".format(batch_idx))
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-
-        # Feed forward overlap input step by step
-        random_high = (num_frames - context) // stride
-        num_rand = random.randint(0, random_high)
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            if i == num_rand:
-                save_data(chunk, "{}/chunk".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_cache, "{}/att_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(cnn_cache, "{}/cnn_cache".format(cal_data_dir),
-                          prefix + "." + str(i))
-                save_data(att_mask, "{}/att_mask".format(cal_data_dir),
-                          prefix + "." + str(i))
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            # NOTE(xcsong): It's fast to calibrate ctc.onnx,
-            #   so it's okay to save all chunks
-            save_data(y, "{}/hidden".format(cal_data_dir),
-                      prefix + "." + str(i))
-
-
-def check_wer(enc, ctc, args, conf):
-    conf['shuffle'] = False
-    dataset = Dataset(
-        "shard", args.wer_datalist, args.symbol_table, conf,
-        bpe_model=args.bpe_model, non_lang_syms=None, partition=False)
-    dataloader = DataLoader(dataset, batch_size=None, num_workers=0)
-    char_dict = {v: k for k, v in args.symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    enc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_encoder/encoder_quantized_model.onnx")
-    ctc_session = HB_ONNXRuntime(
-        model_file=args.output_dir +
-        "/hb_makertbin_output_ctc/ctc_quantized_model.onnx")
-    torch_file = open(args.output_dir + "/torch_text", 'w', encoding="utf-8")
-    onnx_file = open(args.output_dir + "/onnx_text", 'w', encoding="utf-8")
-    subsampling = enc.embed.subsampling_rate
-    context = enc.embed.right_context + 1  # Add current frame
-    stride = subsampling * args.chunk_size
-    decoding_window = (args.chunk_size - 1) * subsampling + context
-    required_cache_size = args.chunk_size * args.num_decoding_left_chunks
-    num_layers = len(enc.encoders)
-    head, d_k = enc.encoders[0].self_attn.h, enc.encoders[0].self_attn.d_k
-    dim, lorder = enc._output_size, enc.encoders[0].conv_module.lorder
-    chunk_size, left_chunks = args.chunk_size, args.num_decoding_left_chunks
-    for batch_idx, batch in enumerate(dataloader):
-        keys, feats, target, feats_lengths, target_lengths = batch
-        num_frames, prefix = feats.size(1), keys[0]
-        att_cache = torch.zeros(
-            [1, head * num_layers, d_k * 2, required_cache_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask = torch.ones(
-            [1, head, chunk_size, required_cache_size + chunk_size],
-            dtype=feats.dtype, device=feats.device)
-        att_mask[:, :, :, :required_cache_size] = 0
-        cnn_cache = torch.zeros(
-            [1, dim, num_layers, lorder],
-            dtype=feats.dtype, device=feats.device)
-        onnx_att_cache = to_numpy(att_cache)
-        onnx_cnn_cache = to_numpy(cnn_cache)
-
-        # Feed forward overlap input step by step
-        torch_out, onnx_out = [], []
-        for i, cur in enumerate(range(0, num_frames - context + 1, stride)):
-            att_mask[:, :, :, -(chunk_size * (i + 1)):] = 1
-            end = min(cur + decoding_window, num_frames)
-            chunk = feats[:, cur:end, :].unsqueeze(0)  # (1, 1, window, mel)
-            if end == num_frames and end - cur < decoding_window:  # last chunk
-                pad_len = decoding_window - (end - cur)  # 67 - (35)
-                pad_chunk = torch.zeros((1, 1, pad_len, chunk.size(-1)),
-                                        device=feats.device)
-                chunk = torch.cat((chunk, pad_chunk),
-                                  dim=2)  # (1, 1, win, mel)
-                if pad_len >= subsampling:
-                    att_mask[:, :, :, -(pad_len // subsampling):] = 0
-            # Torch model
-            (y, att_cache, cnn_cache) = enc.forward(
-                xs=chunk, att_cache=att_cache,
-                cnn_cache=cnn_cache, att_mask=att_mask)
-            torch_out.append(ctc.forward(y).transpose(1, 3).squeeze(2))
-            # Quantized onnx model
-            ort_inputs = {
-                'chunk': to_numpy(chunk), 'att_cache': onnx_att_cache,
-                'cnn_cache': onnx_cnn_cache, 'att_mask': to_numpy(att_mask)}
-            ort_outs = enc_session.run_feature(
-                enc_session.output_names, ort_inputs, input_offset=0)
-            onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-            onnx_y = ctc_session.run_feature(
-                ctc_session.output_names, {'hidden': ort_outs[0]}, input_offset=0)
-            onnx_out.append(torch.from_numpy(
-                np.squeeze(onnx_y[0].transpose(0, 3, 2, 1), axis=2)))
-
-        def post_process(list_out, file_obj, keys):
-            probs = torch.cat(list_out, dim=1)
-            maxlen = probs.size(1)
-            topk_prob, topk_index = probs.topk(1, dim=2)  # (B, maxlen, 1)
-            topk_index = topk_index.view(1, maxlen)  # (B, maxlen)
-            hyps = [hyp.tolist() for hyp in topk_index]
-            scores = topk_prob.max(1)
-            hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-            for i, key in enumerate(keys):
-                content = ''
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content += char_dict[w]
-                file_obj.write('{} {}\n'.format(key, content))
-            return key, content
-
-        if len(torch_out) > 0 and len(onnx_out) > 0:
-            key, content = post_process(torch_out, torch_file, keys)
-            logger.info('torch: {} {}'.format(key, content))
-            key, content = post_process(onnx_out, onnx_file, keys)
-            logger.info('onnx : {} {}'.format(key, content))
-    torch_file.close()
-    onnx_file.close()
-
-
-def generate_config(enc_session, ctc_session, args):
-    template = """
-# 模型参数组
-model_parameters:
-  # 原始Onnx浮点模型文件
-  onnx_model: '{}'
-  # 转换的目标AI芯片架构
-  march: 'bernoulli2'
-  # 模型转换输出的用于上板执行的模型文件的名称前缀
-  output_model_file_prefix: '{}'
-  # 模型转换输出的结果的存放目录
-  working_dir: '{}'
-  # 指定转换后混合异构模型是否保留输出各层的中间结果的能力
-  layer_out_dump: False
-  # 转换过程中日志生成级别
-  log_level: 'debug'
-# 输入信息参数组
-input_parameters:
-  # 原始浮点模型的输入节点名称
-  input_name: '{}'
-  # 原始浮点模型的输入数据格式（数量/顺序与input_name一致）
-  input_type_train: '{}'
-  # 原始浮点模型的输入数据排布（数量/顺序与input_name一致）
-  input_layout_train: '{}'
-  # 原始浮点模型的输入数据尺寸
-  input_shape: '{}'
-  # 网络实际执行时，输入给网络的batch_size  默认值为1
-  # input_batch: 1
-  # 在模型中添加的输入数据预处理方法
-  norm_type: '{}'
-  # 预处理方法的图像减去的均值; 如果是通道均值，value之间必须用空格分隔
-  # mean_value: ''
-  # 预处理方法的图像缩放比例，如果是通道缩放比例，value之间必须用空格分隔
-  # scale_value: ''
-  # 转换后混合异构模型需要适配的输入数据格式（数量/顺序与input_name一致）
-  input_type_rt: '{}'
-  # 输入数据格式的特殊制式
-  input_space_and_range: ''
-  # 转换后混合异构模型需要适配的输入数据排布（数量/顺序与input_name一致）
-  input_layout_rt: '{}'
-# 校准参数组
-calibration_parameters:
-  # 模型校准使用的标定样本的存放目录
-  cal_data_dir: '{}'
-  # 开启图片校准样本自动处理（skimage read resize到输入节点尺寸）
-  preprocess_on: False
-  # 校准使用的算法类型
-  calibration_type: '{}'
-  # max 校准方式的参数
-  max_percentile: 1.0
-  # 强制指定OP在CPU上运行
-  run_on_cpu: '{}'
-  # 强制指定OP在BPU上运行
-  run_on_bpu: '{}'
-# 编译参数组
-compiler_parameters:
-  # 编译策略选择
-  compile_mode: 'latency'
-  # 是否打开编译的debug信息
-  debug: False
-  # 模型运行核心数
-  core_num: 1
-  # 模型编译的优化等级选择
-  optimize_level: 'O3'
-"""
-    output_dir = os.path.realpath(args.output_dir)
-    cal_data_dir = os.path.join(output_dir, 'cal_data_dir')
-    os.makedirs(cal_data_dir, exist_ok=True)
-    enc_dic = enc_session.get_modelmeta().custom_metadata_map
-    enc_onnx_path = os.path.join(output_dir, 'encoder.onnx')
-    enc_log_path = os.path.join(output_dir, 'hb_makertbin_output_encoder')
-    enc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in enc_dic['input_name'].split(';')])
-    ctc_dic = ctc_session.get_modelmeta().custom_metadata_map
-    ctc_onnx_path = os.path.join(output_dir, 'ctc.onnx')
-    ctc_log_path = os.path.join(output_dir, 'hb_makertbin_output_ctc')
-    ctc_cal_data = ";".join(
-        [cal_data_dir + "/" + x for x in ctc_dic['input_name'].split(';')])
-    enc_config = template.format(
-        enc_onnx_path, "encoder", enc_log_path,
-        enc_dic['input_name'], enc_dic['input_type'],
-        enc_dic['input_layout_train'], enc_dic['input_shape'],
-        enc_dic['norm_type'], enc_dic['input_type'], enc_dic['input_layout_rt'],
-        enc_cal_data, args.calibration_type, args.extra_ops_run_on_cpu, "")
-    ctc_config = template.format(
-        ctc_onnx_path, "ctc", ctc_log_path,
-        ctc_dic['input_name'], ctc_dic['input_type'],
-        ctc_dic['input_layout_train'], ctc_dic['input_shape'],
-        ctc_dic['norm_type'], ctc_dic['input_type'], ctc_dic['input_layout_rt'],
-        ctc_cal_data, "default", "", "")
-    with open(output_dir + "/config_encoder.yaml", "w") as enc_yaml:
-        enc_yaml.write(enc_config)
-    with open(output_dir + "/config_ctc.yaml", "w") as ctc_yaml:
-        ctc_yaml.write(ctc_config)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='convert onnx to horizon .bin')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    parser.add_argument('--dict', type=str, required=True, help='dict file')
-    parser.add_argument('--max_samples', type=int, required=True,
-                        help='maximum samples')
-    parser.add_argument('--cali_datalist', type=str, default=None,
-                        help='make calibration data')
-    parser.add_argument('--wer_datalist', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--wer_text', type=str, default=None,
-                        help='check wer')
-    parser.add_argument('--bpe_model', default=None, type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--ln_run_on_bpu', action='store_true',
-                        help='layernorm running on bpu')
-    parser.add_argument('--extra_ops_run_on_cpu', type=str, default=None,
-                        help='extra operations running on cpu.')
-    parser.add_argument('--calibration_type', type=str, default='default',
-                        help='kl / max / default.')
-    return parser
-
-
-if __name__ == '__main__':
-    random.seed(777)
-    parser = get_args()
-    args = parser.parse_args()
-    # NOTE(xcsong): X3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        conf = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(conf)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    symbol_table = read_symbol_table(args.dict)
-    args.symbol_table = symbol_table
-    args.feature_size = conf['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    logger.info("Stage-1: Export onnx")
-    enc, enc_session = export_encoder(model, args)
-    ctc, ctc_session = export_ctc(model, args)
-
-    conf = copy.deepcopy(conf['dataset_conf'])
-    conf['filter_conf']['max_length'] = 102400
-    conf['filter_conf']['min_length'] = 0
-    conf['filter_conf']['token_max_length'] = 102400
-    conf['filter_conf']['token_min_length'] = 0
-    conf['filter_conf']['max_output_input_ratio'] = 102400
-    conf['filter_conf']['min_output_input_ratio'] = 0
-    conf['speed_perturb'] = False
-    conf['spec_aug'] = False
-    conf['spec_sub'] = False
-    conf['spec_trim'] = False
-    conf['shuffle'] = False
-    conf['sort'] = False
-    if 'fbank_conf' in conf:
-        conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in conf:
-        conf['mfcc_conf']['dither'] = 0.0
-    conf['batch_conf']['batch_type'] = "static"
-    conf['batch_conf']['batch_size'] = 1
-
-    if args.cali_datalist is not None:
-        logger.info("Stage-2: Generate config")
-        # FIXME(xcsong): Remove hard code
-        logger.info("torch version: {}".format(torch.__version__))
-        if int(torch.__version__[:4].split('.')[1]) >= 13:
-            args.extra_ops_run_on_cpu = "/Split;" + \
-                "/encoders.0/self_attn/Split;/encoders.1/self_attn/Split;" + \
-                "/encoders.2/self_attn/Split;/encoders.3/self_attn/Split;" + \
-                "/encoders.4/self_attn/Split;/encoders.5/self_attn/Split;" + \
-                "/encoders.6/self_attn/Split;/encoders.7/self_attn/Split;" + \
-                "/encoders.8/self_attn/Split;/encoders.9/self_attn/Split;" + \
-                "/encoders.10/self_attn/Split;/encoders.11/self_attn/Split;" + \
-                "/encoders.0/self_attn/Mul;/encoders.1/self_attn/Mul;" + \
-                "/encoders.2/self_attn/Mul;/encoders.3/self_attn/Mul;" + \
-                "/encoders.4/self_attn/Mul;/encoders.5/self_attn/Mul;" + \
-                "/encoders.6/self_attn/Mul;/encoders.7/self_attn/Mul;" + \
-                "/encoders.8/self_attn/Mul;/encoders.9/self_attn/Mul;" + \
-                "/encoders.10/self_attn/Mul;/encoders.11/self_attn/Mul;"
-        else:
-            args.extra_ops_run_on_cpu = "Split_17;Split_67;Split_209;" + \
-                "Split_351;Split_493;Split_635;Split_777;Split_919;Split_1061;" + \
-                "Split_1203;Split_1345;Split_1487;Split_1629;" + \
-                "Mul_72;Mul_214;Mul_356;Mul_498;Mul_640;Mul_782;" + \
-                "Mul_924;Mul_1066;Mul_1208;Mul_1350;Mul_1492;Mul_1634;"
-        generate_config(enc_session, ctc_session, args)
-
-        logger.info("Stage-3: Make calibration data")
-        make_calibration_data(enc, args, conf)
-
-        output_dir = os.path.realpath(args.output_dir)
-        logger.info("Stage-4: Make ctc.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_ctc".format(output_dir) +
-            " && cd hb_makertbin_log_ctc &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_ctc.yaml")
-        )
-        logger.info("Stage-5: Make encoder.bin")
-        os.system(
-            "cd {} && mkdir -p hb_makertbin_log_encoder ".format(output_dir) +
-            " && cd hb_makertbin_log_encoder &&" +
-            " hb_mapper makertbin --model-type \"onnx\" --config \"{}\"".format(
-                output_dir + "/config_encoder.yaml")
-        )
-
-    if args.wer_datalist is not None:
-        logger.info("Stage-6: Check wer between torch model and quantized onnx")
-        assert args.wer_text is not None
-        check_wer(enc, ctc, args, conf)
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/torch_text",
-                args.output_dir + "/torch_wer")
-        )
-        os.system(
-            "python3 tools/compute-wer.py --char=1 --v=1 {} {} > {}".format(
-                args.wer_text, args.output_dir + "/onnx_text",
-                args.output_dir + "/onnx_wer")
-        )
-        os.system("tail {} {}".format(
-            args.output_dir + "/torch_wer", args.output_dir + "/onnx_wer"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/parse_options.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/parse_options.sh
deleted file mode 100644
index 34476fdb37a4b14d5fe6e0edbebe97e760d2be5a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### No we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/perturb_data_dir_speed.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/perturb_data_dir_speed.sh
deleted file mode 100644
index 901a4882e6481ae269067b0fe7175dba62c4db9e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/perturb_data_dir_speed.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-
-# 2020 @kamo-naoyuki
-# This file was copied from Kaldi and
-# I deleted parts related to wav duration
-# because we shouldn't use kaldi's command here
-# and we don't need the files actually.
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-#           2014  Tom Ko
-#           2018  Emotech LTD (author: Pawel Swietojanski)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It generates the files which are used for perturbing the speed of the original data.
-
-export LC_ALL=C
-set -euo pipefail
-
-if [[ $# != 3 ]]; then
-    echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
-    echo "e.g.:"
-    echo " $0 0.9 data/train_si284 data/train_si284p"
-    exit 1
-fi
-
-factor=$1
-srcdir=$2
-destdir=$3
-label="sp"
-spk_prefix="${label}${factor}-"
-utt_prefix="${label}${factor}-"
-
-#check is sox on the path
-
-! command -v sox &>/dev/null && echo "sox: command not found" && exit 1;
-
-if [[ ! -f ${srcdir}/utt2spk ]]; then
-  echo "$0: no such file ${srcdir}/utt2spk"
-  exit 1;
-fi
-
-if [[ ${destdir} == "${srcdir}" ]]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-mkdir -p "${destdir}"
-
-<"${srcdir}"/utt2spk awk -v p="${utt_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/utt_map"
-<"${srcdir}"/spk2utt awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/spk_map"
-<"${srcdir}"/wav.scp awk -v p="${spk_prefix}" '{printf("%s %s%s\n", $1, p, $1);}' > "${destdir}/reco_map"
-if [[ ! -f ${srcdir}/utt2uniq ]]; then
-    <"${srcdir}/utt2spk" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $1);}' > "${destdir}/utt2uniq"
-else
-    <"${srcdir}/utt2uniq" awk -v p="${utt_prefix}" '{printf("%s%s %s\n", p, $1, $2);}' > "${destdir}/utt2uniq"
-fi
-
-
-<"${srcdir}"/utt2spk utils/apply_map.pl -f 1 "${destdir}"/utt_map | \
-  utils/apply_map.pl -f 2 "${destdir}"/spk_map >"${destdir}"/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <"${destdir}"/utt2spk >"${destdir}"/spk2utt
-
-if [[ -f ${srcdir}/segments ]]; then
-
-  utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/segments | \
-      utils/apply_map.pl -f 2 "${destdir}"/reco_map | \
-          awk -v factor="${factor}" \
-            '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);} }' \
-            >"${destdir}"/segments
-
-  utils/apply_map.pl -f 1 "${destdir}"/reco_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-      awk -v factor="${factor}" \
-          '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-            else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-            else  {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-             > "${destdir}"/wav.scp
-  if [[ -f ${srcdir}/reco2file_and_channel ]]; then
-      utils/apply_map.pl -f 1 "${destdir}"/reco_map \
-       <"${srcdir}"/reco2file_and_channel >"${destdir}"/reco2file_and_channel
-  fi
-
-else # no segments->wav indexed by utterance.
-    if [[ -f ${srcdir}/wav.scp ]]; then
-        utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/wav.scp | sed 's/| *$/ |/' | \
-         # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename"
-         awk -v factor="${factor}" \
-           '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
-             else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" }
-             else {print wid " sox" $_ " -t wav - speed " factor " |"}}' \
-                 > "${destdir}"/wav.scp
-    fi
-fi
-
-if [[ -f ${srcdir}/text ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
-fi
-if [[ -f ${srcdir}/spk2gender ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
-fi
-if [[ -f ${srcdir}/utt2lang ]]; then
-    utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
-fi
-
-rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
-echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"
-
-utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/reduce_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/reduce_data_dir.sh
deleted file mode 100644
index 16194dcc7309a646041181a698c53cd4f46e618b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/reduce_data_dir.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# koried, 10/29/2012
-
-# Reduce a data set based on a list of turn-ids
-
-help_message="usage: $0 srcdir turnlist destdir"
-
-if [ $1 == "--help" ]; then
-    echo "${help_message}"
-    exit 0;
-fi
-
-if [ $# != 3 ]; then
-    echo "${help_message}"
-    exit 1;
-fi
-
-srcdir=$1
-reclist=$2
-destdir=$3
-
-if [ ! -f ${srcdir}/utt2spk ]; then
-echo "$0: no such file $srcdir/utt2spk"
-exit 1;
-fi
-
-function do_filtering {
-# assumes the utt2spk and spk2utt files already exist.
-    [ -f ${srcdir}/feats.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/feats.scp >${destdir}/feats.scp
-    [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/wav.scp >${destdir}/wav.scp
-    [ -f ${srcdir}/text ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/text >${destdir}/text
-    [ -f ${srcdir}/utt2num_frames ] && utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/utt2num_frames >${destdir}/utt2num_frames
-    [ -f ${srcdir}/spk2gender ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/spk2gender >${destdir}/spk2gender
-    [ -f ${srcdir}/cmvn.scp ] && utils/filter_scp.pl ${destdir}/spk2utt <${srcdir}/cmvn.scp >${destdir}/cmvn.scp
-    if [ -f ${srcdir}/segments ]; then
-        utils/filter_scp.pl ${destdir}/utt2spk <${srcdir}/segments >${destdir}/segments
-        awk '{print $2;}' ${destdir}/segments | sort | uniq > ${destdir}/reco # recordings.
-        # The next line would override the command above for wav.scp, which would be incorrect.
-        [ -f ${srcdir}/wav.scp ] && utils/filter_scp.pl ${destdir}/reco <${srcdir}/wav.scp >${destdir}/wav.scp
-        [ -f ${srcdir}/reco2file_and_channel ] && \
-            utils/filter_scp.pl ${destdir}/reco <${srcdir}/reco2file_and_channel >${destdir}/reco2file_and_channel
-
-        # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
-        [ -f ${srcdir}/stm ] && utils/filter_scp.pl ${destdir}/reco < ${srcdir}/stm > ${destdir}/stm
-        rm ${destdir}/reco
-    fi
-    srcutts=$(wc -l < ${srcdir}/utt2spk)
-    destutts=$(wc -l < ${destdir}/utt2spk)
-    echo "Reduced #utt from $srcutts to $destutts"
-}
-
-mkdir -p ${destdir}
-
-# filter the utt2spk based on the set of recordings
-utils/filter_scp.pl ${reclist} < ${srcdir}/utt2spk > ${destdir}/utt2spk
-
-utils/utt2spk_to_spk2utt.pl < ${destdir}/utt2spk > ${destdir}/spk2utt
-do_filtering;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/remove_longshortdata.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/remove_longshortdata.py
deleted file mode 100644
index 7e92f8a424d2d717acf6fc1db5503f79ba38a898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/remove_longshortdata.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='remove too long or too short data in format.data')
-    parser.add_argument('--data_file',
-                        type=str,
-                        help='input format data')
-    parser.add_argument('--output_data_file',
-                        type=str,
-                        help='output format data')
-    parser.add_argument('--min_input_len', type=float,
-                        default=0,
-                        help='minimum input seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--max_input_len', type=float,
-                        default=20,
-                        help='maximum output seq length, in seconds for raw wav, \
-                            in frame numbers for feature data')
-    parser.add_argument('--min_output_len', type=float,
-                        default=0, help='minimum input seq length, in modeling units')
-    parser.add_argument('--max_output_len', type=float,
-                        default=500,
-                        help='maximum output seq length, in modeling units')
-    parser.add_argument('--min_output_input_ratio', type=float, default=0.05,
-                        help='minimum output seq length/output seq length ratio')
-    parser.add_argument('--max_output_input_ratio', type=float, default=10,
-                        help='maximum output seq length/output seq length ratio')
-    args = parser.parse_args()
-
-    data_file = args.data_file
-    output_data_file = args.output_data_file
-    min_input_len = args.min_input_len
-    max_input_len = args.max_input_len
-    min_output_len = args.min_output_len
-    max_output_len = args.max_output_len
-    min_output_input_ratio = args.min_output_input_ratio
-    max_output_input_ratio = args.max_output_input_ratio
-
-    with open(data_file, 'r') as f, open(output_data_file, 'w') as fout:
-        for l in f:
-            l = l.strip()
-            if l:
-                items = l.strip().split('\t')
-                token_shape = items[6]
-                feature_shape = items[2]
-                feat_len = float(feature_shape.split(':')[1].split(',')[0])
-                token_len = float(token_shape.split(':')[1].split(',')[0])
-                condition = [feat_len > min_input_len,
-                             feat_len < max_input_len,
-                             token_len > min_output_len,
-                             token_len < max_output_len,
-                             token_len / feat_len > min_output_input_ratio,
-                             token_len / feat_len < max_output_input_ratio,
-                             ]
-                if all(condition):
-                    fout.write('{}\n'.format(l))
-                    continue
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/segment.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/segment.py
deleted file mode 100644
index a1a7f93a05fbaf42ca09c26c0e5be6a7185f0d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/segment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2021 Mobvoi Inc. (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import argparse
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='generate segmented wav.scp')
-    parser.add_argument('--segments', required=True, help='segments file')
-    parser.add_argument('--input',
-                        required=True,
-                        help='origin wav.scp that not segmented')
-    parser.add_argument('--output',
-                        required=True,
-                        help='output segmented wav.scp')
-    wav_dic = {}
-    args = parser.parse_args()
-    ori_wav = args.input
-    segment_file = args.segments
-    wav_scp = args.output
-    with open(ori_wav, 'r') as ori:
-        for l in ori:
-            item = l.strip().split()
-            wav_dic[item[0]] = item[1]
-    with open(wav_scp, 'w') as f, open(segment_file, 'r') as sgement:
-        for l in sgement:
-            item = l.strip().split()
-            if item[1] in wav_dic:
-                item[1] = wav_dic[item[1]]
-                f.write("{} {},{},{}\n".format(item[0], item[1], item[2], item[3]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/setup_anaconda.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/setup_anaconda.sh
deleted file mode 100644
index f53ace9cc4c19994fc79d01e85d70f49d40d673f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/setup_anaconda.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# NOTE(hslee): this code is borrowed from ESPnet (https://github.com/espnet/espnet)
-set -euo pipefail
-
-if [ -z "${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-CONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-
-if [ $# -gt 4 ]; then
-    echo "Usage: $0 [output] [conda-env-name] [python-version>]"
-    exit 1;
-elif [ $# -eq 3 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION="$3"
-elif [ $# -eq 2 ]; then
-    output_dir="$1"
-    name="$2"
-    PYTHON_VERSION=""
-elif [ $# -eq 1 ]; then
-    output_dir="$1"
-    name=""
-    PYTHON_VERSION=""
-elif [ $# -eq 0 ]; then
-    output_dir=venv
-    name=""
-    PYTHON_VERSION=""
-fi
-
-if [ -e activate_python.sh ]; then
-    echo "Warning: activate_python.sh already exists. It will be overwritten"
-fi
-
-if [ ! -e "${output_dir}/etc/profile.d/conda.sh" ]; then
-    if [ ! -e miniconda.sh ]; then
-        wget --tries=3 "${CONDA_URL}" -O miniconda.sh
-    fi
-
-    bash miniconda.sh -b -p "${output_dir}"
-fi
-
-# shellcheck disable=SC1090
-source "${output_dir}/etc/profile.d/conda.sh"
-conda deactivate
-
-# If the env already exists, skip recreation
-if [ -n "${name}" ] && ! conda activate ${name}; then
-    conda create -yn "${name}"
-fi
-conda activate ${name}
-
-if [ -n "${PYTHON_VERSION}" ]; then
-    conda install -y conda "python=${PYTHON_VERSION}"
-else
-    conda install -y conda
-fi
-
-conda install -y pip setuptools
-
-cat << EOF > activate_python.sh
-#!/usr/bin/env bash
-# THIS FILE IS GENERATED BY tools/setup_anaconda.sh
-if [ -z "\${PS1:-}" ]; then
-    PS1=__dummy__
-fi
-. $(cd ${output_dir}; pwd)/etc/profile.d/conda.sh && conda deactivate && conda activate ${name}
-EOF
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/sph2wav.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/sph2wav.sh
deleted file mode 100644
index a8f0749e3be2ee69b5831da6699c303510ecbed4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/sph2wav.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# convert sph scp to segmented wav scp
-nj=1
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-segments=$2
-outscp=$3
-data=$(dirname ${inscp})
-if [ $# -eq 4 ]; then
-  logdir=$4
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-sph2pipe_version="v2.5"
-if [ ! -d tools/sph2pipe_${sph2pipe_version} ]; then
-  echo "Download sph2pipe_${sph2pipe_version} ......"
-  wget -T 10 -t 3 -P tools https://www.openslr.org/resources/3/sph2pipe_${sph2pipe_version}.tar.gz || \
-  wget -T 10 -c -P tools https://sourceforge.net/projects/kaldi/files/sph2pipe_${sph2pipe_version}.tar.gz; \
-  tar --no-same-owner -xzf tools/sph2pipe_${sph2pipe_version}.tar.gz -C tools
-  cd tools/sph2pipe_${sph2pipe_version}/ && \
-        gcc -o sph2pipe  *.c -lm
-  cd -
-fi
-sph2pipe=`which sph2pipe` || sph2pipe=`pwd`/tools/sph2pipe_${sph2pipe_version}/sph2pipe
-[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
-sox=`which sox`
-[ ! -x $sox ] && echo "Could not find the sox program at $sph2pipe" && exit 1;
-
-cat $inscp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s#-f#wav#-p#-c#1#%s#|\n", $1, sph2pipe, $2);
-    printf("%s-B %s#-f#wav#-p#-c#2#%s#|\n", $1, sph2pipe, $2);}' | \
-   sort > $data/wav_ori.scp || exit 1;
-
-tools/segment.py --segments $segments --input $data/wav_ori.scp --output $data/wav_segments.scp
-sed -i 's/ /,/g' $data/wav_segments.scp
-sed -i 's/#/ /g' $data/wav_segments.scp
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/*.log
-split --additional-suffix .slice -d -n l/$nj $data/wav_segments.scp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    mkdir -p ${data}/wavs/${name}
-    cat ${slice} | awk -F ',' -v sox=$sox -v data=`pwd`/$data/wavs/$name \
-        -v logdir=$logdir -v name=$name '{
-        during=$4-$3
-        cmd=$2 sox " - " data "/" $1 ".wav" " trim " $3 " " during;
-        system(cmd)
-        printf("%s %s/%s.wav\n", $1, data, $1);
-        }' | \
-       sort > ${data}/wavs_${name}.scp || exit 1;
-} &
-done
-wait
-cat ${data}/wavs_*.scp > $outscp
-rm ${data}/wavs_*.scp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/spk2utt_to_utt2spk.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/spk2utt_to_utt2spk.pl
deleted file mode 100644
index 19fb89d501146e360912863d847d6eabb0194511..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/spk2utt_to_utt2spk.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-while(<>){
-    @A = split(" ", $_);
-    @A > 1 || die "Invalid line in spk2utt file: $_";
-    $s = shift @A;
-    foreach $u ( @A ) {
-        print "$u $s\n";
-    }
-}
-
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_decode b/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_decode
deleted file mode 100644
index 882b4f966013d7708460f8d41696583ae59f8fa9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_decode
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for decoding")
-    parser.add_argument("--input", default=None, help="input file to decode")
-    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
-    args = parser.parse_args()
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.input_format == "piece":
-        def decode(l):
-            return "".join(sp.DecodePieces(l))
-    elif args.input_format == "id":
-        def decode(l):
-            return "".join(sp.DecodeIds(l))
-    else:
-        raise NotImplementedError
-
-    def tok2int(tok):
-        # remap reference-side <unk> (represented as <<unk>>) to 0
-        return int(tok) if tok != "<<unk>>" else 0
-
-    if args.input is None:
-        h = sys.stdin
-    else:
-        h = open(args.input, "r", encoding="utf-8")
-    for line in h:
-        print(decode(line.split()))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_encode b/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_encode
deleted file mode 100644
index 4dd2e1004f9fe393c2d34b43bade881b84a31b1f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_encode
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import contextlib
-import sys
-
-import sentencepiece as spm
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True,
-                        help="sentencepiece model to use for encoding")
-    parser.add_argument("--inputs", nargs="+", default=['-'],
-                        help="input files to filter/encode")
-    parser.add_argument("--outputs", nargs="+", default=['-'],
-                        help="path to save encoded outputs")
-    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
-    parser.add_argument("--min-len", type=int, metavar="N",
-                        help="filter sentence pairs with fewer than N tokens")
-    parser.add_argument("--max-len", type=int, metavar="N",
-                        help="filter sentence pairs with more than N tokens")
-    args = parser.parse_args()
-
-    assert len(args.inputs) == len(args.outputs), \
-        "number of input and output paths should match"
-
-    sp = spm.SentencePieceProcessor()
-    sp.Load(args.model)
-
-    if args.output_format == "piece":
-        def encode(l):
-            return sp.EncodeAsPieces(l)
-    elif args.output_format == "id":
-        def encode(l):
-            return list(map(str, sp.EncodeAsIds(l)))
-    else:
-        raise NotImplementedError
-
-    if args.min_len is not None or args.max_len is not None:
-        def valid(line):
-            return (
-                (args.min_len is None or len(line) >= args.min_len) and
-                (args.max_len is None or len(line) <= args.max_len)
-            )
-    else:
-        def valid(lines):
-            return True
-
-    with contextlib.ExitStack() as stack:
-        inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-" else sys.stdin
-            for input in args.inputs
-        ]
-        outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-" else sys.stdout
-            for output in args.outputs
-        ]
-
-        stats = {
-            "num_empty": 0,
-            "num_filtered": 0,
-        }
-
-        def encode_line(line):
-            line = line.strip()
-            if len(line) > 0:
-                line = encode(line)
-                if valid(line):
-                    return line
-                else:
-                    stats["num_filtered"] += 1
-            else:
-                stats["num_empty"] += 1
-            return None
-
-        for i, lines in enumerate(zip(*inputs), start=1):
-            enc_lines = list(map(encode_line, lines))
-            if not any(enc_line is None for enc_line in enc_lines):
-                for enc_line, output_h in zip(enc_lines, outputs):
-                    print(" ".join(enc_line), file=output_h)
-            if i % 10000 == 0:
-                print("processed {} lines".format(i), file=sys.stderr)
-
-        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
-        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_train b/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_train
deleted file mode 100644
index 0b247aee0dc5fcaa7b6cf66d89602e896619c9bb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/spm_train
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# https://github.com/pytorch/fairseq/blob/master/LICENSE
-import sys
-
-import sentencepiece as spm
-
-
-if __name__ == "__main__":
-    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/subset_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/subset_data_dir.sh
deleted file mode 100644
index c35bee62d8710facb8c42a9171ed3caf0171450f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/subset_data_dir.sh
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2010-2011  Microsoft Corporation
-#           2012-2013  Johns Hopkins University (Author: Daniel Povey)
-# Apache 2.0
-
-
-# This script operates on a data directory, such as in data/train/.
-# See http://kaldi-asr.org/doc/data_prep.html#data_prep_data
-# for what these directories contain.
-
-# This script creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed evenly
-# throughout the file, by the program ./subset_scp.pl).
-
-# There are six options, none compatible with any other.
-
-# If you give the --per-spk option, it will attempt to select the supplied
-# number of utterances for each speaker (typically you would supply a much
-# smaller number in this case).
-
-# If you give the --speakers option, it selects a subset of n randomly
-# selected speakers.
-
-# If you give the --shortest option, it will give you the n shortest utterances.
-
-# If you give the --first option, it will just give you the n first utterances.
-
-# If you give the --last option, it will just give you the n last utterances.
-
-# If you give the --spk-list or --utt-list option, it reads the
-# speakers/utterances to keep from <speaker-list-file>/<utt-list-file>" (note,
-# in this case there is no <num-utt> positional parameter; see usage message.)
-
-
-shortest=false
-perspk=false
-speakers=false
-first_opt=
-spk_list=
-utt_list=
-
-expect_args=3
-case $1 in
-  --first|--last) first_opt=$1; shift ;;
-  --per-spk)  perspk=true; shift ;;
-  --shortest) shortest=true; shift ;;
-  --speakers) speakers=true; shift ;;
-  --spk-list) shift; spk_list=$1; shift; expect_args=2 ;;
-  --utt-list) shift; utt_list=$1; shift; expect_args=2 ;;
-  --*) echo "$0: invalid option '$1'"; exit 1
-esac
-
-if [ $# != $expect_args ]; then
-  echo "Usage:"
-  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
-  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
-  echo "  subset_data_dir.sh [--utt-list <utt-list-file>] <srcdir> <destdir>"
-  echo "By default, randomly selects <num-utt> utterances from the data directory."
-  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
-  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
-  echo "With --first, selects the first <num-utt> utterances"
-  echo "With --last, selects the last <num-utt> utterances"
-  echo "With --shortest, selects the shortest <num-utt> utterances."
-  echo "With --spk-list, reads the speakers to keep from <speaker-list-file>"
-  echo "With --utt-list, reads the utterances to keep from <utt-list-file>"
-  exit 1;
-fi
-
-srcdir=$1
-if [[ $spk_list || $utt_list ]]; then
-  numutt=
-  destdir=$2
-else
-  numutt=$2
-  destdir=$3
-fi
-
-export LC_ALL=C
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "$0: no such file $srcdir/utt2spk"
-  exit 1
-fi
-
-if [[ $numutt && $numutt -gt $(wc -l <$srcdir/utt2spk) ]]; then
-  echo "$0: cannot subset to more utterances than you originally had."
-  exit 1
-fi
-
-if $shortest && [ ! -f $srcdir/feats.scp ]; then
-  echo "$0: you selected --shortest but no feats.scp exist."
-  exit 1
-fi
-
-mkdir -p $destdir || exit 1
-
-if [[ $spk_list ]]; then
-  tools/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
-elif [[ $utt_list ]]; then
-  tools/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
-elif $speakers; then
-  tools/shuffle_list.pl < $srcdir/spk2utt |
-    awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' |
-    sort > $destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-elif $perspk; then
-  awk '{ n='$numutt'; printf("%s ",$1);
-         skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= (n*skip+1); x += skip) { printf("%s ", $x); }
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  tools/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-else
-  if $shortest; then
-    # Select $numutt shortest utterances.
-    . ./path.sh
-    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
-    sort -n -k2 $destdir/tmp.len |
-      awk '{print $1}' |
-      head -$numutt >$destdir/tmp.uttlist
-    tools/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
-    rm $destdir/tmp.uttlist $destdir/tmp.len
-  else
-    # Select $numutt random utterances.
-    tools/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
-  fi
-  tools/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
-fi
-
-# Perform filtering. utt2spk and spk2utt files already exist by this point.
-# Filter by utterance.
-[ -f $srcdir/feats.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-[ -f $srcdir/vad.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
-[ -f $srcdir/utt2lang ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang
-[ -f $srcdir/utt2dur ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur
-[ -f $srcdir/utt2num_frames ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames
-[ -f $srcdir/utt2uniq ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq
-[ -f $srcdir/wav.scp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
-[ -f $srcdir/utt2warp ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp
-[ -f $srcdir/text ] &&
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
-
-# Filter by speaker.
-[ -f $srcdir/spk2warp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp
-[ -f $srcdir/spk2gender ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-[ -f $srcdir/cmvn.scp ] &&
-  tools/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
-
-# Filter by recording-id.
-if [ -f $srcdir/segments ]; then
-  tools/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
-  # Recording-ids are in segments.
-  awk '{print $2}' $destdir/segments | sort | uniq >$destdir/reco
-  # The next line overrides the command above for wav.scp, which would be incorrect.
-  #[ -f $srcdir/wav.scp ] &&
-  #  tools/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
-else
-  # No segments; recording-ids are in wav.scp.
-  awk '{print $1}' $destdir/wav.scp | sort | uniq >$destdir/reco
-fi
-
-[ -f $srcdir/reco2file_and_channel ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
-[ -f $srcdir/reco2dur ] &&
-  tools/filter_scp.pl $destdir/reco <$srcdir/reco2dur >$destdir/reco2dur
-
-# Filter the STM file for proper sclite scoring.
-# Copy over the comments from STM file.
-[ -f $srcdir/stm ] &&
-  (grep "^;;" $srcdir/stm
-   tools/filter_scp.pl $destdir/reco $srcdir/stm) >$destdir/stm
-
-rm $destdir/reco
-
-# Copy frame_shift if present.
-[ -f $srcdir/frame_shift ] && cp $srcdir/frame_shift $destdir
-
-srcutts=$(wc -l <$srcdir/utt2spk)
-destutts=$(wc -l <$destdir/utt2spk)
-echo "$0: reducing #utt from $srcutts to $destutts"
-exit 0
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/subset_scp.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/subset_scp.pl
deleted file mode 100644
index 11fddc09a0f4e5fad8e5d63cf65e7e5e627e4af6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/subset_scp.pl
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This program selects a subset of N elements in the scp.
-
-# By default, it selects them evenly from throughout the scp, in order to avoid
-# selecting too many from the same speaker.  It prints them on the standard
-# output.
-# With the option --first, it just selects the N first utterances.
-# With the option --last, it just selects the N last utterances.
-
-# Last modified by JHU & HKUST @2013
-
-
-$quiet = 0;
-$first = 0;
-$last = 0;
-
-if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
-  shift;
-  $quiet = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--first") {
-  shift;
-  $first = 1;
-}
-if (@ARGV > 0 && $ARGV[0] eq "--last") {
-  shift;
-  $last = 1;
-}
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
-        " --quiet  causes it to not die if N < num lines in scp.\n" .
-        " --first and --last make it equivalent to head or tail.\n" .
-        "See also: filter_scp.pl\n";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-  if ($quiet) {
-    $N = $numlines;
-  } else {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-  }
-}
-
-sub select_n {
-  my ($start,$end,$num_needed) = @_;
-  my $diff = $end - $start;
-  if ($num_needed > $diff) {
-    die "select_n: code error";
-  }
-  if ($diff == 1 ) {
-    if ($num_needed  > 0) {
-      print $F[$start];
-    }
-  } else {
-    my $halfdiff = int($diff/2);
-    my $halfneeded = int($num_needed/2);
-    select_n($start, $start+$halfdiff, $halfneeded);
-    select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-  }
-}
-
-if ( ! $first && ! $last) {
-  if ($N > 0) {
-    select_n(0, $numlines, $N);
-  }
-} else {
-  if ($first) { # --first option: same as head.
-    for ($n = 0; $n < $N; $n++) {
-      print $F[$n];
-    }
-  } else { # --last option: same as tail.
-    for ($n = @F - $N; $n < @F; $n++) {
-      print $F[$n];
-    }
-  }
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/sym2int.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/sym2int.pl
deleted file mode 100644
index cec097b6bdaefb5c3452e31fa334f0a7530b9a72..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/sym2int.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-
-for($x = 0; $x < 2; $x++) {
-  if ($ARGV[0] eq "--map-oov") {
-    shift @ARGV;
-    $map_oov = shift @ARGV;
-    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
-      # disallow '-f', the empty string and anything ending in words.txt as the
-      # OOV symbol because these are likely command-line errors.
-      die "the --map-oov option requires an argument";
-    }
-  }
-  if ($ARGV[0] eq "-f") {
-    shift @ARGV;
-    $field_spec = shift @ARGV;
-    if ($field_spec =~ m/^\d+$/) {
-      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
-    }
-    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
-      if ($1 ne "") {
-        $field_begin = $1 - 1;  # Change to zero-based indexing.
-      }
-      if ($2 ne "") {
-        $field_end = $2 - 1;    # Change to zero-based indexing.
-      }
-    }
-    if (!defined $field_begin && !defined $field_end) {
-      die "Bad argument to -f option: $field_spec";
-    }
-  }
-}
-
-$symtab = shift @ARGV;
-if (!defined $symtab) {
-  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
-    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
-      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
-  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
-  $map_oov = $sym2int{$map_oov};
-}
-
-$num_warning = 0;
-$max_warning = 20;
-
-while (<>) {
-  @A = split(" ", $_);
-  @B = ();
-  for ($n = 0; $n < @A; $n++) {
-    $a = $A[$n];
-    if ( (!defined $field_begin || $n >= $field_begin)
-         && (!defined $field_end || $n <= $field_end)) {
-      $i = $sym2int{$a};
-      if (!defined ($i)) {
-        if (defined $map_oov) {
-          if ($num_warning++ < $max_warning) {
-            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-            if ($num_warning == $max_warning) {
-              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-            }
-          }
-          $i = $map_oov;
-        } else {
-          $pos = $n+1;
-          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
-        }
-      }
-      $a = $i;
-    }
-    push @B, $a;
-  }
-  print join(" ", @B);
-  print "\n";
-}
-if ($num_warning > 0) {
-  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
-}
-
-exit(0);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/text2token.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/text2token.py
deleted file mode 100644
index 4f4dcc901d436650695f0b80e0cf99e1e99269ee..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/text2token.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-# Copyright 2021 JD AI Lab. All Rights Reserved. (authors: Lu Fan)
-# Copyright 2021 Mobvoi Inc. All Rights Reserved. (Di Wu)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-def seg_char(sent):
-    pattern = re.compile(r'([\u4e00-\u9fa5])')
-    chars = pattern.split(sent)
-    chars = [w for w in chars if len(w.strip()) > 0]
-    return chars
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description='convert raw text to tokenized text',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--nchar',
-                        '-n',
-                        default=1,
-                        type=int,
-                        help='number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2')
-    parser.add_argument('--skip-ncols',
-                        '-s',
-                        default=0,
-                        type=int,
-                        help='skip first n columns')
-    parser.add_argument('--space',
-                        default='<space>',
-                        type=str,
-                        help='space symbol')
-    parser.add_argument('--bpe-model',
-                        '-m',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--non-lang-syms',
-                        '-l',
-                        default=None,
-                        type=str,
-                        help='list of non-linguistic symobles,'
-                        ' e.g., <NOISE> etc.')
-    parser.add_argument('text',
-                        type=str,
-                        default=False,
-                        nargs='?',
-                        help='input text')
-    parser.add_argument('--trans_type',
-                        '-t',
-                        type=str,
-                        default="char",
-                        choices=["char", "phn", "cn_char_en_bpe"],
-                        help="""Transcript type. char/phn. e.g., for TIMIT
-                             FADG0_SI1279 -
-                             If trans_type is char, read from
-                             SI1279.WRD file -> "bricks are an alternative"
-                             Else if trans_type is phn,
-                             read from SI1279.PHN file ->
-                             "sil b r ih sil k s aa r er n aa l
-                             sil t er n ih sil t ih v sil" """)
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, 'r', encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(args.bpe_model)
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(
-            sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer)
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(' '.join(x[:args.skip_ncols]), end=" ")
-        a = ' '.join(x[args.skip_ncols:])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if len(match_pos) > 0:
-            chars = []
-            i = 0
-            while i < len(a):
-                start_pos, end_pos = exist_or_not(i, match_pos)
-                if start_pos is not None:
-                    chars.append(a[start_pos:end_pos])
-                    i = end_pos
-                else:
-                    chars.append(a[i])
-                    i += 1
-            a = chars
-
-        if (args.trans_type == "phn"):
-            a = a.split(" ")
-        elif args.trans_type == "cn_char_en_bpe":
-            b = seg_char(a)
-            a = []
-            for j in b:
-                # we use "▁" to instead of blanks among english words
-                # warning: here is "▁", not "_"
-                for l in j.strip().split("▁"):
-                    if not l.encode('UTF-8').isalpha():
-                        a.append(l)
-                    else:
-                        for k in sp.encode_as_pieces(l):
-                            a.append(k)
-        else:
-            a = [a[j:j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(' ', args.space) for z in a_flat]
-        if (args.trans_type == "phn"):
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(' '.join(a_chars))
-        line = f.readline()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/utt2spk_to_spk2utt.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/utt2spk_to_spk2utt.pl
deleted file mode 100644
index 5086699ff85fdcb8667bb9ab054700c53e35fd0c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/utt2spk_to_spk2utt.pl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# converts an utt2spk file to a spk2utt file.
-# Takes input from the stdin or from a file argument;
-# output goes to the standard out.
-
-if ( @ARGV > 1 ) {
-    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
-}
-
-while(<>){
-    @A = split(" ", $_);
-    @A == 2 || die "Invalid line in utt2spk file: $_";
-    ($u,$s) = @A;
-    if(!$seen_spk{$s}) {
-        $seen_spk{$s} = 1;
-        push @spklist, $s;
-    }
-    push (@{$spk_hash{$s}}, "$u");
-}
-foreach $s (@spklist) {
-    $l = join(' ',@{$spk_hash{$s}});
-    print "$s $l\n";
-}
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_data_dir.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_data_dir.sh
deleted file mode 100644
index f4b4cbe1410111555d56380078e3d55381e7155a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_data_dir.sh
+++ /dev/null
@@ -1,383 +0,0 @@
-#!/bin/bash
-
-cmd="$@"
-
-no_feats=false
-no_wav=false
-no_text=false
-no_spk_sort=false
-
-for x in `seq 4`; do
-  if [ "$1" == "--no-feats" ]; then
-    no_feats=true
-    shift;
-  fi
-  if [ "$1" == "--no-text" ]; then
-    no_text=true
-    shift;
-  fi
-  if [ "$1" == "--no-wav" ]; then
-    no_wav=true
-    shift;
-  fi
-  if [ "$1" == "--no-spk-sort" ]; then
-    no_spk_sort=true
-    shift;
-  fi
-done
-
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [--no-feats] [--no-text] [--no-wav] [--no-spk-sort] <data-dir>"
-  echo "The --no-xxx options mean that the script does not require "
-  echo "xxx.scp to be present, but it will check it if it is present."
-  echo "--no-spk-sort means that the script does not require the utt2spk to be "
-  echo "sorted by the speaker-id in addition to being sorted by utterance-id."
-  echo "By default, utt2spk is expected to be sorted by both, which can be "
-  echo "achieved by making the speaker-id prefixes of the utterance-ids"
-  echo "e.g.: $0 data/train"
-  exit 1;
-fi
-
-data=$1
-
-if [ ! -d $data ]; then
-  echo "$0: no such directory $data"
-  exit 1;
-fi
-
-if [ -f $data/images.scp ]; then
-  cmd=${cmd/--no-wav/}  # remove --no-wav if supplied
-  image/validate_data_dir.sh $cmd
-  exit $?
-fi
-
-for f in spk2utt utt2spk; do
-  if [ ! -f $data/$f ]; then
-    echo "$0: no such file $f"
-    exit 1;
-  fi
-  if [ ! -s $data/$f ]; then
-    echo "$0: empty file $f"
-    exit 1;
-  fi
-done
-
-! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
-  echo "$0: $data/utt2spk has wrong format." && exit;
-
-ns=$(wc -l < $data/spk2utt)
-if [ "$ns" == 1 ]; then
-  echo "$0: WARNING: you have only one speaker.  This probably a bad idea."
-  echo "   Search for the word 'bold' in http://kaldi-asr.org/doc/data_prep.html"
-  echo "   for more information."
-fi
-
-
-tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
-trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
-
-export LC_ALL=C
-
-function check_sorted_and_uniq {
-  ! perl -ne '((substr $_,-1) eq "\n") or die "file $ARGV has invalid newline";' $1 && exit 1;
-  ! awk '{print $1}' $1 | sort | uniq | cmp -s - <(awk '{print $1}' $1) && \
-    echo "$0: file $1 is not in sorted order or has duplicates" && exit 1;
-}
-
-function partial_diff {
-  diff -U1 $1 $2 | (head -n 6; echo "..."; tail -n 6)
-  n1=`cat $1 | wc -l`
-  n2=`cat $2 | wc -l`
-  echo "[Lengths are $1=$n1 versus $2=$n2]"
-}
-
-check_sorted_and_uniq $data/utt2spk
-
-if ! $no_spk_sort; then
-  ! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
-     echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
-     echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
-fi
-
-check_sorted_and_uniq $data/spk2utt
-
-! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
-     <(tools/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
-   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
-
-cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
-
-if [ ! -f $data/text ] && ! $no_text; then
-  echo "$0: no such file $data/text (if this is by design, specify --no-text)"
-  exit 1;
-fi
-
-num_utts=`cat $tmpdir/utts | wc -l`
-if [ -f $data/text ]; then
-  tools/validate_text.pl $data/text || exit 1;
-  check_sorted_and_uniq $data/text
-  text_len=`cat $data/text | wc -l`
-  illegal_sym_list="<s> </s> #0"
-  for x in $illegal_sym_list; do
-    if grep -w "$x" $data/text > /dev/null; then
-      echo "$0: Error: in $data, text contains illegal symbol $x"
-      exit 1;
-    fi
-  done
-  awk '{print $1}' < $data/text > $tmpdir/utts.txt
-  if ! cmp -s $tmpdir/utts{,.txt}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.txt}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
-  echo "$0: in directory $data, segments file exists but no wav.scp"
-  exit 1;
-fi
-
-
-if [ ! -f $data/wav.scp ] && ! $no_wav; then
-  echo "$0: no such file $data/wav.scp (if this is by design, specify --no-wav)"
-  exit 1;
-fi
-
-if [ -f $data/wav.scp ]; then
-  check_sorted_and_uniq $data/wav.scp
-
-  if grep -E -q '^\S+\s+~' $data/wav.scp; then
-    # note: it's not a good idea to have any kind of tilde in wav.scp, even if
-    # part of a command, as it would cause compatibility problems if run by
-    # other users, but this used to be not checked for so we let it slide unless
-    # it's something of the form "foo ~/foo.wav" (i.e. a plain file name) which
-    # would definitely cause problems as the fopen system call does not do
-    # tilde expansion.
-    echo "$0: Please do not use tilde (~) in your wav.scp."
-    exit 1;
-  fi
-
-  if [ -f $data/segments ]; then
-
-    check_sorted_and_uniq $data/segments
-    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
-    ! cat $data/segments | \
-      awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \
-      echo "$0: badly formatted segments file" && exit 1;
-
-    segments_len=`cat $data/segments | wc -l`
-    if [ -f $data/text ]; then
-      ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/segments) && \
-        echo "$0: Utterance list differs between $data/utt2spk and $data/segments " && \
-        echo "$0: Lengths are $segments_len vs $num_utts" && \
-        exit 1
-    fi
-
-    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
-    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
-    if ! cmp -s $tmpdir/recordings{,.wav}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.wav}
-      exit 1;
-    fi
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
-      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
-        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/recordings{,.r2fc}
-        exit 1;
-      fi
-    fi
-  else
-    # No segments file -> assume wav.scp indexed by utterance.
-    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
-    if ! cmp -s $tmpdir/utts{,.wav}; then
-      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/utts{,.wav}
-      exit 1;
-    fi
-
-    if [ -f $data/reco2file_and_channel ]; then
-      # this file is needed only for ctm scoring; it's indexed by recording-id.
-      check_sorted_and_uniq $data/reco2file_and_channel
-      ! cat $data/reco2file_and_channel | \
-        awk '{if (NF != 3 || ($3 != "A" && $3 != "B" )) {
-                if ( NF == 3 && $3 == "1" ) {
-                  warning_issued = 1;
-                } else {
-                  print "Bad line ", $0; exit 1;
-                }
-              }
-            }
-            END {
-              if (warning_issued == 1) {
-                print "The channel should be marked as A or B, not 1! You should change it ASAP! "
-              }
-            }' && echo "$0: badly formatted reco2file_and_channel file" && exit 1;
-      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
-      if ! cmp -s $tmpdir/utts{,.r2fc}; then
-        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
-        echo "$0: differ, partial diff is:"
-        partial_diff $tmpdir/utts{,.r2fc}
-        exit 1;
-      fi
-    fi
-  fi
-fi
-
-if [ ! -f $data/feats.scp ] && ! $no_feats; then
-  echo "$0: no such file $data/feats.scp (if this is by design, specify --no-feats)"
-  exit 1;
-fi
-
-if [ -f $data/feats.scp ]; then
-  check_sorted_and_uniq $data/feats.scp
-  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
-  if ! cmp -s $tmpdir/utts{,.feats}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.feats}
-    exit 1;
-  fi
-fi
-
-
-if [ -f $data/cmvn.scp ]; then
-  check_sorted_and_uniq $data/cmvn.scp
-  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and cmvn"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.cmvn}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2gender ]; then
-  check_sorted_and_uniq $data/spk2gender
-  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
-     echo "$0: Mal-formed spk2gender file" && exit 1;
-  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2gender}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/spk2warp ]; then
-  check_sorted_and_uniq $data/spk2warp
-  ! cat $data/spk2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed spk2warp file" && exit 1;
-  cat $data/spk2warp | awk '{print $1}' > $tmpdir/speakers.spk2warp
-  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  if ! cmp -s $tmpdir/speakers{,.spk2warp}; then
-    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/speakers{,.spk2warp}
-    exit 1;
-  fi
-fi
-
-if [ -f $data/utt2warp ]; then
-  check_sorted_and_uniq $data/utt2warp
-  ! cat $data/utt2warp | awk '{if (!((NF == 2 && ($2 > 0.5 && $2 < 1.5)))){ print; exit 1; }}' && \
-     echo "$0: Mal-formed utt2warp file" && exit 1;
-  cat $data/utt2warp | awk '{print $1}' > $tmpdir/utts.utt2warp
-  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
-  if ! cmp -s $tmpdir/utts{,.utt2warp}; then
-    echo "$0: Error: in $data, utterance lists extracted from utt2spk and utt2warp"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2warp}
-    exit 1;
-  fi
-fi
-
-# check some optionally-required things
-for f in vad.scp utt2lang utt2uniq; do
-  if [ -f $data/$f ]; then
-    check_sorted_and_uniq $data/$f
-    if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
-      <( awk '{print $1}' $data/$f ); then
-      echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
-      exit 1;
-    fi
-  fi
-done
-
-
-if [ -f $data/utt2dur ]; then
-  check_sorted_and_uniq $data/utt2dur
-  cat $data/utt2dur | awk '{print $1}' > $tmpdir/utts.utt2dur
-  if ! cmp -s $tmpdir/utts{,.utt2dur}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2dur file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2dur}
-    exit 1;
-  fi
-  cat $data/utt2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line utt2dur:" NR ":" $0; exit(1) }}' || exit 1
-fi
-
-if [ -f $data/utt2num_frames ]; then
-  check_sorted_and_uniq $data/utt2num_frames
-  cat $data/utt2num_frames | awk '{print $1}' > $tmpdir/utts.utt2num_frames
-  if ! cmp -s $tmpdir/utts{,.utt2num_frames}; then
-    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and utt2num_frames file"
-    echo "$0: differ, partial diff is:"
-    partial_diff $tmpdir/utts{,.utt2num_frames}
-    exit 1
-  fi
-  awk <$data/utt2num_frames '{
-    if (NF != 2 || !($2 > 0) || $2 != int($2)) {
-      print "Bad line utt2num_frames:" NR ":" $0
-      exit 1 } }' || exit 1
-fi
-
-if [ -f $data/reco2dur ]; then
-  check_sorted_and_uniq $data/reco2dur
-  cat $data/reco2dur | awk '{print $1}' > $tmpdir/recordings.reco2dur
-  if [ -f $tmpdir/recordings ]; then
-    if ! cmp -s $tmpdir/recordings{,.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from segments and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/recordings{,.reco2dur}
-    exit 1;
-    fi
-  else
-    if ! cmp -s $tmpdir/{utts,recordings.reco2dur}; then
-      echo "$0: Error: in $data, recording-ids extracted from wav.scp and reco2dur file"
-      echo "$0: differ, partial diff is:"
-      partial_diff $tmpdir/{utts,recordings.reco2dur}
-    exit 1;
-    fi
-  fi
-  cat $data/reco2dur | \
-    awk '{ if (NF != 2 || !($2 > 0)) { print "Bad line : " $0; exit(1) }}' || exit 1
-fi
-
-
-echo "$0: Successfully validated data-directory $data"
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_dict_dir.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_dict_dir.pl
deleted file mode 100644
index 819fca7f03caff91f3f24f0b69876a0bfc0abbe9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_dict_dir.pl
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env perl
-
-# Apache 2.0.
-# Copyright  2012 Guoguo Chen
-#            2015 Daniel Povey
-#            2017 Johns Hopkins University (Jan "Yenda" Trmal <jtrmal@gmail.com>)
-#
-# Validation script for 'dict' directories (e.g. data/local/dict)
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line($.) $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    print "--> text seems to be UTF-8 or ASCII, checking whitespaces\n";
-    if ($has_invalid_whitespaces) {
-      print "--> ERROR: the text containes disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    } else {
-      print "--> text contains only allowed whitespaces\n";
-    }
-  } else {
-    print "--> text doesn't seem to be UTF-8 or ASCII, won't check whitespaces\n";
-  }
-  return 1;
-}
-
-
-if(@ARGV != 1) {
-  die "Usage: validate_dict_dir.pl <dict-dir>\n" .
-      "e.g.: validate_dict_dir.pl data/local/dict\n";
-}
-
-$dict = shift @ARGV;
-$dict =~ s:/$::;
-
-$exit = 0;
-$success = 1;  # this is re-set each time we read a file.
-
-sub set_to_fail { $exit = 1; $success = 0; }
-
-# Checking silence_phones.txt -------------------------------
-print "Checking $dict/silence_phones.txt ...\n";
-if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
-$idx = 1;
-%silence = ();
-$crlf = 1;
-
-print "--> reading $dict/silence_phones.txt\n";
-check_allowed_whitespace(\*S) || set_to_fail();
-while(<S>) {
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/silence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/silence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/silence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($silence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n";
-    } else {
-      $silence{$p} = 1;
-    }
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(S);
-$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
-print "\n";
-
-# Checking optional_silence.txt -------------------------------
-print "Checking $dict/optional_silence.txt ...\n";
-if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
-if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
-$idx = 1;
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/optional_silence.txt\n";
-check_allowed_whitespace(\*OS) or exit 1;
-while(<OS>) {
-  chomp;
-  my @col = split(" ", $_);
-  if ($idx > 1 or @col > 1) {
-    set_to_fail(); print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n";
-  } elsif (!$silence{$col[0]}) {
-    set_to_fail(); print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n";
-  }
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/optional_silence.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  $idx ++;
-}
-close(OS);
-$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
-print "\n";
-
-# Checking nonsilence_phones.txt -------------------------------
-print "Checking $dict/nonsilence_phones.txt ...\n";
-if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
-$idx = 1;
-%nonsilence = ();
-$success = 1;
-$crlf = 1;
-print "--> reading $dict/nonsilence_phones.txt\n";
-check_allowed_whitespace(\*NS) or set_to_fail();
-while(<NS>) {
-  if ($crlf == 1 && m/\r/) {
-    print "--> ERROR: $dict/nonsilence_phones.txt contains Carriage Return (^M) characters.\n";
-    set_to_fail();
-    $crlf = 0;
-  }
-  if (! s/\n$//) {
-    print "--> ERROR: last line '$_' of $dict/nonsilence_phones.txt does not end in newline.\n";
-    set_to_fail();
-  }
-  my @col = split(" ", $_);
-  if (@col == 0) {
-    set_to_fail();
-    print "--> ERROR: empty line in $dict/nonsilence_phones.txt (line $idx)\n";
-  }
-  foreach(0 .. @col-1) {
-    my $p = $col[$_];
-    if($nonsilence{$p}) {
-      set_to_fail(); print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n";
-    } else {
-      $nonsilence{$p} = 1;
-    }
-    # phones that start with the pound sign/hash may be mistaken for
-    # disambiguation symbols; phones ending in _B, _E, _S or _I will cause
-    # problems with word-position-dependent systems, and <eps> is obviously
-    # confusable with epsilon.
-    if ($p =~ m/^#/ || $p =~ m/_[BESI]$/ || $p eq "<eps>"){
-      set_to_fail();
-      print "--> ERROR: phone \"$p\" has disallowed written form\n";
-    }
-  }
-  $idx ++;
-}
-close(NS);
-$success == 0 || print "--> $dict/nonsilence_phones.txt is OK\n";
-print "\n";
-
-# Checking disjoint -------------------------------
-sub intersect {
-  my ($a, $b) = @_;
-  @itset = ();
-  %itset = ();
-  foreach(keys %$a) {
-    if(exists $b->{$_} and !$itset{$_}) {
-      push(@itset, $_);
-      $itset{$_} = 1;
-    }
-  }
-  return @itset;
-}
-
-print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
-@itset = intersect(\%silence, \%nonsilence);
-if(@itset == 0) {print "--> disjoint property is OK.\n";}
-else {set_to_fail(); print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
-print "\n";
-
-
-sub check_lexicon {
-  my ($lex, $num_prob_cols, $num_skipped_cols) = @_;
-  print "Checking $lex\n";
-  !open(L, "<$lex") && print "--> ERROR: fail to open $lex\n" && set_to_fail();
-  my %seen_line = {};
-  $idx = 1; $success = 1; $crlf = 1;
-  print "--> reading $lex\n";
-  check_allowed_whitespace(\*L) or set_to_fail();
-  while (<L>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $lex contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (defined $seen_line{$_}) {
-      print "--> ERROR: line '$_' of $lex is repeated\n";
-      set_to_fail();
-    }
-    $seen_line{$_} = 1;
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $lex does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    $word = shift @col;
-    if (!defined $word) {
-      print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
-    }
-    if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
-      print "--> ERROR: lexicon.txt contains forbidden word $word\n";
-      set_to_fail();
-    }
-    for ($n = 0; $n < $num_prob_cols; $n++) {
-      $prob = shift @col;
-      if (!($prob > 0.0 && $prob <= 1.0)) {
-        print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lex\n";
-        set_to_fail();
-      }
-    }
-    for ($n = 0; $n < $num_skipped_cols; $n++) { shift @col; }
-    if (@col == 0) {
-      print "--> ERROR: lexicon.txt contains word $word with empty ";
-      print "pronunciation.\n";
-      set_to_fail();
-    }
-    foreach (0 .. @col-1) {
-      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt ";
-        print "(line $idx)\n";
-        set_to_fail();
-      }
-    }
-    $idx ++;
-  }
-  close(L);
-  $success == 0 || print "--> $lex is OK\n";
-  print "\n";
-}
-
-if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0, 0); }
-if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1, 0); }
-if (-f "$dict/lexiconp_silprob.txt") {
-  # If $dict/lexiconp_silprob.txt exists, we expect $dict/silprob.txt to also
-  # exist.
-  check_lexicon("$dict/lexiconp_silprob.txt", 2, 2);
-  if (-f "$dict/silprob.txt") {
-    !open(SP, "<$dict/silprob.txt") &&
-      print "--> ERROR: fail to open $dict/silprob.txt\n" && set_to_fail();
-      $crlf = 1;
-    while (<SP>) {
-      if ($crlf == 1 && m/\r/) {
-        print "--> ERROR: $dict/silprob.txt contains Carriage Return (^M) characters.\n";
-        set_to_fail();
-        $crlf = 0;
-      }
-      chomp; my @col = split;
-      @col != 2 && die "--> ERROR: bad line \"$_\"\n" && set_to_fail();
-      if ($col[0] eq "<s>" || $col[0] eq "overall") {
-        if (!($col[1] > 0.0 && $col[1] <= 1.0)) {
-          set_to_fail();
-          print "--> ERROR: bad probability in $dir/silprob.txt \"$_\"\n";
-        }
-      } elsif ($col[0] eq "</s>_s" || $col[0] eq "</s>_n") {
-        if ($col[1] <= 0.0) {
-          set_to_fail();
-          print "--> ERROR: bad correction term in $dir/silprob.txt \"$_\"\n";
-        }
-      } else {
-        print "--> ERROR: unexpected line in $dir/silprob.txt \"$_\"\n";
-        set_to_fail();
-      }
-    }
-    close(SP);
-  } else {
-    set_to_fail();
-    print "--> ERROR: expecting $dict/silprob.txt to exist\n";
-  }
-}
-
-if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
-  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
-  set_to_fail();
-}
-
-sub check_lexicon_pair {
-  my ($lex1, $num_prob_cols1, $num_skipped_cols1,
-      $lex2, $num_prob_cols2, $num_skipped_cols2) = @_;
-  # We have checked individual lexicons already.
-  open(L1, "<$lex1"); open(L2, "<$lex2");
-  print "Checking lexicon pair $lex1 and $lex2\n";
-  my $line_num = 0;
-  while(<L1>) {
-    $line_num++;
-    @A = split;
-    $line_B = <L2>;
-    if (!defined $line_B) {
-      print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-      set_to_fail(); last;
-    }
-    @B = split(" ", $line_B);
-    # Check if the word matches.
-    if ($A[0] ne $B[0]) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-    shift @A; shift @B;
-    for ($n = 0; $n < $num_prob_cols1 + $num_skipped_cols1; $n ++) { shift @A; }
-    for ($n = 0; $n < $num_prob_cols2 + $num_skipped_cols2; $n ++) { shift @B; }
-    # Check if the pronunciation matches
-    if (join(" ", @A) ne join(" ", @B)) {
-      print "--> ERROR: $lex1 and $lex2 mismatch at line $line_num. sorting?\n";
-      set_to_fail(); last;
-    }
-  }
-  $line_B = <L2>;
-  if (defined $line_B && $exit == 0) {
-    print "--> ERROR: $lex1 and $lex2 have different number of lines.\n";
-    set_to_fail();
-  }
-  $success == 0 || print "--> lexicon pair $lex1 and $lex2 match\n\n";
-}
-
-# If more than one lexicon exist, we have to check if they correspond to each
-# other. It could be that the user overwrote one and we need to regenerate the
-# other, but we do not know which is which.
-if ( -f "$dict/lexicon.txt" && -f "$dict/lexiconp.txt") {
-  check_lexicon_pair("$dict/lexicon.txt", 0, 0, "$dict/lexiconp.txt", 1, 0);
-}
-if ( -f "$dict/lexiconp.txt" && -f "$dict/lexiconp_silprob.txt") {
-  check_lexicon_pair("$dict/lexiconp.txt", 1, 0,
-                     "$dict/lexiconp_silprob.txt", 2, 2);
-}
-
-# Checking extra_questions.txt -------------------------------
-%distinguished = (); # Keep track of all phone-pairs including nonsilence that
-                     # are distinguished (split apart) by extra_questions.txt,
-                     # as $distinguished{$p1,$p2} = 1.  This will be used to
-                     # make sure that we don't have pairs of phones on the same
-                     # line in nonsilence_phones.txt that can never be
-                     # distinguished from each other by questions.  (If any two
-                     # phones appear on the same line in nonsilence_phones.txt,
-                     # they share a tree root, and since the automatic
-                     # question-building treats all phones that appear on the
-                     # same line of nonsilence_phones.txt as being in the same
-                     # group, we can never distinguish them without resorting to
-                     # questions in extra_questions.txt.
-print "Checking $dict/extra_questions.txt ...\n";
-if (-s "$dict/extra_questions.txt") {
-  if (!open(EX, "<$dict/extra_questions.txt")) {
-    set_to_fail(); print "--> ERROR: fail to open $dict/extra_questions.txt\n";
-  }
-  $idx = 1;
-  $success = 1;
-  $crlf = 1;
-  print "--> reading $dict/extra_questions.txt\n";
-  check_allowed_whitespace(\*EX) or set_to_fail();
-  while(<EX>) {
-    if ($crlf == 1 && m/\r/) {
-      print "--> ERROR: $dict/extra_questions.txt contains Carriage Return (^M) characters.\n";
-      set_to_fail();
-      $crlf = 0;
-    }
-    if (! s/\n$//) {
-      print "--> ERROR: last line '$_' of $dict/extra_questions.txt does not end in newline.\n";
-      set_to_fail();
-    }
-    my @col = split(" ", $_);
-    if (@col == 0) {
-      set_to_fail();  print "--> ERROR: empty line in $dict/extra_questions.txt\n";
-    }
-    foreach (0 .. @col-1) {
-      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
-        set_to_fail();  print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence_phones.txt (line $idx, block ", $_+1, ")\n";
-      }
-      $idx ++;
-    }
-    %col_hash = ();
-    foreach $p (@col) { $col_hash{$p} = 1; }
-    foreach $p1 (@col) {
-      # Update %distinguished hash.
-      foreach $p2 (keys %nonsilence) {
-        if (!defined $col_hash{$p2}) { # for each p1 in this question and p2 not
-                                       # in this question (and in nonsilence
-                                       # phones)... mark p1,p2 as being split apart
-          $distinguished{$p1,$p2} = 1;
-          $distinguished{$p2,$p1} = 1;
-        }
-      }
-    }
-  }
-  close(EX);
-  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
-
-if (-f "$dict/nonterminals.txt") {
-  open(NT, "<$dict/nonterminals.txt") || die "opening $dict/nonterminals.txt";
-  my %nonterminals = ();
-  my $line_number = 1;
-  while (<NT>) {
-    chop;
-    my @line = split(" ", $_);
-    if (@line != 1 || ! m/^#nonterm:/ || defined $nonterminals{$line[0]}) {
-      print "--> ERROR: bad (or duplicate) line $line_number: '$_' in $dict/nonterminals.txt\n"; exit 1;
-    }
-    $nonterminals{$line[0]} = 1;
-    $line_number++;
-  }
-  print "--> $dict/nonterminals.txt is OK\n";
-}
-
-
-# check nonsilence_phones.txt again for phone-pairs that are never
-# distnguishable.  (note: this situation is normal and expected for silence
-# phones, so we don't check it.)
-if(!open(NS, "<$dict/nonsilence_phones.txt")) {
-  print "--> ERROR: fail to open $dict/nonsilence_phones.txt the second time\n"; exit 1;
-}
-
-$num_warn_nosplit = 0;
-$num_warn_nosplit_limit = 10;
-while(<NS>) {
-  my @col = split(" ", $_);
-  foreach $p1 (@col) {
-    foreach $p2 (@col) {
-      if ($p1 ne $p2 && ! $distinguished{$p1,$p2}) {
-        set_to_fail();
-        if ($num_warn_nosplit <= $num_warn_nosplit_limit) {
-          print "--> ERROR: phones $p1 and $p2 share a tree root but can never be distinguished by extra_questions.txt.\n";
-        }
-        if ($num_warn_nosplit == $num_warn_nosplit_limit) {
-          print "... Not warning any more times about this issue.\n";
-        }
-        if ($num_warn_nosplit == 0) {
-          print "    (note: we started checking for this only recently.  You can still build a system but\n";
-          print "     phones $p1 and $p2 will be acoustically indistinguishable).\n";
-        }
-        $num_warn_nosplit++;
-      }
-    }
-  }
-}
-
-
-if ($exit == 1) {
-  print "--> ERROR validating dictionary directory $dict (see detailed error ";
-  print "messages above)\n\n";
-  exit 1;
-} else {
-  print "--> SUCCESS [validating dictionary directory $dict]\n\n";
-}
-
-exit 0;
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_text.pl b/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_text.pl
deleted file mode 100644
index 7f75cf12f20f6e22948682e8e726e628a72dac69..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/validate_text.pl
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env perl
-#
-#===============================================================================
-# Copyright 2017  Johns Hopkins University (author: Yenda Trmal <jtrmal@gmail.com>)
-#                 Johns Hopkins University (author: Daniel Povey)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-# validation script for data/<dataset>/text
-# to be called (preferably) from utils/validate_data_dir.sh
-use strict;
-use warnings;
-use utf8;
-use Fcntl qw< SEEK_SET >;
-
-# this function reads the opened file (supplied as a first
-# parameter) into an array of lines. For each
-# line, it tests whether it's a valid utf-8 compatible
-# line. If all lines are valid utf-8, it returns the lines
-# decoded as utf-8, otherwise it assumes the file's encoding
-# is one of those 1-byte encodings, such as ISO-8859-x
-# or Windows CP-X.
-# Please recall we do not really care about
-# the actually encoding, we just need to
-# make sure the length of the (decoded) string
-# is correct (to make the output formatting looking right).
-sub get_utf8_or_bytestream {
-  use Encode qw(decode encode);
-  my $is_utf_compatible = 1;
-  my @unicode_lines;
-  my @raw_lines;
-  my $raw_text;
-  my $lineno = 0;
-  my $file = shift;
-
-  while (<$file>) {
-    $raw_text = $_;
-    last unless $raw_text;
-    if ($is_utf_compatible) {
-      my $decoded_text = eval { decode("UTF-8", $raw_text, Encode::FB_CROAK) } ;
-      $is_utf_compatible = $is_utf_compatible && defined($decoded_text);
-      push @unicode_lines, $decoded_text;
-    } else {
-      #print STDERR "WARNING: the line $raw_text cannot be interpreted as UTF-8: $decoded_text\n";
-      ;
-    }
-    push @raw_lines, $raw_text;
-    $lineno += 1;
-  }
-
-  if (!$is_utf_compatible) {
-    return (0, @raw_lines);
-  } else {
-    return (1, @unicode_lines);
-  }
-}
-
-# check if the given unicode string contain unicode whitespaces
-# other than the usual four: TAB, LF, CR and SPACE
-sub validate_utf8_whitespaces {
-  my $unicode_lines = shift;
-  use feature 'unicode_strings';
-  for (my $i = 0; $i < scalar @{$unicode_lines}; $i++) {
-    my $current_line = $unicode_lines->[$i];
-    if ((substr $current_line, -1) ne "\n"){
-      print STDERR "$0: The current line (nr. $i) has invalid newline\n";
-      return 1;
-    }
-    my @A = split(" ", $current_line);
-    my $utt_id = $A[0];
-    # we replace TAB, LF, CR, and SPACE
-    # this is to simplify the test
-    if ($current_line =~ /\x{000d}/) {
-      print STDERR "$0: The line for utterance $utt_id contains CR (0x0D) character\n";
-      return 1;
-    }
-    $current_line =~ s/[\x{0009}\x{000a}\x{0020}]/./g;
-    if ($current_line =~/\s/) {
-      print STDERR "$0: The line for utterance $utt_id contains disallowed Unicode whitespaces\n";
-      return 1;
-    }
-  }
-  return 0;
-}
-
-# checks if the text in the file (supplied as the argument) is utf-8 compatible
-# if yes, checks if it contains only allowed whitespaces. If no, then does not
-# do anything. The function seeks to the original position in the file after
-# reading the text.
-sub check_allowed_whitespace {
-  my $file = shift;
-  my $filename = shift;
-  my $pos = tell($file);
-  (my $is_utf, my @lines) = get_utf8_or_bytestream($file);
-  seek($file, $pos, SEEK_SET);
-  if ($is_utf) {
-    my $has_invalid_whitespaces = validate_utf8_whitespaces(\@lines);
-    if ($has_invalid_whitespaces) {
-      print STDERR "$0: ERROR: text file '$filename' contains disallowed UTF-8 whitespace character(s)\n";
-      return 0;
-    }
-  }
-  return 1;
-}
-
-if(@ARGV != 1) {
-  die "Usage: validate_text.pl <text-file>\n" .
-      "e.g.: validate_text.pl data/train/text\n";
-}
-
-my $text = shift @ARGV;
-
-if (-z "$text") {
-  print STDERR "$0: ERROR: file '$text' is empty or does not exist\n";
-  exit 1;
-}
-
-if(!open(FILE, "<$text")) {
-  print STDERR "$0: ERROR: failed to open $text\n";
-  exit 1;
-}
-
-check_allowed_whitespace(\*FILE, $text) or exit 1;
-close(FILE);
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/wav2dur.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/wav2dur.py
deleted file mode 100644
index 1bcc1b693458b66c0e341e5d6b375cc81e6db8b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/wav2dur.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-
-import sys
-
-import torchaudio
-torchaudio.set_audio_backend("sox_io")
-
-scp = sys.argv[1]
-dur_scp = sys.argv[2]
-
-with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
-    cnt = 0
-    total_duration = 0
-    for l in f:
-        items = l.strip().split()
-        wav_id = items[0]
-        fname = items[1]
-        cnt += 1
-        waveform, rate = torchaudio.load(fname)
-        frames = len(waveform[0])
-        duration = frames / float(rate)
-        total_duration += duration
-        fout.write('{} {}\n'.format(wav_id, duration))
-    print('process {} utts'.format(cnt))
-    print('total {} s'.format(total_duration))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/wav_to_duration.sh b/models/audio/speech_recognition/conformer/igie/wenet/tools/wav_to_duration.sh
deleted file mode 100644
index 51b055c633ac809b6b8d702925dc47875973403d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/wav_to_duration.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# split the wav scp, calculate duration and merge
-nj=4
-. tools/parse_options.sh || exit 1;
-
-inscp=$1
-outscp=$2
-data=$(dirname ${inscp})
-if [ $# -eq 3 ]; then
-  logdir=$3
-else
-  logdir=${data}/log
-fi
-mkdir -p ${logdir}
-
-rm -f $logdir/wav_*.slice
-rm -f $logdir/wav_*.shape
-split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
-
-for slice in `ls $logdir/wav_*.slice`; do
-{
-    name=`basename -s .slice $slice`
-    tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
-} &
-done
-wait
-cat $logdir/wav_*.shape > $outscp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/tools/websocket/performance-ws.py b/models/audio/speech_recognition/conformer/igie/wenet/tools/websocket/performance-ws.py
deleted file mode 100644
index af77dea06bb41297b674b5b6dbfd0266bcff5d53..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/tools/websocket/performance-ws.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-# coding:utf-8
-
-# Copyright (c) 2022 SDCI Co. Ltd (author: veelion)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import json
-import time
-import asyncio
-import argparse
-import websockets
-import soundfile as sf
-import statistics
-
-
-WS_START = json.dumps({
-    'signal': 'start',
-    'nbest': 1,
-    'continuous_decoding': False,
-})
-WS_END = json.dumps({
-    'signal': 'end'
-})
-
-
-async def ws_rec(data, ws_uri):
-    begin = time.time()
-    conn = await websockets.connect(ws_uri, ping_timeout=200)
-    # step 1: send start
-    await conn.send(WS_START)
-    ret = await conn.recv()
-    # step 2: send audio data
-    await conn.send(data)
-    # step 3: send end
-    await conn.send(WS_END)
-    # step 4: receive result
-    texts = []
-    while 1:
-        ret = await conn.recv()
-        ret = json.loads(ret)
-        if ret['type'] == 'final_result':
-            nbest = json.loads(ret['nbest'])
-            text = nbest[0]['sentence']
-            texts.append(text)
-        elif ret['type'] == 'speech_end':
-            break
-    # step 5: close
-    try:
-        await conn.close()
-    except Exception as e:
-        # this except has no effect, just log as debug
-        # it seems the server does not send close info, maybe
-        print(e)
-    time_cost = time.time() - begin
-    return {
-        'text': ''.join(texts),
-        'time': time_cost,
-    }
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument(
-        '-u', '--ws_uri', required=True,
-        help="websocket_server_main's uri, e.g. ws://127.0.0.1:10086")
-    parser.add_argument(
-        '-w', '--wav_scp', required=True,
-        help='path to wav_scp_file')
-    parser.add_argument(
-        '-t', '--trans', required=True,
-        help='path to trans_text_file of wavs')
-    parser.add_argument(
-        '-s', '--save_to', required=True,
-        help='path to save transcription')
-    parser.add_argument(
-        '-n', '--num_concurrence', type=int, required=True,
-        help='num of concurrence for query')
-    args = parser.parse_args()
-    return args
-
-
-def print_result(info):
-    length = max([len(k) for k in info])
-    for k, v in info.items():
-        print(f'\t{k: >{length}} : {v}')
-
-
-async def main(args):
-    wav_scp = []
-    total_duration = 0
-    with open(args.wav_scp) as f:
-        for line in f:
-            zz = line.strip().split()
-            assert len(zz) == 2
-            data, sr = sf.read(zz[1], dtype='int16')
-            assert sr == 16000
-            duration = (len(data)) / 16000
-            total_duration += duration
-            wav_scp.append((zz[0], data.tobytes()))
-    print(f'{len(wav_scp) = }, {total_duration = }')
-
-    tasks = []
-    failed = 0
-    texts = []
-    request_times = []
-    begin = time.time()
-    for i, (_uttid, data) in enumerate(wav_scp):
-        task = asyncio.create_task(ws_rec(data, args.ws_uri))
-        tasks.append((_uttid, task))
-        if len(tasks) < args.num_concurrence:
-            continue
-        print((f'{i=}, start {args.num_concurrence} '
-               f'queries @ {time.strftime("%m-%d %H:%M:%S")}'))
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-        tasks = []
-        print(f'\tdone @ {time.strftime("%m-%d %H:%M:%S")}')
-    if tasks:
-        for uttid, task in tasks:
-            result = await task
-            texts.append(f'{uttid}\t{result["text"]}\n')
-            request_times.append(result['time'])
-    request_time = time.time() - begin
-    rtf = request_time / total_duration
-    print('For all concurrence:')
-    print_result({
-        'failed': failed,
-        'total_duration': total_duration,
-        'request_time': request_time,
-        'RTF': rtf,
-    })
-    print('For one request:')
-    print_result({
-        'mean': statistics.mean(request_times),
-        'median': statistics.median(request_times),
-        'max_time': max(request_times),
-        'min_time': min(request_times),
-    })
-    with open(args.save_to, 'w', encoding='utf8') as fsave:
-        fsave.write(''.join(texts))
-    # caculate CER
-    cmd = (f'python ../compute-wer.py --char=1 --v=1 '
-           f'{args.trans} {args.save_to} > '
-           f'{args.save_to}-test-{args.num_concurrence}.cer.txt')
-    print(cmd)
-    os.system(cmd)
-    print('done')
-
-
-if __name__ == '__main__':
-    args = get_args()
-    asyncio.run(main(args))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/alignment.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/alignment.py
deleted file mode 100644
index 071691183e5af227e60fe06e4f8d4bf0f33b7f71..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/alignment.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Di Wu)
-#               2022 Tinnove Inc (authors: Wei Ren)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-from textgrid import TextGrid, IntervalTier
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.ctc_util import forced_align
-from wenet.utils.common import get_subsample
-from wenet.utils.init_model import init_model
-
-
-def generator_textgrid(maxtime, lines, output):
-    # Download Praat: https://www.fon.hum.uva.nl/praat/
-    interval = maxtime / (len(lines) + 1)
-    margin = 0.0001
-
-    tg = TextGrid(maxTime=maxtime)
-    linetier = IntervalTier(name="line", maxTime=maxtime)
-
-    i = 0
-    for l in lines:
-        s, e, w = l.split()
-        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)
-
-    tg.append(linetier)
-    print("successfully generator {}".format(output))
-    tg.write(output)
-
-
-def get_frames_timestamp(alignment):
-    # convert alignment to a praat format, which is a doing phonetics
-    # by computer and helps analyzing alignment
-    timestamp = []
-    # get frames level duration for each token
-    start = 0
-    end = 0
-    while end < len(alignment):
-        while end < len(alignment) and alignment[end] == 0:
-            end += 1
-        if end == len(alignment):
-            timestamp[-1] += alignment[start:]
-            break
-        end += 1
-        while end < len(alignment) and alignment[end - 1] == alignment[end]:
-            end += 1
-        timestamp.append(alignment[start:end])
-        start = end
-    return timestamp
-
-
-def get_labformat(timestamp, subsample):
-    begin = 0
-    duration = 0
-    labformat = []
-    for idx, t in enumerate(timestamp):
-        # 25ms frame_length,10ms hop_length, 1/subsample
-        subsample = get_subsample(configs)
-        # time duration
-        duration = len(t) * 0.01 * subsample
-        if idx < len(timestamp) - 1:
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[t[-1]]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[t[-1]]))
-        else:
-            non_blank = 0
-            for i in t:
-                if i != 0:
-                    token = i
-                    break
-            print("{:.2f} {:.2f} {}".format(begin, begin + duration,
-                                            char_dict[token]))
-            labformat.append("{:.2f} {:.2f} {}\n".format(
-                begin, begin + duration, char_dict[token]))
-        begin = begin + duration
-    return labformat
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='use ctc to generate alignment')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--input_file', required=True, help='format data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--non_lang_syms',
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--result_file',
-                        required=True,
-                        help='alignment result file')
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument('--gen_praat',
-                        action='store_true',
-                        help='convert alignment to a praat format')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-
-    args = parser.parse_args()
-    print(args)
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.batch_size > 1:
-        logging.fatal('alignment mode must be running with batch_size == 1')
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    # Load dict
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
-
-    symbol_table = read_symbol_table(args.dict)
-
-    # Init dataset and data loader
-    ali_conf = copy.deepcopy(configs['dataset_conf'])
-
-    ali_conf['filter_conf']['max_length'] = 102400
-    ali_conf['filter_conf']['min_length'] = 0
-    ali_conf['filter_conf']['token_max_length'] = 102400
-    ali_conf['filter_conf']['token_min_length'] = 0
-    ali_conf['filter_conf']['max_output_input_ratio'] = 102400
-    ali_conf['filter_conf']['min_output_input_ratio'] = 0
-    ali_conf['speed_perturb'] = False
-    ali_conf['spec_aug'] = False
-    ali_conf['shuffle'] = False
-    ali_conf['sort'] = False
-    ali_conf['fbank_conf']['dither'] = 0.0
-    ali_conf['batch_conf']['batch_type'] = "static"
-    ali_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    ali_dataset = Dataset(args.data_type,
-                          args.input_file,
-                          symbol_table,
-                          ali_conf,
-                          args.bpe_model,
-                          non_lang_syms,
-                          partition=False)
-
-    ali_data_loader = DataLoader(ali_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w',
-                               encoding='utf-8') as fout:
-        for batch_idx, batch in enumerate(ali_data_loader):
-            print("#" * 80)
-            key, feat, target, feats_length, target_length = batch
-            print(key)
-
-            feat = feat.to(device)
-            target = target.to(device)
-            feats_length = feats_length.to(device)
-            target_length = target_length.to(device)
-            # Let's assume B = batch_size and N = beam_size
-            # 1. Encoder
-            encoder_out, encoder_mask = model._forward_encoder(
-                feat, feats_length)  # (B, maxlen, encoder_dim)
-            maxlen = encoder_out.size(1)
-            ctc_probs = model.ctc.log_softmax(
-                encoder_out)  # (1, maxlen, vocab_size)
-            # print(ctc_probs.size(1))
-            ctc_probs = ctc_probs.squeeze(0)
-            target = target.squeeze(0)
-            alignment = forced_align(ctc_probs, target)
-            print(alignment)
-            fout.write('{} {}\n'.format(key[0], alignment))
-
-            if args.gen_praat:
-                timestamp = get_frames_timestamp(alignment)
-                print(timestamp)
-                subsample = get_subsample(configs)
-                labformat = get_labformat(timestamp, subsample)
-
-                lab_path = os.path.join(os.path.dirname(args.result_file),
-                                        key[0] + ".lab")
-                with open(lab_path, 'w', encoding='utf-8') as f:
-                    f.writelines(labformat)
-
-                textgrid_path = os.path.join(os.path.dirname(args.result_file),
-                                             key[0] + ".TextGrid")
-                generator_textgrid(maxtime=(len(alignment) + 1) * 0.01 *
-                                   subsample,
-                                   lines=labformat,
-                                   output=textgrid_path)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/average_model.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/average_model.py
deleted file mode 100644
index 01efa64b4b458bc931a86a9a304b9f330ce4aaa2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/average_model.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import glob
-
-import yaml
-import numpy as np
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='average model')
-    parser.add_argument('--dst_model', required=True, help='averaged model')
-    parser.add_argument('--src_path',
-                        required=True,
-                        help='src model path for average')
-    parser.add_argument('--val_best',
-                        action="store_true",
-                        help='averaged model')
-    parser.add_argument('--num',
-                        default=5,
-                        type=int,
-                        help='nums for averaged model')
-    parser.add_argument('--min_epoch',
-                        default=0,
-                        type=int,
-                        help='min epoch used for averaging model')
-    parser.add_argument('--max_epoch',
-                        default=65536,
-                        type=int,
-                        help='max epoch used for averaging model')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    checkpoints = []
-    val_scores = []
-    if args.val_best:
-        yamls = glob.glob('{}/[!train]*.yaml'.format(args.src_path))
-        for y in yamls:
-            with open(y, 'r') as f:
-                dic_yaml = yaml.load(f, Loader=yaml.FullLoader)
-                loss = dic_yaml['cv_loss']
-                epoch = dic_yaml['epoch']
-                if epoch >= args.min_epoch and epoch <= args.max_epoch:
-                    val_scores += [[epoch, loss]]
-        val_scores = np.array(val_scores)
-        sort_idx = np.argsort(val_scores[:, -1])
-        sorted_val_scores = val_scores[sort_idx][::1]
-        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
-        print("selected epochs = " +
-              str(sorted_val_scores[:args.num, 0].astype(np.int64)))
-        path_list = [
-            args.src_path + '/{}.pt'.format(int(epoch))
-            for epoch in sorted_val_scores[:args.num, 0]
-        ]
-    else:
-        path_list = glob.glob('{}/[0-9]*.pt'.format(args.src_path))
-        path_list = sorted(path_list, key=os.path.getmtime)
-        path_list = path_list[-args.num:]
-    print(path_list)
-    avg = None
-    num = args.num
-    assert num == len(path_list)
-    for path in path_list:
-        print('Processing {}'.format(path))
-        states = torch.load(path, map_location=torch.device('cpu'))
-        if avg is None:
-            avg = states
-        else:
-            for k in avg.keys():
-                avg[k] += states[k]
-    # average
-    for k in avg.keys():
-        if avg[k] is not None:
-            # pytorch 1.6 use true_divide instead of /=
-            avg[k] = torch.true_divide(avg[k], num)
-    print('Saving to {}'.format(args.dst_model))
-    torch.save(avg, args.dst_model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_jit.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_jit.py
deleted file mode 100644
index b2e5864e8382235c1cc800484ba5031ae22f3bd9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_jit.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-
-import torch
-import yaml
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_file', default=None, help='output file')
-    parser.add_argument('--output_quant_file',
-                        default=None,
-                        help='output quantized model file')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    # No need gpu for model export
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    model = init_model(configs)
-    print(model)
-
-    load_checkpoint(model, args.checkpoint)
-    # Export jit torch script model
-
-    if args.output_file:
-        script_model = torch.jit.script(model)
-        script_model.save(args.output_file)
-        print('Export model successfully, see {}'.format(args.output_file))
-
-    # Export quantized jit torch script model
-    if args.output_quant_file:
-        quantized_model = torch.quantization.quantize_dynamic(
-            model, {torch.nn.Linear}, dtype=torch.qint8
-        )
-        print(quantized_model)
-        script_quant_model = torch.jit.script(quantized_model)
-        script_quant_model.save(args.output_quant_file)
-        print('Export quantized model successfully, '
-              'see {}'.format(args.output_quant_file))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_bpu.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_bpu.py
deleted file mode 100644
index 6462a69506f10778d08faae5fcf3067ad43d38bd..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_bpu.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-# Copyright (c) 2022, Horizon Inc. Xingchen Song (sxc19@tsinghua.org.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""NOTE(xcsong): Currently, we only support
-1. specific conformer encoder architecture, see:
-    encoder: conformer
-    encoder_conf:
-      activation_type: **must be** relu
-      attention_heads: 2 or 4 or 8 or any number divisible by output_size
-      causal: **must be** true
-      cnn_module_kernel: 1 ~ 7
-      cnn_module_norm: **must be** batch_norm
-      input_layer: **must be** conv2d8
-      linear_units: 1 ~ 2048
-      normalize_before: **must be** true
-      num_blocks: 1 ~ 12
-      output_size: 1 ~ 512
-      pos_enc_layer_type: **must be** no_pos
-      selfattention_layer_type: **must be** selfattn
-      use_cnn_module: **must be** true
-      use_dynamic_chunk: **must be** true
-      use_dynamic_left_chunk: **must be** true
-
-2. specific decoding method: ctc_greedy_search
-"""
-
-
-from __future__ import print_function
-
-import os
-import sys
-import copy
-import math
-import yaml
-import logging
-from typing import Tuple
-
-import torch
-import numpy as np
-
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-from wenet.bin.export_onnx_cpu import (get_args, to_numpy,
-                                       print_input_output_info)
-
-
-try:
-    import onnx
-    import onnxruntime
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class BPULayerNorm(torch.nn.Module):
-    """Refactor torch.nn.LayerNorm to meet 4-D dataflow."""
-    def __init__(self, module, chunk_size=8, run_on_bpu=False):
-        super().__init__()
-        original = copy.deepcopy(module)
-        self.hidden = module.weight.size(0)
-        self.chunk_size = chunk_size
-        self.run_on_bpu = run_on_bpu
-
-        if self.run_on_bpu:
-            self.weight = torch.nn.Parameter(
-                module.weight.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.bias = torch.nn.Parameter(
-                module.bias.reshape(1, self.hidden, 1, 1).repeat(
-                    1, 1, 1, chunk_size))
-            self.negtive = torch.nn.Parameter(
-                torch.ones((1, self.hidden, 1, chunk_size)) * -1.0)
-            self.eps = torch.nn.Parameter(
-                torch.zeros((1, self.hidden, 1, chunk_size)) + module.eps)
-            self.mean_conv_1 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_1.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-            self.mean_conv_2 = torch.nn.Conv2d(self.hidden, 1, 1, bias=False)
-            self.mean_conv_2.weight = torch.nn.Parameter(
-                torch.ones(self.hidden, self.hidden, 1, 1) / (1.0 * self.hidden))
-        else:
-            self.norm = module
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.hidden)
-        orig_out = module(random_data)
-        new_out = self.forward(random_data.transpose(1, 2).unsqueeze(2))
-        np.testing.assert_allclose(
-            to_numpy(orig_out), to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.run_on_bpu:
-            u = self.mean_conv_1(x)  # (1, h, 1, c)
-            numerator = x + u * self.negtive  # (1, h, 1, c)
-            s = torch.pow(numerator, 2)  # (1, h, 1, c)
-            s = self.mean_conv_2(s)  # (1, h, 1, c)
-            denominator = torch.sqrt(s + self.eps)  # (1, h, 1, c)
-            x = torch.div(numerator, denominator)  # (1, h, 1, c)
-            x = x * self.weight + self.bias
-        else:
-            x = x.squeeze(2).transpose(1, 2).contiguous()
-            x = self.norm(x)
-            x = x.transpose(1, 2).contiguous().unsqueeze(2)
-        return x
-
-
-class BPUIdentity(torch.nn.Module):
-    """Refactor torch.nn.Identity().
-       For inserting BPU node whose input == output.
-    """
-    def __init__(self, channels):
-        super().__init__()
-        self.channels = channels
-        self.identity_conv = torch.nn.Conv2d(
-            channels, channels, 1, groups=channels, bias=False)
-        torch.nn.init.dirac_(
-            self.identity_conv.weight.data, groups=channels)
-
-        self.check_equal()
-
-    def check_equal(self):
-        random_data = torch.randn(1, self.channels, 1, 10)
-        result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(random_data), to_numpy(result),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Identity with 4-D dataflow, input == output.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-
-        Returns:
-            (torch.Tensor): (batch, in_channel, 1, time).
-        """
-        return self.identity_conv(x)
-
-
-class BPULinear(torch.nn.Module):
-    """Refactor torch.nn.Linear or pointwise_conv"""
-    def __init__(self, module, is_pointwise_conv=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.weight.size(1)
-        self.odim = module.weight.size(0)
-        self.is_pointwise_conv = is_pointwise_conv
-
-        # Modify weight & bias
-        self.linear = torch.nn.Conv2d(self.idim, self.odim, 1, 1)
-        if is_pointwise_conv:
-            # (odim, idim, kernel=1) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(-1))
-        else:
-            # (odim, idim) -> (odim, idim, 1, 1)
-            self.linear.weight = torch.nn.Parameter(
-                module.weight.unsqueeze(2).unsqueeze(3))
-        self.linear.bias = module.bias
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.idim)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-        original_result = module(random_data)
-        if self.is_pointwise_conv:
-            random_data = random_data.transpose(1, 2)
-            original_result = original_result.transpose(1, 2)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Linear with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, in_channel, 1, time)
-        Returns:
-            (torch.Tensor): (batch, out_channel, 1, time).
-        """
-        return self.linear(x)
-
-
-class BPUGlobalCMVN(torch.nn.Module):
-    """Refactor wenet/transformer/cmvn.py::GlobalCMVN"""
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        self.norm_var = module.norm_var
-
-        # NOTE(xcsong): Expand to 4-D tensor, (mel_dim) -> (1, 1, mel_dim, 1)
-        self.mean = module.mean.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-        self.istd = module.istd.unsqueeze(-1).unsqueeze(0).unsqueeze(0)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """CMVN with 4-D dataflow.
-        Args:
-            x (torch.Tensor): (batch, 1, mel_dim, time)
-        Returns:
-            (torch.Tensor): normalized feature with same shape.
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
-
-
-class BPUConv2dSubsampling8(torch.nn.Module):
-    """Refactor wenet/transformer/subsampling.py::Conv2dSubsampling8
-
-    NOTE(xcsong): Only support pos_enc_class == NoPositionalEncoding
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.right_context = module.right_context
-        self.subsampling_rate = module.subsampling_rate
-        assert isinstance(module.pos_enc, NoPositionalEncoding)
-
-        # 1. Modify self.conv
-        # NOTE(xcsong): We change input shape from (1, 1, frames, mel_dim)
-        #   to (1, 1, mel_dim, frames) for more efficient computation.
-        self.conv = module.conv
-        for idx in [0, 2, 4]:
-            self.conv[idx].weight = torch.nn.Parameter(
-                module.conv[idx].weight.transpose(2, 3)
-            )
-
-        # 2. Modify self.linear
-        # NOTE(xcsong): Split final projection to meet the requirment of
-        #   maximum kernel_size (7 for XJ3)
-        self.linear = torch.nn.ModuleList()
-        odim = module.linear.weight.size(0)  # 512, in this case
-        freq = module.linear.weight.size(1) // odim  # 4608 // 512 == 9
-        self.odim, self.freq = odim, freq
-        weight = module.linear.weight.reshape(
-            odim, odim, freq, 1)  # (odim, odim * freq) -> (odim, odim, freq, 1)
-        self.split_size = []
-        num_split = (freq - 1) // 7 + 1  # XJ3 requires kernel_size <= 7
-        slice_begin = 0
-        for idx in range(num_split):
-            kernel_size = min(freq, (idx + 1) * 7) - idx * 7
-            conv_ele = torch.nn.Conv2d(
-                odim, odim, (kernel_size, 1), (kernel_size, 1))
-            conv_ele.weight = torch.nn.Parameter(
-                weight[:, :, slice_begin:slice_begin + kernel_size, :]
-            )
-            conv_ele.bias = torch.nn.Parameter(
-                torch.zeros_like(conv_ele.bias)
-            )
-            self.linear.append(conv_ele)
-            self.split_size.append(kernel_size)
-            slice_begin += kernel_size
-        self.linear[0].bias = torch.nn.Parameter(module.linear.bias)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 67, 80)
-        mask = torch.zeros(1, 1, 67)
-        original_result, _, _ = module(random_data, mask)  # (1, 8, 512)
-        random_data = random_data.transpose(1, 2).unsqueeze(0)  # (1, 1, 80, 67)
-        new_result = self.forward(random_data)  # (1, 512, 1, 8)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Subsample x with 4-D dataflow.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, 1, mel_dim, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, odim, 1, time'),
-                where time' = time // 8.
-        """
-        x = self.conv(x)  # (1, odim, freq, time')
-        x_out = torch.zeros(x.size(0), self.odim, 1, x.size(3))
-        x = torch.split(x, self.split_size, dim=2)
-        for idx, (x_part, layer) in enumerate(zip(x, self.linear)):
-            x_out += layer(x_part)
-        return x_out
-
-
-class BPUMultiHeadedAttention(torch.nn.Module):
-    """Refactor wenet/transformer/attention.py::MultiHeadedAttention
-
-    NOTE(xcsong): Only support attention_class == MultiHeadedAttention,
-        we do not consider RelPositionMultiHeadedAttention currently.
-    """
-    def __init__(self, module, chunk_size, left_chunks):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.d_k = module.d_k
-        self.h = module.h
-        n_feat = self.d_k * self.h
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.time = chunk_size * (left_chunks + 1)
-        self.activation = torch.nn.Softmax(dim=-1)
-
-        # 1. Modify self.linear_x
-        self.linear_q = BPULinear(module.linear_q)
-        self.linear_k = BPULinear(module.linear_k)
-        self.linear_v = BPULinear(module.linear_v)
-        self.linear_out = BPULinear(module.linear_out)
-        # 2. denom
-        self.register_buffer(
-            "denom", torch.full((1, self.h, 1, 1), 1.0 / math.sqrt(self.d_k)))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, self.chunk_size, self.d_k * self.h)
-        mask = torch.ones((1, self.h, self.chunk_size, self.time),
-                          dtype=torch.bool)
-        cache = torch.zeros(1, self.h, self.chunk_size * self.left_chunks,
-                            self.d_k * 2)
-        original_out, original_cache = module(
-            random_data, random_data, random_data,
-            mask[:, 0, :, :], torch.empty(0), cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.reshape(1, self.h, self.d_k * 2,
-                              self.chunk_size * self.left_chunks)
-        new_out, new_cache = self.forward(
-            random_data, random_data, random_data, mask, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-        mask: torch.Tensor, cache: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            q (torch.Tensor): Query tensor (#batch, size, 1, chunk_size).
-            k (torch.Tensor): Key tensor (#batch, size, 1, chunk_size).
-            v (torch.Tensor): Value tensor (#batch, size, 1, chunk_size).
-            mask (torch.Tensor): Mask tensor,
-                (#batch, head, chunk_size, cache_t + chunk_size).
-            cache (torch.Tensor): Cache tensor
-                (1, head, d_k * 2, cache_t),
-                where `cache_t == chunk_size * left_chunks`.
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: Cache tensor
-                (1, head, d_k * 2, cache_t + chunk_size)
-                where `cache_t == chunk_size * left_chunks`
-        """
-        # 1. Forward QKV
-        q = self.linear_q(q)  # (1, d, 1, c) d == size, c == chunk_size
-        k = self.linear_k(k)  # (1, d, 1, c)
-        v = self.linear_v(v)  # (1, d, 1, c)
-        q = q.view(1, self.h, self.d_k, self.chunk_size)
-        k = k.view(1, self.h, self.d_k, self.chunk_size)
-        v = v.view(1, self.h, self.d_k, self.chunk_size)
-        q = q.transpose(2, 3)  # (batch, head, time1, d_k)
-        k_cache, v_cache = torch.split(cache, cache.size(2) // 2, dim=2)
-        k = torch.cat((k_cache, k), dim=3)
-        v = torch.cat((v_cache, v), dim=3)
-        new_cache = torch.cat((k, v), dim=2)
-        # 2. (Q^T)K
-        scores = torch.matmul(q, k) * self.denom  # (#b, n_head, time1, time2)
-        # 3. Forward attention
-        mask = mask.eq(0)
-        scores = scores.masked_fill(mask, -float('inf'))
-        attn = self.activation(scores).masked_fill(mask, 0.0)
-        attn = attn.transpose(2, 3)
-        x = torch.matmul(v, attn)
-        x = x.view(1, self.d_k * self.h, 1, self.chunk_size)
-        x_out = self.linear_out(x)
-        return x_out, new_cache
-
-
-class BPUConvolution(torch.nn.Module):
-    """Refactor wenet/transformer/convolution.py::ConvolutionModule
-
-    NOTE(xcsong): Only suport use_layer_norm == False
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.lorder = module.lorder
-        self.use_layer_norm = False
-        self.activation = module.activation
-        channels = module.pointwise_conv1.weight.size(1)
-        self.channels = channels
-        kernel_size = module.depthwise_conv.weight.size(2)
-        assert module.use_layer_norm is False
-
-        # 1. Modify self.pointwise_conv1
-        self.pointwise_conv1 = BPULinear(module.pointwise_conv1, True)
-
-        # 2. Modify self.depthwise_conv
-        self.depthwise_conv = torch.nn.Conv2d(
-            channels, channels, (1, kernel_size),
-            stride=1, groups=channels)
-        self.depthwise_conv.weight = torch.nn.Parameter(
-            module.depthwise_conv.weight.unsqueeze(-2))
-        self.depthwise_conv.bias = torch.nn.Parameter(
-            module.depthwise_conv.bias)
-
-        # 3. Modify self.norm, Only support batchnorm2d
-        self.norm = torch.nn.BatchNorm2d(channels)
-        self.norm.training = False
-        self.norm.num_features = module.norm.num_features
-        self.norm.eps = module.norm.eps
-        self.norm.momentum = module.norm.momentum
-        self.norm.weight = torch.nn.Parameter(module.norm.weight)
-        self.norm.bias = torch.nn.Parameter(module.norm.bias)
-        self.norm.running_mean = module.norm.running_mean
-        self.norm.running_var = module.norm.running_var
-
-        # 4. Modify self.pointwise_conv2
-        self.pointwise_conv2 = BPULinear(module.pointwise_conv2, True)
-
-        # 5. Identity conv, for running `concat` on BPU
-        self.identity = BPUIdentity(channels)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.channels)
-        cache = torch.zeros((1, self.channels, self.lorder))
-        original_out, original_cache = module(random_data, cache=cache)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        cache = cache.unsqueeze(2)
-        new_out, new_cache = self.forward(random_data, cache)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cache),
-            to_numpy(new_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, channels, 1, chunk_size).
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, 1, cache_t).
-        Returns:
-            torch.Tensor: Output tensor (#batch, channels, 1, chunk_size).
-            torch.Tensor: Cache tensor (#batch, channels, 1, cache_t).
-        """
-        # Concat cache
-        x = torch.cat((self.identity(cache), self.identity(x)), dim=3)
-        new_cache = x[:, :, :, -self.lorder:]
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, 1, dim)
-        x = torch.nn.functional.glu(x, dim=1)  # (b, channel, 1, dim)
-
-        # Depthwise Conv
-        x = self.depthwise_conv(x)
-        x = self.activation(self.norm(x))
-        x = self.pointwise_conv2(x)
-        return x, new_cache
-
-
-class BPUFFN(torch.nn.Module):
-    """Refactor wenet/transformer/positionwise_feed_forward.py::PositionwiseFeedForward
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.activation = module.activation
-
-        # 1. Modify self.w_x
-        self.w_1 = BPULinear(module.w_1)
-        self.w_2 = BPULinear(module.w_2)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 8, self.w_1.idim)
-        original_out = module(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_out = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_out),
-            to_numpy(new_out.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, D, 1, L)
-        Returns:
-            output tensor, (B, D, 1, L)
-        """
-        return self.w_2(self.activation(self.w_1(x)))
-
-
-class BPUConformerEncoderLayer(torch.nn.Module):
-    """Refactor wenet/transformer/encoder_layer.py::ConformerEncoderLayer
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.size = module.size
-        assert module.normalize_before is True
-        assert module.concat_after is False
-
-        # 1. Modify submodules
-        self.feed_forward_macaron = BPUFFN(module.feed_forward_macaron)
-        self.self_attn = BPUMultiHeadedAttention(
-            module.self_attn, chunk_size, left_chunks)
-        self.conv_module = BPUConvolution(module.conv_module)
-        self.feed_forward = BPUFFN(module.feed_forward)
-
-        # 2. Modify norms
-        self.norm_ff = BPULayerNorm(module.norm_ff, chunk_size, ln_run_on_bpu)
-        self.norm_mha = BPULayerNorm(module.norm_mha, chunk_size, ln_run_on_bpu)
-        self.norm_ff_macron = BPULayerNorm(module.norm_ff_macaron,
-                                           chunk_size, ln_run_on_bpu)
-        self.norm_conv = BPULayerNorm(module.norm_conv,
-                                      chunk_size, ln_run_on_bpu)
-        self.norm_final = BPULayerNorm(module.norm_final,
-                                       chunk_size, ln_run_on_bpu)
-
-        # 3. 4-D ff_scale
-        self.register_buffer(
-            "ff_scale", torch.full((1, self.size, 1, 1), module.ff_scale))
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.self_attn.chunk_size
-        time2 = self.self_attn.time
-        h, d_k = self.self_attn.h, self.self_attn.d_k
-        random_x = torch.randn(1, time1, self.size)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(1, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(1, self.size, self.conv_module.lorder)
-        original_x, _, original_att_cache, original_cnn_cache = module(
-            random_x, att_mask[:, 0, :, :], torch.empty(0),
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.transpose(1, 2).unsqueeze(2)
-        att_cache = att_cache.reshape(1, h, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.unsqueeze(2)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_mask, att_cache, cnn_cache
-        )
-        np.testing.assert_allclose(
-            to_numpy(original_att_cache),
-            to_numpy(new_att_cache.transpose(2, 3)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(original_cnn_cache),
-            to_numpy(new_cnn_cache.squeeze(2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, x: torch.Tensor, att_mask: torch.Tensor,
-        att_cache: torch.Tensor, cnn_cache: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, size, 1, chunk_size)
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, d_k * 2, cache_t1), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, 1, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, size, 1, chunk_size).
-            torch.Tensor: att_cache tensor,
-                (1, head, d_k * 2, cache_t1 + chunk_size).
-            torch.Tensor: cnn_cahce tensor (#batch, size, 1, cache_t2).
-        """
-        # 1. ffn_macaron
-        residual = x
-        x = self.norm_ff_macron(x)
-        x = residual + self.ff_scale * self.feed_forward_macaron(x)
-
-        # 2. attention
-        residual = x
-        x = self.norm_mha(x)
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, att_mask, att_cache)
-        x = residual + x_att
-
-        # 3. convolution
-        residual = x
-        x = self.norm_conv(x)
-        x, new_cnn_cache = self.conv_module(x, cnn_cache)
-        x = residual + x
-
-        # 4. ffn
-        residual = x
-        x = self.norm_ff(x)
-        x = residual + self.ff_scale * self.feed_forward(x)
-
-        # 5. final post-norm
-        x = self.norm_final(x)
-
-        return x, new_att_cache, new_cnn_cache
-
-
-class BPUConformerEncoder(torch.nn.Module):
-    """Refactor wenet/transformer/encoder.py::ConformerEncoder
-    """
-    def __init__(self, module, chunk_size, left_chunks, ln_run_on_bpu=False):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        output_size = module.output_size()
-        self._output_size = module.output_size()
-        self.after_norm = module.after_norm
-        self.chunk_size = chunk_size
-        self.left_chunks = left_chunks
-        self.head = module.encoders[0].self_attn.h
-        self.layers = len(module.encoders)
-
-        # 1. Modify submodules
-        self.global_cmvn = BPUGlobalCMVN(module.global_cmvn)
-        self.embed = BPUConv2dSubsampling8(module.embed)
-        self.encoders = torch.nn.ModuleList()
-        for layer in module.encoders:
-            self.encoders.append(BPUConformerEncoderLayer(
-                layer, chunk_size, left_chunks, ln_run_on_bpu))
-
-        # 2. Auxiliary conv
-        self.identity_cnncache = BPUIdentity(output_size)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        time1 = self.encoders[0].self_attn.chunk_size
-        time2 = self.encoders[0].self_attn.time
-        layers = self.layers
-        h, d_k = self.head, self.encoders[0].self_attn.d_k
-        decoding_window = (self.chunk_size - 1) * \
-            module.embed.subsampling_rate + \
-            module.embed.right_context + 1
-        lorder = self.encoders[0].conv_module.lorder
-        random_x = torch.randn(1, decoding_window, 80)
-        att_mask = torch.ones(1, h, time1, time2)
-        att_cache = torch.zeros(layers, h, time2 - time1, d_k * 2)
-        cnn_cache = torch.zeros(layers, 1, self._output_size, lorder)
-        orig_x, orig_att_cache, orig_cnn_cache = module.forward_chunk(
-            random_x, 0, time2 - time1, att_mask=att_mask[:, 0, :, :],
-            att_cache=att_cache, cnn_cache=cnn_cache
-        )
-        random_x = random_x.unsqueeze(0)
-        att_cache = att_cache.reshape(1, h * layers, d_k * 2, time2 - time1)
-        cnn_cache = cnn_cache.reshape(1, self._output_size, layers, lorder)
-        new_x, new_att_cache, new_cnn_cache = self.forward(
-            random_x, att_cache, cnn_cache, att_mask
-        )
-        caches = torch.split(new_att_cache, h, dim=1)
-        caches = [c.transpose(2, 3) for c in caches]
-        np.testing.assert_allclose(
-            to_numpy(orig_att_cache),
-            to_numpy(torch.cat(caches, dim=0)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_x),
-            to_numpy(new_x.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-        np.testing.assert_allclose(
-            to_numpy(orig_cnn_cache),
-            to_numpy(new_cnn_cache.transpose(0, 2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(
-        self, xs: torch.Tensor, att_cache: torch.Tensor,
-        cnn_cache: torch.Tensor, att_mask: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, 1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (1, head * elayers, d_k * 2, cache_t1), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (1, hidden-dim, elayers, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask (torch.Tensor): Mask tensor for the input
-                (#batch, head, chunk_size, cache_t1 + chunk_size),
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, hidden-dim, 1, chunk_size).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape as the original att_cache.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-        """
-        # xs: (B, 1, time, mel_dim) -> (B, 1, mel_dim, time)
-        xs = xs.transpose(2, 3)
-        xs = self.global_cmvn(xs)
-        # xs: (B, 1, mel_dim, time) -> (B, hidden_dim, 1, chunk_size)
-        xs = self.embed(xs)
-
-        att_cache = torch.split(att_cache, self.head, dim=1)
-        cnn_cache = self.identity_cnncache(cnn_cache)
-        cnn_cache = torch.split(cnn_cache, 1, dim=2)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            xs, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, att_cache=att_cache[i], cnn_cache=cnn_cache[i])
-            r_att_cache.append(new_att_cache[:, :, :, self.chunk_size:])
-            r_cnn_cache.append(new_cnn_cache)
-        r_att_cache = torch.cat(r_att_cache, dim=1)
-        r_cnn_cache = self.identity_cnncache(
-            torch.cat(r_cnn_cache, dim=2))
-
-        xs = xs.squeeze(2).transpose(1, 2).contiguous()
-        xs = self.after_norm(xs)
-        # NOTE(xcsong): 4D in, 4D out to meet the requirment of CTC input.
-        xs = xs.transpose(1, 2).contiguous().unsqueeze(2)  # (B, C, 1, T)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-
-class BPUCTC(torch.nn.Module):
-    """Refactor wenet/transformer/ctc.py::CTC
-    """
-    def __init__(self, module):
-        super().__init__()
-        # Unchanged submodules and attributes
-        original = copy.deepcopy(module)
-        self.idim = module.ctc_lo.weight.size(1)
-        num_class = module.ctc_lo.weight.size(0)
-
-        # 1. Modify self.ctc_lo, Split final projection to meet the
-        #   requirment of maximum in/out channels (2048 for XJ3)
-        self.ctc_lo = torch.nn.ModuleList()
-        self.split_size = []
-        num_split = (num_class - 1) // 2048 + 1
-        for idx in range(num_split):
-            out_channel = min(num_class, (idx + 1) * 2048) - idx * 2048
-            conv_ele = torch.nn.Conv2d(self.idim, out_channel, 1, 1)
-            self.ctc_lo.append(conv_ele)
-            self.split_size.append(out_channel)
-        orig_weight = torch.split(module.ctc_lo.weight, self.split_size, dim=0)
-        orig_bias = torch.split(module.ctc_lo.bias, self.split_size, dim=0)
-        for i, (w, b) in enumerate(zip(orig_weight, orig_bias)):
-            w = w.unsqueeze(2).unsqueeze(3)
-            self.ctc_lo[i].weight = torch.nn.Parameter(w)
-            self.ctc_lo[i].bias = torch.nn.Parameter(b)
-
-        self.check_equal(original)
-
-    def check_equal(self, module):
-        random_data = torch.randn(1, 100, self.idim)
-        original_result = module.ctc_lo(random_data)
-        random_data = random_data.transpose(1, 2).unsqueeze(2)
-        new_result = self.forward(random_data)
-        np.testing.assert_allclose(
-            to_numpy(original_result),
-            to_numpy(new_result.squeeze(2).transpose(1, 2)),
-            rtol=1e-02, atol=1e-03)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """frame activations, without softmax.
-
-        Args:
-            Tensor x: 4d tensor (B, hidden_dim, 1, chunk_size)
-        Returns:
-            torch.Tensor: (B, num_class, 1, chunk_size)
-        """
-        out = []
-        for i, layer in enumerate(self.ctc_lo):
-            out.append(layer(x))
-        out = torch.cat(out, dim=1)
-        return out
-
-
-def export_encoder(asr_model, args):
-    logger.info("Stage-1: export encoder")
-    decode_window, mel_dim = args.decoding_window, args.feature_size
-    encoder = BPUConformerEncoder(
-        asr_model.encoder, args.chunk_size, args.num_decoding_left_chunks,
-        args.ln_run_on_bpu)
-    encoder.eval()
-    encoder_outpath = os.path.join(args.output_dir, 'encoder.onnx')
-
-    logger.info("Stage-1.1: prepare inputs for encoder")
-    chunk = torch.randn((1, 1, decode_window, mel_dim))
-    required_cache_size = encoder.chunk_size * encoder.left_chunks
-    kv_time = required_cache_size + encoder.chunk_size
-    hidden, layers = encoder._output_size, len(encoder.encoders)
-    head = encoder.encoders[0].self_attn.h
-    d_k = hidden // head
-    lorder = encoder.encoders[0].conv_module.lorder
-    att_cache = torch.zeros(1, layers * head, d_k * 2, required_cache_size)
-    att_mask = torch.ones((1, head, encoder.chunk_size, kv_time))
-    att_mask[:, :, :, :required_cache_size] = 0
-    cnn_cache = torch.zeros((1, hidden, layers, lorder))
-    inputs = (chunk, att_cache, cnn_cache, att_mask)
-    logger.info("chunk.size(): {} att_cache.size(): {} "
-                "cnn_cache.size(): {} att_mask.size(): {}".format(
-                    list(chunk.size()), list(att_cache.size()),
-                    list(cnn_cache.size()), list(att_mask.size())))
-
-    logger.info("Stage-1.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'] = "chunk;att_cache;cnn_cache;att_mask"
-    attributes['output_name'] = "output;r_att_cache;r_cnn_cache"
-    attributes['input_type'] = "featuremap;featuremap;featuremap;featuremap"
-    attributes['norm_type'] = \
-        "no_preprocess;no_preprocess;no_preprocess;no_preprocess"
-    attributes['input_layout_train'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_layout_rt'] = "NCHW;NCHW;NCHW;NCHW"
-    attributes['input_shape'] = \
-        "{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{};{}x{}x{}x{}".format(
-        chunk.size(0), chunk.size(1), chunk.size(2), chunk.size(3),
-        att_cache.size(0), att_cache.size(1), att_cache.size(2),
-        att_cache.size(3), cnn_cache.size(0), cnn_cache.size(1),
-        cnn_cache.size(2), cnn_cache.size(3), att_mask.size(0),
-        att_mask.size(1), att_mask.size(2), att_mask.size(3)
-    )
-    torch.onnx.export(  # NOTE(xcsong): only support opset==11
-        encoder, inputs, encoder_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=attributes['input_name'].split(';'),
-        output_names=attributes['output_name'].split(';'),
-        dynamic_axes=None, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for k in vars(args):
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    logger.info('Export onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    logger.info("Stage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk, torch_att_mask = copy.deepcopy(chunk), copy.deepcopy(att_mask)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    for i in range(10):
-        logger.info("torch chunk-{}: {}, att_cache: {}, cnn_cache: {}"
-                    ", att_mask: {}".format(
-                        i, list(torch_chunk.size()),
-                        list(torch_att_cache.size()),
-                        list(torch_cnn_cache.size()),
-                        list(torch_att_mask.size())))
-        torch_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-    torch_output = torch.cat(torch_output, dim=-1)
-
-    onnx_output = []
-    onnx_chunk, onnx_att_mask = to_numpy(chunk), to_numpy(att_mask)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        logger.info("onnx  chunk-{}: {}, att_cache: {}, cnn_cache: {},"
-                    " att_mask: {}".format(
-                        i, onnx_chunk.shape, onnx_att_cache.shape,
-                        onnx_cnn_cache.shape, onnx_att_mask.shape))
-        onnx_att_mask[:, :, :, -(encoder.chunk_size * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'att_cache': onnx_att_cache,
-            'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask,
-        }
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-    onnx_output = np.concatenate(onnx_output, axis=-1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_encoder, pass!")
-    return encoder, ort_session
-
-
-def export_ctc(asr_model, args):
-    logger.info("Stage-2: export ctc")
-    ctc = BPUCTC(asr_model.ctc).eval()
-    ctc_outpath = os.path.join(args.output_dir, 'ctc.onnx')
-
-    logger.info("Stage-2.1: prepare inputs for ctc")
-    hidden = torch.randn((1, args.output_size, 1, args.chunk_size))
-
-    logger.info("Stage-2.2: torch.onnx.export")
-    # NOTE(xcsong): Below attributes will be used in
-    #   onnx2horizonbin.py::generate_config()
-    attributes = {}
-    attributes['input_name'], attributes['input_type'] = "hidden", "featuremap"
-    attributes['norm_type'] = "no_preprocess"
-    attributes['input_layout_train'] = "NCHW"
-    attributes['input_layout_rt'] = "NCHW"
-    attributes['input_shape'] = "{}x{}x{}x{}".format(
-        hidden.size(0), hidden.size(1), hidden.size(2), hidden.size(3),
-    )
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=11,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=None, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for k in vars(args):
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(getattr(args, k))
-    for k in attributes:
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(attributes[k])
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    logger.info('Export onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    logger.info("Stage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-04)
-    meta = ort_session.get_modelmeta()
-    logger.info("custom_metadata_map={}".format(meta.custom_metadata_map))
-    logger.info("Check onnx_ctc, pass!")
-    return ctc, ort_session
-
-
-def export_decoder(asr_model, args):
-    logger.info("Currently, Decoder is not supported.")
-
-
-if __name__ == '__main__':
-    torch.manual_seed(777)
-    args = get_args()
-    args.ln_run_on_bpu = False
-    # NOTE(xcsong): XJ3 BPU only support static shapes
-    assert args.chunk_size > 0
-    assert args.num_decoding_left_chunks > 0
-    os.system("mkdir -p " + args.output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    args.feature_size = configs['input_dim']
-    args.output_size = model.encoder.output_size()
-    args.decoding_window = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1
-
-    export_encoder(model, args)
-    export_ctc(model, args)
-    export_decoder(model, args)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_cpu.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_cpu.py
deleted file mode 100644
index a8009d2f606f753a5870eb754235d8d55e756b5d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_cpu.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright (c) 2022, Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import copy
-import sys
-
-import torch
-import yaml
-import numpy as np
-
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.init_model import init_model
-
-try:
-    import onnx
-    import onnxruntime
-    from onnxruntime.quantization import quantize_dynamic, QuantType
-except ImportError:
-    print('Please install onnx and onnxruntime!')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='export your script model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--output_dir', required=True, help='output directory')
-    parser.add_argument('--chunk_size', required=True,
-                        type=int, help='decoding chunk size')
-    parser.add_argument('--num_decoding_left_chunks', required=True,
-                        type=int, help='cache chunks')
-    parser.add_argument('--reverse_weight', default=0.5,
-                        type=float, help='reverse_weight in attention_rescoing')
-    args = parser.parse_args()
-    return args
-
-
-def to_numpy(tensor):
-    if tensor.requires_grad:
-        return tensor.detach().cpu().numpy()
-    else:
-        return tensor.cpu().numpy()
-
-
-def print_input_output_info(onnx_model, name, prefix="\t\t"):
-    input_names = [node.name for node in onnx_model.graph.input]
-    input_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                    for node in onnx_model.graph.input]
-    output_names = [node.name for node in onnx_model.graph.output]
-    output_shapes = [[d.dim_value for d in node.type.tensor_type.shape.dim]
-                     for node in onnx_model.graph.output]
-    print("{}{} inputs : {}".format(prefix, name, input_names))
-    print("{}{} input shapes : {}".format(prefix, name, input_shapes))
-    print("{}{} outputs: {}".format(prefix, name, output_names))
-    print("{}{} output shapes : {}".format(prefix, name, output_shapes))
-
-
-def export_encoder(asr_model, args):
-    print("Stage-1: export encoder")
-    encoder = asr_model.encoder
-    encoder.forward = encoder.forward_chunk
-    encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx')
-
-    print("\tStage-1.1: prepare inputs for encoder")
-    chunk = torch.randn(
-        (args['batch'], args['decoding_window'], args['feature_size']))
-    offset = 0
-    # NOTE(xcsong): The uncertainty of `next_cache_start` only appears
-    #   in the first few chunks, this is caused by dynamic att_cache shape, i,e
-    #   (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent
-    #   chunks. One way to ease the ONNX export is to keep `next_cache_start`
-    #   as a fixed value. To do this, for the **first** chunk, if
-    #   left_chunks > 0, we feed real cache & real mask to the model, otherwise
-    #   fake cache & fake mask. In this way, we get:
-    #   1. 16/-1 mode: next_cache_start == 0 for all chunks
-    #   2. 16/4  mode: next_cache_start == chunk_size for all chunks
-    #   3. 16/0  mode: next_cache_start == chunk_size for all chunks
-    #   4. -1/-1 mode: next_cache_start == 0 for all chunks
-    #   NO MORE DYNAMIC CHANGES!!
-    #
-    # NOTE(Mddct): We retain the current design for the convenience of supporting some
-    #   inference frameworks without dynamic shapes. If you're interested in all-in-one
-    #   model that supports different chunks please see:
-    #   https://github.com/wenet-e2e/wenet/pull/1174
-
-    if args['left_chunks'] > 0:  # 16/4
-        required_cache_size = args['chunk_size'] * args['left_chunks']
-        offset = required_cache_size
-        # Real cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], required_cache_size,
-             args['output_size'] // args['head'] * 2))
-        # Real mask
-        att_mask = torch.ones(
-            (args['batch'], 1, required_cache_size + args['chunk_size']),
-            dtype=torch.bool)
-        att_mask[:, :, :required_cache_size] = 0
-    elif args['left_chunks'] <= 0:  # 16/-1, -1/-1, 16/0
-        required_cache_size = -1 if args['left_chunks'] < 0 else 0
-        # Fake cache
-        att_cache = torch.zeros(
-            (args['num_blocks'], args['head'], 0,
-             args['output_size'] // args['head'] * 2))
-        # Fake mask
-        att_mask = torch.ones((0, 0, 0), dtype=torch.bool)
-    cnn_cache = torch.zeros(
-        (args['num_blocks'], args['batch'],
-         args['output_size'], args['cnn_module_kernel'] - 1))
-    inputs = (chunk, offset, required_cache_size,
-              att_cache, cnn_cache, att_mask)
-    print("\t\tchunk.size(): {}\n".format(chunk.size()),
-          "\t\toffset: {}\n".format(offset),
-          "\t\trequired_cache: {}\n".format(required_cache_size),
-          "\t\tatt_cache.size(): {}\n".format(att_cache.size()),
-          "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()),
-          "\t\tatt_mask.size(): {}\n".format(att_mask.size()))
-
-    print("\tStage-1.2: torch.onnx.export")
-    dynamic_axes = {
-        'chunk': {1: 'T'},
-        'att_cache': {2: 'T_CACHE'},
-        'att_mask': {2: 'T_ADD_T_CACHE'},
-        'output': {1: 'T'},
-        'r_att_cache': {2: 'T_CACHE'},
-    }
-    # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is
-    #   to avoid padding the last chunk (which usually contains less
-    #   frames than required). For users who want static axes, just pop
-    #   out specific axis.
-    # if args['chunk_size'] > 0:  # 16/4, 16/-1, 16/0
-    #     dynamic_axes.pop('chunk')
-    #     dynamic_axes.pop('output')
-    # if args['left_chunks'] >= 0:  # 16/4, 16/0
-    #     # NOTE(xsong): since we feed real cache & real mask into the
-    #     #   model when left_chunks > 0, the shape of cache will never
-    #     #   be changed.
-    #     dynamic_axes.pop('att_cache')
-    #     dynamic_axes.pop('r_att_cache')
-    torch.onnx.export(
-        encoder, inputs, encoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=[
-            'chunk', 'offset', 'required_cache_size',
-            'att_cache', 'cnn_cache', 'att_mask'
-        ],
-        output_names=['output', 'r_att_cache', 'r_cnn_cache'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_encoder = onnx.load(encoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_encoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_encoder)
-    onnx.helper.printable_graph(onnx_encoder.graph)
-    # NOTE(xcsong): to add those metadatas we need to reopen
-    #   the file and resave it.
-    onnx.save(onnx_encoder, encoder_outpath)
-    print_input_output_info(onnx_encoder, "onnx_encoder")
-    # Dynamic quantization
-    model_fp32 = encoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'encoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath))
-
-    print("\tStage-1.3: check onnx_encoder and torch_encoder")
-    torch_output = []
-    torch_chunk = copy.deepcopy(chunk)
-    torch_offset = copy.deepcopy(offset)
-    torch_required_cache_size = copy.deepcopy(required_cache_size)
-    torch_att_cache = copy.deepcopy(att_cache)
-    torch_cnn_cache = copy.deepcopy(cnn_cache)
-    torch_att_mask = copy.deepcopy(att_mask)
-    for i in range(10):
-        print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, list(torch_chunk.size()), torch_offset,
-                  list(torch_att_cache.size()),
-                  list(torch_cnn_cache.size()), list(torch_att_mask.size())))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        out, torch_att_cache, torch_cnn_cache = encoder(
-            torch_chunk, torch_offset, torch_required_cache_size,
-            torch_att_cache, torch_cnn_cache, torch_att_mask)
-        torch_output.append(out)
-        torch_offset += out.size(1)
-    torch_output = torch.cat(torch_output, dim=1)
-
-    onnx_output = []
-    onnx_chunk = to_numpy(chunk)
-    onnx_offset = np.array((offset)).astype(np.int64)
-    onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64)
-    onnx_att_cache = to_numpy(att_cache)
-    onnx_cnn_cache = to_numpy(cnn_cache)
-    onnx_att_mask = to_numpy(att_mask)
-    ort_session = onnxruntime.InferenceSession(encoder_outpath)
-    input_names = [node.name for node in onnx_encoder.graph.input]
-    for i in range(10):
-        print("\t\tonnx  chunk-{}: {}, offset: {}, att_cache: {},"
-              " cnn_cache: {}, att_mask: {}".format(
-                  i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape,
-                  onnx_cnn_cache.shape, onnx_att_mask.shape))
-        # NOTE(xsong): att_mask of the first few batches need changes if
-        #   we use 16/4 mode.
-        if args['left_chunks'] > 0:  # 16/4
-            onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1
-        ort_inputs = {
-            'chunk': onnx_chunk, 'offset': onnx_offset,
-            'required_cache_size': onnx_required_cache_size,
-            'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache,
-            'att_mask': onnx_att_mask
-        }
-        # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start`
-        #   will be hardcoded to 0 or chunk_size by ONNX, thus
-        #   required_cache_size and att_mask are no more needed and they will
-        #   be removed by ONNX automatically.
-        for k in list(ort_inputs):
-            if k not in input_names:
-                ort_inputs.pop(k)
-        ort_outs = ort_session.run(None, ort_inputs)
-        onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2]
-        onnx_output.append(ort_outs[0])
-        onnx_offset += ort_outs[0].shape[1]
-    onnx_output = np.concatenate(onnx_output, axis=1)
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output,
-                               rtol=1e-03, atol=1e-05)
-    meta = ort_session.get_modelmeta()
-    print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map))
-    print("\t\tCheck onnx_encoder, pass!")
-
-
-def export_ctc(asr_model, args):
-    print("Stage-2: export ctc")
-    ctc = asr_model.ctc
-    ctc.forward = ctc.log_softmax
-    ctc_outpath = os.path.join(args['output_dir'], 'ctc.onnx')
-
-    print("\tStage-2.1: prepare inputs for ctc")
-    hidden = torch.randn(
-        (args['batch'], args['chunk_size'] if args['chunk_size'] > 0 else 16,
-         args['output_size']))
-
-    print("\tStage-2.2: torch.onnx.export")
-    dynamic_axes = {'hidden': {1: 'T'}, 'probs': {1: 'T'}}
-    torch.onnx.export(
-        ctc, hidden, ctc_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hidden'], output_names=['probs'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_ctc = onnx.load(ctc_outpath)
-    for (k, v) in args.items():
-        meta = onnx_ctc.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_ctc)
-    onnx.helper.printable_graph(onnx_ctc.graph)
-    onnx.save(onnx_ctc, ctc_outpath)
-    print_input_output_info(onnx_ctc, "onnx_ctc")
-    # Dynamic quantization
-    model_fp32 = ctc_outpath
-    model_quant = os.path.join(args['output_dir'], 'ctc.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_ctc, done! see {}'.format(ctc_outpath))
-
-    print("\tStage-2.3: check onnx_ctc and torch_ctc")
-    torch_output = ctc(hidden)
-    ort_session = onnxruntime.InferenceSession(ctc_outpath)
-    onnx_output = ort_session.run(None, {'hidden': to_numpy(hidden)})
-
-    np.testing.assert_allclose(to_numpy(torch_output), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_ctc, pass!")
-
-
-def export_decoder(asr_model, args):
-    print("Stage-3: export decoder")
-    decoder = asr_model
-    # NOTE(lzhin): parameters of encoder will be automatically removed
-    #   since they are not used during rescoring.
-    decoder.forward = decoder.forward_attention_decoder
-    decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx')
-
-    print("\tStage-3.1: prepare inputs for decoder")
-    # hardcode time->200 nbest->10 len->20, they are dynamic axes.
-    encoder_out = torch.randn((1, 200, args['output_size']))
-    hyps = torch.randint(low=0, high=args['vocab_size'],
-                         size=[10, 20])
-    hyps[:, 0] = args['vocab_size'] - 1  # <sos>
-    hyps_lens = torch.randint(low=15, high=21, size=[10])
-
-    print("\tStage-3.2: torch.onnx.export")
-    dynamic_axes = {
-        'hyps': {0: 'NBEST', 1: 'L'}, 'hyps_lens': {0: 'NBEST'},
-        'encoder_out': {1: 'T'},
-        'score': {0: 'NBEST', 1: 'L'}, 'r_score': {0: 'NBEST', 1: 'L'}
-    }
-    inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    torch.onnx.export(
-        decoder, inputs, decoder_outpath, opset_version=13,
-        export_params=True, do_constant_folding=True,
-        input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'],
-        output_names=['score', 'r_score'],
-        dynamic_axes=dynamic_axes, verbose=False)
-    onnx_decoder = onnx.load(decoder_outpath)
-    for (k, v) in args.items():
-        meta = onnx_decoder.metadata_props.add()
-        meta.key, meta.value = str(k), str(v)
-    onnx.checker.check_model(onnx_decoder)
-    onnx.helper.printable_graph(onnx_decoder.graph)
-    onnx.save(onnx_decoder, decoder_outpath)
-    print_input_output_info(onnx_decoder, "onnx_decoder")
-    model_fp32 = decoder_outpath
-    model_quant = os.path.join(args['output_dir'], 'decoder.quant.onnx')
-    quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
-    print('\t\tExport onnx_decoder, done! see {}'.format(
-        decoder_outpath))
-
-    print("\tStage-3.3: check onnx_decoder and torch_decoder")
-    torch_score, torch_r_score = decoder(
-        hyps, hyps_lens, encoder_out, args['reverse_weight'])
-    ort_session = onnxruntime.InferenceSession(decoder_outpath)
-    input_names = [node.name for node in onnx_decoder.graph.input]
-    ort_inputs = {
-        'hyps': to_numpy(hyps),
-        'hyps_lens': to_numpy(hyps_lens),
-        'encoder_out': to_numpy(encoder_out),
-        'reverse_weight': np.array((args['reverse_weight'])),
-    }
-    for k in list(ort_inputs):
-        if k not in input_names:
-            ort_inputs.pop(k)
-    onnx_output = ort_session.run(None, ort_inputs)
-
-    np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0],
-                               rtol=1e-03, atol=1e-05)
-    if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0:
-        np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1],
-                                   rtol=1e-03, atol=1e-05)
-    print("\t\tCheck onnx_decoder, pass!")
-
-
-def main():
-    torch.manual_seed(777)
-    args = get_args()
-    output_dir = args.output_dir
-    os.system("mkdir -p " + output_dir)
-    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-    print(model)
-
-    arguments = {}
-    arguments['output_dir'] = output_dir
-    arguments['batch'] = 1
-    arguments['chunk_size'] = args.chunk_size
-    arguments['left_chunks'] = args.num_decoding_left_chunks
-    arguments['reverse_weight'] = args.reverse_weight
-    arguments['output_size'] = configs['encoder_conf']['output_size']
-    arguments['num_blocks'] = configs['encoder_conf']['num_blocks']
-    arguments['cnn_module_kernel'] = configs['encoder_conf'].get('cnn_module_kernel', 1)
-    arguments['head'] = configs['encoder_conf']['attention_heads']
-    arguments['feature_size'] = configs['input_dim']
-    arguments['vocab_size'] = configs['output_dim']
-    # NOTE(xcsong): if chunk_size == -1, hardcode to 67
-    arguments['decoding_window'] = (args.chunk_size - 1) * \
-        model.encoder.embed.subsampling_rate + \
-        model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67
-    arguments['encoder'] = configs['encoder']
-    arguments['decoder'] = configs['decoder']
-    arguments['subsampling_rate'] = model.subsampling_rate()
-    arguments['right_context'] = model.right_context()
-    arguments['sos_symbol'] = model.sos_symbol()
-    arguments['eos_symbol'] = model.eos_symbol()
-    arguments['is_bidirectional_decoder'] = 1 \
-        if model.is_bidirectional_decoder() else 0
-
-    # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is
-    #   not a [16/4 16/-1 16/0] all-in-one model and it should not be used in
-    #   streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you
-    #   want to use 16/-1 or any other streaming mode in `decoder_main`,
-    #   please export onnx in the same config.
-    if arguments['left_chunks'] > 0:
-        assert arguments['chunk_size'] > 0  # -1/4 not supported
-
-    export_encoder(model, arguments)
-    export_ctc(model, arguments)
-    export_decoder(model, arguments)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_gpu.py
deleted file mode 100644
index 19f810c2804efdf74ff369f780fa3102e2e389fa..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/export_onnx_gpu.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import torch
-import yaml
-import logging
-
-import torch.nn.functional as F
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import BaseEncoder
-from wenet.utils.init_model import init_model
-from wenet.utils.mask import make_pad_mask
-
-try:
-    import onnxruntime
-except ImportError:
-    print('Please install onnxruntime-gpu!')
-    sys.exit(1)
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.INFO)
-
-
-class Encoder(torch.nn.Module):
-    def __init__(self,
-                 encoder: BaseEncoder,
-                 ctc: CTC,
-                 beam_size: int = 10):
-        super().__init__()
-        self.encoder = encoder
-        self.ctc = ctc
-        self.beam_size = beam_size
-
-    def forward(self, speech: torch.Tensor,
-                speech_lengths: torch.Tensor,):
-        """Encoder
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-        Returns:
-            encoder_out: B x T x F
-            encoder_out_lens: B
-            ctc_log_probs: B x T x V
-            beam_log_probs: B x T x beam_size
-            beam_log_probs_idx: B x T x beam_size
-        """
-        encoder_out, encoder_mask = self.encoder(speech,
-                                                 speech_lengths,
-                                                 -1, -1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_log_probs = self.ctc.log_softmax(encoder_out)
-        encoder_out_lens = encoder_out_lens.int()
-        beam_log_probs, beam_log_probs_idx = torch.topk(
-            ctc_log_probs, self.beam_size, dim=2)
-        return encoder_out, encoder_out_lens, ctc_log_probs, \
-            beam_log_probs, beam_log_probs_idx
-
-
-class StreamingEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size, transformer=False):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.transformer = transformer
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoder.encoders):
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, masks, pos_emb,
-                att_cache=att_cache[i],
-                cnn_cache=cnn_cache[i])
-            #   shape(new_att_cache) is (B, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (B, hidden-dim, cache_t2)
-            r_att_cache.append(
-                new_att_cache[:, :, next_cache_start:, :].unsqueeze(1))
-            if not self.transformer:
-                r_cnn_cache.append(new_cnn_cache.unsqueeze(1))
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        if not self.transformer:
-            r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingSqueezeformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-        self.reduce_idx = model.encoder.reduce_idx
-        self.recover_idx = model.encoder.recover_idx
-        if self.reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if self.recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                > 0: actual cache size
-                <= 0: not allowed in streaming gpu encoder                   `
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, b, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        elayers, cache_size = att_cache.size(0), att_cache.size(3)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = att_mask[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.encoder.preln(xs)
-        for i, layer in enumerate(self.encoder.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append(
-                        (xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.encoder.time_reduction_layer(
-                            xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    if self.encoder.pos_enc_layer_type == "rel_pos_repaired":
-                        pos_emb = pos_emb[:, :xs.size(1) * 2 - 1, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.encoder.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(1)
-            cached_att = cached_att.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :].unsqueeze(1))
-            r_cnn_cache.append(cached_cnn)
-
-        chunk_out = xs
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class StreamingEfficientConformerEncoder(torch.nn.Module):
-    def __init__(self, model, required_cache_size, beam_size):
-        super().__init__()
-        self.ctc = model.ctc
-        self.subsampling_rate = model.encoder.embed.subsampling_rate
-        self.embed = model.encoder.embed
-        self.global_cmvn = model.encoder.global_cmvn
-        self.required_cache_size = required_cache_size
-        self.beam_size = beam_size
-        self.encoder = model.encoder
-
-        # Efficient Conformer
-        self.stride_layer_idx = model.encoder.stride_layer_idx
-        self.stride = model.encoder.stride
-        self.num_blocks = model.encoder.num_blocks
-        self.cnn_module_kernel = model.encoder.cnn_module_kernel
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self, chunk_xs, chunk_lens, offset,
-                att_cache, cnn_cache, cache_mask):
-        """Streaming Encoder
-        Args:
-            chunk_xs (torch.Tensor): chunk input, with shape (b, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            chunk_lens (torch.Tensor):
-            offset (torch.Tensor): offset with shape (b, 1)
-                        1 is retained for triton deployment
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (b, elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (b, elayers, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            cache_mask: (torch.Tensor): cache mask with shape (b, required_cache_size)
-                 in a batch of request, each request may have different
-                 history cache. Cache mask is used to indidate the effective
-                 cache for each request
-        Returns:
-            torch.Tensor: log probabilities of ctc output and cutoff by beam size
-                with shape (b, chunk_size, beam)
-            torch.Tensor: index of top beam size probabilities for each timestep
-                with shape (b, chunk_size, beam)
-            torch.Tensor: output of current input xs,
-                with shape (b, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                same shape (b, elayers, head, cache_t1, d_k * 2)
-                as the original att_cache
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-            torch.Tensor: new cache mask, with same shape as the original
-                cache mask
-        """
-        offset = offset.squeeze(1)  # (b, )
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        T = chunk_xs.size(1)
-        chunk_mask = ~make_pad_mask(chunk_lens, T).unsqueeze(1)  # (b, 1, T)
-        # B X 1 X T
-        chunk_mask = chunk_mask.to(chunk_xs.dtype)
-        # transpose batch & num_layers dim
-        #   Shape(att_cache): (elayers, b, head, cache_t1, d_k * 2)
-        #   Shape(cnn_cache): (elayers, b, outsize, cnn_kernel)
-        att_cache = torch.transpose(att_cache, 0, 1)
-        cnn_cache = torch.transpose(cnn_cache, 0, 1)
-
-        # rewrite encoder.forward_chunk
-        # <---------forward_chunk START--------->
-        xs = self.global_cmvn(chunk_xs)
-        # chunk mask is important for batch inferencing since
-        # different sequence in a batch has different length
-        xs, pos_emb, chunk_mask = self.embed(xs, chunk_mask, offset)
-        cache_size = att_cache.size(3)  # required cache size
-        masks = torch.cat((cache_mask, chunk_mask), dim=2)
-        att_mask = torch.cat((cache_mask, chunk_mask), dim=2)
-        index = offset - cache_size
-
-        pos_emb = self.embed.position_encoding(index, cache_size + xs.size(1))
-        pos_emb = pos_emb.to(dtype=xs.dtype)
-
-        next_cache_start = -self.required_cache_size
-        r_cache_mask = masks[:, :, next_cache_start:]
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = chunk_mask.to(torch.bool)
-        max_att_len, max_cnn_len = 0, 0  # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoder.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (b, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(3) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                # We propose to double the chunk_size.
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(3) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i][:, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [batch, 1, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(1)  # shape(1):layerID
-
-            # use repeat_interleave to new_att_cache
-            # new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            new_att_cache = new_att_cache.unsqueeze(3). \
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :].unsqueeze(1))
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.encoder.normalize_before:
-            chunk_out = self.encoder.after_norm(xs)
-        else:
-            chunk_out = xs
-
-        # shape of r_att_cache: (b, elayers, head, time2, outdim)
-        r_att_cache = torch.cat(r_att_cache, dim=1)  # concat on layers idx
-        # shape of r_cnn_cache: (b, elayers, outdim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=1)  # concat on layers
-
-        # <---------forward_chunk END--------->
-
-        log_ctc_probs = self.ctc.log_softmax(chunk_out)
-        log_probs, log_probs_idx = torch.topk(log_ctc_probs,
-                                              self.beam_size,
-                                              dim=2)
-        log_probs = log_probs.to(chunk_xs.dtype)
-
-        r_offset = offset + chunk_out.shape[1]
-        # the below ops not supported in Tensorrt
-        # chunk_out_lens = torch.div(chunk_lens, subsampling_rate,
-        #                   rounding_mode='floor')
-        chunk_out_lens = chunk_lens // self.subsampling_rate // \
-            self.calculate_downsampling_factor(self.num_blocks + 1)
-        chunk_out_lens += 1
-        r_offset = r_offset.unsqueeze(1)
-
-        return log_probs, log_probs_idx, chunk_out, chunk_out_lens, \
-            r_offset, r_att_cache, r_cnn_cache, r_cache_mask
-
-
-class Decoder(torch.nn.Module):
-    def __init__(self,
-                 decoder: TransformerDecoder,
-                 ctc_weight: float = 0.5,
-                 reverse_weight: float = 0.0,
-                 beam_size: int = 10,
-                 decoder_fastertransformer: bool = False):
-        super().__init__()
-        self.decoder = decoder
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-        self.beam_size = beam_size
-        self.decoder_fastertransformer = decoder_fastertransformer
-
-    def forward(self,
-                encoder_out: torch.Tensor,
-                encoder_lens: torch.Tensor,
-                hyps_pad_sos_eos: torch.Tensor,
-                hyps_lens_sos: torch.Tensor,
-                r_hyps_pad_sos_eos: torch.Tensor,
-                ctc_score: torch.Tensor):
-        """Encoder
-        Args:
-            encoder_out: B x T x F
-            encoder_lens: B
-            hyps_pad_sos_eos: B x beam x (T2+1),
-                        hyps with sos & eos and padded by ignore id
-            hyps_lens_sos: B x beam, length for each hyp with sos
-            r_hyps_pad_sos_eos: B x beam x (T2+1),
-                    reversed hyps with sos & eos and padded by ignore id
-            ctc_score: B x beam, ctc score for each hyp
-        Returns:
-            decoder_out: B x beam x T2 x V
-            r_decoder_out: B x beam x T2 x V
-            best_index: B
-        """
-        B, T, F = encoder_out.shape
-        bz = self.beam_size
-        B2 = B * bz
-        encoder_out = encoder_out.repeat(1, bz, 1).view(B2, T, F)
-        encoder_mask = ~make_pad_mask(encoder_lens, T).unsqueeze(1)
-        encoder_mask = encoder_mask.repeat(1, bz, 1).view(B2, 1, T)
-        T2 = hyps_pad_sos_eos.shape[2] - 1
-        hyps_pad = hyps_pad_sos_eos.view(B2, T2 + 1)
-        hyps_lens = hyps_lens_sos.view(B2,)
-        hyps_pad_sos = hyps_pad[:, :-1].contiguous()
-        hyps_pad_eos = hyps_pad[:, 1:].contiguous()
-
-        r_hyps_pad = r_hyps_pad_sos_eos.view(B2, T2 + 1)
-        r_hyps_pad_sos = r_hyps_pad[:, :-1].contiguous()
-        r_hyps_pad_eos = r_hyps_pad[:, 1:].contiguous()
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad_sos, hyps_lens, r_hyps_pad_sos,
-            self.reverse_weight)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        V = decoder_out.shape[-1]
-        decoder_out = decoder_out.view(B2, T2, V)
-        mask = ~make_pad_mask(hyps_lens, T2)  # B2 x T2
-        # mask index, remove ignore id
-        index = torch.unsqueeze(hyps_pad_eos * mask, 2)
-        score = decoder_out.gather(2, index).squeeze(2)  # B2 X T2
-        # mask padded part
-        score = score * mask
-        decoder_out = decoder_out.view(B, bz, T2, V)
-        if self.reverse_weight > 0:
-            r_decoder_out = torch.nn.functional.log_softmax(
-                r_decoder_out, dim=-1)
-            r_decoder_out = r_decoder_out.view(B2, T2, V)
-            index = torch.unsqueeze(r_hyps_pad_eos * mask, 2)
-            r_score = r_decoder_out.gather(2, index).squeeze(2)
-            r_score = r_score * mask
-            score = score * (1 - self.reverse_weight) + \
-                self.reverse_weight * r_score
-            r_decoder_out = r_decoder_out.view(B, bz, T2, V)
-        score = torch.sum(score, axis=1)  # B2
-        score = torch.reshape(score, (B, bz)) + self.ctc_weight * ctc_score
-        best_index = torch.argmax(score, dim=1)
-        if self.decoder_fastertransformer:
-            return decoder_out, best_index
-        else:
-            return best_index
-
-
-def to_numpy(tensors):
-    out = []
-    if type(tensors) == torch.tensor:
-        tensors = [tensors]
-    for tensor in tensors:
-        if tensor.requires_grad:
-            tensor = tensor.detach().cpu().numpy()
-        else:
-            tensor = tensor.cpu().numpy()
-        out.append(tensor)
-    return out
-
-
-def test(xlist, blist, rtol=1e-3, atol=1e-5, tolerate_small_mismatch=True):
-    for a, b in zip(xlist, blist):
-        try:
-            torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
-        except AssertionError as error:
-            if tolerate_small_mismatch:
-                print(error)
-            else:
-                raise
-
-
-def export_offline_encoder(model, configs, args, logger, encoder_onnx_path):
-    bz = 32
-    seq_len = 100
-    beam_size = args.beam_size
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(
-        low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      dynamic_axes={
-                          'speech': {0: 'B', 1: 'T'},
-                          'speech_lengths': {0: 'B'},
-                          'encoder_out': {0: 'B', 1: 'T_OUT'},
-                          'encoder_out_lens': {0: 'B'},
-                          'ctc_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs': {0: 'B', 1: 'T_OUT'},
-                          'beam_log_probs_idx': {0: 'B', 1: 'T_OUT'},
-                      },
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-def export_offline_encoder_static(model, configs, args, logger, encoder_onnx_path):
-    bz = args.batch_size
-    seq_len = args.seq_len
-    beam_size = args.beam_size
-    
-    feature_size = configs["input_dim"]
-
-    speech = torch.randn(bz, seq_len, feature_size, dtype=torch.float32)
-    speech_lens = torch.randint(low=10, high=seq_len, size=(bz,), dtype=torch.int32)
-    encoder = Encoder(model.encoder, model.ctc, beam_size)
-    encoder.eval()
-    import os
-    file_name, file_ext = os.path.splitext(encoder_onnx_path)
-    encoder_onnx_path = file_name + "_bs" + str(bz) + "_seq" + str(seq_len) + "_static.onnx"
-
-    torch.onnx.export(encoder,
-                      (speech, speech_lens),
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=11,
-                      do_constant_folding=True,
-                      input_names=['speech', 'speech_lengths'],
-                      output_names=['encoder_out', 'encoder_out_lens',
-                                    'ctc_log_probs',
-                                    'beam_log_probs', 'beam_log_probs_idx'],
-                      verbose=False
-                      )
-
-    with torch.no_grad():
-        o0, o1, o2, o3, o4 = encoder(speech, speech_lens)
-
-    providers = ["CPUExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=providers)
-    ort_inputs = {'speech': to_numpy(speech),
-                  'speech_lengths': to_numpy(speech_lens)}
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check encoder output
-    test(to_numpy([o0, o1, o2, o3, o4]), ort_outs)
-    logger.info("export offline onnx encoder succeed!")
-    onnx_config = {"beam_size": args.beam_size,
-                   "reverse_weight": args.reverse_weight,
-                   "ctc_weight": args.ctc_weight,
-                   "fp16": args.fp16}
-    return onnx_config
-
-
-def export_online_encoder(model, configs, args, logger, encoder_onnx_path):
-    decoding_chunk_size = args.decoding_chunk_size
-    subsampling = model.encoder.embed.subsampling_rate
-    context = model.encoder.embed.right_context + 1
-    decoding_window = (decoding_chunk_size - 1) * subsampling + context
-    batch_size = 32
-    audio_len = decoding_window
-    feature_size = configs["input_dim"]
-    output_size = configs["encoder_conf"]["output_size"]
-    num_layers = configs["encoder_conf"]["num_blocks"]
-    # in transformer the cnn module will not be available
-    transformer = False
-    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1) - 1
-    if not cnn_module_kernel:
-        transformer = True
-    num_decoding_left_chunks = args.num_decoding_left_chunks
-    required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-    if configs['encoder'] == 'squeezeformer':
-        encoder = StreamingSqueezeformerEncoder(
-            model, required_cache_size, args.beam_size)
-    elif configs['encoder'] == 'efficientConformer':
-        encoder = StreamingEfficientConformerEncoder(
-            model, required_cache_size, args.beam_size)
-    else:
-        encoder = StreamingEncoder(
-            model, required_cache_size, args.beam_size, transformer)
-    encoder.eval()
-
-    # begin to export encoder
-    chunk_xs = torch.randn(batch_size, audio_len,
-                           feature_size, dtype=torch.float32)
-    chunk_lens = torch.ones(batch_size, dtype=torch.int32) * audio_len
-
-    offset = torch.arange(0, batch_size).unsqueeze(1)
-    #  (elayers, b, head, cache_t1, d_k * 2)
-    head = configs["encoder_conf"]["attention_heads"]
-    d_k = configs["encoder_conf"]["output_size"] // head
-    att_cache = torch.randn(batch_size, num_layers, head,
-                            required_cache_size, d_k * 2,
-                            dtype=torch.float32)
-    cnn_cache = torch.randn(batch_size, num_layers, output_size,
-                            cnn_module_kernel, dtype=torch.float32)
-
-    cache_mask = torch.ones(
-        batch_size, 1, required_cache_size, dtype=torch.float32)
-    input_names = ['chunk_xs', 'chunk_lens', 'offset',
-                   'att_cache', 'cnn_cache', 'cache_mask']
-    output_names = ['log_probs', 'log_probs_idx', 'chunk_out',
-                    'chunk_out_lens', 'r_offset', 'r_att_cache',
-                    'r_cnn_cache', 'r_cache_mask']
-    input_tensors = (chunk_xs, chunk_lens, offset,
-                     att_cache, cnn_cache, cache_mask)
-    if transformer:
-        output_names.pop(6)
-
-    all_names = input_names + output_names
-    dynamic_axes = {}
-    for name in all_names:
-        # only the first dimension is dynamic
-        # all other dimension is fixed
-        dynamic_axes[name] = {0: 'B'}
-
-    torch.onnx.export(encoder,
-                      input_tensors,
-                      encoder_onnx_path,
-                      export_params=True,
-                      opset_version=14,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes,
-                      verbose=False)
-
-    with torch.no_grad():
-        torch_outs = encoder(chunk_xs, chunk_lens, offset,
-                             att_cache, cnn_cache, cache_mask)
-    if transformer:
-        torch_outs = list(torch_outs).pop(6)
-    ort_session = onnxruntime.InferenceSession(encoder_onnx_path,
-                                               providers=["CUDAExecutionProvider"])
-    ort_inputs = {}
-
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-    if transformer:
-        del ort_inputs['cnn_cache']
-    ort_outs = ort_session.run(None, ort_inputs)
-    test(to_numpy(torch_outs), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx streaming encoder succeed!")
-    onnx_config = {
-        "subsampling_rate": subsampling,
-        "context": context,
-        "decoding_chunk_size": decoding_chunk_size,
-        "num_decoding_left_chunks": num_decoding_left_chunks,
-        "beam_size": args.beam_size,
-        "fp16": args.fp16,
-        "feat_size": feature_size,
-        "decoding_window": decoding_window,
-        "cnn_module_kernel_cache": cnn_module_kernel
-    }
-    return onnx_config
-
-
-def export_rescoring_decoder(model, configs, args,
-                             logger, decoder_onnx_path, decoder_fastertransformer):
-    bz, seq_len = 32, 100
-    beam_size = args.beam_size
-    decoder = Decoder(model.decoder,
-                      model.ctc_weight,
-                      model.reverse_weight,
-                      beam_size,
-                      decoder_fastertransformer)
-    decoder.eval()
-
-    hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-    hyps_lens_sos = torch.randint(low=3, high=seq_len, size=(bz, beam_size),
-                                  dtype=torch.int32)
-    r_hyps_pad_sos_eos = torch.randint(
-        low=3, high=1000, size=(bz, beam_size, seq_len))
-
-    output_size = configs["encoder_conf"]["output_size"]
-    encoder_out = torch.randn(bz, seq_len, output_size, dtype=torch.float32)
-    encoder_out_lens = torch.randint(
-        low=3, high=seq_len, size=(bz,), dtype=torch.int32)
-    ctc_score = torch.randn(bz, beam_size, dtype=torch.float32)
-
-    input_names = ['encoder_out', 'encoder_out_lens',
-                   'hyps_pad_sos_eos', 'hyps_lens_sos',
-                   'r_hyps_pad_sos_eos', 'ctc_score']
-    output_names = ['best_index']
-    if decoder_fastertransformer:
-        output_names.insert(0, 'decoder_out')
-
-    torch.onnx.export(decoder,
-                      (encoder_out, encoder_out_lens,
-                       hyps_pad_sos_eos, hyps_lens_sos,
-                       r_hyps_pad_sos_eos, ctc_score),
-                      decoder_onnx_path,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes={'encoder_out': {0: 'B', 1: 'T'},
-                                    'encoder_out_lens': {0: 'B'},
-                                    'hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'hyps_lens_sos': {0: 'B'},
-                                    'r_hyps_pad_sos_eos': {0: 'B', 2: 'T2'},
-                                    'ctc_score': {0: 'B'},
-                                    'best_index': {0: 'B'},
-                                    },
-                      verbose=False
-                      )
-    with torch.no_grad():
-        o0 = decoder(encoder_out,
-                     encoder_out_lens,
-                     hyps_pad_sos_eos,
-                     hyps_lens_sos,
-                     r_hyps_pad_sos_eos,
-                     ctc_score)
-    providers = ["CUDAExecutionProvider"]
-    ort_session = onnxruntime.InferenceSession(decoder_onnx_path,
-                                               providers=providers)
-
-    input_tensors = [encoder_out, encoder_out_lens, hyps_pad_sos_eos,
-                     hyps_lens_sos, r_hyps_pad_sos_eos, ctc_score]
-    ort_inputs = {}
-    input_tensors = to_numpy(input_tensors)
-    for idx, name in enumerate(input_names):
-        ort_inputs[name] = input_tensors[idx]
-
-    # if model.reverse weight == 0,
-    # the r_hyps_pad will be removed
-    # from the onnx decoder since it doen't play any role
-    if model.reverse_weight == 0:
-        del ort_inputs['r_hyps_pad_sos_eos']
-    ort_outs = ort_session.run(None, ort_inputs)
-
-    # check decoder output
-    if decoder_fastertransformer:
-        test(to_numpy(o0), ort_outs, rtol=1e-03, atol=1e-05)
-    else:
-        test(to_numpy([o0]), ort_outs, rtol=1e-03, atol=1e-05)
-    logger.info("export to onnx decoder succeed!")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='export x86_gpu model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--cmvn_file', required=False, default='', type=str,
-                        help='global_cmvn file, default path is in config file')
-    parser.add_argument('--reverse_weight', default=-1.0, type=float,
-                        required=False,
-                        help='reverse weight for bitransformer,' +
-                        'default value is in config file')
-    parser.add_argument('--ctc_weight', default=-1.0, type=float,
-                        required=False,
-                        help='ctc weight, default value is in config file')
-    parser.add_argument('--batch_size', type=int, default=24, help='encoder batch size')
-    parser.add_argument('--seq_len', default=512, type=int, required=False,
-                        help="Encoder seq_len")
-    parser.add_argument('--beam_size', default=10, type=int, required=False,
-                        help="beam size would be ctc output size")
-    parser.add_argument('--output_onnx_dir',
-                        default="onnx_model",
-                        help='output onnx encoder and decoder directory')
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    # arguments for streaming encoder
-    parser.add_argument('--streaming',
-                        action='store_true',
-                        help="whether to export streaming encoder, default false")
-    parser.add_argument('--decoding_chunk_size',
-                        default=16,
-                        type=int,
-                        required=False,
-                        help='the decoding chunk size, <=0 is not supported')
-    parser.add_argument('--num_decoding_left_chunks',
-                        default=5,
-                        type=int,
-                        required=False,
-                        help="number of left chunks, <= 0 is not supported")
-    parser.add_argument('--decoder_fastertransformer',
-                        action='store_true',
-                        help='return decoder_out and best_index for ft')
-    args = parser.parse_args()
-
-    torch.manual_seed(0)
-    torch.set_printoptions(precision=10)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if args.cmvn_file and os.path.exists(args.cmvn_file):
-        configs['cmvn_file'] = args.cmvn_file
-    if args.reverse_weight != -1.0 and 'reverse_weight' in configs['model_conf']:
-        configs['model_conf']['reverse_weight'] = args.reverse_weight
-        print("Update reverse weight to", args.reverse_weight)
-    if args.ctc_weight != -1:
-        print("Update ctc weight to ", args.ctc_weight)
-        configs['model_conf']['ctc_weight'] = args.ctc_weight
-    configs["encoder_conf"]["use_dynamic_chunk"] = False
-
-    model = init_model(configs)
-    load_checkpoint(model, args.checkpoint)
-    model.eval()
-
-    if not os.path.exists(args.output_onnx_dir):
-        os.mkdir(args.output_onnx_dir)
-    encoder_onnx_path = os.path.join(args.output_onnx_dir, 'encoder.onnx')
-    export_enc_func = None
-    if args.streaming:
-        assert args.decoding_chunk_size > 0
-        assert args.num_decoding_left_chunks > 0
-        export_enc_func = export_online_encoder
-    else:
-        export_enc_func = export_offline_encoder_static
-
-    onnx_config = export_enc_func(
-        model, configs, args, logger, encoder_onnx_path)
-
-    decoder_onnx_path = os.path.join(args.output_onnx_dir, 'decoder.onnx')
-    export_rescoring_decoder(model, configs, args, logger,
-                             decoder_onnx_path, args.decoder_fastertransformer)
-
-    if args.fp16:
-        try:
-            import onnxmltools
-            from onnxmltools.utils.float16_converter import convert_float_to_float16
-        except ImportError:
-            print('Please install onnxmltools!')
-            sys.exit(1)
-        encoder_onnx_model = onnxmltools.utils.load_model(encoder_onnx_path)
-        encoder_onnx_model = convert_float_to_float16(encoder_onnx_model)
-        encoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'encoder_fp16.onnx')
-        onnxmltools.utils.save_model(encoder_onnx_model, encoder_onnx_path)
-        decoder_onnx_model = onnxmltools.utils.load_model(decoder_onnx_path)
-        decoder_onnx_model = convert_float_to_float16(decoder_onnx_model)
-        decoder_onnx_path = os.path.join(
-            args.output_onnx_dir, 'decoder_fp16.onnx')
-        onnxmltools.utils.save_model(decoder_onnx_model, decoder_onnx_path)
-    # dump configurations
-
-    config_dir = os.path.join(args.output_onnx_dir, "config.yaml")
-    with open(config_dir, "w") as out:
-        yaml.dump(onnx_config, out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/recognize.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/recognize.py
deleted file mode 100644
index 03b5dfd42cc098efacd20e08756a5300f6477cc1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/recognize.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import load_checkpoint
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--checkpoint', required=True, help='checkpoint model')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--beam_size',
-                        type=int,
-                        default=10,
-                        help='beam size for search')
-    parser.add_argument('--penalty',
-                        type=float,
-                        default=0.0,
-                        help='length penalty')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=16,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'attention', 'ctc_greedy_search',
-                            'ctc_prefix_beam_search', 'attention_rescoring',
-                            'rnnt_greedy_search', 'rnnt_beam_search',
-                            'rnnt_beam_attn_rescoring', 'ctc_beam_td_attn_rescoring',
-                            'hlg_onebest', 'hlg_rescore'
-                        ],
-                        default='attention',
-                        help='decoding mode')
-
-    parser.add_argument('--search_ctc_weight',
-                        type=float,
-                        default=1.0,
-                        help='ctc weight for nbest generation')
-    parser.add_argument('--search_transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for nbest generation')
-    parser.add_argument('--ctc_weight',
-                        type=float,
-                        default=0.0,
-                        help='ctc weight for rescoring weight in \
-                                  attention rescoring decode mode \
-                              ctc weight for rescoring weight in \
-                                  transducer attention rescore decode mode')
-
-    parser.add_argument('--transducer_weight',
-                        type=float,
-                        default=0.0,
-                        help='transducer weight for rescoring weight in transducer \
-                                 attention rescore mode')
-    parser.add_argument('--attn_weight',
-                        type=float,
-                        default=0.0,
-                        help='attention weight for rescoring weight in transducer \
-                              attention rescore mode')
-    parser.add_argument('--decoding_chunk_size',
-                        type=int,
-                        default=-1,
-                        help='''decoding chunk size,
-                                <0: for decoding, use full chunk.
-                                >0: for decoding, use fixed chunk size as set.
-                                0: used for training, it's prohibited here''')
-    parser.add_argument('--num_decoding_left_chunks',
-                        type=int,
-                        default=-1,
-                        help='number of left chunks for decoding')
-    parser.add_argument('--simulate_streaming',
-                        action='store_true',
-                        help='simulate streaming inference')
-    parser.add_argument('--reverse_weight',
-                        type=float,
-                        default=0.0,
-                        help='''right to left weight for attention rescoring
-                                decode mode''')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--connect_symbol',
-                        default='',
-                        type=str,
-                        help='used to connect the output characters')
-
-    parser.add_argument('--word',
-                        default='',
-                        type=str,
-                        help='word file, only used for hlg decode')
-    parser.add_argument('--hlg',
-                        default='',
-                        type=str,
-                        help='hlg file, only used for hlg decode')
-    parser.add_argument('--lm_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-    parser.add_argument('--r_decoder_scale',
-                        type=float,
-                        default=0.0,
-                        help='lm scale for hlg attention rescore decode')
-
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring'
-                     ] and args.batch_size > 1:
-        logging.fatal(
-            'decoding mode {} must be running with batch_size == 1'.format(
-                args.mode))
-        sys.exit(1)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_sub'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    if 'fbank_conf' in test_conf:
-        test_conf['fbank_conf']['dither'] = 0.0
-    elif 'mfcc_conf' in test_conf:
-        test_conf['mfcc_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           non_lang_syms,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    model = init_model(configs)
-
-    # Load dict
-    char_dict = {v: k for k, v in symbol_table.items()}
-    eos = len(char_dict) - 1
-
-    load_checkpoint(model, args.checkpoint)
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    device = torch.device('cuda' if use_cuda else 'cpu')
-    model = model.to(device)
-
-    model.eval()
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for batch_idx, batch in enumerate(test_data_loader):
-            keys, feats, target, feats_lengths, target_lengths = batch
-            feats = feats.to(device)
-            target = target.to(device)
-            feats_lengths = feats_lengths.to(device)
-            target_lengths = target_lengths.to(device)
-            if args.mode == 'attention':
-                hyps, _ = model.recognize(
-                    feats,
-                    feats_lengths,
-                    beam_size=args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp.tolist() for hyp in hyps]
-            elif args.mode == 'ctc_greedy_search':
-                hyps, _ = model.ctc_greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_greedy_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.greedy_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-            elif args.mode == 'rnnt_beam_search':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.beam_search(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.search_ctc_weight,
-                    transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'rnnt_beam_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight)
-            elif args.mode == 'ctc_beam_td_attn_rescoring':
-                assert (feats.size(0) == 1)
-                assert 'predictor' in configs
-                hyps = model.transducer_attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    beam_size=args.beam_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    ctc_weight=args.ctc_weight,
-                    transducer_weight=args.transducer_weight,
-                    attn_weight=args.attn_weight,
-                    reverse_weight=args.reverse_weight,
-                    search_ctc_weight=args.search_ctc_weight,
-                    search_transducer_weight=args.search_transducer_weight,
-                    beam_search_type='ctc')
-            # ctc_prefix_beam_search and attention_rescoring only return one
-            # result in List[int], change it to List[List[int]] for compatible
-            # with other batch decoding mode
-            elif args.mode == 'ctc_prefix_beam_search':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.ctc_prefix_beam_search(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming)
-                hyps = [hyp]
-            elif args.mode == 'attention_rescoring':
-                assert (feats.size(0) == 1)
-                hyp, _ = model.attention_rescoring(
-                    feats,
-                    feats_lengths,
-                    args.beam_size,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    ctc_weight=args.ctc_weight,
-                    simulate_streaming=args.simulate_streaming,
-                    reverse_weight=args.reverse_weight)
-                hyps = [hyp]
-            elif args.mode == 'hlg_onebest':
-                hyps = model.hlg_onebest(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            elif args.mode == 'hlg_rescore':
-                hyps = model.hlg_rescore(
-                    feats,
-                    feats_lengths,
-                    decoding_chunk_size=args.decoding_chunk_size,
-                    num_decoding_left_chunks=args.num_decoding_left_chunks,
-                    simulate_streaming=args.simulate_streaming,
-                    lm_scale=args.lm_scale,
-                    decoder_scale=args.decoder_scale,
-                    r_decoder_scale=args.r_decoder_scale,
-                    hlg=args.hlg,
-                    word=args.word,
-                    symbol_table=symbol_table)
-            for i, key in enumerate(keys):
-                content = []
-                for w in hyps[i]:
-                    if w == eos:
-                        break
-                    content.append(char_dict[w])
-                logging.info('{} {}'.format(key, args.connect_symbol.join(content)))
-                fout.write('{} {}\n'.format(key, args.connect_symbol.join(content)))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/recognize_onnx_gpu.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/recognize_onnx_gpu.py
deleted file mode 100644
index 42f403bf55ac0bc51d9c754574d3479345948122..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/recognize_onnx_gpu.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This script is for testing exported onnx encoder and decoder from
-export_onnx_gpu.py. The exported onnx models only support batch offline ASR inference.
-It requires a python wrapped c++ ctc decoder.
-Please install it by following:
-https://github.com/Slyne/ctc_decoder.git
-"""
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-import sys
-
-import torch
-import yaml
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.common import IGNORE_ID
-from wenet.utils.file_utils import read_symbol_table
-from wenet.utils.config import override_config
-
-import onnxruntime as rt
-import multiprocessing
-import numpy as np
-
-try:
-    from swig_decoders import map_batch, \
-        ctc_beam_search_decoder_batch, \
-        TrieVector, PathTrie
-except ImportError:
-    print('Please install ctc decoders first by refering to\n' +
-          'https://github.com/Slyne/ctc_decoder.git')
-    sys.exit(1)
-
-
-def get_args():
-    parser = argparse.ArgumentParser(description='recognize with your model')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--test_data', required=True, help='test data file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this rank, -1 for cpu')
-    parser.add_argument('--dict', required=True, help='dict file')
-    parser.add_argument('--encoder_onnx', required=True, help='encoder onnx file')
-    parser.add_argument('--decoder_onnx', required=True, help='decoder onnx file')
-    parser.add_argument('--result_file', required=True, help='asr result file')
-    parser.add_argument('--batch_size',
-                        type=int,
-                        default=32,
-                        help='asr result file')
-    parser.add_argument('--mode',
-                        choices=[
-                            'ctc_greedy_search', 'ctc_prefix_beam_search',
-                            'attention_rescoring'],
-                        default='attention_rescoring',
-                        help='decoding mode')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument('--fp16',
-                        action='store_true',
-                        help='whether to export fp16 model, default false')
-    args = parser.parse_args()
-    print(args)
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
-    symbol_table = read_symbol_table(args.dict)
-    test_conf = copy.deepcopy(configs['dataset_conf'])
-    test_conf['filter_conf']['max_length'] = 102400
-    test_conf['filter_conf']['min_length'] = 0
-    test_conf['filter_conf']['token_max_length'] = 102400
-    test_conf['filter_conf']['token_min_length'] = 0
-    test_conf['filter_conf']['max_output_input_ratio'] = 102400
-    test_conf['filter_conf']['min_output_input_ratio'] = 0
-    test_conf['speed_perturb'] = False
-    test_conf['spec_aug'] = False
-    test_conf['spec_trim'] = False
-    test_conf['shuffle'] = False
-    test_conf['sort'] = False
-    test_conf['fbank_conf']['dither'] = 0.0
-    test_conf['batch_conf']['batch_type'] = "static"
-    test_conf['batch_conf']['batch_size'] = args.batch_size
-
-    test_dataset = Dataset(args.data_type,
-                           args.test_data,
-                           symbol_table,
-                           test_conf,
-                           args.bpe_model,
-                           partition=False)
-
-    test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0)
-
-    # Init asr model from configs
-    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-    if use_cuda:
-        EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    else:
-        EP_list = ['CPUExecutionProvider']
-
-    encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list)
-    decoder_ort_session = None
-    if args.mode == "attention_rescoring":
-        decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list)
-
-    # Load dict
-    vocabulary = []
-    char_dict = {}
-    with open(args.dict, 'r') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            char_dict[int(arr[1])] = arr[0]
-            vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
-    with torch.no_grad(), open(args.result_file, 'w') as fout:
-        for _, batch in enumerate(test_data_loader):
-            keys, feats, _, feats_lengths, _ = batch
-            feats, feats_lengths = feats.numpy(), feats_lengths.numpy()
-            if args.fp16:
-                feats = feats.astype(np.float16)
-            ort_inputs = {
-                encoder_ort_session.get_inputs()[0].name: feats,
-                encoder_ort_session.get_inputs()[1].name: feats_lengths}
-            ort_outs = encoder_ort_session.run(None, ort_inputs)
-            encoder_out, encoder_out_lens, ctc_log_probs, \
-                beam_log_probs, beam_log_probs_idx = ort_outs
-            beam_size = beam_log_probs.shape[-1]
-            batch_size = beam_log_probs.shape[0]
-            num_processes = min(multiprocessing.cpu_count(), batch_size)
-            if args.mode == 'ctc_greedy_search':
-                if beam_size != 1:
-                    log_probs_idx = beam_log_probs_idx[:, :, 0]
-                batch_sents = []
-                for idx, seq in enumerate(log_probs_idx):
-                    batch_sents.append(seq[0:encoder_out_lens[idx]].tolist())
-                hyps = map_batch(batch_sents, vocabulary, num_processes,
-                                 True, 0)
-            elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"):
-                batch_log_probs_seq_list = beam_log_probs.tolist()
-                batch_log_probs_idx_list = beam_log_probs_idx.tolist()
-                batch_len_list = encoder_out_lens.tolist()
-                batch_log_probs_seq = []
-                batch_log_probs_ids = []
-                batch_start = []  # only effective in streaming deployment
-                batch_root = TrieVector()
-                root_dict = {}
-                for i in range(len(batch_len_list)):
-                    num_sent = batch_len_list[i]
-                    batch_log_probs_seq.append(
-                        batch_log_probs_seq_list[i][0:num_sent])
-                    batch_log_probs_ids.append(
-                        batch_log_probs_idx_list[i][0:num_sent])
-                    root_dict[i] = PathTrie()
-                    batch_root.append(root_dict[i])
-                    batch_start.append(True)
-                score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq,
-                                                           batch_log_probs_ids,
-                                                           batch_root,
-                                                           batch_start,
-                                                           beam_size,
-                                                           num_processes,
-                                                           0, -2, 0.99999)
-                if args.mode == 'ctc_prefix_beam_search':
-                    hyps = []
-                    for cand_hyps in score_hyps:
-                        hyps.append(cand_hyps[0][1])
-                    hyps = map_batch(hyps, vocabulary, num_processes, False, 0)
-            if args.mode == 'attention_rescoring':
-                ctc_score, all_hyps = [], []
-                max_len = 0
-                for hyps in score_hyps:
-                    cur_len = len(hyps)
-                    if len(hyps) < beam_size:
-                        hyps += (beam_size - cur_len) * [(-float("INF"), (0,))]
-                    cur_ctc_score = []
-                    for hyp in hyps:
-                        cur_ctc_score.append(hyp[0])
-                        all_hyps.append(list(hyp[1]))
-                        if len(hyp[1]) > max_len:
-                            max_len = len(hyp[1])
-                    ctc_score.append(cur_ctc_score)
-                if args.fp16:
-                    ctc_score = np.array(ctc_score, dtype=np.float16)
-                else:
-                    ctc_score = np.array(ctc_score, dtype=np.float32)
-                hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                r_hyps_pad_sos_eos = np.ones(
-                    (batch_size, beam_size, max_len + 2), dtype=np.int64) * IGNORE_ID
-                hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32)
-                k = 0
-                for i in range(batch_size):
-                    for j in range(beam_size):
-                        cand = all_hyps[k]
-                        l = len(cand) + 2
-                        hyps_pad_sos_eos[i][j][0:l] = [sos] + cand + [eos]
-                        r_hyps_pad_sos_eos[i][j][0:l] = [sos] + cand[::-1] + [eos]
-                        hyps_lens_sos[i][j] = len(cand) + 1
-                        k += 1
-                decoder_ort_inputs = {
-                    decoder_ort_session.get_inputs()[0].name: encoder_out,
-                    decoder_ort_session.get_inputs()[1].name: encoder_out_lens,
-                    decoder_ort_session.get_inputs()[2].name: hyps_pad_sos_eos,
-                    decoder_ort_session.get_inputs()[3].name: hyps_lens_sos,
-                    decoder_ort_session.get_inputs()[-1].name: ctc_score}
-                if reverse_weight > 0:
-                    r_hyps_pad_sos_eos_name = decoder_ort_session.get_inputs()[4].name
-                    decoder_ort_inputs[r_hyps_pad_sos_eos_name] = r_hyps_pad_sos_eos
-                best_index = decoder_ort_session.run(None, decoder_ort_inputs)[0]
-                best_sents = []
-                k = 0
-                for idx in best_index:
-                    cur_best_sent = all_hyps[k: k + beam_size][idx]
-                    best_sents.append(cur_best_sent)
-                    k += beam_size
-                hyps = map_batch(best_sents, vocabulary, num_processes)
-
-            for i, key in enumerate(keys):
-                content = hyps[i]
-                logging.info('{} {}'.format(key, content))
-                fout.write('{} {}\n'.format(key, content))
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/train.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/train.py
deleted file mode 100644
index 70799b60790b31d73911770891f519f5473e2f4b..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/bin/train.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import copy
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.optim as optim
-import yaml
-from tensorboardX import SummaryWriter
-from torch.utils.data import DataLoader
-
-from wenet.dataset.dataset import Dataset
-from wenet.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                    load_trained_modules)
-from wenet.utils.executor import Executor
-from wenet.utils.file_utils import read_symbol_table, read_non_lang_symbols
-from wenet.utils.scheduler import WarmupLR, NoamHoldAnnealing
-from wenet.utils.config import override_config
-from wenet.utils.init_model import init_model
-
-def get_args():
-    parser = argparse.ArgumentParser(description='training your network')
-    parser.add_argument('--config', required=True, help='config file')
-    parser.add_argument('--data_type',
-                        default='raw',
-                        choices=['raw', 'shard'],
-                        help='train and cv data type')
-    parser.add_argument('--train_data', required=True, help='train data file')
-    parser.add_argument('--cv_data', required=True, help='cv data file')
-    parser.add_argument('--gpu',
-                        type=int,
-                        default=-1,
-                        help='gpu id for this local rank, -1 for cpu')
-    parser.add_argument('--model_dir', required=True, help='save model dir')
-    parser.add_argument('--checkpoint', help='checkpoint model')
-    parser.add_argument('--tensorboard_dir',
-                        default='tensorboard',
-                        help='tensorboard log dir')
-    parser.add_argument('--ddp.rank',
-                        dest='rank',
-                        default=0,
-                        type=int,
-                        help='global rank for distributed training')
-    parser.add_argument('--ddp.world_size',
-                        dest='world_size',
-                        default=-1,
-                        type=int,
-                        help='''number of total processes/gpus for
-                        distributed training''')
-    parser.add_argument('--ddp.dist_backend',
-                        dest='dist_backend',
-                        default='nccl',
-                        choices=['nccl', 'gloo'],
-                        help='distributed backend')
-    parser.add_argument('--ddp.init_method',
-                        dest='init_method',
-                        default=None,
-                        help='ddp init method')
-    parser.add_argument('--num_workers',
-                        default=0,
-                        type=int,
-                        help='num of subprocess workers for reading')
-    parser.add_argument('--pin_memory',
-                        action='store_true',
-                        default=False,
-                        help='Use pinned memory buffers used for reading')
-    parser.add_argument('--use_amp',
-                        action='store_true',
-                        default=False,
-                        help='Use automatic mixed precision training')
-    parser.add_argument('--fp16_grad_sync',
-                        action='store_true',
-                        default=False,
-                        help='Use fp16 gradient sync for ddp')
-    parser.add_argument('--cmvn', default=None, help='global cmvn file')
-    parser.add_argument('--symbol_table',
-                        required=True,
-                        help='model unit symbol table for training')
-    parser.add_argument("--non_lang_syms",
-                        help="non-linguistic symbol file. One symbol per line.")
-    parser.add_argument('--prefetch',
-                        default=100,
-                        type=int,
-                        help='prefetch number')
-    parser.add_argument('--bpe_model',
-                        default=None,
-                        type=str,
-                        help='bpe model for english part')
-    parser.add_argument('--override_config',
-                        action='append',
-                        default=[],
-                        help="override yaml config")
-    parser.add_argument("--enc_init",
-                        default=None,
-                        type=str,
-                        help="Pre-trained model to initialize encoder")
-    parser.add_argument("--enc_init_mods",
-                        default="encoder.",
-                        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
-                        help="List of encoder modules \
-                        to initialize ,separated by a comma")
-
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(asctime)s %(levelname)s %(message)s')
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-
-    # Set random seed
-    torch.manual_seed(777)
-    with open(args.config, 'r') as fin:
-        configs = yaml.load(fin, Loader=yaml.FullLoader)
-    if len(args.override_config) > 0:
-        configs = override_config(configs, args.override_config)
-
-    distributed = args.world_size > 1
-    if distributed:
-        logging.info('training on multiple gpus, this gpu {}'.format(args.gpu))
-        dist.init_process_group(args.dist_backend,
-                                init_method=args.init_method,
-                                world_size=args.world_size,
-                                rank=args.rank)
-
-    symbol_table = read_symbol_table(args.symbol_table)
-
-    train_conf = configs['dataset_conf']
-    cv_conf = copy.deepcopy(train_conf)
-    cv_conf['speed_perturb'] = False
-    cv_conf['spec_aug'] = False
-    cv_conf['spec_sub'] = False
-    cv_conf['spec_trim'] = False
-    cv_conf['shuffle'] = False
-    non_lang_syms = read_non_lang_symbols(args.non_lang_syms)
-
-    train_dataset = Dataset(args.data_type, args.train_data, symbol_table,
-                            train_conf, args.bpe_model, non_lang_syms, True)
-    cv_dataset = Dataset(args.data_type,
-                         args.cv_data,
-                         symbol_table,
-                         cv_conf,
-                         args.bpe_model,
-                         non_lang_syms,
-                         partition=False)
-
-    train_data_loader = DataLoader(train_dataset,
-                                   batch_size=None,
-                                   pin_memory=args.pin_memory,
-                                   num_workers=args.num_workers,
-                                   prefetch_factor=args.prefetch)
-    cv_data_loader = DataLoader(cv_dataset,
-                                batch_size=None,
-                                pin_memory=args.pin_memory,
-                                num_workers=args.num_workers,
-                                prefetch_factor=args.prefetch)
-
-    if 'fbank_conf' in configs['dataset_conf']:
-        input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins']
-    else:
-        input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins']
-    vocab_size = len(symbol_table)
-
-    # Save configs to model_dir/train.yaml for inference and export
-    configs['input_dim'] = input_dim
-    configs['output_dim'] = vocab_size
-    configs['cmvn_file'] = args.cmvn
-    configs['is_json_cmvn'] = True
-    if args.rank == 0:
-        saved_config_path = os.path.join(args.model_dir, 'train.yaml')
-        with open(saved_config_path, 'w') as fout:
-            data = yaml.dump(configs)
-            fout.write(data)
-
-    # Init asr model from configs
-    model = init_model(configs)
-    print(model)
-    num_params = sum(p.numel() for p in model.parameters())
-    print('the number of model params: {:,d}'.format(num_params))
-
-    # !!!IMPORTANT!!!
-    # Try to export the model by script, if fails, we should refine
-    # the code to satisfy the script export requirements
-    if args.rank == 0:
-        script_model = torch.jit.script(model)
-        script_model.save(os.path.join(args.model_dir, 'init.zip'))
-    executor = Executor()
-    # If specify checkpoint, load some info from checkpoint
-    if args.checkpoint is not None:
-        infos = load_checkpoint(model, args.checkpoint)
-    elif args.enc_init is not None:
-        logging.info('load pretrained encoders: {}'.format(args.enc_init))
-        infos = load_trained_modules(model, args)
-    else:
-        infos = {}
-    start_epoch = infos.get('epoch', -1) + 1
-    cv_loss = infos.get('cv_loss', 0.0)
-    step = infos.get('step', -1)
-
-    num_epochs = configs.get('max_epoch', 100)
-    model_dir = args.model_dir
-    writer = None
-    if args.rank == 0:
-        os.makedirs(model_dir, exist_ok=True)
-        exp_id = os.path.basename(model_dir)
-        writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id))
-
-    if distributed:
-        assert (torch.cuda.is_available())
-        # cuda model is required for nn.parallel.DistributedDataParallel
-        model.cuda()
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, find_unused_parameters=True)
-        device = torch.device("cuda")
-        if args.fp16_grad_sync:
-            from torch.distributed.algorithms.ddp_comm_hooks import (
-                default as comm_hooks,
-            )
-            model.register_comm_hook(
-                state=None, hook=comm_hooks.fp16_compress_hook
-            )
-    else:
-        use_cuda = args.gpu >= 0 and torch.cuda.is_available()
-        device = torch.device('cuda' if use_cuda else 'cpu')
-        model = model.to(device)
-
-    if configs['optim'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), **configs['optim_conf'])
-    elif configs['optim'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), **configs['optim_conf'])
-    else:
-        raise ValueError("unknown optimizer: " + configs['optim'])
-    if configs['scheduler'] == 'warmuplr':
-        scheduler = WarmupLR(optimizer, **configs['scheduler_conf'])
-    elif configs['scheduler'] == 'NoamHoldAnnealing':
-        scheduler = NoamHoldAnnealing(optimizer, **configs['scheduler_conf'])
-    else:
-        raise ValueError("unknown scheduler: " + configs['scheduler'])
-
-    final_epoch = None
-    configs['rank'] = args.rank
-    configs['is_distributed'] = distributed
-    configs['use_amp'] = args.use_amp
-    if start_epoch == 0 and args.rank == 0:
-        save_model_path = os.path.join(model_dir, 'init.pt')
-        save_checkpoint(model, save_model_path)
-
-    # Start training loop
-    executor.step = step
-    scheduler.set_step(step)
-    # used for pytorch amp mixed precision training
-    scaler = None
-    if args.use_amp:
-        scaler = torch.cuda.amp.GradScaler()
-
-    for epoch in range(start_epoch, num_epochs):
-        train_dataset.set_epoch(epoch)
-        configs['epoch'] = epoch
-        lr = optimizer.param_groups[0]['lr']
-        logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr))
-        executor.train(model, optimizer, scheduler, train_data_loader, device,
-                       writer, configs, scaler)
-        total_loss, num_seen_utts = executor.cv(model, cv_data_loader, device,
-                                                configs)
-        cv_loss = total_loss / num_seen_utts
-
-        logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss))
-        if args.rank == 0:
-            save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch))
-            save_checkpoint(
-                model, save_model_path, {
-                    'epoch': epoch,
-                    'lr': lr,
-                    'cv_loss': cv_loss,
-                    'step': executor.step
-                })
-            writer.add_scalar('epoch/cv_loss', cv_loss, epoch)
-            writer.add_scalar('epoch/lr', lr, epoch)
-        final_epoch = epoch
-
-    if final_epoch is not None and args.rank == 0:
-        final_model_path = os.path.join(model_dir, 'final.pt')
-        os.remove(final_model_path) if os.path.exists(final_model_path) else None
-        os.symlink('{}.pt'.format(final_epoch), final_model_path)
-        writer.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/dataset.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/dataset.py
deleted file mode 100644
index 6d799b5b5aea2d34546484b3fed5d45e2d5b6aa6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/dataset.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset
-
-import wenet.dataset.processor as processor
-from wenet.utils.file_utils import read_lists
-
-
-class Processor(IterableDataset):
-    def __init__(self, source, f, *args, **kw):
-        assert callable(f)
-        self.source = source
-        self.f = f
-        self.args = args
-        self.kw = kw
-
-    def set_epoch(self, epoch):
-        self.source.set_epoch(epoch)
-
-    def __iter__(self):
-        """ Return an iterator over the source dataset processed by the
-            given processor.
-        """
-        assert self.source is not None
-        assert callable(self.f)
-        return self.f(iter(self.source), *self.args, **self.kw)
-
-    def apply(self, f):
-        assert callable(f)
-        return Processor(self, f, *self.args, **self.kw)
-
-
-class DistributedSampler:
-    def __init__(self, shuffle=True, partition=True):
-        self.epoch = -1
-        self.update()
-        self.shuffle = shuffle
-        self.partition = partition
-
-    def update(self):
-        assert dist.is_available()
-        if dist.is_initialized():
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
-        else:
-            self.rank = 0
-            self.world_size = 1
-        worker_info = torch.utils.data.get_worker_info()
-        if worker_info is None:
-            self.worker_id = 0
-            self.num_workers = 1
-        else:
-            self.worker_id = worker_info.id
-            self.num_workers = worker_info.num_workers
-        return dict(rank=self.rank,
-                    world_size=self.world_size,
-                    worker_id=self.worker_id,
-                    num_workers=self.num_workers)
-
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-
-    def sample(self, data):
-        """ Sample data according to rank/world_size/num_workers
-
-            Args:
-                data(List): input data list
-
-            Returns:
-                List: data list after sample
-        """
-        data = list(range(len(data)))
-        # TODO(Binbin Zhang): fix this
-        # We can not handle uneven data for CV on DDP, so we don't
-        # sample data by rank, that means every GPU gets the same
-        # and all the CV data
-        if self.partition:
-            if self.shuffle:
-                random.Random(self.epoch).shuffle(data)
-            data = data[self.rank::self.world_size]
-        data = data[self.worker_id::self.num_workers]
-        return data
-
-
-class DataList(IterableDataset):
-    def __init__(self, lists, shuffle=True, partition=True):
-        self.lists = lists
-        self.sampler = DistributedSampler(shuffle, partition)
-
-    def set_epoch(self, epoch):
-        self.sampler.set_epoch(epoch)
-
-    def __iter__(self):
-        sampler_info = self.sampler.update()
-        indexes = self.sampler.sample(self.lists)
-        for index in indexes:
-            # yield dict(src=src)
-            data = dict(src=self.lists[index])
-            data.update(sampler_info)
-            yield data
-
-
-def Dataset(data_type,
-            data_list_file,
-            symbol_table,
-            conf,
-            bpe_model=None,
-            non_lang_syms=None,
-            partition=True):
-    """ Construct dataset from arguments
-
-        We have two shuffle stage in the Dataset. The first is global
-        shuffle at shards tar/raw file level. The second is global shuffle
-        at training samples level.
-
-        Args:
-            data_type(str): raw/shard
-            bpe_model(str): model for english bpe part
-            partition(bool): whether to do data partition in terms of rank
-    """
-    assert data_type in ['raw', 'shard']
-    lists = read_lists(data_list_file)
-    shuffle = conf.get('shuffle', True)
-    dataset = DataList(lists, shuffle=shuffle, partition=partition)
-    if data_type == 'shard':
-        dataset = Processor(dataset, processor.url_opener)
-        dataset = Processor(dataset, processor.tar_file_and_group)
-    else:
-        dataset = Processor(dataset, processor.parse_raw)
-
-    dataset = Processor(dataset, processor.tokenize, symbol_table, bpe_model,
-                        non_lang_syms, conf.get('split_with_space', False))
-    filter_conf = conf.get('filter_conf', {})
-    dataset = Processor(dataset, processor.filter, **filter_conf)
-
-    resample_conf = conf.get('resample_conf', {})
-    dataset = Processor(dataset, processor.resample, **resample_conf)
-
-    speed_perturb = conf.get('speed_perturb', False)
-    if speed_perturb:
-        dataset = Processor(dataset, processor.speed_perturb)
-
-    feats_type = conf.get('feats_type', 'fbank')
-    assert feats_type in ['fbank', 'mfcc']
-    if feats_type == 'fbank':
-        fbank_conf = conf.get('fbank_conf', {})
-        dataset = Processor(dataset, processor.compute_fbank, **fbank_conf)
-    elif feats_type == 'mfcc':
-        mfcc_conf = conf.get('mfcc_conf', {})
-        dataset = Processor(dataset, processor.compute_mfcc, **mfcc_conf)
-
-    spec_aug = conf.get('spec_aug', True)
-    spec_sub = conf.get('spec_sub', False)
-    spec_trim = conf.get('spec_trim', False)
-    if spec_aug:
-        spec_aug_conf = conf.get('spec_aug_conf', {})
-        dataset = Processor(dataset, processor.spec_aug, **spec_aug_conf)
-    if spec_sub:
-        spec_sub_conf = conf.get('spec_sub_conf', {})
-        dataset = Processor(dataset, processor.spec_sub, **spec_sub_conf)
-    if spec_trim:
-        spec_trim_conf = conf.get('spec_trim_conf', {})
-        dataset = Processor(dataset, processor.spec_trim, **spec_trim_conf)
-
-    if shuffle:
-        shuffle_conf = conf.get('shuffle_conf', {})
-        dataset = Processor(dataset, processor.shuffle, **shuffle_conf)
-
-    sort = conf.get('sort', True)
-    if sort:
-        sort_conf = conf.get('sort_conf', {})
-        dataset = Processor(dataset, processor.sort, **sort_conf)
-
-    batch_conf = conf.get('batch_conf', {})
-    dataset = Processor(dataset, processor.batch, **batch_conf)
-    dataset = Processor(dataset, processor.padding)
-    return dataset
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/kaldi_io.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/kaldi_io.py
deleted file mode 100644
index c9bef293c93d882147bb5b738e1fc49a7a19a484..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/kaldi_io.py
+++ /dev/null
@@ -1,666 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2016  Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-  # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-  os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk'
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-  """ fd = open_or_fd(file)
-   Open file, gzipped file, pipe, or forward the file-descriptor.
-   Eventually seeks in the 'file' argument contains ':offset' suffix.
-  """
-  offset = None
-  try:
-    # strip 'ark:' prefix from r{x,w}filename (optional),
-    if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-      (prefix,file) = file.split(':',1)
-    # separate offset from filename (optional),
-    if re.search(':[0-9]+$', file):
-      (file,offset) = file.rsplit(':',1)
-    # input pipe?
-    if file[-1] == '|':
-      fd = popen(file[:-1], 'rb') # custom,
-    # output pipe?
-    elif file[0] == '|':
-      fd = popen(file[1:], 'wb') # custom,
-    # is it gzipped?
-    elif file.split('.')[-1] == 'gz':
-      fd = gzip.open(file, mode)
-    # a normal file...
-    else:
-      fd = open(file, mode)
-  except TypeError:
-    # 'file' is opened file descriptor,
-    fd = file
-  # Eventually seek to offset,
-  if offset != None: fd.seek(int(offset))
-  return fd
-
-# based on '/usr/local/lib/python3.4/os.py'
-def popen(cmd, mode="rb"):
-  if not isinstance(cmd, str):
-    raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-  import subprocess, io, threading
-
-  # cleanup function for subprocesses,
-  def cleanup(proc, cmd):
-    ret = proc.wait()
-    if ret > 0:
-      raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-    return
-
-  # text-mode,
-  if mode == "r":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdout)
-  elif mode == "w":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return io.TextIOWrapper(proc.stdin)
-  # binary,
-  elif mode == "rb":
-    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdout
-  elif mode == "wb":
-    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
-    threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-    return proc.stdin
-  # sanity,
-  else:
-    raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-  """ [key] = read_key(fd)
-   Read the utterance-key from the opened ark/stream descriptor 'fd'.
-  """
-  key = ''
-  while 1:
-    char = fd.read(1).decode("latin1")
-    if char == '' : break
-    if char == ' ' : break
-    key += char
-  key = key.strip()
-  if key == '': return None # end of file,
-  assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-  return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-  """ Alias to 'read_vec_int_ark()' """
-  return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-   Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_int(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_int_scp(file_or_fd):
-  """ generator(key,vec) = read_vec_int_scp(file_or_fd)
-   Returns generator of (key,vector<int>) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_int_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_int(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_int(file_or_fd):
-  """ [int-vec] = read_vec_int(file_or_fd)
-   Read kaldi integer vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-    assert(vec[0]['size'] == 4) # int32 size,
-    ans = vec[:]['value'] # values are in 2nd column,
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=int)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-  """ write_vec_int(f, v, key='')
-   Write a binary kaldi integer vector to filename or stream.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_int(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # dim,
-    fd.write('\4'.encode()) # int32 type,
-    fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-    # data,
-    for i in range(len(v)):
-      fd.write('\4'.encode()) # int32 type,
-      fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-  """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-   Returns generator of (key,vector) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,vec in kaldi_io.read_vec_flt_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      vec = read_vec_flt(rxfile)
-      yield key, vec
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-  """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-   Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Read ark to a 'dictionary':
-   d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      ali = read_vec_flt(fd)
-      yield key, ali
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_vec_flt(file_or_fd):
-  """ [flt-vec] = read_vec_flt(file_or_fd)
-   Read kaldi float vector, ascii or binary input,
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode()
-  if binary == '\0B': # binary flag
-    # Data type,
-    header = fd.read(3).decode()
-    if header == 'FV ': sample_size = 4 # floats
-    elif header == 'DV ': sample_size = 8 # doubles
-    else: raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-  else: # ascii,
-    arr = (binary + fd.readline().decode()).strip().split()
-    try:
-      arr.remove('['); arr.remove(']') # optionally
-    except ValueError:
-      pass
-    ans = np.array(arr, dtype=float)
-  if fd is not file_or_fd : fd.close() # cleanup
-  return ans
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-  """ write_vec_flt(f, v, key='')
-   Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-   Arguments:
-   file_or_fd : filename or opened file descriptor for writing,
-   v : the vector to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-   Example of writing single vector:
-   kaldi_io.write_vec_flt(filename, vec)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,vec in dict.iteritems():
-       kaldi_io.write_vec_flt(f, vec, key=key)
-  """
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if v.dtype == 'float32': fd.write('FV '.encode())
-    elif v.dtype == 'float64': fd.write('DV '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-    # Dim,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-    # Data,
-    fd.write(v.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-  """ generator(key,mat) = read_mat_scp(file_or_fd)
-   Returns generator of (key,matrix) tuples, read according to kaldi scp.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the scp:
-   for key,mat in kaldi_io.read_mat_scp(file):
-     ...
-
-   Read scp to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    for line in fd:
-      (key,rxfile) = line.decode().split(' ')
-      mat = read_mat(rxfile)
-      yield key, mat
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-  """ generator(key,mat) = read_mat_ark(file_or_fd)
-   Returns generator of (key,matrix) tuples, read from ark file/stream.
-   file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,mat in kaldi_io.read_mat_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      mat = read_mat(fd)
-      yield key, mat
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-  """ [mat] = read_mat(file_or_fd)
-   Reads single kaldi matrix, supports ascii and binary.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    binary = fd.read(2).decode()
-    if binary == '\0B' :
-      mat = _read_mat_binary(fd)
-    else:
-      assert(binary == ' [')
-      mat = _read_mat_ascii(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-  return mat
-
-def _read_mat_binary(fd):
-  # Data type
-  header = fd.read(3).decode()
-  # 'CM', 'CM2', 'CM3' are possible values,
-  if header.startswith('CM'): return _read_compressed_mat(fd, header)
-  elif header == 'FM ': sample_size = 4 # floats
-  elif header == 'DM ': sample_size = 8 # doubles
-  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-  assert(sample_size > 0)
-  # Dimensions
-  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-  # Read whole matrix
-  buf = fd.read(rows * cols * sample_size)
-  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-  else : raise BadSampleSize
-  mat = np.reshape(vec,(rows,cols))
-  return mat
-
-def _read_mat_ascii(fd):
-  rows = []
-  while 1:
-    line = fd.readline().decode()
-    if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-    if len(line.strip()) == 0 : continue # skip empty line
-    arr = line.strip().split()
-    if arr[-1] != ']':
-      rows.append(np.array(arr,dtype='float32')) # not last line
-    else:
-      rows.append(np.array(arr[:-1],dtype='float32')) # last line
-      mat = np.vstack(rows)
-      return mat
-
-
-def _read_compressed_mat(fd, format):
-  """ Read a compressed matrix,
-      see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-      methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-  """
-  assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-  # Format of header 'struct',
-  global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-  per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-  # Mapping for percentiles in col-headers,
-  def uint16_to_float(value, min, range):
-    return np.float32(min + range * 1.52590218966964e-05 * value)
-
-  # Mapping for matrix elements,
-  def uint8_to_float_v2(vec, p0, p25, p75, p100):
-    # Split the vector by masks,
-    mask_0_64 = (vec <= 64);
-    mask_193_255 = (vec > 192);
-    mask_65_192 = (~(mask_0_64 | mask_193_255));
-    # Sanity check (useful but slow...),
-    # assert(len(vec) == np.sum(np.hstack([mask_0_64,mask_65_192,mask_193_255])))
-    # assert(len(vec) == np.sum(np.any([mask_0_64,mask_65_192,mask_193_255], axis=0)))
-    # Build the float vector,
-    ans = np.empty(len(vec), dtype='float32')
-    ans[mask_0_64] = p0 + (p25 - p0) / 64. * vec[mask_0_64]
-    ans[mask_65_192] = p25 + (p75 - p25) / 128. * (vec[mask_65_192] - 64)
-    ans[mask_193_255] = p75 + (p100 - p75) / 63. * (vec[mask_193_255] - 192)
-    return ans
-
-  # Read global header,
-  globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-  # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-  #                         {           cols           }{     size         }
-  col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-  data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-  mat = np.empty((cols,rows), dtype='float32')
-  for i, col_header in enumerate(col_headers):
-    col_header_flt = [ uint16_to_float(percentile, globmin, globrange) for percentile in col_header ]
-    mat[i] = uint8_to_float_v2(data[i], *col_header_flt)
-
-  return mat.T # transpose! col-major -> row-major,
-
-def write_ark_scp(key, mat, ark_fout, scp_out):
-  mat_offset = write_mat(ark_fout, mat, key)
-  scp_line = '{}\t{}:{}'.format(key, ark_fout.name, mat_offset)
-  scp_out.write(scp_line)
-  scp_out.write('\n')
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-  """ write_mat(f, m, key='')
-  Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-  Arguments:
-   file_or_fd : filename of opened file descriptor for writing,
-   m : the matrix to be stored,
-   key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-   Example of writing single matrix:
-   kaldi_io.write_mat(filename, mat)
-
-   Example of writing arkfile:
-   with open(ark_file,'w') as f:
-     for key,mat in dict.iteritems():
-       kaldi_io.write_mat(f, mat, key=key)
-  """
-  mat_offset = 0
-  fd = open_or_fd(file_or_fd, mode='wb')
-  if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-  try:
-    if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-    mat_offset = fd.tell()
-    fd.write('\0B'.encode()) # we write binary!
-    # Data-type,
-    if m.dtype == 'float32': fd.write('FM '.encode())
-    elif m.dtype == 'float64': fd.write('DM '.encode())
-    else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-    # Dims,
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-    fd.write('\04'.encode())
-    fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-    # Data,
-    fd.write(m.tobytes())
-  finally:
-    if fd is not file_or_fd : fd.close()
-  return mat_offset
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-  """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-  return read_post_ark(file_or_fd)
-
-def read_post_ark(file_or_fd):
-  """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-   Returns generator of (key,posterior) tuples, read from ark file.
-   file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,post in kaldi_io.read_post_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      post = read_post(fd)
-      yield key, post
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-  """ [post] = read_post(file_or_fd)
-   Reads single kaldi 'Posterior' in binary format.
-
-   The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-   the outer-vector is usually time axis, inner-vector are the records
-   at given time,  and the tuple is composed of an 'index' (integer)
-   and a 'float-value'. The 'float-value' can represent a probability
-   or any other numeric value.
-
-   Returns vector of vectors of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  ans=[]
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-  assert(fd.read(1).decode() == '\4'); # int-size
-  outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  # Loop over 'outer-vector',
-  for i in range(outer_vec_size):
-    assert(fd.read(1).decode() == '\4'); # int-size
-    inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-    data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-    assert(data[0]['size_idx'] == 4)
-    assert(data[0]['size_post'] == 4)
-    ans.append(data[['idx','post']].tolist())
-
-  if fd is not file_or_fd: fd.close()
-  return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-  """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-   Returns generator of (key,cntime) tuples, read from ark file.
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Iterate the ark:
-   for key,time in kaldi_io.read_cntime_ark(file):
-     ...
-
-   Read ark to a 'dictionary':
-   d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-  """
-  fd = open_or_fd(file_or_fd)
-  try:
-    key = read_key(fd)
-    while key:
-      cntime = read_cntime(fd)
-      yield key, cntime
-      key = read_key(fd)
-  finally:
-    if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-  """ [cntime] = read_cntime(file_or_fd)
-   Reads single kaldi 'Confusion Network time info', in binary format:
-   C++ type: vector<tuple<float,float> >.
-   (begin/end times of bins at the confusion network).
-
-   Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-   file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-   Returns vector of tuples.
-  """
-  fd = open_or_fd(file_or_fd)
-  binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-  assert(fd.read(1).decode() == '\4'); # int-size
-  vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-  data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-  assert(data[0]['size_beg'] == 4)
-  assert(data[0]['size_end'] == 4)
-  ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-  if fd is not file_or_fd : fd.close()
-  return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-  """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-   using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-   - t-beg, t-end is in seconds,
-   - assumed 100 frames/second,
-  """
-  segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-  # Sanity checks,
-  assert(len(segs) > 0) # empty segmentation is an error,
-  assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-  # Convert time to frame-indexes,
-  start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-  end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-  # Taken from 'read_lab_to_bool_vec', htk.py,
-  frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                   np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-  assert np.sum(end-start) == np.sum(frms)
-  return frms
-
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/processor.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/processor.py
deleted file mode 100644
index b4bd07ce674eb3288cd1b13a09085eec48d40845..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/processor.py
+++ /dev/null
@@ -1,660 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import json
-import random
-import re
-import tarfile
-from subprocess import PIPE, Popen
-from urllib.parse import urlparse
-
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-from torch.nn.utils.rnn import pad_sequence
-
-AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
-
-
-def url_opener(data):
-    """ Give url or local file, return file descriptor
-        Inplace operation.
-
-        Args:
-            data(Iterable[str]): url or local file list
-
-        Returns:
-            Iterable[{src, stream}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        # TODO(Binbin Zhang): support HTTP
-        url = sample['src']
-        try:
-            pr = urlparse(url)
-            # local file
-            if pr.scheme == '' or pr.scheme == 'file':
-                stream = open(url, 'rb')
-            # network file, such as HTTP(HDFS/OSS/S3)/HTTPS/SCP
-            else:
-                cmd = f'wget -q -O - {url}'
-                process = Popen(cmd, shell=True, stdout=PIPE)
-                sample.update(process=process)
-                stream = process.stdout
-            sample.update(stream=stream)
-            yield sample
-        except Exception as ex:
-            logging.warning('Failed to open {}'.format(url))
-
-
-def tar_file_and_group(data):
-    """ Expand a stream of open tar files into a stream of tar file contents.
-        And groups the file with same prefix
-
-        Args:
-            data: Iterable[{src, stream}]
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'stream' in sample
-        stream = tarfile.open(fileobj=sample['stream'], mode="r|*")
-        prev_prefix = None
-        example = {}
-        valid = True
-        for tarinfo in stream:
-            name = tarinfo.name
-            pos = name.rfind('.')
-            assert pos > 0
-            prefix, postfix = name[:pos], name[pos + 1:]
-            if prev_prefix is not None and prefix != prev_prefix:
-                example['key'] = prev_prefix
-                if valid:
-                    yield example
-                example = {}
-                valid = True
-            with stream.extractfile(tarinfo) as file_obj:
-                try:
-                    if postfix == 'txt':
-                        example['txt'] = file_obj.read().decode('utf8').strip()
-                    elif postfix in AUDIO_FORMAT_SETS:
-                        waveform, sample_rate = torchaudio.load(file_obj)
-                        example['wav'] = waveform
-                        example['sample_rate'] = sample_rate
-                    else:
-                        example[postfix] = file_obj.read()
-                except Exception as ex:
-                    valid = False
-                    logging.warning('error to parse {}'.format(name))
-            prev_prefix = prefix
-        if prev_prefix is not None:
-            example['key'] = prev_prefix
-            yield example
-        stream.close()
-        if 'process' in sample:
-            sample['process'].communicate()
-        sample['stream'].close()
-
-
-def parse_raw(data):
-    """ Parse key/wav/txt from json line
-
-        Args:
-            data: Iterable[str], str is a json line has key/wav/txt
-
-        Returns:
-            Iterable[{key, wav, txt, sample_rate}]
-    """
-    for sample in data:
-        assert 'src' in sample
-        json_line = sample['src']
-        obj = json.loads(json_line)
-        assert 'key' in obj
-        assert 'wav' in obj
-        assert 'txt' in obj
-        key = obj['key']
-        wav_file = obj['wav']
-        txt = obj['txt']
-        try:
-            if 'start' in obj:
-                assert 'end' in obj
-                sample_rate = torchaudio.backend.sox_io_backend.info(
-                    wav_file).sample_rate
-                start_frame = int(obj['start'] * sample_rate)
-                end_frame = int(obj['end'] * sample_rate)
-                waveform, _ = torchaudio.backend.sox_io_backend.load(
-                    filepath=wav_file,
-                    num_frames=end_frame - start_frame,
-                    frame_offset=start_frame)
-            else:
-                waveform, sample_rate = torchaudio.load(wav_file)
-            example = dict(key=key,
-                           txt=txt,
-                           wav=waveform,
-                           sample_rate=sample_rate)
-            yield example
-        except Exception as ex:
-            logging.warning('Failed to read {}'.format(wav_file))
-
-
-def filter(data,
-           max_length=10240,
-           min_length=10,
-           token_max_length=200,
-           token_min_length=1,
-           min_output_input_ratio=0.0005,
-           max_output_input_ratio=1):
-    """ Filter sample according to feature and label length
-        Inplace operation.
-
-        Args::
-            data: Iterable[{key, wav, label, sample_rate}]
-            max_length: drop utterance which is greater than max_length(10ms)
-            min_length: drop utterance which is less than min_length(10ms)
-            token_max_length: drop utterance which is greater than
-                token_max_length, especially when use char unit for
-                english modeling
-            token_min_length: drop utterance which is
-                less than token_max_length
-            min_output_input_ratio: minimal ration of
-                token_length / feats_length(10ms)
-            max_output_input_ratio: maximum ration of
-                token_length / feats_length(10ms)
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'label' in sample
-        # sample['wav'] is torch.Tensor, we have 100 frames every second
-        num_frames = sample['wav'].size(1) / sample['sample_rate'] * 100
-        if num_frames < min_length:
-            continue
-        if num_frames > max_length:
-            continue
-        if len(sample['label']) < token_min_length:
-            continue
-        if len(sample['label']) > token_max_length:
-            continue
-        if num_frames != 0:
-            if len(sample['label']) / num_frames < min_output_input_ratio:
-                continue
-            if len(sample['label']) / num_frames > max_output_input_ratio:
-                continue
-        yield sample
-
-
-def resample(data, resample_rate=16000):
-    """ Resample data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            resample_rate: target resample rate
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        if sample_rate != resample_rate:
-            sample['sample_rate'] = resample_rate
-            sample['wav'] = torchaudio.transforms.Resample(
-                orig_freq=sample_rate, new_freq=resample_rate)(waveform)
-        yield sample
-
-
-def speed_perturb(data, speeds=None):
-    """ Apply speed perturb to the data.
-        Inplace operation.
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-            speeds(List[float]): optional speed
-
-        Returns:
-            Iterable[{key, wav, label, sample_rate}]
-    """
-    if speeds is None:
-        speeds = [0.9, 1.0, 1.1]
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        speed = random.choice(speeds)
-        if speed != 1.0:
-            wav, _ = torchaudio.sox_effects.apply_effects_tensor(
-                waveform, sample_rate,
-                [['speed', str(speed)], ['rate', str(sample_rate)]])
-            sample['wav'] = wav
-
-        yield sample
-
-
-def compute_fbank(data,
-                  num_mel_bins=23,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0):
-    """ Extract fbank
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.fbank(waveform,
-                          num_mel_bins=num_mel_bins,
-                          frame_length=frame_length,
-                          frame_shift=frame_shift,
-                          dither=dither,
-                          energy_floor=0.0,
-                          sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def compute_mfcc(data,
-                 num_mel_bins=23,
-                 frame_length=25,
-                 frame_shift=10,
-                 dither=0.0,
-                 num_ceps=40,
-                 high_freq=0.0,
-                 low_freq=20.0):
-    """ Extract mfcc
-
-        Args:
-            data: Iterable[{key, wav, label, sample_rate}]
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'sample_rate' in sample
-        assert 'wav' in sample
-        assert 'key' in sample
-        assert 'label' in sample
-        sample_rate = sample['sample_rate']
-        waveform = sample['wav']
-        waveform = waveform * (1 << 15)
-        # Only keep key, feat, label
-        mat = kaldi.mfcc(waveform,
-                         num_mel_bins=num_mel_bins,
-                         frame_length=frame_length,
-                         frame_shift=frame_shift,
-                         dither=dither,
-                         num_ceps=num_ceps,
-                         high_freq=high_freq,
-                         low_freq=low_freq,
-                         sample_frequency=sample_rate)
-        yield dict(key=sample['key'], label=sample['label'], feat=mat)
-
-
-def __tokenize_by_bpe_model(sp, txt):
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r'([\u4e00-\u9fff])')
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-
-    return tokens
-
-
-def tokenize(data,
-             symbol_table,
-             bpe_model=None,
-             non_lang_syms=None,
-             split_with_space=False):
-    """ Decode text to chars or BPE
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, wav, txt, sample_rate}]
-
-        Returns:
-            Iterable[{key, wav, txt, tokens, label, sample_rate}]
-    """
-    if non_lang_syms is not None:
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-    else:
-        non_lang_syms = {}
-        non_lang_syms_pattern = None
-
-    if bpe_model is not None:
-        import sentencepiece as spm
-        sp = spm.SentencePieceProcessor()
-        sp.load(bpe_model)
-    else:
-        sp = None
-
-    for sample in data:
-        assert 'txt' in sample
-        txt = sample['txt'].strip()
-        if non_lang_syms_pattern is not None:
-            parts = non_lang_syms_pattern.split(txt.upper())
-            parts = [w for w in parts if len(w.strip()) > 0]
-        else:
-            parts = [txt]
-
-        label = []
-        tokens = []
-        for part in parts:
-            if part in non_lang_syms:
-                tokens.append(part)
-            else:
-                if bpe_model is not None:
-                    tokens.extend(__tokenize_by_bpe_model(sp, part))
-                else:
-                    if split_with_space:
-                        part = part.split(" ")
-                    for ch in part:
-                        if ch == ' ':
-                            ch = "▁"
-                        tokens.append(ch)
-
-        for ch in tokens:
-            if ch in symbol_table:
-                label.append(symbol_table[ch])
-            elif '<unk>' in symbol_table:
-                label.append(symbol_table['<unk>'])
-
-        sample['tokens'] = tokens
-        sample['label'] = label
-        yield sample
-
-
-def spec_aug(data, num_t_mask=2, num_f_mask=2, max_t=50, max_f=10, max_w=80):
-    """ Do spec augmentation
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            num_t_mask: number of time mask to apply
-            num_f_mask: number of freq mask to apply
-            max_t: max width of time mask
-            max_f: max width of freq mask
-            max_w: max width of time warp
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        max_freq = y.size(1)
-        # time mask
-        for i in range(num_t_mask):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            y[start:end, :] = 0
-        # freq mask
-        for i in range(num_f_mask):
-            start = random.randint(0, max_freq - 1)
-            length = random.randint(1, max_f)
-            end = min(max_freq, start + length)
-            y[:, start:end] = 0
-        sample['feat'] = y
-        yield sample
-
-
-def spec_sub(data, max_t=20, num_t_sub=3):
-    """ Do spec substitute
-        Inplace operation
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of time substitute
-            num_t_sub: number of time substitute to apply
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        y = x.clone().detach()
-        max_frames = y.size(0)
-        for i in range(num_t_sub):
-            start = random.randint(0, max_frames - 1)
-            length = random.randint(1, max_t)
-            end = min(max_frames, start + length)
-            # only substitute the earlier time chosen randomly for current time
-            pos = random.randint(0, start)
-            y[start:end, :] = x[start - pos:end - pos, :]
-        sample['feat'] = y
-        yield sample
-
-
-def spec_trim(data, max_t=20):
-    """ Trim tailing frames. Inplace operation.
-        ref: TrimTail [https://arxiv.org/abs/2211.00522]
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_t: max width of length trimming
-
-        Returns
-            Iterable[{key, feat, label}]
-    """
-    for sample in data:
-        assert 'feat' in sample
-        x = sample['feat']
-        assert isinstance(x, torch.Tensor)
-        max_frames = x.size(0)
-        length = random.randint(1, max_t)
-        if length < max_frames / 2:
-            y = x.clone().detach()[:max_frames - length]
-            sample['feat'] = y
-        yield sample
-
-
-def shuffle(data, shuffle_size=10000):
-    """ Local shuffle the data
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            shuffle_size: buffer size for shuffle
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= shuffle_size:
-            random.shuffle(buf)
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    random.shuffle(buf)
-    for x in buf:
-        yield x
-
-
-def sort(data, sort_size=500):
-    """ Sort the data by feature length.
-        Sort is used after shuffle and before batch, so we can group
-        utts with similar lengths into a batch, and `sort_size` should
-        be less than `shuffle_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            sort_size: buffer size for sort
-
-        Returns:
-            Iterable[{key, feat, label}]
-    """
-
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= sort_size:
-            buf.sort(key=lambda x: x['feat'].size(0))
-            for x in buf:
-                yield x
-            buf = []
-    # The sample left over
-    buf.sort(key=lambda x: x['feat'].size(0))
-    for x in buf:
-        yield x
-
-
-def static_batch(data, batch_size=16):
-    """ Static batch the data by `batch_size`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            batch_size: batch size
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    for sample in data:
-        buf.append(sample)
-        if len(buf) >= batch_size:
-            yield buf
-            buf = []
-    if len(buf) > 0:
-        yield buf
-
-
-def dynamic_batch(data, max_frames_in_batch=12000):
-    """ Dynamic batch the data until the total frames in batch
-        reach `max_frames_in_batch`
-
-        Args:
-            data: Iterable[{key, feat, label}]
-            max_frames_in_batch: max_frames in one batch
-
-        Returns:
-            Iterable[List[{key, feat, label}]]
-    """
-    buf = []
-    longest_frames = 0
-    for sample in data:
-        assert 'feat' in sample
-        assert isinstance(sample['feat'], torch.Tensor)
-        new_sample_frames = sample['feat'].size(0)
-        longest_frames = max(longest_frames, new_sample_frames)
-        frames_after_padding = longest_frames * (len(buf) + 1)
-        if frames_after_padding > max_frames_in_batch:
-            yield buf
-            buf = [sample]
-            longest_frames = new_sample_frames
-        else:
-            buf.append(sample)
-    if len(buf) > 0:
-        yield buf
-
-
-def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000):
-    """ Wrapper for static/dynamic batch
-    """
-    if batch_type == 'static':
-        return static_batch(data, batch_size)
-    elif batch_type == 'dynamic':
-        return dynamic_batch(data, max_frames_in_batch)
-    else:
-        logging.fatal('Unsupported batch type {}'.format(batch_type))
-
-
-def padding(data):
-    """ Padding the data into training data
-
-        Args:
-            data: Iterable[List[{key, feat, label}]]
-
-        Returns:
-            Iterable[Tuple(keys, feats, labels, feats lengths, label lengths)]
-    """
-    for sample in data:
-        assert isinstance(sample, list)
-        feats_length = torch.tensor([x['feat'].size(0) for x in sample],
-                                    dtype=torch.int32)
-        order = torch.argsort(feats_length, descending=True)
-        feats_lengths = torch.tensor(
-            [sample[i]['feat'].size(0) for i in order], dtype=torch.int32)
-        sorted_feats = [sample[i]['feat'] for i in order]
-        sorted_keys = [sample[i]['key'] for i in order]
-        sorted_labels = [
-            torch.tensor(sample[i]['label'], dtype=torch.int64) for i in order
-        ]
-        label_lengths = torch.tensor([x.size(0) for x in sorted_labels],
-                                     dtype=torch.int32)
-
-        padded_feats = pad_sequence(sorted_feats,
-                                    batch_first=True,
-                                    padding_value=0)
-        
-        pad = (0, 0, 0, 0)
-        seq_len=  padded_feats.shape[1]
-        if seq_len < 384:
-            pad = (0, 0, 0, 384-seq_len)
-        elif seq_len < 512:
-            pad = (0, 0, 0, 512-seq_len)
-        elif seq_len < 640:
-            pad = (0, 0, 0, 640-seq_len)
-        elif seq_len < 768:
-            pad = (0, 0, 0, 768-seq_len)
-        elif seq_len < 896:
-            pad = (0, 0, 0, 896-seq_len)
-        elif seq_len < 1024:
-            pad = (0, 0, 0, 1024-seq_len)
-        elif seq_len < 1280:
-            pad = (0, 0, 0, 1280-seq_len)
-        padded_feats = torch.nn.functional.pad(padded_feats, pad, mode='constant', value=0)
-        padding_labels = pad_sequence(sorted_labels,
-                                      batch_first=True,
-                                      padding_value=-1)
-
-        yield (sorted_keys, padded_feats, padding_labels, feats_lengths,
-               label_lengths)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/wav_distortion.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/wav_distortion.py
deleted file mode 100644
index 2917d3cc6cfb801935cb0885d0c42cd88f1833b8..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/dataset/wav_distortion.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Chao Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import math
-
-import torchaudio
-import torch
-torchaudio.set_audio_backend("sox_io")
-
-
-def db2amp(db):
-    return pow(10, db / 20)
-
-def amp2db(amp):
-    return 20 * math.log10(amp)
-
-def make_poly_distortion(conf):
-    """Generate a db-domain ploynomial distortion function
-
-        f(x) = a * x^m * (1-x)^n + x
-
-    Args:
-        conf: a dict {'a': #int, 'm': #int, 'n': #int}
-
-    Returns:
-        The ploynomial function, which could be applied on
-        a float amplitude value
-    """
-    a = conf['a']
-    m = conf['m']
-    n = conf['n']
-
-    def poly_distortion(x):
-        abs_x = abs(x)
-        if abs_x < 0.000001:
-            x = x
-        else:
-            db_norm = amp2db(abs_x) / 100 + 1
-            if db_norm < 0:
-                db_norm = 0
-            db_norm = a * pow(db_norm, m) * pow((1 - db_norm), n) + db_norm
-            if db_norm > 1:
-                db_norm = 1
-            db = (db_norm - 1) * 100
-            amp = db2amp(db)
-            if amp >= 0.9997:
-                amp = 0.9997
-            if x > 0:
-                x = amp
-            else:
-                x = -amp
-        return x
-    return poly_distortion
-
-def make_quad_distortion():
-    return make_poly_distortion({'a' : 1, 'm' : 1, 'n' : 1})
-
-# the amplitude are set to max for all non-zero point
-def make_max_distortion(conf):
-    """Generate a max distortion function
-
-    Args:
-        conf: a dict {'max_db': float }
-            'max_db': the maxium value.
-
-    Returns:
-        The max function, which could be applied on
-        a float amplitude value
-    """
-    max_db = conf['max_db']
-    if max_db:
-        max_amp = db2amp(max_db)  # < 0.997
-    else:
-        max_amp = 0.997
-
-    def max_distortion(x):
-        if x > 0:
-            x = max_amp
-        elif x < 0:
-            x = -max_amp
-        else:
-            x = 0.0
-        return x
-    return max_distortion
-
-
-
-def make_amp_mask(db_mask=None):
-    """Get a amplitude domain mask from db domain mask
-
-    Args:
-        db_mask: Optional. A list of tuple. if None, using default value.
-
-    Returns:
-        A list of tuple. The amplitude domain mask
-    """
-    if db_mask is None:
-        db_mask = [(-110, -95), (-90, -80), (-65, -60), (-50, -30), (-15, 0)]
-    amp_mask = [(db2amp(db[0]), db2amp(db[1])) for db in db_mask]
-    return amp_mask
-
-default_mask = make_amp_mask()
-
-
-def generate_amp_mask(mask_num):
-    """Generate amplitude domain mask randomly in [-100db, 0db]
-
-    Args:
-        mask_num: the slot number of the mask
-
-    Returns:
-        A list of tuple. each tuple defines a slot.
-        e.g. [(-100, -80), (-65, -60), (-50, -30), (-15, 0)]
-        for #mask_num = 4
-    """
-    a = [0] * 2 * mask_num
-    a[0] = 0
-    m = []
-    for i in range(1, 2 * mask_num):
-        a[i] = a[i - 1] + random.uniform(0.5, 1)
-    max_val = a[2 * mask_num - 1]
-    for i in range(0, mask_num):
-        l = ((a[2 * i] - max_val) / max_val) * 100
-        r = ((a[2 * i + 1] - max_val) / max_val) * 100
-        m.append((l, r))
-    return make_amp_mask(m)
-
-
-def make_fence_distortion(conf):
-    """Generate a fence distortion function
-
-    In this fence-like shape function, the values in mask slots are
-    set to maxium, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': int,'max_db': float }
-            'mask_number': the slot number in mask.
-            'max_db': the maxium value.
-
-    Returns:
-        The fence function, which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    max_db = conf['max_db']
-    max_amp = db2amp(max_db)  # 0.997
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def fence_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return max_amp
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return fence_distortion
-
-#
-def make_jag_distortion(conf):
-    """Generate a jag distortion function
-
-    In this jag-like shape function, the values in mask slots are
-    not changed, while the values not in mask slots are set to 0.
-    Use seperated masks for Positive and negetive amplitude.
-
-    Args:
-        conf: a dict {'mask_number': #int}
-            'mask_number': the slot number in mask.
-
-    Returns:
-        The jag function,which could be applied on
-        a float amplitude value
-    """
-    mask_number = conf['mask_number']
-    if mask_number <= 0 :
-        positive_mask = default_mask
-        negative_mask = make_amp_mask([(-50, 0)])
-    else:
-        positive_mask = generate_amp_mask(mask_number)
-        negative_mask = generate_amp_mask(mask_number)
-
-    def jag_distortion(x):
-        is_in_mask = False
-        if x > 0:
-            for mask in positive_mask:
-                if x >= mask[0] and x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        elif x < 0:
-            abs_x = abs(x)
-            for mask in negative_mask:
-                if abs_x >= mask[0] and abs_x <= mask[1]:
-                    is_in_mask = True
-                    return x
-            if not is_in_mask:
-                return 0.0
-        return x
-
-    return jag_distortion
-
-# gaining 20db means amp = amp * 10
-# gaining -20db means amp = amp / 10
-def make_gain_db(conf):
-    """Generate a db domain gain function
-
-    Args:
-        conf: a dict {'db': #float}
-            'db': the gaining value
-
-    Returns:
-        The db gain function, which could be applied on
-        a float amplitude value
-    """
-    db = conf['db']
-
-    def gain_db(x):
-        return min(0.997, x * pow(10, db / 20))
-
-    return gain_db
-
-
-def distort(x, func, rate=0.8):
-    """Distort a waveform in sample point level
-
-    Args:
-        x: the origin wavefrom
-        func: the distort function
-        rate: sample point-level distort probability
-
-    Returns:
-        the distorted waveform
-    """
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            x[0][i] = func(float(x[0][i]))
-    return x
-
-def distort_chain(x, funcs, rate=0.8):
-    for i in range(0, x.shape[1]):
-        a = random.uniform(0, 1)
-        if a < rate:
-            for func in funcs:
-                x[0][i] = func(float(x[0][i]))
-    return x
-
-# x is numpy
-def distort_wav_conf(x, distort_type, distort_conf, rate=0.1):
-    if distort_type == 'gain_db':
-        gain_db = make_gain_db(distort_conf)
-        x = distort(x, gain_db)
-    elif distort_type == 'max_distortion':
-        max_distortion = make_max_distortion(distort_conf)
-        x = distort(x, max_distortion, rate=rate)
-    elif distort_type == 'fence_distortion':
-        fence_distortion = make_fence_distortion(distort_conf)
-        x = distort(x, fence_distortion, rate=rate)
-    elif distort_type == 'jag_distortion':
-        jag_distortion = make_jag_distortion(distort_conf)
-        x = distort(x, jag_distortion, rate=rate)
-    elif distort_type == 'poly_distortion':
-        poly_distortion = make_poly_distortion(distort_conf)
-        x = distort(x, poly_distortion, rate=rate)
-    elif distort_type == 'quad_distortion':
-        quad_distortion = make_quad_distortion()
-        x = distort(x, quad_distortion, rate=rate)
-    elif distort_type == 'none_distortion':
-        pass
-    else:
-        print('unsupport type')
-    return x
-
-def distort_wav_conf_and_save(distort_type, distort_conf, rate, wav_in, wav_out):
-    x, sr = torchaudio.load(wav_in)
-    x = x.detach().numpy()
-    out = distort_wav_conf(x, distort_type, distort_conf, rate)
-    torchaudio.save(wav_out, torch.from_numpy(out), sr)
-
-if __name__ == "__main__":
-    distort_type = sys.argv[1]
-    wav_in = sys.argv[2]
-    wav_out = sys.argv[3]
-    conf = None
-    rate = 0.1
-    if distort_type == 'new_jag_distortion':
-        conf = {'mask_number' : 4}
-    elif distort_type == 'new_fence_distortion':
-        conf = {'mask_number' : 1, 'max_db' : -30}
-    elif distort_type == 'poly_distortion':
-        conf = {'a' : 4, 'm' : 2, "n" : 2}
-    distort_wav_conf_and_save(distort_type, conf, rate, wav_in, wav_out)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/attention.py
deleted file mode 100644
index 475131b15af92ffcaf91ad5e2e30d114d4d1a2a3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/attention.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple, Optional
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from wenet.transformer.attention import MultiHeadedAttention
-
-
-class GroupedRelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper:
-        https://arxiv.org/abs/1901.02860
-        https://arxiv.org/abs/2109.01163
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate, group_size=3):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        self.group_size = group_size
-        self.d_k = n_feat // n_head  # for GroupedAttention
-        self.n_feat = n_feat
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k * self.group_size))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def pad4group(self, Q, K, V, P, mask, group_size: int = 3):
-        """
-        q: (#batch, time1, size) -> (#batch, head, time1, size/head)
-        k,v: (#batch, time2, size) -> (#batch, head, time2, size/head)
-        p: (#batch, time2, size)
-        """
-        # Compute Overflows
-        overflow_Q = Q.size(2) % group_size
-        overflow_KV = K.size(2) % group_size
-
-        # if-else for ONNX export
-        #   0 // 0.00000000000000001 = 0
-        #   1 // 1.00000000000000001 = 1
-        padding_Q = (group_size - overflow_Q) * int(
-            overflow_Q // (overflow_Q + 0.00000000000000001))
-        padding_KV = (group_size - overflow_KV) * int(
-            overflow_KV // (overflow_KV + 0.00000000000000001))
-
-        batch_size, _, seq_len_KV, _ = K.size()
-
-        # Input Padding (B, T, D) -> (B, T + P, D)
-        Q = F.pad(Q, (0, 0, 0, padding_Q), value=0.0)
-        K = F.pad(K, (0, 0, 0, padding_KV), value=0.0)
-        V = F.pad(V, (0, 0, 0, padding_KV), value=0.0)
-
-        if mask is not None and mask.size(2) > 0 :  # time2 > 0:
-            mask = mask[:, ::group_size, ::group_size]
-
-        Q = Q.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        K = K.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-        V = V.transpose(1, 2).contiguous().view(
-            batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        # process pos_emb
-        P_batch_size = P.size(0)
-        overflow_P = P.size(1) % group_size
-        padding_P = group_size - overflow_P if overflow_P else 0
-        P = F.pad(P, (0, 0, 0, padding_P), value=0.0)
-        P = P.view(P_batch_size, -1, self.h, self.d_k * group_size).transpose(1, 2)
-
-        return Q, K, V, P, mask, padding_Q
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        padding_q: Optional[int] = None
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            padding_q : for GroupedAttention in efficent conformer
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-
-        # n_feat!=h*d_k may be happened in GroupAttention
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1, self.n_feat)
-             )  # (batch, time1, d_model)
-        if padding_q is not None:
-            # for GroupedAttention in efficent conformer
-            x = x[:, :x.size(1) - padding_q]
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q = self.linear_q(query)
-        k = self.linear_k(key)          # (#batch, time2, size)
-        v = self.linear_v(value)
-        p = self.linear_pos(pos_emb)    # (#batch, time2, size)
-
-        batch_size, seq_len_KV, _ = k.size()  # seq_len_KV = time2
-
-        # (#batch, time2, size) -> (#batch, head, time2, size/head)
-        q = q.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        k = k.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        v = v.view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-        if cache.size(0) > 0:
-            # use attention cache
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        new_cache = torch.cat((k, v), dim=-1)
-
-        # May be k and p does not match.  eg. time2=18+18/2=27 > mask=36/2=18
-        if mask is not None and mask.size(2) > 0:
-            time2 = mask.size(2)
-            k = k[:, :, -time2:, :]
-            v = v[:, :, -time2:, :]
-
-        # q k v p: (batch, head, time1, d_k)
-        q, k, v, p, mask, padding_q = self.pad4group(q, k, v, p, mask, self.group_size)
-
-        # q_with_bias_u & q_with_bias_v = (batch, head, time1, d_k)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k * self.group_size)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask, padding_q), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/convolution.py
deleted file mode 100644
index 52d6c1c14c0812ab3957a60a135f644833c2ad95..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/convolution.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 stride: int = 1):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-            stride (int): Stride Convolution, for efficient Conformer
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=stride,  # for depthwise_conv in StrideConv
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        self.stride = stride
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                # When export ONNX，the first cache is not None but all-zero,
-                # cause shape error in residual block,
-                #   eg. cache14 + x9 = 23, 23-7+1=17 != 9
-                cache = cache[:, :, -self.lorder:]
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is requried,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            if mask_pad.size(2) != x.size(2):
-                mask_pad = mask_pad[:, :, ::self.stride]
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/encoder.py
deleted file mode 100644
index dbd37f53cac86be851e2bb194354fd07eb271f11..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/encoder.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from EfficientConformer(https://github.com/burchim/EfficientConformer)
-#               Paper(https://arxiv.org/abs/2109.01163)
-
-"""Encoder definition."""
-from typing import Tuple, Optional, List, Union
-
-import torch
-import logging
-from typeguard import check_argument_types
-import torch.nn.functional as F
-
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-
-from wenet.efficient_conformer.subsampling import Conv2dSubsampling2
-from wenet.efficient_conformer.convolution import ConvolutionModule
-from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
-from wenet.efficient_conformer.encoder_layer import StrideConformerEncoderLayer
-
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class EfficientConformerEncoder(torch.nn.Module):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        macaron_style: bool = True,
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-        stride_layer_idx: Optional[Union[int, List[int]]] = 3,
-        stride: Optional[Union[int, List[int]]] = 2,
-        group_layer_idx: Optional[Union[int, List[int], tuple]] = (0, 1, 2, 3),
-        group_size: int = 3,
-        stride_kernel: bool = True,
-        **kwargs
-    ):
-        """Construct Efficient Conformer Encoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-            stride_layer_idx (list): layer id with StrideConv, start from 0
-            stride (list): stride size of each StrideConv in efficient conformer
-            group_layer_idx (list): layer id with GroupedAttention, start from 0
-            group_size (int): group size of every GroupedAttention layer
-            stride_kernel (bool): default True. True: recompute cnn kernels with stride.
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d2":
-            subsampling_class = Conv2dSubsampling2
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        logging.info(f"input_layer = {input_layer}, "
-                     f"subsampling_class = {subsampling_class}")
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-        self.input_layer = input_layer
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-        activation = get_activation(activation_type)
-        self.num_blocks = num_blocks
-        self.attention_heads = attention_heads
-        self.cnn_module_kernel = cnn_module_kernel
-        self.global_chunk_size = 0
-        self.chunk_feature_map = 0
-
-        # efficient conformer configs
-        self.stride_layer_idx = [stride_layer_idx] \
-            if type(stride_layer_idx) == int else stride_layer_idx
-        self.stride = [stride] \
-            if type(stride) == int else stride
-        self.group_layer_idx = [group_layer_idx] \
-            if type(group_layer_idx) == int else group_layer_idx
-        self.grouped_size = group_size    # group size of every GroupedAttention layer
-
-        assert len(self.stride) == len(self.stride_layer_idx)
-        self.cnn_module_kernels = [cnn_module_kernel]  # kernel size of each StridedConv
-        for i in self.stride:
-            if stride_kernel:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1] // i)
-            else:
-                self.cnn_module_kernels.append(self.cnn_module_kernels[-1])
-
-        logging.info(f"stride_layer_idx= {self.stride_layer_idx}, "
-                     f"stride = {self.stride}, "
-                     f"cnn_module_kernel = {self.cnn_module_kernels}, "
-                     f"group_layer_idx = {self.group_layer_idx}, "
-                     f"grouped_size = {self.grouped_size}")
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-
-        # encoder definition
-        index = 0
-        layers = []
-        for i in range(num_blocks):
-            # self-attention module definition
-            if i in self.group_layer_idx:
-                encoder_selfattn_layer = GroupedRelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate,
-                    self.grouped_size)
-            else:
-                if pos_enc_layer_type == "no_pos":
-                    encoder_selfattn_layer = MultiHeadedAttention
-                else:
-                    encoder_selfattn_layer = RelPositionMultiHeadedAttention
-                encoder_selfattn_layer_args = (
-                    attention_heads,
-                    output_size,
-                    attention_dropout_rate)
-
-            # conformer module definition
-            if i in self.stride_layer_idx:
-                # conformer block with downsampling
-                convolution_layer_args_stride = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal, True, self.stride[index])
-                layers.append(StrideConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_stride) if use_cnn_module else None,
-                    torch.nn.AvgPool1d(
-                        kernel_size=self.stride[index], stride=self.stride[index],
-                        padding=0, ceil_mode=True,
-                        count_include_pad=False),   # pointwise_conv_layer
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-                index = index + 1
-            else:
-                # conformer block
-                convolution_layer_args_normal = (
-                    output_size, self.cnn_module_kernels[index], activation,
-                    cnn_module_norm, causal)
-                layers.append(ConformerEncoderLayer(
-                    output_size,
-                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                    positionwise_layer(*positionwise_layer_args),
-                    positionwise_layer(
-                        *positionwise_layer_args) if macaron_style else None,
-                    convolution_layer(
-                        *convolution_layer_args_normal) if use_cnn_module else None,
-                    dropout_rate,
-                    normalize_before,
-                    concat_after,
-                ))
-
-        self.encoders = torch.nn.ModuleList(layers)
-
-    def set_global_chunk_size(self, chunk_size):
-        """Used in ONNX export.
-        """
-        logging.info(f"set global chunk size: {chunk_size}, default is 0.")
-        self.global_chunk_size = chunk_size
-        if self.embed.subsampling_rate == 2:
-            self.chunk_feature_map = 2 * self.global_chunk_size + 1
-        elif self.embed.subsampling_rate == 6:
-            self.chunk_feature_map = 6 * self.global_chunk_size + 5
-        elif self.embed.subsampling_rate == 8:
-            self.chunk_feature_map = 8 * self.global_chunk_size + 7
-        else:
-            self.chunk_feature_map = 4 * self.global_chunk_size + 3
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        factor = 1
-        for idx, stride_idx in enumerate(self.stride_layer_idx):
-            if i > stride_idx:
-                factor *= self.stride[idx]
-        return factor
-
-    def forward(self,
-                xs: torch.Tensor,
-                xs_lens: torch.Tensor,
-                decoding_chunk_size: int = 0,
-                num_decoding_left_chunks: int = -1,
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        index = 0  # traverse stride
-        for i, layer in enumerate(self.encoders):
-            # layer return : x, mask, new_att_cache, new_cnn_cache
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-            if i in self.stride_layer_idx:
-                masks = masks[:, :, ::self.stride[index]]
-                chunk_masks = chunk_masks[:, ::self.stride[index],
-                                          ::self.stride[index]]
-                mask_pad = masks
-                pos_emb = pos_emb[:, ::self.stride[index], :]
-                index = index + 1
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-            att_mask : mask matrix of self attention
-
-        Returns:
-            torch.Tensor: output of current input xs
-            torch.Tensor: subsampling cache required for next chunk computation
-            List[torch.Tensor]: encoder layers output cache required for next
-                chunk computation
-            List[torch.Tensor]: conformer cnn cache
-
-        """
-        assert xs.size(0) == 1
-
-        # using downsampling factor to recover offset
-        offset *= self.calculate_downsampling_factor(self.num_blocks + 1)
-
-        chunk_masks = torch.ones(1,
-                                 xs.size(1),
-                                 device=xs.device,
-                                 dtype=torch.bool)
-        chunk_masks = chunk_masks.unsqueeze(1)  # (1, 1, xs-time)
-
-        real_len = 0
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation， padding xs to chunk_size
-            real_len = xs.size(1)
-            pad_len = self.chunk_feature_map - real_len
-            xs = F.pad(xs, (0, 0, 0, pad_len), value=0.0)
-            chunk_masks = F.pad(chunk_masks, (0, pad_len), value=0.0)
-
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, chunk_masks = self.embed(xs, chunk_masks, offset)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        # shape(pos_emb) = (b=1, chunk_size, emb_size=output_size=hidden-dim)
-
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)    # batchPad (b=1, 1, time=chunk_size)
-
-        if self.global_chunk_size > 0:
-            # for ONNX decode simulation
-            pos_emb = self.embed.position_encoding(
-                offset=max(offset - cache_t1, 0),
-                size=cache_t1 + self.global_chunk_size)
-            att_mask[:, :, -self.global_chunk_size:] = chunk_masks
-            mask_pad = chunk_masks.to(torch.bool)
-        else:
-            pos_emb = self.embed.position_encoding(
-                offset=offset - cache_t1, size=attention_key_size)
-
-        max_att_len, max_cnn_len = 0, 0     # for repeat_interleave of new_att_cache
-        for i, layer in enumerate(self.encoders):
-            factor = self.calculate_downsampling_factor(i)
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            # shape(new_att_cache) = [ batch, head, time2, outdim//head * 2 ]
-            att_cache_trunc = 0
-            if xs.size(1) + att_cache.size(2) / factor > pos_emb.size(1):
-                # The time step is not divisible by the downsampling multiple
-                att_cache_trunc = xs.size(1) + \
-                    att_cache.size(2) // factor - pos_emb.size(1) + 1
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                mask_pad=mask_pad,
-                att_cache=att_cache[i:i + 1, :, ::factor, :][:, :, att_cache_trunc:, :],
-                cnn_cache=cnn_cache[i, :, :, :]
-                if cnn_cache.size(0) > 0 else cnn_cache
-            )
-
-            if i in self.stride_layer_idx:
-                # compute time dimension for next block
-                efficient_index = self.stride_layer_idx.index(i)
-                att_mask = att_mask[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                mask_pad = mask_pad[:, ::self.stride[efficient_index],
-                                    ::self.stride[efficient_index]]
-                pos_emb = pos_emb[:, ::self.stride[efficient_index], :]
-
-            # shape(new_att_cache) = [batch, head, time2, outdim]
-            new_att_cache = new_att_cache[:, :, next_cache_start // factor:, :]
-            # shape(new_cnn_cache) = [1, batch, outdim, cache_t2]
-            new_cnn_cache = new_cnn_cache.unsqueeze(0)
-
-            # use repeat_interleave to new_att_cache
-            new_att_cache = new_att_cache.repeat_interleave(repeats=factor, dim=2)
-            # padding new_cnn_cache to cnn.lorder for casual convolution
-            new_cnn_cache = F.pad(
-                new_cnn_cache,
-                (self.cnn_module_kernel - 1 - new_cnn_cache.size(3), 0))
-
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = new_att_cache.size(2)
-                max_cnn_len = new_cnn_cache.size(3)
-
-            # update real shape of att_cache and cnn_cache
-            r_att_cache.append(new_att_cache[:, :, -max_att_len:, :])
-            r_cnn_cache.append(new_cnn_cache[:, :, :, -max_cnn_len:])
-
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.global_chunk_size > 0 and real_len:
-            chunk_real_len = real_len // self.embed.subsampling_rate // \
-                self.calculate_downsampling_factor(self.num_blocks + 1)
-            # Keeping 1 more timestep can mitigate information leakage
-            #   from the encoder caused by the padding
-            xs = xs[:, :chunk_real_len + 1, :]
-
-        return xs, r_att_cache, r_cnn_cache
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-        use_onnx=False
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            decoding_chunk_size (int): decoding chunk size
-            num_decoding_left_chunks (int):
-            use_onnx (bool): True for simulating ONNX model inference.
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-        if use_onnx:
-            logging.info("Simulating for ONNX runtime ...")
-            att_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, self.attention_heads, required_cache_size,
-                 self.output_size() // self.attention_heads * 2),
-                device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros(
-                (self.num_blocks, 1, self.output_size(), self.cnn_module_kernel - 1),
-                device=xs.device)
-            self.set_global_chunk_size(chunk_size=decoding_chunk_size)
-        else:
-            logging.info("Simulating for JIT runtime ...")
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            logging.info(f"-->> frame chunk msg: cur={cur}, "
-                         f"end={end}, num_frames={end-cur}, "
-                         f"decoding_window={decoding_window}")
-            if use_onnx:
-                att_mask: torch.Tensor = torch.ones(
-                    (1, 1, required_cache_size + decoding_chunk_size),
-                    dtype=torch.bool, device=xs.device)
-                if cur == 0:
-                    att_mask[:, :, :required_cache_size] = 0
-            else:
-                att_mask: torch.Tensor = torch.ones(
-                    (0, 0, 0), dtype=torch.bool, device=xs.device)
-
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache, att_mask)
-            outputs.append(y)
-            offset += y.size(1)
-
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones(1, 1, ys.size(1), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/encoder_layer.py
deleted file mode 100644
index 3a88ec9fca9797664ce89566e6c1d28a8f0ad5f4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/encoder_layer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-import torch
-from torch import nn
-
-
-class StrideConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        pointwise_conv_layer: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.pointwise_conv_layer = pointwise_conv_layer
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        self.concat_linear = nn.Linear(size + size, size)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.tensor([0.0], dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-
-            # add pointwise_conv for efficient conformer
-            #   pointwise_conv_layer does not change shape
-            if self.pointwise_conv_layer is not None:
-                residual = residual.transpose(1, 2)
-                residual = self.pointwise_conv_layer(residual)
-                residual = residual.transpose(1, 2)
-                assert residual.size(0) == x.size(0)
-                assert residual.size(1) == x.size(1)
-                assert residual.size(2) == x.size(2)
-
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/subsampling.py
deleted file mode 100644
index 98b2c2228eac8e77586110686c48a7b0141458c9..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/efficient_conformer/subsampling.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 58.com(Wuba) Inc AI Lab.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-from wenet.transformer.subsampling import BaseSubsampling
-
-
-class Conv2dSubsampling2(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU()
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * ((idim - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 2
-        # 2 = (3 - 1) * 1
-        self.right_context = 2
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 2.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 2.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, :-2:2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/attention.py
deleted file mode 100644
index 97412badbe8e2c5caec81c0636d15be3f80d6b84..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/attention.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#               2022 Ximalaya Inc. (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-import torch
-import torch.nn as nn
-from wenet.transformer.attention import MultiHeadedAttention
-from typing import Tuple
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-
-    def __init__(self, n_head, n_feat, dropout_rate,
-                 do_rel_shift=False, adaptive_scale=False, init_weights=False):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.do_rel_shift = do_rel_shift
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = nn.Parameter(
-            torch.ones([1, 1, n_feat]), requires_grad=adaptive_scale)
-        self.ada_bias = nn.Parameter(
-            torch.zeros([1, 1, n_feat]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        input_max = (self.h * self.d_k) ** -0.5
-        torch.nn.init.uniform_(self.linear_q.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_q.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_k.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_v.bias, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_pos.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.weight, -input_max, input_max)
-        torch.nn.init.uniform_(self.linear_out.bias, -input_max, input_max)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward_attention(
-            self, value: torch.Tensor, scores: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0:  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            # (batch, head, time1, time2)
-            attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        if self.adaptive_scale:
-            query = self.ada_scale * query + self.ada_bias
-            key = self.ada_scale * key + self.ada_bias
-            value = self.ada_scale * value + self.ada_bias
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/conv2d.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/conv2d.py
deleted file mode 100644
index c230263396392d72f36c56d645338f2d576db898..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/conv2d.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Conv2d Module with Valid Padding"""
-
-import torch.nn.functional as F
-from torch.nn.modules.conv import _ConvNd, _size_2_t, Union, _pair, Tensor, Optional
-
-
-class Conv2dValid(_ConvNd):
-    """
-    Conv2d operator for VALID mode padding.
-    """
-    def __init__(
-            self,
-            in_channels: int,
-            out_channels: int,
-            kernel_size: _size_2_t,
-            stride: _size_2_t = 1,
-            padding: Union[str, _size_2_t] = 0,
-            dilation: _size_2_t = 1,
-            groups: int = 1,
-            bias: bool = True,
-            padding_mode: str = 'zeros',  # TODO: refine this type
-            device=None,
-            dtype=None,
-            valid_trigx: bool = False,
-            valid_trigy: bool = False
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
-        kernel_size_ = _pair(kernel_size)
-        stride_ = _pair(stride)
-        padding_ = padding if isinstance(padding, str) else _pair(padding)
-        dilation_ = _pair(dilation)
-        super(Conv2dValid, self).__init__(
-            in_channels, out_channels, kernel_size_,
-            stride_, padding_, dilation_, False, _pair(0),
-            groups, bias, padding_mode, **factory_kwargs)
-        self.valid_trigx = valid_trigx
-        self.valid_trigy = valid_trigy
-
-    def _conv_forward(
-            self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
-        validx, validy = 0, 0
-        if self.valid_trigx:
-            validx = (input.size(-2) * (self.stride[-2] - 1) - 1
-                      + self.kernel_size[-2]) // 2
-        if self.valid_trigy:
-            validy = (input.size(-1) * (self.stride[-1] - 1) - 1
-                      + self.kernel_size[-1]) // 2
-        return F.conv2d(input, weight, bias, self.stride,
-                        (validx, validy), self.dilation, self.groups)
-
-    def forward(self, input: Tensor) -> Tensor:
-        return self._conv_forward(input, self.weight, self.bias)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/convolution.py
deleted file mode 100644
index 6da2ee8c98ed58fae66d66c892041037f0d6bc3a..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/convolution.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#               2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True,
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-        self.bias = bias
-        self.channels = channels
-        self.kernel_size = kernel_size
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, channels]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, channels]), requires_grad=adaptive_scale)
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        pw_max = self.channels ** -0.5
-        dw_max = self.kernel_size ** -0.5
-        torch.nn.init.uniform_(self.pointwise_conv1.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv1.bias.data, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.depthwise_conv.weight.data, -dw_max, dw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.depthwise_conv.bias.data, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pointwise_conv2.weight.data, -pw_max, pw_max)
-        if self.bias:
-            torch.nn.init.uniform_(self.pointwise_conv2.bias.data, -pw_max, pw_max)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        if self.adaptive_scale:
-            x = self.ada_scale * x + self.ada_bias
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/encoder.py
deleted file mode 100644
index f13038321ae6c07d484a617aee7d83ed07742510..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/encoder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-import torch
-import torch.nn as nn
-from typing import Tuple, Union, Optional, List
-from wenet.squeezeformer.subsampling \
-    import DepthwiseConv2dSubsampling4, TimeReductionLayer1D, \
-    TimeReductionLayer2D, TimeReductionLayerStream
-from wenet.squeezeformer.encoder_layer import SqueezeformerEncoderLayer
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.squeezeformer.attention import RelPositionMultiHeadedAttention
-from wenet.squeezeformer.positionwise_feed_forward \
-    import PositionwiseFeedForward
-from wenet.squeezeformer.convolution import ConvolutionModule
-from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.common import get_activation
-
-
-class SqueezeformerEncoder(nn.Module):
-    def __init__(
-            self,
-            input_size: int = 80,
-            encoder_dim: int = 256,
-            output_size: int = 256,
-            attention_heads: int = 4,
-            num_blocks: int = 12,
-            reduce_idx: Optional[Union[int, List[int]]] = 5,
-            recover_idx: Optional[Union[int, List[int]]] = 11,
-            feed_forward_expansion_factor: int = 4,
-            dw_stride: bool = False,
-            input_dropout_rate: float = 0.1,
-            pos_enc_layer_type: str = "rel_pos",
-            time_reduction_layer_type: str = "conv1d",
-            do_rel_shift: bool = True,
-            feed_forward_dropout_rate: float = 0.1,
-            attention_dropout_rate: float = 0.1,
-            cnn_module_kernel: int = 31,
-            cnn_norm_type: str = "batch_norm",
-            dropout: float = 0.1,
-            causal: bool = False,
-            adaptive_scale: bool = True,
-            activation_type: str = "swish",
-            init_weights: bool = True,
-            global_cmvn: torch.nn.Module = None,
-            normalize_before: bool = False,
-            use_dynamic_chunk: bool = False,
-            concat_after: bool = False,
-            static_chunk_size: int = 0,
-            use_dynamic_left_chunk: bool = False
-    ):
-        """Construct SqueezeformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
-            encoder_dim (int): The hidden dimension of encoder layer.
-            output_size (int): The output dimension of final projection layer.
-            attention_heads (int): Num of attention head in attention module.
-            num_blocks (int): Num of encoder layers.
-            reduce_idx Optional[Union[int, List[int]]]:
-                reduce layer index, from 40ms to 80ms per frame.
-            recover_idx Optional[Union[int, List[int]]]:
-                recover layer index, from 80ms to 40ms per frame.
-            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
-            dw_stride (bool): Whether do depthwise convolution
-                              on subsampling module.
-            input_dropout_rate (float): Dropout rate of input projection layer.
-            pos_enc_layer_type (str): Self attention type.
-            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
-            cnn_module_kernel (int): Kernel size of CNN module.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            adaptive_scale (bool): Whether to use adaptive scale.
-            init_weights (bool): Whether to initialize weights.
-            causal (bool): whether to use causal convolution or not.
-        """
-        super(SqueezeformerEncoder, self).__init__()
-        self.global_cmvn = global_cmvn
-        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
-            if type(reduce_idx) == int else reduce_idx
-        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
-            if type(recover_idx) == int else recover_idx
-        self.check_ascending_list()
-        if reduce_idx is None:
-            self.time_reduce = None
-        else:
-            if recover_idx is None:
-                self.time_reduce = 'normal'  # no recovery at the end
-            else:
-                self.time_reduce = 'recover'  # recovery at the end
-                assert len(self.reduce_idx) == len(self.recover_idx)
-            self.reduce_stride = 2
-        self._output_size = output_size
-        self.normalize_before = normalize_before
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-        self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                output_size,
-                attention_dropout_rate,
-            )
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-            encoder_selfattn_layer_args = (
-                attention_heads,
-                encoder_dim,
-                attention_dropout_rate,
-                do_rel_shift,
-                adaptive_scale,
-                init_weights
-            )
-
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            encoder_dim,
-            encoder_dim * feed_forward_expansion_factor,
-            feed_forward_dropout_rate,
-            activation,
-            adaptive_scale,
-            init_weights
-        )
-
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (
-            encoder_dim, cnn_module_kernel, activation,
-            cnn_norm_type, causal, True, adaptive_scale, init_weights)
-
-        self.embed = DepthwiseConv2dSubsampling4(
-            1, encoder_dim,
-            RelPositionalEncoding(encoder_dim, dropout_rate=0.1),
-            dw_stride,
-            input_size,
-            input_dropout_rate,
-            init_weights
-        )
-
-        self.preln = nn.LayerNorm(encoder_dim)
-        self.encoders = torch.nn.ModuleList([SqueezeformerEncoderLayer(
-            encoder_dim,
-            encoder_selfattn_layer(*encoder_selfattn_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            convolution_layer(*convolution_layer_args),
-            positionwise_layer(*positionwise_layer_args),
-            normalize_before,
-            dropout,
-            concat_after) for _ in range(num_blocks)
-        ])
-        if time_reduction_layer_type == 'conv1d':
-            time_reduction_layer = TimeReductionLayer1D
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        elif time_reduction_layer_type == 'stream':
-            time_reduction_layer = TimeReductionLayerStream
-            time_reduction_layer_args = {
-                'channel': encoder_dim,
-                'out_dim': encoder_dim,
-            }
-        else:
-            time_reduction_layer = TimeReductionLayer2D
-            time_reduction_layer_args = {'encoder_dim': encoder_dim}
-
-        self.time_reduction_layer = time_reduction_layer(**time_reduction_layer_args)
-        self.time_recover_layer = nn.Linear(encoder_dim, encoder_dim)
-        self.final_proj = None
-        if output_size != encoder_dim:
-            self.final_proj = nn.Linear(encoder_dim, output_size)
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-            self,
-            xs: torch.Tensor,
-            xs_lens: torch.Tensor,
-            decoding_chunk_size: int = 0,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        xs_lens = mask_pad.squeeze(1).sum(1)
-        xs = self.preln(xs)
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        for i, layer in enumerate(self.encoders):
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, chunk_masks, pos_emb, mask_pad))
-                    xs, xs_lens, chunk_masks, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, chunk_masks, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_chunk_masks,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    chunk_masks = recover_chunk_masks
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    xs = xs.masked_fill(~mask_pad[:, 0, :].unsqueeze(-1), 0.0)
-
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return xs, masks
-
-    def check_ascending_list(self):
-        if self.reduce_idx is not None:
-            assert self.reduce_idx == sorted(self.reduce_idx), \
-                "reduce_idx should be int or ascending list"
-        if self.recover_idx is not None:
-            assert self.recover_idx == sorted(self.recover_idx), \
-                "recover_idx should be int or ascending list"
-
-    def calculate_downsampling_factor(self, i: int) -> int:
-        if self.reduce_idx is None:
-            return 1
-        else:
-            reduce_exp, recover_exp = 0, 0
-            for exp, rd_idx in enumerate(self.reduce_idx):
-                if i >= rd_idx:
-                    reduce_exp = exp + 1
-            if self.recover_idx is not None:
-                for exp, rc_idx in enumerate(self.recover_idx):
-                    if i >= rc_idx:
-                        recover_exp = exp + 1
-            return int(2 ** (reduce_exp - recover_exp))
-
-    def forward_chunk(
-            self,
-            xs: torch.Tensor,
-            offset: int,
-            required_cache_size: int,
-            att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-            att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-
-        r_att_cache = []
-        r_cnn_cache = []
-
-        mask_pad = torch.ones(1,
-                              xs.size(1),
-                              device=xs.device,
-                              dtype=torch.bool)
-        mask_pad = mask_pad.unsqueeze(1)
-        max_att_len: int = 0
-        recover_activations: \
-            List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]] = []
-        index = 0
-        xs_lens = torch.tensor([xs.size(1)], device=xs.device, dtype=torch.int)
-        xs = self.preln(xs)
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            if self.reduce_idx is not None:
-                if self.time_reduce is not None and i in self.reduce_idx:
-                    recover_activations.append((xs, att_mask, pos_emb, mask_pad))
-                    xs, xs_lens, att_mask, mask_pad = \
-                        self.time_reduction_layer(xs, xs_lens, att_mask, mask_pad)
-                    pos_emb = pos_emb[:, ::2, :]
-                    index += 1
-
-            if self.recover_idx is not None:
-                if self.time_reduce == 'recover' and i in self.recover_idx:
-                    index -= 1
-                    (recover_tensor, recover_att_mask,
-                     recover_pos_emb, recover_mask_pad) \
-                        = recover_activations[index]
-                    # recover output length for ctc decode
-                    xs = xs.unsqueeze(2).repeat(1, 1, 2, 1).flatten(1, 2)
-                    xs = self.time_recover_layer(xs)
-                    recoverd_t = recover_tensor.size(1)
-                    xs = recover_tensor + xs[:, :recoverd_t, :].contiguous()
-                    att_mask = recover_att_mask
-                    pos_emb = recover_pos_emb
-                    mask_pad = recover_mask_pad
-                    if att_mask.size(1) != 0:
-                        xs = xs.masked_fill(~att_mask[:, 0, :].unsqueeze(-1), 0.0)
-
-            factor = self.calculate_downsampling_factor(i)
-
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1][:, :, ::factor, :]
-                [:, :, :pos_emb.size(1) - xs.size(1), :] if
-                elayers > 0 else att_cache[:, :, ::factor, :],
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            cached_att \
-                = new_att_cache[:, :, next_cache_start // factor:, :]
-            cached_cnn = new_cnn_cache.unsqueeze(0)
-            cached_att = cached_att.unsqueeze(3).\
-                repeat(1, 1, 1, factor, 1).flatten(2, 3)
-            if i == 0:
-                # record length for the first block as max length
-                max_att_len = cached_att.size(2)
-            r_att_cache.append(cached_att[:, :, :max_att_len, :])
-            r_cnn_cache.append(cached_cnn)
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        if self.final_proj is not None:
-            xs = self.final_proj(xs)
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-            self,
-            xs: torch.Tensor,
-            decoding_chunk_size: int,
-            num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = \
-                self.forward_chunk(
-                    chunk_xs, offset, required_cache_size,
-                    att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/encoder_layer.py
deleted file mode 100644
index 3c6bdd44a20447cea91c0f965c666b844f4264be..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/encoder_layer.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SqueezeformerEncoderLayer definition."""
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple
-
-
-class SqueezeformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-        Args:
-            size (int): Input dimension.
-            self_attn (torch.nn.Module): Self-attention module instance.
-                `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-                instance can be used as the argument.
-            feed_forward1 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            conv_module (torch.nn.Module): Convolution module instance.
-                `ConvlutionModule` instance can be used as the argument.
-            feed_forward2 (torch.nn.Module): Feed-forward module instance.
-                `PositionwiseFeedForward` instance can be used as the argument.
-            dropout_rate (float): Dropout rate.
-            normalize_before (bool):
-                True: use layer_norm before each sub-block.
-                False: use layer_norm after each sub-block.
-        """
-
-    def __init__(
-            self,
-            size: int,
-            self_attn: torch.nn.Module,
-            feed_forward1: Optional[nn.Module] = None,
-            conv_module: Optional[nn.Module] = None,
-            feed_forward2: Optional[nn.Module] = None,
-            normalize_before: bool = False,
-            dropout_rate: float = 0.1,
-            concat_after: bool = False,
-    ):
-        super(SqueezeformerEncoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.layer_norm1 = nn.LayerNorm(size)
-        self.ffn1 = feed_forward1
-        self.layer_norm2 = nn.LayerNorm(size)
-        self.conv_module = conv_module
-        self.layer_norm3 = nn.LayerNorm(size)
-        self.ffn2 = feed_forward2
-        self.layer_norm4 = nn.LayerNorm(size)
-        self.normalize_before = normalize_before
-        self.dropout = nn.Dropout(dropout_rate)
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            mask: torch.Tensor,
-            pos_emb: torch.Tensor,
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-            cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        # self attention module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm1(x)
-        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.layer_norm1(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm2(x)
-        x = self.ffn1(x)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm2(x)
-
-        # conv module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm3(x)
-        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm3(x)
-
-        # ffn module
-        residual = x
-        if self.normalize_before:
-            x = self.layer_norm4(x)
-        x = self.ffn2(x)
-        # we do not use dropout here since it is inside feed forward function
-        x = residual + self.dropout(x)
-        if not self.normalize_before:
-            x = self.layer_norm4(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/positionwise_feed_forward.py
deleted file mode 100644
index 289062dcf3189f79a5ebb206990160d8665c613c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU(),
-                 adaptive_scale: bool = False,
-                 init_weights: bool = False
-                 ):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.idim = idim
-        self.hidden_units = hidden_units
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-        self.ada_scale = None
-        self.ada_bias = None
-        self.adaptive_scale = adaptive_scale
-        self.ada_scale = torch.nn.Parameter(
-            torch.ones([1, 1, idim]), requires_grad=adaptive_scale)
-        self.ada_bias = torch.nn.Parameter(
-            torch.zeros([1, 1, idim]), requires_grad=adaptive_scale)
-        if init_weights:
-            self.init_weights()
-
-    def init_weights(self):
-        ffn1_max = self.idim ** -0.5
-        ffn2_max = self.hidden_units ** -0.5
-        torch.nn.init.uniform_(self.w_1.weight.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_1.bias.data, -ffn1_max, ffn1_max)
-        torch.nn.init.uniform_(self.w_2.weight.data, -ffn2_max, ffn2_max)
-        torch.nn.init.uniform_(self.w_2.bias.data, -ffn2_max, ffn2_max)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        if self.adaptive_scale:
-            xs = self.ada_scale * xs + self.ada_bias
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/subsampling.py
deleted file mode 100644
index fdb0101d6ebb54c42e710bbb0f35a6f7615ca567..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/squeezeformer/subsampling.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2022 Ximalaya Inc. (authors: Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from Squeezeformer(https://github.com/kssteven418/Squeezeformer)
-#               Squeezeformer(https://github.com/upskyy/Squeezeformer)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-"""DepthwiseConv2dSubsampling4 and TimeReductionLayer definition."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from wenet.transformer.subsampling import BaseSubsampling
-from typing import Tuple
-from wenet.squeezeformer.conv2d import Conv2dValid
-
-
-class DepthwiseConv2dSubsampling4(BaseSubsampling):
-    """Depthwise Convolutional 2D subsampling (to 1/4 length).
-
-        Args:
-            idim (int): Input dimension.
-            odim (int): Output dimension.
-            pos_enc_class (nn.Module): position encoding class.
-            dw_stride (int): Whether do depthwise convolution.
-            input_size (int): filter bank dimension.
-
-        """
-
-    def __init__(
-            self, idim: int, odim: int,
-            pos_enc_class: torch.nn.Module,
-            dw_stride: bool = False,
-            input_size: int = 80,
-            input_dropout_rate: float = 0.1,
-            init_weights: bool = True
-    ):
-        super(DepthwiseConv2dSubsampling4, self).__init__()
-        self.idim = idim
-        self.odim = odim
-        self.pw_conv = nn.Conv2d(
-            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
-        self.act1 = nn.ReLU()
-        self.dw_conv = nn.Conv2d(
-            in_channels=odim, out_channels=odim, kernel_size=3, stride=2,
-            groups=odim if dw_stride else 1
-        )
-        self.act2 = nn.ReLU()
-        self.pos_enc = pos_enc_class
-        self.input_proj = nn.Sequential(
-            nn.Linear(
-                odim * (((input_size - 1) // 2 - 1) // 2), odim),
-            nn.Dropout(p=input_dropout_rate),
-        )
-        if init_weights:
-            linear_max = (odim * input_size / 4) ** -0.5
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.weight'], -linear_max, linear_max)
-            torch.nn.init.uniform_(
-                self.input_proj.state_dict()['0.bias'], -linear_max, linear_max)
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: int = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.pw_conv(x)
-        x = self.act1(x)
-        x = self.dw_conv(x)
-        x = self.act2(x)
-        b, c, t, f = x.size()
-        x = x.permute(0, 2, 1, 3)
-        x = x.contiguous().view(b, t, c * f)
-        x, pos_emb = self.pos_enc(x, offset)
-        x = self.input_proj(x)
-        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]
-
-
-class TimeReductionLayer1D(nn.Module):
-    """
-    Modified NeMo,
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-                       MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-                           depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 5, stride: int = 2):
-        super(TimeReductionLayer1D, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = max(0, self.kernel_size - self.stride)
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.padding,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayer2D(nn.Module):
-    def __init__(
-            self, kernel_size: int = 5, stride: int = 2, encoder_dim: int = 256):
-        super(TimeReductionLayer2D, self).__init__()
-        self.encoder_dim = encoder_dim
-        self.kernel_size = kernel_size
-        self.dw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=(kernel_size, 1),
-            stride=stride,
-            valid_trigy=True
-        )
-        self.pw_conv = Conv2dValid(
-            in_channels=encoder_dim,
-            out_channels=encoder_dim,
-            kernel_size=1,
-            stride=1,
-            valid_trigx=False,
-            valid_trigy=False,
-        )
-
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.encoder_dim ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(
-            self, xs: torch.Tensor, xs_lens: torch.Tensor,
-            mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-            mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        xs = xs.masked_fill(mask_pad.transpose(1, 2).eq(0), 0.0)
-        xs = xs.unsqueeze(2)
-        padding1 = self.kernel_size - self.stride
-        xs = F.pad(xs, (0, 0, 0, 0, 0, padding1, 0, 0),
-                   mode='constant', value=0.)
-        xs = self.dw_conv(xs.permute(0, 3, 1, 2))
-        xs = self.pw_conv(xs).permute(0, 3, 2, 1).squeeze(1).contiguous()
-        tmp_length = xs.size(1)
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        padding2 = max(0, (xs_lens.max() - tmp_length).data.item())
-        batch_size, hidden = xs.size(0), xs.size(-1)
-        dummy_pad = torch.zeros(batch_size, padding2, hidden, device=xs.device)
-        xs = torch.cat([xs, dummy_pad], dim=1)
-        mask = mask[:, ::2, ::2]
-        mask_pad = mask_pad[:, :, ::2]
-        return xs, xs_lens, mask, mask_pad
-
-
-class TimeReductionLayerStream(nn.Module):
-    """
-    Squeezeformer Time Reduction procedure.
-    Downsamples the audio by `stride` in the time dimension.
-    Args:
-        channel (int): input dimension of
-            MultiheadAttentionMechanism and PositionwiseFeedForward
-        out_dim (int): Output dimension of the module.
-        kernel_size (int): Conv kernel size for
-            depthwise convolution in convolution module
-        stride (int): Downsampling factor in time dimension.
-    """
-
-    def __init__(self, channel: int, out_dim: int,
-                 kernel_size: int = 1, stride: int = 2):
-        super(TimeReductionLayerStream, self).__init__()
-
-        self.channel = channel
-        self.out_dim = out_dim
-        self.kernel_size = kernel_size
-        self.stride = stride
-
-        self.dw_conv = nn.Conv1d(
-            in_channels=channel,
-            out_channels=channel,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=0,
-            groups=channel,
-        )
-
-        self.pw_conv = nn.Conv1d(
-            in_channels=channel, out_channels=out_dim,
-            kernel_size=1, stride=1, padding=0, groups=1,
-        )
-
-        self.init_weights()
-
-    def init_weights(self):
-        dw_max = self.kernel_size ** -0.5
-        pw_max = self.channel ** -0.5
-        torch.nn.init.uniform_(self.dw_conv.weight, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.dw_conv.bias, -dw_max, dw_max)
-        torch.nn.init.uniform_(self.pw_conv.weight, -pw_max, pw_max)
-        torch.nn.init.uniform_(self.pw_conv.bias, -pw_max, pw_max)
-
-    def forward(self, xs, xs_lens: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                ):
-        xs = xs.transpose(1, 2)  # [B, C, T]
-        xs = xs.masked_fill(mask_pad.eq(0), 0.0)
-
-        xs = self.dw_conv(xs)
-        xs = self.pw_conv(xs)
-
-        xs = xs.transpose(1, 2)  # [B, T, C]
-
-        B, T, D = xs.size()
-        mask = mask[:, ::self.stride, ::self.stride]
-        mask_pad = mask_pad[:, :, ::self.stride]
-        L = mask_pad.size(-1)
-        # For JIT exporting, we remove F.pad operator.
-        if L - T < 0:
-            xs = xs[:, :L - T, :].contiguous()
-        else:
-            dummy_pad = torch.zeros(B, L - T, D, device=xs.device)
-            xs = torch.cat([xs, dummy_pad], dim=1)
-
-        xs_lens = torch.div(xs_lens + 1, 2, rounding_mode='trunc')
-        return xs, xs_lens, mask, mask_pad
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/joint.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/joint.py
deleted file mode 100644
index f7cbaf62ee0bf4ffa127e5bbf4a49a64c2378495..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/joint.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Optional
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation
-
-
-class TransducerJoint(torch.nn.Module):
-
-    def __init__(self,
-                 voca_size: int,
-                 enc_output_size: int,
-                 pred_output_size: int,
-                 join_dim: int,
-                 prejoin_linear: bool = True,
-                 postjoin_linear: bool = False,
-                 joint_mode: str = 'add',
-                 activation: str = "tanh"):
-        assert check_argument_types()
-        # TODO(Mddct): concat in future
-        assert joint_mode in ['add']
-        super().__init__()
-
-        self.activatoin = get_activation(activation)
-        self.prejoin_linear = prejoin_linear
-        self.postjoin_linear = postjoin_linear
-        self.joint_mode = joint_mode
-
-        if not self.prejoin_linear and not self.postjoin_linear:
-            assert enc_output_size == pred_output_size == join_dim
-        # torchscript compatibility
-        self.enc_ffn: Optional[nn.Linear] = None
-        self.pred_ffn: Optional[nn.Linear] = None
-        if self.prejoin_linear:
-            self.enc_ffn = nn.Linear(enc_output_size, join_dim)
-            self.pred_ffn = nn.Linear(pred_output_size, join_dim)
-        # torchscript compatibility
-        self.post_ffn: Optional[nn.Linear] = None
-        if self.postjoin_linear:
-            self.post_ffn = nn.Linear(join_dim, join_dim)
-
-        self.ffn_out = nn.Linear(join_dim, voca_size)
-
-    def forward(self, enc_out: torch.Tensor, pred_out: torch.Tensor):
-        """
-        Args:
-            enc_out (torch.Tensor): [B, T, E]
-            pred_out (torch.Tensor): [B, T, P]
-        Return:
-            [B,T,U,V]
-        """
-        if (self.prejoin_linear and self.enc_ffn is not None
-                and self.pred_ffn is not None):
-            enc_out = self.enc_ffn(enc_out)  # [B,T,E] -> [B,T,V]
-            pred_out = self.pred_ffn(pred_out)
-
-        enc_out = enc_out.unsqueeze(2)  # [B,T,V] -> [B,T,1,V]
-        pred_out = pred_out.unsqueeze(1)  # [B,U,V] -> [B,1 U, V]
-
-        # TODO(Mddct): concat joint
-        _ = self.joint_mode
-        out = enc_out + pred_out  # [B,T,U,V]
-
-        if self.postjoin_linear and self.post_ffn is not None:
-            out = self.post_ffn(out)
-
-        out = self.activatoin(out)
-        out = self.ffn_out(out)
-        return out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/predictor.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/predictor.py
deleted file mode 100644
index 600e97a9d83646047ec3fc14f3087bd4df761c68..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/predictor.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-from wenet.utils.common import get_activation, get_rnn
-
-
-def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
-    """
-    Args:
-        input:   [bs, max_time_step, dim]
-        padding: [bs, max_time_step]
-    """
-    return padding * pad_value + input * (1 - padding)
-
-
-class PredictorBase(torch.nn.Module):
-
-    # NOTE(Mddct): We can use ABC abstract here, but
-    # keep this class simple enough for now
-    def __init__(self) -> None:
-        super().__init__()
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        _, _, _ = batch_size, method, device
-        raise NotImplementedError("this is a base precictor")
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        _ = cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ):
-        _, _, = input, cache
-        raise NotImplementedError("this is a base precictor")
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        _, _, _, = input, padding, cache
-        raise NotImplementedError("this is a base precictor")
-
-
-class RNNPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 output_size: int,
-                 embed_dropout: float,
-                 hidden_size: int,
-                 num_layers: int,
-                 bias: bool = True,
-                 rnn_type: str = "lstm",
-                 dropout: float = 0.1) -> None:
-        assert check_argument_types()
-        super().__init__()
-        self.n_layers = num_layers
-        self.hidden_size = hidden_size
-        # disable rnn base out projection
-        self.embed = nn.Embedding(voca_size, embed_size)
-        self.dropout = nn.Dropout(embed_dropout)
-        # NOTE(Mddct): rnn base from torch not support layer norm
-        # will add layer norm and prune value in cell and layer
-        # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
-        self.projection = nn.Linear(hidden_size, output_size)
-
-    def forward(
-        self,
-        input: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> torch.Tensor:
-        """
-        Args:
-            input (torch.Tensor): [batch, max_time).
-            padding (torch.Tensor): [batch, max_time]
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        Returns:
-            output: [batch, max_time, output_size]
-        """
-
-        # NOTE(Mddct): we don't use pack input format
-        embed = self.embed(input)  # [batch, max_time, emb_size]
-        embed = self.dropout(embed)
-        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
-        if cache is None:
-            state = self.init_state(batch_size=input.size(0),
-                                    device=input.device)
-            states = (state[0], state[1])
-        else:
-            assert len(cache) == 2
-            states = (cache[0], cache[1])
-        out, (m, c) = self.rnn(embed, states)
-        out = self.projection(out)
-
-        # NOTE(Mddct): Although we don't use staate in transducer
-        # training forward, we need make it right for padding value
-        # so we create forward_step for infering, forward for training
-        _, _ = m, c
-        return out
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-           cache: [state_m, state_c]
-               state_ms: [1*n_layers, bs, ...]
-               state_cs: [1*n_layers, bs, ...]
-        Returns:
-           new_cache: [[state_m_1, state_c_1], [state_m_2, state_c_2]...]
-        """
-        assert len(cache) == 2
-        state_ms = cache[0]
-        state_cs = cache[1]
-
-        assert state_ms.size(1) == state_cs.size(1)
-
-        new_cache: List[List[torch.Tensor]] = []
-        for state_m, state_c in zip(torch.split(state_ms, 1, dim=1),
-                                    torch.split(state_cs, 1, dim=1)):
-            new_cache.append([state_m, state_c])
-        return new_cache
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[state_m_1, state_c_1], [state_m_1, state_c_1]...]
-
-        Returns:
-            new_caceh: [state_ms, state_cs],
-                state_ms: [1*n_layers, bs, ...]
-                state_cs: [1*n_layers, bs, ...]
-        """
-        state_ms = torch.cat([states[0] for states in cache], dim=1)
-        state_cs = torch.cat([states[1] for states in cache], dim=1)
-        return [state_ms, state_cs]
-
-    def init_state(
-        self,
-        batch_size: int,
-        device: torch.device,
-        method: str = "zero",
-    ) -> List[torch.Tensor]:
-        assert batch_size > 0
-        # TODO(Mddct): xavier init method
-        _ = method
-        return [
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device),
-            torch.zeros(1 * self.n_layers,
-                        batch_size,
-                        self.hidden_size,
-                        device=device)
-        ]
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache : rnn predictor cache[0] == state_m
-                    cache[1] == state_c
-        """
-        assert len(cache) == 2
-        state_m, state_c = cache[0], cache[1]
-        embed = self.embed(input)  # [batch, 1, emb_size]
-        embed = self.dropout(embed)
-        out, (m, c) = self.rnn(embed, (state_m, state_c))
-
-        out = self.projection(out)
-        m = ApplyPadding(m, padding.unsqueeze(0), state_m)
-        c = ApplyPadding(c, padding.unsqueeze(0), state_c)
-
-        return (out, [m, c])
-
-
-class EmbeddingPredictor(PredictorBase):
-    """Embedding predictor
-
-    Described in:
-    https://arxiv.org/pdf/2109.07513.pdf
-
-    embed-> proj -> layer norm -> swish
-    """
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 n_head: int,
-                 history_size: int = 2,
-                 activation: str = "swish",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-
-        assert check_argument_types()
-        super().__init__()
-        # multi head
-        self.num_heads = n_head
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.pos_embed = torch.nn.Linear(embed_size * self.context_size,
-                                         self.num_heads,
-                                         bias=bias)
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.ffn = nn.Linear(self.embed_size, self.embed_size)
-        self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        _ = method
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device),
-        ]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-
-        input = input.unfold(1, self.context_size, 1).permute(
-            0, 1, 3, 2)  # [bs, seq_len, context_size, embed]
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        # broadcast dot attenton
-        input_expand = input.unsqueeze(
-            2)  # [bs, seq_len, 1, context_size, embed]
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-
-        # [bs, seq_len, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, seq_len, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, seq_len, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, seq_len, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        return output
-
-    def forward_step(
-        self,
-        input: torch.Tensor,
-        padding: torch.Tensor,
-        cache: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input_expand = context_input.unsqueeze(1).unsqueeze(
-            2)  # [bs, 1, 1, context_size, embed]
-
-        # multi head pos: [n_head, embed, context_size]
-        multi_head_pos = self.pos_embed.weight.view(self.num_heads,
-                                                    self.embed_size,
-                                                    self.context_size)
-
-        multi_head_pos = multi_head_pos.permute(
-            0, 2, 1)  # [num_heads, context_size, embed]
-        # [bs, 1, num_heads, context_size, embed]
-        weight = input_expand * multi_head_pos
-        weight = weight.sum(dim=-1, keepdim=False).unsqueeze(
-            3)  # [bs, 1, num_heads, 1, context_size]
-        output = weight.matmul(input_expand).squeeze(
-            dim=3)  # [bs, 1, num_heads, embed]
-        output = output.sum(dim=2)  # [bs, 1, embed]
-        output = output / (self.num_heads * self.context_size)
-
-        output = self.ffn(output)
-        output = self.norm(output)
-        output = self.activatoin(output)
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): we need padding new_cache in future
-        # new_cache = ApplyPadding(history, padding, new_cache)
-        return (output, [new_cache])
-
-
-class ConvPredictor(PredictorBase):
-
-    def __init__(self,
-                 voca_size: int,
-                 embed_size: int,
-                 embed_dropout: float,
-                 history_size: int = 2,
-                 activation: str = "relu",
-                 bias: bool = False,
-                 layer_norm_epsilon: float = 1e-5) -> None:
-        assert check_argument_types()
-        super().__init__()
-
-        assert history_size >= 0
-        self.embed_size = embed_size
-        self.context_size = history_size + 1
-        self.embed = nn.Embedding(voca_size, self.embed_size)
-        self.embed_dropout = nn.Dropout(p=embed_dropout)
-        self.conv = nn.Conv1d(in_channels=embed_size,
-                              out_channels=embed_size,
-                              kernel_size=self.context_size,
-                              padding=0,
-                              groups=embed_size,
-                              bias=bias)
-        self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
-
-    def init_state(self,
-                   batch_size: int,
-                   device: torch.device,
-                   method: str = "zero") -> List[torch.Tensor]:
-        assert batch_size > 0
-        assert method == "zero"
-        return [
-            torch.zeros(batch_size,
-                        self.context_size - 1,
-                        self.embed_size,
-                        device=device)
-        ]
-
-    def cache_to_batch(self,
-                       cache: List[List[torch.Tensor]]) -> List[torch.Tensor]:
-        """
-        Args:
-            cache : [[history_1], [history_2], [history3]...]
-
-        Returns:
-            new_caceh: [history],
-                history: [bs, ...]
-        """
-        history = torch.cat([h[0] for h in cache], dim=0)
-        return [history]
-
-    def batch_to_cache(self,
-                       cache: List[torch.Tensor]) -> List[List[torch.Tensor]]:
-        """
-        Args:
-            cache : [history]
-                history: [bs, ...]
-        Returns:
-            new_ache : [[history_1], [history_2], [history_3]...]
-        """
-        assert len(cache) == 1
-        cache_0 = cache[0]
-        history: List[List[torch.Tensor]] = []
-        for h in torch.split(cache_0, 1, dim=0):
-            history.append([h])
-        return history
-
-    def forward(self,
-                input: torch.Tensor,
-                cache: Optional[List[torch.Tensor]] = None):
-        """ forward for training
-        """
-        input = self.embed(input)  # [bs, seq_len, embed]
-        input = self.embed_dropout(input)
-        if cache is None:
-            zeros = self.init_state(input.size(0), device=input.device)[0]
-        else:
-            assert len(cache) == 1
-            zeros = cache[0]
-
-        input = torch.cat((zeros, input),
-                          dim=1)  # [bs, context_size-1 + seq_len, embed]
-        input = input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-        return out
-
-    def forward_step(
-            self, input: torch.Tensor, padding: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """ forward step for inference
-        Args:
-            input (torch.Tensor): [batch_size, time_step=1]
-            padding (torch.Tensor): [batch_size,1], 1 is padding value
-            cache: for embedding predictor, cache[0] == history
-        """
-        assert input.size(1) == 1
-        assert len(cache) == 1
-        history = cache[0]
-        assert history.size(1) == self.context_size - 1
-        input = self.embed(input)  # [bs, 1, embed]
-        input = self.embed_dropout(input)
-        context_input = torch.cat((history, input), dim=1)
-        input = context_input.permute(0, 2, 1)
-        out = self.conv(input).permute(0, 2, 1)
-        out = self.activatoin(self.norm(out))
-
-        new_cache = context_input[:, 1:, :]
-        # TODO(Mddct): apply padding in future
-        return (out, [new_cache])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/search/greedy_search.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/search/greedy_search.py
deleted file mode 100644
index ef7354562b6617b7be33bf32d673117eb1d3d547..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/search/greedy_search.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-
-import torch
-
-
-def basic_greedy_search(
-    model: torch.nn.Module,
-    encoder_out: torch.Tensor,
-    encoder_out_lens: torch.Tensor,
-    n_steps: int = 64,
-) -> List[List[int]]:
-    # fake padding
-    padding = torch.zeros(1, 1).to(encoder_out.device)
-    # sos
-    pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
-    cache = model.predictor.init_state(1,
-                                       method="zero",
-                                       device=encoder_out.device)
-    new_cache: List[torch.Tensor] = []
-    t = 0
-    hyps = []
-    prev_out_nblk = True
-    pred_out_step = None
-    per_frame_max_noblk = n_steps
-    per_frame_noblk = 0
-    while t < encoder_out_lens:
-        encoder_out_step = encoder_out[:, t:t + 1, :]  # [1, 1, E]
-        if prev_out_nblk:
-            step_outs = model.predictor.forward_step(pred_input_step, padding,
-                                                     cache)  # [1, 1, P]
-            pred_out_step, new_cache = step_outs[0], step_outs[1]
-
-        joint_out_step = model.joint(encoder_out_step,
-                                     pred_out_step)  # [1,1,v]
-        joint_out_probs = joint_out_step.log_softmax(dim=-1)
-
-        joint_out_max = joint_out_probs.argmax(dim=-1).squeeze()  # []
-        if joint_out_max != model.blank:
-            hyps.append(joint_out_max.item())
-            prev_out_nblk = True
-            per_frame_noblk = per_frame_noblk + 1
-            pred_input_step = joint_out_max.reshape(1, 1)
-            # state_m, state_c =  clstate_out_m, state_out_c
-            cache = new_cache
-
-        if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
-            if joint_out_max == model.blank:
-                prev_out_nblk = False
-            # TODO(Mddct): make t in chunk for streamming
-            # or t should't be too lang to predict none blank
-            t = t + 1
-            per_frame_noblk = 0
-
-    return [hyps]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/search/prefix_beam_search.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/search/prefix_beam_search.py
deleted file mode 100644
index f00917717c16a73916586708ebfede54fa02a21f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/search/prefix_beam_search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from typing import List, Tuple
-
-import torch
-from wenet.utils.common import log_add
-
-
-class Sequence():
-
-    __slots__ = {'hyp', 'score', 'cache'}
-
-    def __init__(
-        self,
-        hyp: List[torch.Tensor],
-        score,
-        cache: List[torch.Tensor],
-    ):
-        self.hyp = hyp
-        self.score = score
-        self.cache = cache
-
-
-class PrefixBeamSearch():
-
-    def __init__(self, encoder, predictor, joint, ctc, blank):
-        self.encoder = encoder
-        self.predictor = predictor
-        self.joint = joint
-        self.ctc = ctc
-        self.blank = blank
-
-    def forward_decoder_one_step(
-            self, encoder_x: torch.Tensor, pre_t: torch.Tensor,
-            cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        padding = torch.zeros(pre_t.size(0), 1, device=encoder_x.device)
-        pre_t, new_cache = self.predictor.forward_step(pre_t.unsqueeze(-1),
-                                                       padding, cache)
-        x = self.joint(encoder_x, pre_t)  # [beam, 1, 1, vocab]
-        x = x.log_softmax(dim=-1)
-        return x, new_cache
-
-    def prefix_beam_search(self,
-                           speech: torch.Tensor,
-                           speech_lengths: torch.Tensor,
-                           decoding_chunk_size: int = -1,
-                           beam_size: int = 5,
-                           num_decoding_left_chunks: int = -1,
-                           simulate_streaming: bool = False,
-                           ctc_weight: float = 0.3,
-                           transducer_weight: float = 0.7):
-        """prefix beam search
-           also see wenet.transducer.transducer.beam_search
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-        assert batch_size == 1
-
-        # 1. Encoder
-        encoder_out, _ = self.encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-
-        ctc_probs = self.ctc.log_softmax(encoder_out).squeeze(0)
-        beam_init: List[Sequence] = []
-
-        # 2. init beam using Sequence to save beam unit
-        cache = self.predictor.init_state(1, method="zero", device=device)
-        beam_init.append(Sequence(hyp=[self.blank], score=0.0, cache=cache))
-        # 3. start decoding (notice: we use breathwise first searching)
-        # !!!! In this decoding method: one frame do not output multi units. !!!!
-        # !!!!    Experiments show that this strategy has little impact      !!!!
-        for i in range(maxlen):
-            # 3.1 building input
-            # decoder taking the last token to predict the next token
-            input_hyp = [s.hyp[-1] for s in beam_init]
-            input_hyp_tensor = torch.tensor(input_hyp,
-                                            dtype=torch.int,
-                                            device=device)
-            # building statement from beam
-            cache_batch = self.predictor.cache_to_batch(
-                [s.cache for s in beam_init])
-            # build score tensor to do torch.add() function
-            scores = torch.tensor([s.score for s in beam_init]).to(device)
-
-            # 3.2 forward decoder
-            logp, new_cache = self.forward_decoder_one_step(
-                encoder_out[:, i, :].unsqueeze(1),
-                input_hyp_tensor,
-                cache_batch,
-            )  # logp: (N, 1, 1, vocab_size)
-            logp = logp.squeeze(1).squeeze(1)  # logp: (N, vocab_size)
-            new_cache = self.predictor.batch_to_cache(new_cache)
-
-            # 3.3 shallow fusion for transducer score
-            #     and ctc score where we can also add the LM score
-            logp = torch.log(
-                torch.add(transducer_weight * torch.exp(logp),
-                          ctc_weight * torch.exp(ctc_probs[i].unsqueeze(0))))
-
-            # 3.4 first beam prune
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (N, N)
-            scores = torch.add(scores.unsqueeze(1), top_k_logp)
-
-            # 3.5 generate new beam (N*N)
-            beam_A = []
-            for j in range(len(beam_init)):
-                # update seq
-                base_seq = beam_init[j]
-                for t in range(beam_size):
-                    # blank: only update the score
-                    if top_k_index[j, t] == self.blank:
-                        new_seq = Sequence(hyp=base_seq.hyp.copy(),
-                                           score=scores[j, t].item(),
-                                           cache=base_seq.cache)
-
-                        beam_A.append(new_seq)
-                    # other unit: update hyp score statement and last
-                    else:
-                        hyp_new = base_seq.hyp.copy()
-                        hyp_new.append(top_k_index[j, t].item())
-                        new_seq = Sequence(hyp=hyp_new,
-                                           score=scores[j, t].item(),
-                                           cache=new_cache[j])
-                        beam_A.append(new_seq)
-
-            # 3.6 prefix fusion
-            fusion_A = [beam_A[0]]
-            for j in range(1, len(beam_A)):
-                s1 = beam_A[j]
-                if_do_append = True
-                for t in range(len(fusion_A)):
-                    # notice: A_ can not fusion with A
-                    if s1.hyp == fusion_A[t].hyp:
-                        fusion_A[t].score = log_add(
-                            [fusion_A[t].score, s1.score])
-                        if_do_append = False
-                        break
-                if if_do_append:
-                    fusion_A.append(s1)
-
-            # 4. second pruned
-            fusion_A.sort(key=lambda x: x.score, reverse=True)
-            beam_init = fusion_A[:beam_size]
-
-        return beam_init, encoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/transducer.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/transducer.py
deleted file mode 100644
index 821a0946e621353a18bededbd93a658e83b0e0e2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transducer/transducer.py
+++ /dev/null
@@ -1,453 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-import torchaudio
-from torch import nn
-from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
-from wenet.transducer.predictor import PredictorBase
-from wenet.transducer.search.greedy_search import basic_greedy_search
-from wenet.transducer.search.prefix_beam_search import PrefixBeamSearch
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_blank, add_sos_eos,
-                                reverse_pad_list)
-
-
-class Transducer(ASRModel):
-    """Transducer-ctc-attention hybrid Encoder-Predictor-Decoder model"""
-
-    def __init__(
-        self,
-        vocab_size: int,
-        blank: int,
-        encoder: nn.Module,
-        predictor: PredictorBase,
-        joint: nn.Module,
-        attention_decoder: Optional[Union[TransformerDecoder,
-                                          BiTransformerDecoder]] = None,
-        ctc: Optional[CTC] = None,
-        ctc_weight: float = 0,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-        transducer_weight: float = 1.0,
-        attention_weight: float = 0.0,
-    ) -> None:
-        assert check_argument_types()
-        assert attention_weight + ctc_weight + transducer_weight == 1.0
-        super().__init__(vocab_size, encoder, attention_decoder, ctc,
-                         ctc_weight, ignore_id, reverse_weight, lsm_weight,
-                         length_normalized_loss)
-
-        self.blank = blank
-        self.transducer_weight = transducer_weight
-        self.attention_decoder_weight = 1 - self.transducer_weight - self.ctc_weight
-
-        self.predictor = predictor
-        self.joint = joint
-        self.bs = None
-
-        # Note(Mddct): decoder also means predictor in transducer,
-        # but here decoder is attention decoder
-        del self.criterion_att
-        if attention_decoder is not None:
-            self.criterion_att = LabelSmoothingLoss(
-                size=vocab_size,
-                padding_idx=ignore_id,
-                smoothing=lsm_weight,
-                normalize_length=length_normalized_loss,
-            )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + predictor + joint + loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-
-        # Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        # predictor
-        ys_in_pad = add_blank(text, self.blank, self.ignore_id)
-        predictor_out = self.predictor(ys_in_pad)
-        # joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        # NOTE(Mddct): some loss implementation require pad valid is zero
-        # torch.int32 rnnt_loss required
-        rnnt_text = text.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        rnnt_text_lengths = text_lengths.to(torch.int32)
-        encoder_out_lens = encoder_out_lens.to(torch.int32)
-        loss = torchaudio.functional.rnnt_loss(joint_out,
-                                               rnnt_text,
-                                               encoder_out_lens,
-                                               rnnt_text_lengths,
-                                               blank=self.blank,
-                                               reduction="mean")
-        loss_rnnt = loss
-
-        loss = self.transducer_weight * loss
-        # optional attention decoder
-        loss_att: Optional[torch.Tensor] = None
-        if self.attention_decoder_weight != 0.0 and self.decoder is not None:
-            loss_att, _ = self._calc_att_loss(encoder_out, encoder_mask, text,
-                                              text_lengths)
-
-        # optional ctc
-        loss_ctc: Optional[torch.Tensor] = None
-        if self.ctc_weight != 0.0 and self.ctc is not None:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is not None:
-            loss = loss + self.ctc_weight * loss_ctc.sum()
-        if loss_att is not None:
-            loss = loss + self.attention_decoder_weight * loss_att.sum()
-        # NOTE: 'loss' must be in dict
-        return {
-            'loss': loss,
-            'loss_att': loss_att,
-            'loss_ctc': loss_ctc,
-            'loss_rnnt': loss_rnnt,
-        }
-
-    def init_bs(self):
-        if self.bs is None:
-            self.bs = PrefixBeamSearch(self.encoder, self.predictor,
-                                       self.joint, self.ctc, self.blank)
-
-    def _cal_transducer_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        hyps_pad: torch.Tensor,
-    ):
-        # ignore id -> blank, add blank at head
-        hyps_pad_blank = add_blank(hyps_pad, self.blank, self.ignore_id)
-        xs_in_lens = encoder_mask.squeeze(1).sum(1).int()
-
-        # 1. Forward predictor
-        predictor_out = self.predictor(hyps_pad_blank)
-        # 2. Forward joint
-        joint_out = self.joint(encoder_out, predictor_out)
-        rnnt_text = hyps_pad.to(torch.int64)
-        rnnt_text = torch.where(rnnt_text == self.ignore_id, 0,
-                                rnnt_text).to(torch.int32)
-        # 3. Compute transducer loss
-        loss_td = torchaudio.functional.rnnt_loss(joint_out,
-                                                  rnnt_text,
-                                                  xs_in_lens,
-                                                  hyps_lens.int(),
-                                                  blank=self.blank,
-                                                  reduction='none')
-        return loss_td * -1
-
-    def _cal_attn_score(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        hyps_pad: torch.Tensor,
-        hyps_lens: torch.Tensor,
-    ):
-        # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-
-        # td_score = loss_td * -1
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        return decoder_out, r_decoder_out
-
-    def beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        beam_size: int = 5,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        ctc_weight: float = 0.3,
-        transducer_weight: float = 0.7,
-    ):
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight in transducer
-                prefix beam search.
-                final_prob = ctc_weight * ctc_prob + transducer_weight * transducer_prob
-            transducer_weight (float): transducer probability weight in
-                prefix beam search
-        Returns:
-            List[List[int]]: best path result
-
-        """
-        self.init_bs()
-        beam, _ = self.bs.prefix_beam_search(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            beam_size,
-            num_decoding_left_chunks,
-            simulate_streaming,
-            ctc_weight,
-            transducer_weight,
-        )
-        return beam[0].hyp[1:], beam[0].score
-
-    def transducer_attention_rescoring(
-            self,
-            speech: torch.Tensor,
-            speech_lengths: torch.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int = -1,
-            num_decoding_left_chunks: int = -1,
-            simulate_streaming: bool = False,
-            reverse_weight: float = 0.0,
-            ctc_weight: float = 0.0,
-            attn_weight: float = 0.0,
-            transducer_weight: float = 0.0,
-            search_ctc_weight: float = 1.0,
-            search_transducer_weight: float = 0.0,
-            beam_search_type: str = 'transducer') -> List[List[int]]:
-        """beam search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            ctc_weight (float): ctc probability weight using in rescoring.
-                rescore_prob = ctc_weight * ctc_prob +
-                               transducer_weight * (transducer_loss * -1) +
-                               attn_weight * attn_prob
-            attn_weight (float): attn probability weight using in rescoring.
-            transducer_weight (float): transducer probability weight using in
-                rescoring
-            search_ctc_weight (float): ctc weight using
-                               in rnnt beam search (seeing in self.beam_search)
-            search_transducer_weight (float): transducer weight using
-                               in rnnt beam search (seeing in self.beam_search)
-        Returns:
-            List[List[int]]: best path result
-
-        """
-
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        self.init_bs()
-        if beam_search_type == 'transducer':
-            beam, encoder_out = self.bs.prefix_beam_search(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                beam_size=beam_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                ctc_weight=search_ctc_weight,
-                transducer_weight=search_transducer_weight,
-            )
-            beam_score = [s.score for s in beam]
-            hyps = [s.hyp[1:] for s in beam]
-
-        elif beam_search_type == 'ctc':
-            hyps, encoder_out = self._ctc_prefix_beam_search(
-                speech,
-                speech_lengths,
-                beam_size=beam_size,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks,
-                simulate_streaming=simulate_streaming)
-            beam_score = [hyp[1] for hyp in hyps]
-            hyps = [hyp[0] for hyp in hyps]
-        assert len(hyps) == beam_size
-
-        # build hyps and encoder output
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long) for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-
-        # 2.1 calculate transducer score
-        td_score = self._cal_transducer_score(
-            encoder_out,
-            encoder_mask,
-            hyps_lens,
-            hyps_pad,
-        )
-        # 2.2 calculate attention score
-        decoder_out, r_decoder_out = self._cal_attn_score(
-            encoder_out,
-            encoder_mask,
-            hyps_pad,
-            hyps_lens,
-        )
-
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp)][self.eos]
-            td_s = td_score[i]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp):
-                    r_score += r_decoder_out[i][len(hyp) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp)][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score = score * attn_weight + \
-                beam_score[i] * ctc_weight + \
-                td_s * transducer_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-
-        return hyps[best_index], best_score
-
-    def greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        n_steps: int = 64,
-    ) -> List[List[int]]:
-        """ greedy search
-
-        Args:
-            speech (torch.Tensor): (batch=1, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        # TODO(Mddct): batch decode
-        assert speech.size(0) == 1
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        # TODO(Mddct): forward chunk by chunk
-        _ = simulate_streaming
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self.encoder(
-            speech,
-            speech_lengths,
-            decoding_chunk_size,
-            num_decoding_left_chunks,
-        )
-        encoder_out_lens = encoder_mask.squeeze(1).sum()
-        hyps = basic_greedy_search(self,
-                                   encoder_out,
-                                   encoder_out_lens,
-                                   n_steps=n_steps)
-
-        return hyps
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def forward_predictor_step(
-            self, xs: torch.Tensor, cache: List[torch.Tensor]
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        assert len(cache) == 2
-        # fake padding
-        padding = torch.zeros(1, 1)
-        return self.predictor.forward_step(xs, padding, cache)
-
-    @torch.jit.export
-    def forward_joint_step(self, enc_out: torch.Tensor,
-                           pred_out: torch.Tensor) -> torch.Tensor:
-        return self.joint(enc_out, pred_out)
-
-    @torch.jit.export
-    def forward_predictor_init_state(self) -> List[torch.Tensor]:
-        return self.predictor.init_state(1, device=torch.device("cpu"))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/asr_model.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/asr_model.py
deleted file mode 100644
index 4288f68472d63ce4bf270c5f377d62fa7408713e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/asr_model.py
+++ /dev/null
@@ -1,904 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-
-import torch
-
-from torch.nn.utils.rnn import pad_sequence
-
-try:
-    import k2
-    from icefall.utils import get_texts
-    from icefall.decode import get_lattice, Nbest, one_best_decoding
-except ImportError:
-    print('Failed to import k2 and icefall. \
-        Notice that they are necessary for hlg_onebest and hlg_rescore')
-
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import TransformerDecoder
-from wenet.transformer.encoder import TransformerEncoder
-from wenet.transformer.label_smoothing_loss import LabelSmoothingLoss
-from wenet.utils.common import (IGNORE_ID, add_sos_eos, log_add,
-                                remove_duplicates_and_blank, th_accuracy,
-                                reverse_pad_list)
-from wenet.utils.mask import (make_pad_mask, mask_finished_preds,
-                              mask_finished_scores, subsequent_mask)
-
-
-class ASRModel(torch.nn.Module):
-    """CTC-attention hybrid Encoder-Decoder model"""
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder: TransformerEncoder,
-        decoder: TransformerDecoder,
-        ctc: CTC,
-        ctc_weight: float = 0.5,
-        ignore_id: int = IGNORE_ID,
-        reverse_weight: float = 0.0,
-        lsm_weight: float = 0.0,
-        length_normalized_loss: bool = False,
-    ):
-        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
-
-        super().__init__()
-        # note that eos is the same as sos (equivalent ID)
-        self.sos = vocab_size - 1
-        self.eos = vocab_size - 1
-        self.vocab_size = vocab_size
-        self.ignore_id = ignore_id
-        self.ctc_weight = ctc_weight
-        self.reverse_weight = reverse_weight
-
-        self.encoder = encoder
-        self.decoder = decoder
-        self.ctc = ctc
-        self.criterion_att = LabelSmoothingLoss(
-            size=vocab_size,
-            padding_idx=ignore_id,
-            smoothing=lsm_weight,
-            normalize_length=length_normalized_loss,
-        )
-
-    def forward(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        text: torch.Tensor,
-        text_lengths: torch.Tensor,
-    ) -> Dict[str, Optional[torch.Tensor]]:
-        """Frontend + Encoder + Decoder + Calc loss
-
-        Args:
-            speech: (Batch, Length, ...)
-            speech_lengths: (Batch, )
-            text: (Batch, Length)
-            text_lengths: (Batch,)
-        """
-        assert text_lengths.dim() == 1, text_lengths.shape
-        # Check that batch_size is unified
-        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
-                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
-                                         text.shape, text_lengths.shape)
-        # 1. Encoder
-        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-
-        # 2a. Attention-decoder branch
-        if self.ctc_weight != 1.0:
-            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
-                                                    text, text_lengths)
-        else:
-            loss_att = None
-
-        # 2b. CTC branch
-        if self.ctc_weight != 0.0:
-            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
-                                text_lengths)
-        else:
-            loss_ctc = None
-
-        if loss_ctc is None:
-            loss = loss_att
-        elif loss_att is None:
-            loss = loss_ctc
-        else:
-            loss = self.ctc_weight * loss_ctc + (1 -
-                                                 self.ctc_weight) * loss_att
-        return {"loss": loss, "loss_att": loss_att, "loss_ctc": loss_ctc}
-
-    def _calc_att_loss(
-        self,
-        encoder_out: torch.Tensor,
-        encoder_mask: torch.Tensor,
-        ys_pad: torch.Tensor,
-        ys_pad_lens: torch.Tensor,
-    ) -> Tuple[torch.Tensor, float]:
-        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
-                                            self.ignore_id)
-        ys_in_lens = ys_pad_lens + 1
-
-        # reverse the seq, used for right to left decoder
-        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
-        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
-                                                self.ignore_id)
-        # 1. Forward decoder
-        decoder_out, r_decoder_out, _ = self.decoder(encoder_out, encoder_mask,
-                                                     ys_in_pad, ys_in_lens,
-                                                     r_ys_in_pad,
-                                                     self.reverse_weight)
-        # 2. Compute attention loss
-        loss_att = self.criterion_att(decoder_out, ys_out_pad)
-        r_loss_att = torch.tensor(0.0)
-        if self.reverse_weight > 0.0:
-            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
-        loss_att = loss_att * (
-            1 - self.reverse_weight) + r_loss_att * self.reverse_weight
-        acc_att = th_accuracy(
-            decoder_out.view(-1, self.vocab_size),
-            ys_out_pad,
-            ignore_label=self.ignore_id,
-        )
-        return loss_att, acc_att
-
-    def _forward_encoder(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Let's assume B = batch_size
-        # 1. Encoder
-        if simulate_streaming and decoding_chunk_size > 0:
-            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
-                speech,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        else:
-            encoder_out, encoder_mask = self.encoder(
-                speech,
-                speech_lengths,
-                decoding_chunk_size=decoding_chunk_size,
-                num_decoding_left_chunks=num_decoding_left_chunks
-            )  # (B, maxlen, encoder_dim)
-        return encoder_out, encoder_mask
-
-    def recognize(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int = 10,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> torch.Tensor:
-        """ Apply beam search on attention decoder
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            torch.Tensor: decoding result, (batch, max_result_len)
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        device = speech.device
-        batch_size = speech.shape[0]
-
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_dim = encoder_out.size(2)
-        running_size = batch_size * beam_size
-        encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view(
-            running_size, maxlen, encoder_dim)  # (B*N, maxlen, encoder_dim)
-        encoder_mask = encoder_mask.unsqueeze(1).repeat(
-            1, beam_size, 1, 1).view(running_size, 1,
-                                     maxlen)  # (B*N, 1, max_len)
-
-        hyps = torch.ones([running_size, 1], dtype=torch.long,
-                          device=device).fill_(self.sos)  # (B*N, 1)
-        scores = torch.tensor([0.0] + [-float('inf')] * (beam_size - 1),
-                              dtype=torch.float)
-        scores = scores.to(device).repeat([batch_size]).unsqueeze(1).to(
-            device)  # (B*N, 1)
-        end_flag = torch.zeros_like(scores, dtype=torch.bool, device=device)
-        cache: Optional[List[torch.Tensor]] = None
-        # 2. Decoder forward step by step
-        for i in range(1, maxlen + 1):
-            # Stop if all batch and all beam produce eos
-            if end_flag.sum() == running_size:
-                break
-            # 2.1 Forward decoder step
-            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
-                running_size, 1, 1).to(device)  # (B*N, i, i)
-            # logp: (B*N, vocab)
-            logp, cache = self.decoder.forward_one_step(
-                encoder_out, encoder_mask, hyps, hyps_mask, cache)
-            # 2.2 First beam prune: select topk best prob at current time
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
-            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
-            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)
-            # 2.3 Second beam prune: select topk score with history
-            scores = scores + top_k_logp  # (B*N, N), broadcast add
-            scores = scores.view(batch_size, beam_size * beam_size)  # (B, N*N)
-            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
-            # Update cache to be consistent with new topk scores / hyps
-            cache_index = (offset_k_index // beam_size).view(-1)  # (B*N)
-            base_cache_index = (torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size]) * beam_size).view(-1)  # (B*N)
-            cache_index = base_cache_index + cache_index
-            cache = [torch.index_select(c, dim=0, index=cache_index) for c in cache]
-            scores = scores.view(-1, 1)  # (B*N, 1)
-            # 2.4. Compute base index in top_k_index,
-            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
-            # then find offset_k_index in top_k_index
-            base_k_index = torch.arange(batch_size, device=device).view(
-                -1, 1).repeat([1, beam_size])  # (B, N)
-            base_k_index = base_k_index * beam_size * beam_size
-            best_k_index = base_k_index.view(-1) + offset_k_index.view(
-                -1)  # (B*N)
-
-            # 2.5 Update best hyps
-            best_k_pred = torch.index_select(top_k_index.view(-1),
-                                             dim=-1,
-                                             index=best_k_index)  # (B*N)
-            best_hyps_index = best_k_index // beam_size
-            last_best_k_hyps = torch.index_select(
-                hyps, dim=0, index=best_hyps_index)  # (B*N, i)
-            hyps = torch.cat((last_best_k_hyps, best_k_pred.view(-1, 1)),
-                             dim=1)  # (B*N, i+1)
-
-            # 2.6 Update end flag
-            end_flag = torch.eq(hyps[:, -1], self.eos).view(-1, 1)
-
-        # 3. Select best of best
-        scores = scores.view(batch_size, beam_size)
-        # TODO: length normalization
-        best_scores, best_index = scores.max(dim=-1)
-        best_hyps_index = best_index + torch.arange(
-            batch_size, dtype=torch.long, device=device) * beam_size
-        best_hyps = torch.index_select(hyps, dim=0, index=best_hyps_index)
-        best_hyps = best_hyps[:, 1:]
-        return best_hyps, best_scores
-
-    def ctc_greedy_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[List[int]]:
-        """ Apply CTC greedy search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-        Returns:
-            List[List[int]]: best path result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # Let's assume B = batch_size
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (B, maxlen, vocab_size)
-        topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-        topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-        mask = make_pad_mask(encoder_out_lens, maxlen)  # (B, maxlen)
-        topk_index = topk_index.masked_fill_(mask, self.eos)  # (B, maxlen)
-        hyps = [hyp.tolist() for hyp in topk_index]
-        scores = topk_prob.max(1)
-        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
-        return hyps, scores
-
-    def _ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> Tuple[List[List[int]], torch.Tensor]:
-        """ CTC prefix beam search inner implementation
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[List[int]]: nbest results
-            torch.Tensor: encoder output, (1, max_len, encoder_dim),
-                it will be used for rescoring in attention rescoring mode
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        batch_size = speech.shape[0]
-        # For CTC prefix beam search, we only support batch_size=1
-        assert batch_size == 1
-        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder forward and get CTC score
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        maxlen = encoder_out.size(1)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        ctc_probs = ctc_probs.squeeze(0)
-        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
-        cur_hyps = [(tuple(), (0.0, -float('inf')))]
-        # 2. CTC beam search step by step
-        for t in range(0, maxlen):
-            logp = ctc_probs[t]  # (vocab_size,)
-            # key: prefix, value (pb, pnb), default value(-inf, -inf)
-            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
-            # 2.1 First beam prune: select topk best
-            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
-            for s in top_k_index:
-                s = s.item()
-                ps = logp[s].item()
-                for prefix, (pb, pnb) in cur_hyps:
-                    last = prefix[-1] if len(prefix) > 0 else None
-                    if s == 0:  # blank
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                    elif s == last:
-                        #  Update *ss -> *s;
-                        n_pb, n_pnb = next_hyps[prefix]
-                        n_pnb = log_add([n_pnb, pnb + ps])
-                        next_hyps[prefix] = (n_pb, n_pnb)
-                        # Update *s-s -> *ss, - is for blank
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-                    else:
-                        n_prefix = prefix + (s, )
-                        n_pb, n_pnb = next_hyps[n_prefix]
-                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
-                        next_hyps[n_prefix] = (n_pb, n_pnb)
-
-            # 2.2 Second beam prune
-            next_hyps = sorted(next_hyps.items(),
-                               key=lambda x: log_add(list(x[1])),
-                               reverse=True)
-            cur_hyps = next_hyps[:beam_size]
-        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
-        return hyps, encoder_out
-
-    def ctc_prefix_beam_search(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-    ) -> List[int]:
-        """ Apply CTC prefix beam search
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-
-        Returns:
-            List[int]: CTC prefix beam search nbest results
-        """
-        hyps, _ = self._ctc_prefix_beam_search(speech, speech_lengths,
-                                               beam_size, decoding_chunk_size,
-                                               num_decoding_left_chunks,
-                                               simulate_streaming)
-        return hyps[0]
-
-    def attention_rescoring(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        beam_size: int,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        ctc_weight: float = 0.0,
-        simulate_streaming: bool = False,
-        reverse_weight: float = 0.0,
-    ) -> List[int]:
-        """ Apply attention rescoring decoding, CTC prefix beam search
-            is applied first to get nbest, then we resoring the nbest on
-            attention decoder with corresponding encoder out
-
-        Args:
-            speech (torch.Tensor): (batch, max_len, feat_dim)
-            speech_length (torch.Tensor): (batch, )
-            beam_size (int): beam size for beam search
-            decoding_chunk_size (int): decoding chunk for dynamic chunk
-                trained model.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-                0: used for training, it's prohibited here
-            simulate_streaming (bool): whether do encoder forward in a
-                streaming fashion
-            reverse_weight (float): right to left decoder weight
-            ctc_weight (float): ctc score weight
-
-        Returns:
-            List[int]: Attention rescoring result
-        """
-        assert speech.shape[0] == speech_lengths.shape[0]
-        assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
-            # decoder should be a bitransformer decoder if reverse_weight > 0.0
-            assert hasattr(self.decoder, 'right_decoder')
-        device = speech.device
-        batch_size = speech.shape[0]
-        # For attention rescoring we only support batch_size=1
-        assert batch_size == 1
-        # encoder_out: (1, maxlen, encoder_dim), len(hyps) = beam_size
-        hyps, encoder_out = self._ctc_prefix_beam_search(
-            speech, speech_lengths, beam_size, decoding_chunk_size,
-            num_decoding_left_chunks, simulate_streaming)
-
-        assert len(hyps) == beam_size
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp[0], device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp[0]) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out = encoder_out.repeat(beam_size, 1, 1)
-        encoder_mask = torch.ones(beam_size,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out.cpu().numpy()
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out.cpu().numpy()
-        # Only use decoder score for rescoring
-        best_score = -float('inf')
-        best_index = 0
-        for i, hyp in enumerate(hyps):
-            score = 0.0
-            for j, w in enumerate(hyp[0]):
-                score += decoder_out[i][j][w]
-            score += decoder_out[i][len(hyp[0])][self.eos]
-            # add right to left decoder score
-            if reverse_weight > 0:
-                r_score = 0.0
-                for j, w in enumerate(hyp[0]):
-                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
-                r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
-            # add ctc score
-            score += hyp[1] * ctc_weight
-            if score > best_score:
-                best_score = score
-                best_index = i
-        return hyps[best_index][0], best_score
-
-    def load_hlg_resource_if_necessary(self, hlg, word):
-        if not hasattr(self, 'hlg'):
-            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.hlg = k2.Fsa.from_dict(torch.load(hlg, map_location=device))
-        if not hasattr(self.hlg, "lm_scores"):
-            self.hlg.lm_scores = self.hlg.scores.clone()
-        if not hasattr(self, 'word_table'):
-            self.word_table = {}
-            with open(word, 'r') as fin:
-                for line in fin:
-                    arr = line.strip().split()
-                    assert len(arr) == 2
-                    self.word_table[int(arr[1])] = arr[0]
-
-    @torch.no_grad()
-    def hlg_onebest(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        best_path = one_best_decoding(lattice=lattice, use_double_scores=True)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.no_grad()
-    def hlg_rescore(
-        self,
-        speech: torch.Tensor,
-        speech_lengths: torch.Tensor,
-        decoding_chunk_size: int = -1,
-        num_decoding_left_chunks: int = -1,
-        simulate_streaming: bool = False,
-        lm_scale: float = 0,
-        decoder_scale: float = 0,
-        r_decoder_scale: float = 0,
-        hlg: str = '',
-        word: str = '',
-        symbol_table: Dict[str, int] = None,
-    ) -> List[int]:
-        self.load_hlg_resource_if_necessary(hlg, word)
-        device = speech.device
-        encoder_out, encoder_mask = self._forward_encoder(
-            speech, speech_lengths, decoding_chunk_size,
-            num_decoding_left_chunks,
-            simulate_streaming)  # (B, maxlen, encoder_dim)
-        ctc_probs = self.ctc.log_softmax(
-            encoder_out)  # (1, maxlen, vocab_size)
-        supervision_segments = torch.stack(
-            (torch.arange(len(encoder_mask)),
-             torch.zeros(len(encoder_mask)),
-             encoder_mask.squeeze(dim=1).sum(dim=1).cpu()), 1,).to(torch.int32)
-        lattice = get_lattice(
-            nnet_output=ctc_probs,
-            decoding_graph=self.hlg,
-            supervision_segments=supervision_segments,
-            search_beam=20,
-            output_beam=7,
-            min_active_states=30,
-            max_active_states=10000,
-            subsampling_factor=4)
-        nbest = Nbest.from_lattice(
-            lattice=lattice,
-            num_paths=100,
-            use_double_scores=True,
-            nbest_scale=0.5,)
-        nbest = nbest.intersect(lattice)
-        assert hasattr(nbest.fsa, "lm_scores")
-        assert hasattr(nbest.fsa, "tokens")
-        assert isinstance(nbest.fsa.tokens, torch.Tensor)
-
-        tokens_shape = nbest.fsa.arcs.shape().remove_axis(1)
-        tokens = k2.RaggedTensor(tokens_shape, nbest.fsa.tokens)
-        tokens = tokens.remove_values_leq(0)
-        hyps = tokens.tolist()
-
-        # cal attention_score
-        hyps_pad = pad_sequence([
-            torch.tensor(hyp, device=device, dtype=torch.long)
-            for hyp in hyps
-        ], True, self.ignore_id)  # (beam_size, max_hyps_len)
-        ori_hyps_pad = hyps_pad
-        hyps_lens = torch.tensor([len(hyp) for hyp in hyps],
-                                 device=device,
-                                 dtype=torch.long)  # (beam_size,)
-        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
-        hyps_lens = hyps_lens + 1  # Add <sos> at begining
-        encoder_out_repeat = []
-        tot_scores = nbest.tot_scores()
-        repeats = [tot_scores[i].shape[0] for i in range(tot_scores.dim0)]
-        for i in range(len(encoder_out)):
-            encoder_out_repeat.append(encoder_out[i: i + 1].repeat(repeats[i], 1, 1))
-        encoder_out = torch.concat(encoder_out_repeat, dim=0)
-        encoder_mask = torch.ones(encoder_out.size(0),
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=device)
-        # used for right to left decoder
-        r_hyps_pad = reverse_pad_list(ori_hyps_pad, hyps_lens, self.ignore_id)
-        r_hyps_pad, _ = add_sos_eos(r_hyps_pad, self.sos, self.eos,
-                                    self.ignore_id)
-        reverse_weight = 0.5
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-        decoder_out = decoder_out
-        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
-        # conventional transformer decoder.
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        r_decoder_out = r_decoder_out
-
-        decoder_scores = torch.tensor([sum([decoder_out[i, j, hyps[i][j]]
-                                            for j in range(len(hyps[i]))])
-                                       for i in range(len(hyps))], device=device)
-        r_decoder_scores = []
-        for i in range(len(hyps)):
-            score = 0
-            for j in range(len(hyps[i])):
-                score += r_decoder_out[i, len(hyps[i]) - j - 1, hyps[i][j]]
-            score += r_decoder_out[i, len(hyps[i]), self.eos]
-            r_decoder_scores.append(score)
-        r_decoder_scores = torch.tensor(r_decoder_scores, device=device)
-
-        am_scores = nbest.compute_am_scores()
-        ngram_lm_scores = nbest.compute_lm_scores()
-        tot_scores = am_scores.values + lm_scale * ngram_lm_scores.values + \
-            decoder_scale * decoder_scores + r_decoder_scale * r_decoder_scores
-        ragged_tot_scores = k2.RaggedTensor(nbest.shape, tot_scores)
-        max_indexes = ragged_tot_scores.argmax()
-        best_path = k2.index_fsa(nbest.fsa, max_indexes)
-        hyps = get_texts(best_path)
-        hyps = [[symbol_table[k] for j in i for k in self.word_table[j]] for i in hyps]
-        return hyps
-
-    @torch.jit.export
-    def subsampling_rate(self) -> int:
-        """ Export interface for c++ call, return subsampling_rate of the
-            model
-        """
-        return self.encoder.embed.subsampling_rate
-
-    @torch.jit.export
-    def right_context(self) -> int:
-        """ Export interface for c++ call, return right_context of the model
-        """
-        return self.encoder.embed.right_context
-
-    @torch.jit.export
-    def sos_symbol(self) -> int:
-        """ Export interface for c++ call, return sos symbol id of the model
-        """
-        return self.sos
-
-    @torch.jit.export
-    def eos_symbol(self) -> int:
-        """ Export interface for c++ call, return eos symbol id of the model
-        """
-        return self.eos
-
-    @torch.jit.export
-    def forward_encoder_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, give input chunk xs, and return
-            output from time 0 to current chunk.
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        return self.encoder.forward_chunk(xs, offset, required_cache_size,
-                                          att_cache, cnn_cache)
-
-    @torch.jit.export
-    def ctc_activation(self, xs: torch.Tensor) -> torch.Tensor:
-        """ Export interface for c++ call, apply linear transform and log
-            softmax before ctc
-        Args:
-            xs (torch.Tensor): encoder output
-
-        Returns:
-            torch.Tensor: activation before ctc
-
-        """
-        return self.ctc.log_softmax(xs)
-
-    @torch.jit.export
-    def is_bidirectional_decoder(self) -> bool:
-        """
-        Returns:
-            torch.Tensor: decoder output
-        """
-        if hasattr(self.decoder, 'right_decoder'):
-            return True
-        else:
-            return False
-
-    @torch.jit.export
-    def forward_attention_decoder(
-        self,
-        hyps: torch.Tensor,
-        hyps_lens: torch.Tensor,
-        encoder_out: torch.Tensor,
-        reverse_weight: float = 0,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Export interface for c++ call, forward decoder with multiple
-            hypothesis from ctc prefix beam search and one encoder output
-        Args:
-            hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad sos at the begining
-            hyps_lens (torch.Tensor): length of each hyp in hyps
-            encoder_out (torch.Tensor): corresponding encoder output
-            r_hyps (torch.Tensor): hyps from ctc prefix beam search, already
-                pad eos at the begining which is used fo right to left decoder
-            reverse_weight: used for verfing whether used right to left decoder,
-            > 0 will use.
-
-        Returns:
-            torch.Tensor: decoder output
-        """
-        assert encoder_out.size(0) == 1
-        num_hyps = hyps.size(0)
-        assert hyps_lens.size(0) == num_hyps
-        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
-        encoder_mask = torch.ones(num_hyps,
-                                  1,
-                                  encoder_out.size(1),
-                                  dtype=torch.bool,
-                                  device=encoder_out.device)
-
-        # input for right to left decoder
-        # this hyps_lens has count <sos> token, we need minus it.
-        r_hyps_lens = hyps_lens - 1
-        # this hyps has included <sos> token, so it should be
-        # convert the original hyps.
-        r_hyps = hyps[:, 1:]
-        #   >>> r_hyps
-        #   >>> tensor([[ 1,  2,  3],
-        #   >>>         [ 9,  8,  4],
-        #   >>>         [ 2, -1, -1]])
-        #   >>> r_hyps_lens
-        #   >>> tensor([3, 3, 1])
-
-        # NOTE(Mddct): `pad_sequence` is not supported by ONNX, it is used
-        #   in `reverse_pad_list` thus we have to refine the below code.
-        #   Issue: https://github.com/wenet-e2e/wenet/issues/1113
-        # Equal to:
-        #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
-        #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
-        max_len = torch.max(r_hyps_lens)
-        index_range = torch.arange(0, max_len, 1).to(encoder_out.device)
-        seq_len_expand = r_hyps_lens.unsqueeze(1)
-        seq_mask = seq_len_expand > index_range  # (beam, max_len)
-        #   >>> seq_mask
-        #   >>> tensor([[ True,  True,  True],
-        #   >>>         [ True,  True,  True],
-        #   >>>         [ True, False, False]])
-        index = (seq_len_expand - 1) - index_range  # (beam, max_len)
-        #   >>> index
-        #   >>> tensor([[ 2,  1,  0],
-        #   >>>         [ 2,  1,  0],
-        #   >>>         [ 0, -1, -2]])
-        index = index * seq_mask
-        #   >>> index
-        #   >>> tensor([[2, 1, 0],
-        #   >>>         [2, 1, 0],
-        #   >>>         [0, 0, 0]])
-        r_hyps = torch.gather(r_hyps, 1, index)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, 2, 2]])
-        r_hyps = torch.where(seq_mask, r_hyps, self.eos)
-        #   >>> r_hyps
-        #   >>> tensor([[3, 2, 1],
-        #   >>>         [4, 8, 9],
-        #   >>>         [2, eos, eos]])
-        r_hyps = torch.cat([hyps[:, 0:1], r_hyps], dim=1)
-        #   >>> r_hyps
-        #   >>> tensor([[sos, 3, 2, 1],
-        #   >>>         [sos, 4, 8, 9],
-        #   >>>         [sos, 2, eos, eos]])
-
-        decoder_out, r_decoder_out, _ = self.decoder(
-            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps,
-            reverse_weight)  # (num_hyps, max_hyps_len, vocab_size)
-        decoder_out = torch.nn.functional.log_softmax(decoder_out, dim=-1)
-
-        # right to left decoder may be not used during decoding process,
-        # which depends on reverse_weight param.
-        # r_dccoder_out will be 0.0, if reverse_weight is 0.0
-        r_decoder_out = torch.nn.functional.log_softmax(r_decoder_out, dim=-1)
-        return decoder_out, r_decoder_out
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/attention.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/attention.py
deleted file mode 100644
index 6ee5e313edf2e88a844ce004c0f819b0bd3260f6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/attention.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Multi-Head Attention layer definition."""
-
-import math
-from typing import Tuple
-
-import torch
-from torch import nn
-
-
-class MultiHeadedAttention(nn.Module):
-    """Multi-Head Attention layer.
-
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
-        """Construct an MultiHeadedAttention object."""
-        super().__init__()
-        assert n_feat % n_head == 0
-        # We assume d_v always equals d_k
-        self.d_k = n_feat // n_head
-        self.h = n_head
-        self.linear_q = nn.Linear(n_feat, n_feat)
-        self.linear_k = nn.Linear(n_feat, n_feat)
-        self.linear_v = nn.Linear(n_feat, n_feat)
-        self.linear_out = nn.Linear(n_feat, n_feat)
-        self.dropout = nn.Dropout(p=dropout_rate)
-
-    def forward_qkv(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Transform query, key and value.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-
-        Returns:
-            torch.Tensor: Transformed query tensor, size
-                (#batch, n_head, time1, d_k).
-            torch.Tensor: Transformed key tensor, size
-                (#batch, n_head, time2, d_k).
-            torch.Tensor: Transformed value tensor, size
-                (#batch, n_head, time2, d_k).
-
-        """
-        n_batch = query.size(0)
-        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
-        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
-        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
-        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
-        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
-        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
-
-        return q, k, v
-
-    def forward_attention(
-        self, value: torch.Tensor, scores: torch.Tensor,
-        mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool)
-    ) -> torch.Tensor:
-        """Compute attention context vector.
-
-        Args:
-            value (torch.Tensor): Transformed value, size
-                (#batch, n_head, time2, d_k).
-            scores (torch.Tensor): Attention score, size
-                (#batch, n_head, time1, time2).
-            mask (torch.Tensor): Mask, size (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-
-        Returns:
-            torch.Tensor: Transformed value (#batch, time1, d_model)
-                weighted by the attention score (#batch, time1, time2).
-
-        """
-        n_batch = value.size(0)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be True?
-        #   1. onnx(16/4) [WHY? Because we feed real cache & real mask for the
-        #           1st chunk to ease the onnx export.]
-        #   2. pytorch training
-        if mask.size(2) > 0 :  # time2 > 0
-            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
-            # For last chunk, time2 might be larger than scores.size(-1)
-            mask = mask[:, :, :, :scores.size(-1)]  # (batch, 1, *, time2)
-            scores = scores.masked_fill(mask, -float('inf'))
-            attn = torch.softmax(scores, dim=-1).masked_fill(
-                mask, 0.0)  # (batch, head, time1, time2)
-        # NOTE(xcsong): When will `if mask.size(2) > 0` be False?
-        #   1. onnx(16/-1, -1/-1, 16/0)
-        #   2. jit (16/-1, -1/-1, 16/0, 16/4)
-        else:
-            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
-
-        p_attn = self.dropout(attn)
-        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
-        x = (x.transpose(1, 2).contiguous().view(n_batch, -1,
-                                                 self.h * self.d_k)
-             )  # (batch, time1, d_model)
-
-        return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward(self, query: torch.Tensor, key: torch.Tensor,
-                value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute scaled dot product attention.
-
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2).
-                1.When applying cross attention between decoder and encoder,
-                the batch padding mask for input is in (#batch, 1, T) shape.
-                2.When applying self attention of encoder,
-                the mask is in (#batch, T, T)  shape.
-                3.When applying self attention of decoder,
-                the mask is in (#batch, L, L)  shape.
-                4.If the different position in decoder see different block
-                of the encoder, such as Mocha, the passed in mask could be
-                in (#batch, L, T) shape. But there is no such case in current
-                Wenet.
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        return self.forward_attention(v, scores, mask), new_cache
-
-
-class RelPositionMultiHeadedAttention(MultiHeadedAttention):
-    """Multi-Head Attention layer with relative position encoding.
-    Paper: https://arxiv.org/abs/1901.02860
-    Args:
-        n_head (int): The number of heads.
-        n_feat (int): The number of features.
-        dropout_rate (float): Dropout rate.
-    """
-    def __init__(self, n_head, n_feat, dropout_rate):
-        """Construct an RelPositionMultiHeadedAttention object."""
-        super().__init__(n_head, n_feat, dropout_rate)
-        # linear transformation for positional encoding
-        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
-        # these two learnable bias are used in matrix c and matrix d
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
-        torch.nn.init.xavier_uniform_(self.pos_bias_u)
-        torch.nn.init.xavier_uniform_(self.pos_bias_v)
-
-    def rel_shift(self, x, zero_triu: bool = False):
-        """Compute relative positinal encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, size).
-            zero_triu (bool): If true, return the lower triangular part of
-                the matrix.
-        Returns:
-            torch.Tensor: Output tensor.
-        """
-
-        zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1),
-                               device=x.device,
-                               dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(x.size()[0],
-                                 x.size()[1],
-                                 x.size(3) + 1, x.size(2))
-        x = x_padded[:, :, 1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(2), x.size(3)))
-            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
-
-        return x
-
-    def forward(self, query: torch.Tensor,
-                key: torch.Tensor, value: torch.Tensor,
-                mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-                pos_emb: torch.Tensor = torch.empty(0),
-                cache: torch.Tensor = torch.zeros((0, 0, 0, 0))
-                ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
-        Args:
-            query (torch.Tensor): Query tensor (#batch, time1, size).
-            key (torch.Tensor): Key tensor (#batch, time2, size).
-            value (torch.Tensor): Value tensor (#batch, time2, size).
-            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
-                (#batch, time1, time2), (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): Positional embedding tensor
-                (#batch, time2, size).
-            cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        Returns:
-            torch.Tensor: Output tensor (#batch, time1, d_model).
-            torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
-                where `cache_t == chunk_size * num_decoding_left_chunks`
-                and `head * d_k == size`
-        """
-        q, k, v = self.forward_qkv(query, key, value)
-        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-
-        # NOTE(xcsong):
-        #   when export onnx model, for 1st chunk, we feed
-        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
-        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
-        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
-        #       and we will always do splitting and
-        #       concatnation(this will simplify onnx export). Note that
-        #       it's OK to concat & split zero-shaped tensors(see code below).
-        #   when export jit  model, for 1st chunk, we always feed
-        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
-        # >>> a = torch.ones((1, 2, 0, 4))
-        # >>> b = torch.ones((1, 2, 3, 4))
-        # >>> c = torch.cat((a, b), dim=2)
-        # >>> torch.equal(b, c)        # True
-        # >>> d = torch.split(a, 2, dim=-1)
-        # >>> torch.equal(d[0], d[1])  # True
-        if cache.size(0) > 0:
-            key_cache, value_cache = torch.split(
-                cache, cache.size(-1) // 2, dim=-1)
-            k = torch.cat([key_cache, k], dim=2)
-            v = torch.cat([value_cache, v], dim=2)
-        # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's
-        #   non-trivial to calculate `next_cache_start` here.
-        new_cache = torch.cat((k, v), dim=-1)
-
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
-        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
-
-        # (batch, head, time1, d_k)
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        # (batch, head, time1, d_k)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        # compute attention score
-        # first compute matrix a and matrix c
-        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
-        # (batch, head, time1, time2)
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        # compute matrix b and matrix d
-        # (batch, head, time1, time2)
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        # Remove rel_shift since it is useless in speech recognition,
-        # and it requires special attention for streaming.
-        # matrix_bd = self.rel_shift(matrix_bd)
-
-        scores = (matrix_ac + matrix_bd) / math.sqrt(
-            self.d_k)  # (batch, head, time1, time2)
-
-        return self.forward_attention(v, scores, mask), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/cmvn.py
deleted file mode 100644
index 3a1e7457fd3788d9a7e031e96517505a65925102..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/cmvn.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-
-class GlobalCMVN(torch.nn.Module):
-    def __init__(self,
-                 mean: torch.Tensor,
-                 istd: torch.Tensor,
-                 norm_var: bool = True):
-        """
-        Args:
-            mean (torch.Tensor): mean stats
-            istd (torch.Tensor): inverse std, std which is 1.0 / std
-        """
-        super().__init__()
-        assert mean.shape == istd.shape
-        self.norm_var = norm_var
-        # The buffer can be accessed from this module using self.mean
-        self.register_buffer("mean", mean)
-        self.register_buffer("istd", istd)
-
-    def forward(self, x: torch.Tensor):
-        """
-        Args:
-            x (torch.Tensor): (batch, max_len, feat_dim)
-
-        Returns:
-            (torch.Tensor): normalized feature
-        """
-        x = x - self.mean
-        if self.norm_var:
-            x = x * self.istd
-        return x
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/convolution.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/convolution.py
deleted file mode 100644
index 2cf9794e14ea7441ccd30ab52202ac02fb25c2b6..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/convolution.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""ConvolutionModule definition."""
-
-from typing import Tuple
-
-import torch
-from torch import nn
-from typeguard import check_argument_types
-
-
-class ConvolutionModule(nn.Module):
-    """ConvolutionModule in Conformer model."""
-    def __init__(self,
-                 channels: int,
-                 kernel_size: int = 15,
-                 activation: nn.Module = nn.ReLU(),
-                 norm: str = "batch_norm",
-                 causal: bool = False,
-                 bias: bool = True):
-        """Construct an ConvolutionModule object.
-        Args:
-            channels (int): The number of channels of conv layers.
-            kernel_size (int): Kernel size of conv layers.
-            causal (int): Whether use causal convolution or not
-        """
-        assert check_argument_types()
-        super().__init__()
-
-        self.pointwise_conv1 = nn.Conv1d(
-            channels,
-            2 * channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        # self.lorder is used to distinguish if it's a causal convolution,
-        # if self.lorder > 0: it's a causal convolution, the input will be
-        #    padded with self.lorder frames on the left in forward.
-        # else: it's a symmetrical convolution
-        if causal:
-            padding = 0
-            self.lorder = kernel_size - 1
-        else:
-            # kernel_size should be an odd number for none causal convolution
-            assert (kernel_size - 1) % 2 == 0
-            padding = (kernel_size - 1) // 2
-            self.lorder = 0
-        self.depthwise_conv = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size,
-            stride=1,
-            padding=padding,
-            groups=channels,
-            bias=bias,
-        )
-
-        assert norm in ['batch_norm', 'layer_norm']
-        if norm == "batch_norm":
-            self.use_layer_norm = False
-            self.norm = nn.BatchNorm1d(channels)
-        else:
-            self.use_layer_norm = True
-            self.norm = nn.LayerNorm(channels)
-
-        self.pointwise_conv2 = nn.Conv1d(
-            channels,
-            channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-        )
-        self.activation = activation
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        cache: torch.Tensor = torch.zeros((0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute convolution module.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, channels).
-            mask_pad (torch.Tensor): used for batch padding (#batch, 1, time),
-                (0, 0, 0) means fake mask.
-            cache (torch.Tensor): left context cache, it is only
-                used in causal convolution (#batch, channels, cache_t),
-                (0, 0, 0) meas fake cache.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, channels).
-        """
-        # exchange the temporal dimension and the feature dimension
-        x = x.transpose(1, 2)  # (#batch, channels, time)
-
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        if self.lorder > 0:
-            if cache.size(2) == 0:  # cache_t == 0
-                x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0)
-            else:
-                assert cache.size(0) == x.size(0)  # equal batch
-                assert cache.size(1) == x.size(1)  # equal channel
-                x = torch.cat((cache, x), dim=2)
-            assert (x.size(2) > self.lorder)
-            new_cache = x[:, :, -self.lorder:]
-        else:
-            # It's better we just return None if no cache is required,
-            # However, for JIT export, here we just fake one tensor instead of
-            # None.
-            new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-
-        # GLU mechanism
-        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
-        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
-
-        # 1D Depthwise Conv
-        x = self.depthwise_conv(x)
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.activation(self.norm(x))
-        if self.use_layer_norm:
-            x = x.transpose(1, 2)
-        x = self.pointwise_conv2(x)
-        # mask batch padding
-        if mask_pad.size(2) > 0:  # time > 0
-            x.masked_fill_(~mask_pad, 0.0)
-
-        return x.transpose(1, 2), new_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/ctc.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/ctc.py
deleted file mode 100644
index 3dfcbaa324ffc26afa9ceaeb75007eb312546326..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/ctc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
-
-
-class CTC(torch.nn.Module):
-    """CTC module"""
-    def __init__(
-        self,
-        odim: int,
-        encoder_output_size: int,
-        dropout_rate: float = 0.0,
-        reduce: bool = True,
-    ):
-        """ Construct CTC module
-        Args:
-            odim: dimension of outputs
-            encoder_output_size: number of encoder projection units
-            dropout_rate: dropout rate (0.0 ~ 1.0)
-            reduce: reduce the CTC loss into a scalar
-        """
-        assert check_argument_types()
-        super().__init__()
-        eprojs = encoder_output_size
-        self.dropout_rate = dropout_rate
-        self.ctc_lo = torch.nn.Linear(eprojs, odim)
-
-        reduction_type = "sum" if reduce else "none"
-        self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
-
-    def forward(self, hs_pad: torch.Tensor, hlens: torch.Tensor,
-                ys_pad: torch.Tensor, ys_lens: torch.Tensor) -> torch.Tensor:
-        """Calculate CTC loss.
-
-        Args:
-            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
-            hlens: batch of lengths of hidden state sequences (B)
-            ys_pad: batch of padded character id sequence tensor (B, Lmax)
-            ys_lens: batch of lengths of character sequence (B)
-        """
-        # hs_pad: (B, L, NProj) -> ys_hat: (B, L, Nvocab)
-        ys_hat = self.ctc_lo(F.dropout(hs_pad, p=self.dropout_rate))
-        # ys_hat: (B, L, D) -> (L, B, D)
-        ys_hat = ys_hat.transpose(0, 1)
-        ys_hat = ys_hat.log_softmax(2)
-        loss = self.ctc_loss(ys_hat, ys_pad, hlens, ys_lens)
-        # Batch-size average
-        loss = loss / ys_hat.size(1)
-        return loss
-
-    def log_softmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """log_softmax of frame activations
-
-        Args:
-            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
-        """
-        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
-
-    def argmax(self, hs_pad: torch.Tensor) -> torch.Tensor:
-        """argmax of frame activations
-
-        Args:
-            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
-        Returns:
-            torch.Tensor: argmax applied 2d tensor (B, Tmax)
-        """
-        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/decoder.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/decoder.py
deleted file mode 100644
index c31853d9e868c99290b8d597f53d9a680202c82c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/decoder.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Decoder definition."""
-from typing import Tuple, List, Optional
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.decoder_layer import DecoderLayer
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.utils.mask import (subsequent_mask, make_pad_mask)
-
-
-class TransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        assert check_argument_types()
-        super().__init__()
-        attention_dim = encoder_output_size
-
-        if input_layer == "embed":
-            self.embed = torch.nn.Sequential(
-                torch.nn.Embedding(vocab_size, attention_dim),
-                PositionalEncoding(attention_dim, positional_dropout_rate),
-            )
-        else:
-            raise ValueError(f"only 'embed' is supported: {input_layer}")
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(attention_dim, eps=1e-5)
-        self.use_output_layer = use_output_layer
-        self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
-        self.num_blocks = num_blocks
-        self.decoders = torch.nn.ModuleList([
-            DecoderLayer(
-                attention_dim,
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     self_attention_dropout_rate),
-                MultiHeadedAttention(attention_heads, attention_dim,
-                                     src_attention_dropout_rate),
-                PositionwiseFeedForward(attention_dim, linear_units,
-                                        dropout_rate),
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(self.num_blocks)
-        ])
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor = torch.empty(0),
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: not used in transformer decoder, in order to unify api
-                with bidirectional decoder
-            reverse_weight: not used in transformer decoder, in order to unify
-                api with bidirectional decode
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                torch.tensor(0.0), in order to unify api with bidirectional decoder
-                olens: (batch, )
-        """
-        tgt = ys_in_pad
-        maxlen = tgt.size(1)
-        # tgt_mask: (B, 1, L)
-        tgt_mask = ~make_pad_mask(ys_in_lens, maxlen).unsqueeze(1)
-        tgt_mask = tgt_mask.to(tgt.device)
-        # m: (1, L, L)
-        m = subsequent_mask(tgt_mask.size(-1),
-                            device=tgt_mask.device).unsqueeze(0)
-        # tgt_mask: (B, L, L)
-        tgt_mask = tgt_mask & m
-        x, _ = self.embed(tgt)
-        for layer in self.decoders:
-            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
-                                                     memory_mask)
-        if self.normalize_before:
-            x = self.after_norm(x)
-        if self.use_output_layer:
-            x = self.output_layer(x)
-        olens = tgt_mask.sum(1)
-        return x, torch.tensor(0.0), olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        x, _ = self.embed(tgt)
-        new_cache = []
-        for i, decoder in enumerate(self.decoders):
-            if cache is None:
-                c = None
-            else:
-                c = cache[i]
-            x, tgt_mask, memory, memory_mask = decoder(x,
-                                                       tgt_mask,
-                                                       memory,
-                                                       memory_mask,
-                                                       cache=c)
-            new_cache.append(x)
-        if self.normalize_before:
-            y = self.after_norm(x[:, -1])
-        else:
-            y = x[:, -1]
-        if self.use_output_layer:
-            y = torch.log_softmax(self.output_layer(y), dim=-1)
-        return y, new_cache
-
-
-class BiTransformerDecoder(torch.nn.Module):
-    """Base class of Transfomer decoder module.
-    Args:
-        vocab_size: output dim
-        encoder_output_size: dimension of attention
-        attention_heads: the number of heads of multi head attention
-        linear_units: the hidden units number of position-wise feedforward
-        num_blocks: the number of decoder blocks
-        r_num_blocks: the number of right to left decoder blocks
-        dropout_rate: dropout rate
-        self_attention_dropout_rate: dropout rate for attention
-        input_layer: input layer type
-        use_output_layer: whether to use output layer
-        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
-        normalize_before:
-            True: use layer_norm before each sub-block of a layer.
-            False: use layer_norm after each sub-block of a layer.
-        concat_after: whether to concat attention layer's input and output
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        vocab_size: int,
-        encoder_output_size: int,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        r_num_blocks: int = 0,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        self_attention_dropout_rate: float = 0.0,
-        src_attention_dropout_rate: float = 0.0,
-        input_layer: str = "embed",
-        use_output_layer: bool = True,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-
-        assert check_argument_types()
-        super().__init__()
-        self.left_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-        self.right_decoder = TransformerDecoder(
-            vocab_size, encoder_output_size, attention_heads, linear_units,
-            r_num_blocks, dropout_rate, positional_dropout_rate,
-            self_attention_dropout_rate, src_attention_dropout_rate,
-            input_layer, use_output_layer, normalize_before, concat_after)
-
-    def forward(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        ys_in_pad: torch.Tensor,
-        ys_in_lens: torch.Tensor,
-        r_ys_in_pad: torch.Tensor,
-        reverse_weight: float = 0.0,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Forward decoder.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
-            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
-            ys_in_lens: input lengths of this batch (batch)
-            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
-                used for right to left decoder
-            reverse_weight: used for right to left decoder
-        Returns:
-            (tuple): tuple containing:
-                x: decoded token score before softmax (batch, maxlen_out,
-                    vocab_size) if use_output_layer is True,
-                r_x: x: decoded token score (right to left decoder)
-                    before softmax (batch, maxlen_out, vocab_size)
-                    if use_output_layer is True,
-                olens: (batch, )
-        """
-        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
-                                          ys_in_lens)
-        r_x = torch.tensor(0.0)
-        if reverse_weight > 0.0:
-            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
-                                               ys_in_lens)
-        return l_x, r_x, olens
-
-    def forward_one_step(
-        self,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        cache: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """Forward one step.
-            This is only used for decoding.
-        Args:
-            memory: encoded memory, float32  (batch, maxlen_in, feat)
-            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
-            tgt: input token ids, int64 (batch, maxlen_out)
-            tgt_mask: input token mask,  (batch, maxlen_out)
-                      dtype=torch.uint8 in PyTorch 1.2-
-                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
-            cache: cached output list of (batch, max_time_out-1, size)
-        Returns:
-            y, cache: NN output value and cache per `self.decoders`.
-            y.shape` is (batch, maxlen_out, token)
-        """
-        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
-                                                  tgt_mask, cache)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/decoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/decoder_layer.py
deleted file mode 100644
index 6b52aa6ab730dc51b18f0787e8236ab10c1e9cad..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/decoder_layer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder self-attention layer definition."""
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class DecoderLayer(nn.Module):
-    """Single decoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (torch.nn.Module): Inter-attention module instance.
-            `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's inpu
-            and output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: nn.Module,
-        src_attn: nn.Module,
-        feed_forward: nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an DecoderLayer object."""
-        super().__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.norm3 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear1 = nn.Linear(size + size, size)
-            self.concat_linear2 = nn.Linear(size + size, size)
-        else:
-            self.concat_linear1 = nn.Identity()
-            self.concat_linear2 = nn.Identity()
-
-    def forward(
-        self,
-        tgt: torch.Tensor,
-        tgt_mask: torch.Tensor,
-        memory: torch.Tensor,
-        memory_mask: torch.Tensor,
-        cache: Optional[torch.Tensor] = None
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute decoded features.
-
-        Args:
-            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
-            tgt_mask (torch.Tensor): Mask for input tensor
-                (#batch, maxlen_out).
-            memory (torch.Tensor): Encoded memory
-                (#batch, maxlen_in, size).
-            memory_mask (torch.Tensor): Encoded memory mask
-                (#batch, maxlen_in).
-            cache (torch.Tensor): cached tensors.
-                (#batch, maxlen_out - 1, size).
-
-        Returns:
-            torch.Tensor: Output tensor (#batch, maxlen_out, size).
-            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
-            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
-            torch.Tensor: Encoded memory mask (#batch, maxlen_in).
-
-        """
-        residual = tgt
-        if self.normalize_before:
-            tgt = self.norm1(tgt)
-
-        if cache is None:
-            tgt_q = tgt
-            tgt_q_mask = tgt_mask
-        else:
-            # compute only the last frame query keeping dim: max_time_out -> 1
-            assert cache.shape == (
-                tgt.shape[0],
-                tgt.shape[1] - 1,
-                self.size,
-            ), "{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
-            tgt_q = tgt[:, -1:, :]
-            residual = residual[:, -1:, :]
-            tgt_q_mask = tgt_mask[:, -1:, :]
-
-        if self.concat_after:
-            tgt_concat = torch.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
-            x = residual + self.concat_linear1(tgt_concat)
-        else:
-            x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        if self.concat_after:
-            x_concat = torch.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
-            x = residual + self.concat_linear2(x_concat)
-        else:
-            x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask)[0])
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm3(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm3(x)
-
-        if cache is not None:
-            x = torch.cat([cache, x], dim=1)
-
-        return x, tgt_mask, memory, memory_mask
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/embedding.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/embedding.py
deleted file mode 100644
index 611a927864d93c3ad8357f66c780bf537b2a4d67..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/embedding.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        self.pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        self.pe[:, 0::2] = torch.sin(position * div_term)
-        self.pe[:, 1::2] = torch.cos(position * div_term)
-        self.pe = self.pe.unsqueeze(0)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        self.pe = self.pe.to(x.device)
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self, offset: Union[int, torch.Tensor], size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        if isinstance(offset, int):
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size < self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size < self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        self.pe = self.pe.to(x.device)
-        x = x * self.xscale
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        return self.dropout(x), self.dropout(pos_emb)
-
-
-class NoPositionalEncoding(torch.nn.Module):
-    """ No position encoding
-    """
-    def __init__(self, d_model: int, dropout_rate: float):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Just return zero vector for interface compatibility
-        """
-        pos_emb = torch.zeros(1, x.size(1), self.d_model).to(x.device)
-        return self.dropout(x), pos_emb
-
-    def position_encoding(
-            self, offset: Union[int, torch.Tensor], size: int) -> torch.Tensor:
-        return torch.zeros(1, size, self.d_model)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/encoder.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/encoder.py
deleted file mode 100644
index bb2ec65827548bd1242cb3b367cb3983c2de6119..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/encoder.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder definition."""
-from typing import Tuple
-
-import torch
-from typeguard import check_argument_types
-
-from wenet.transformer.attention import MultiHeadedAttention
-from wenet.transformer.attention import RelPositionMultiHeadedAttention
-from wenet.transformer.convolution import ConvolutionModule
-from wenet.transformer.embedding import PositionalEncoding
-from wenet.transformer.embedding import RelPositionalEncoding
-from wenet.transformer.embedding import NoPositionalEncoding
-from wenet.transformer.encoder_layer import TransformerEncoderLayer
-from wenet.transformer.encoder_layer import ConformerEncoderLayer
-from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
-from wenet.transformer.subsampling import Conv2dSubsampling4
-from wenet.transformer.subsampling import Conv2dSubsampling6
-from wenet.transformer.subsampling import Conv2dSubsampling8
-from wenet.transformer.subsampling import LinearNoSubsampling
-from wenet.utils.common import get_activation
-from wenet.utils.mask import make_pad_mask
-from wenet.utils.mask import add_optional_chunk_mask
-
-
-class BaseEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """
-        Args:
-            input_size (int): input dim
-            output_size (int): dimension of attention
-            attention_heads (int): the number of heads of multi head attention
-            linear_units (int): the hidden units number of position-wise feed
-                forward
-            num_blocks (int): the number of decoder blocks
-            dropout_rate (float): dropout rate
-            attention_dropout_rate (float): dropout rate in attention
-            positional_dropout_rate (float): dropout rate after adding
-                positional encoding
-            input_layer (str): input layer type.
-                optional [linear, conv2d, conv2d6, conv2d8]
-            pos_enc_layer_type (str): Encoder positional encoding layer type.
-                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
-            normalize_before (bool):
-                True: use layer_norm before each sub-block of a layer.
-                False: use layer_norm after each sub-block of a layer.
-            concat_after (bool): whether to concat attention layer's input
-                and output.
-                True: x -> x + linear(concat(x, att(x)))
-                False: x -> x + att(x)
-            static_chunk_size (int): chunk size for static chunk training and
-                decoding
-            use_dynamic_chunk (bool): whether use dynamic chunk size for
-                training or not, You can only use fixed chunk(chunk_size > 0)
-                or dyanmic chunk size(use_dynamic_chunk = True)
-            global_cmvn (Optional[torch.nn.Module]): Optional GlobalCMVN module
-            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
-                dynamic chunk training
-        """
-        assert check_argument_types()
-        super().__init__()
-        self._output_size = output_size
-
-        if pos_enc_layer_type == "abs_pos":
-            pos_enc_class = PositionalEncoding
-        elif pos_enc_layer_type == "rel_pos":
-            pos_enc_class = RelPositionalEncoding
-        elif pos_enc_layer_type == "no_pos":
-            pos_enc_class = NoPositionalEncoding
-        else:
-            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
-
-        if input_layer == "linear":
-            subsampling_class = LinearNoSubsampling
-        elif input_layer == "conv2d":
-            subsampling_class = Conv2dSubsampling4
-        elif input_layer == "conv2d6":
-            subsampling_class = Conv2dSubsampling6
-        elif input_layer == "conv2d8":
-            subsampling_class = Conv2dSubsampling8
-        else:
-            raise ValueError("unknown input_layer: " + input_layer)
-
-        self.global_cmvn = global_cmvn
-        self.embed = subsampling_class(
-            input_size,
-            output_size,
-            dropout_rate,
-            pos_enc_class(output_size, positional_dropout_rate),
-        )
-
-        self.normalize_before = normalize_before
-        self.after_norm = torch.nn.LayerNorm(output_size, eps=1e-5)
-        self.static_chunk_size = static_chunk_size
-        self.use_dynamic_chunk = use_dynamic_chunk
-        self.use_dynamic_left_chunk = use_dynamic_left_chunk
-
-    def output_size(self) -> int:
-        return self._output_size
-
-    def forward(
-        self,
-        xs: torch.Tensor,
-        xs_lens: torch.Tensor,
-        decoding_chunk_size: int = 0,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Embed positions in tensor.
-
-        Args:
-            xs: padded input tensor (B, T, D)
-            xs_lens: input length (B)
-            decoding_chunk_size: decoding chunk size for dynamic chunk
-                0: default for training, use random dynamic chunk.
-                <0: for decoding, use full chunk.
-                >0: for decoding, use fixed chunk size as set.
-            num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-                >=0: use num_decoding_left_chunks
-                <0: use all left chunks
-        Returns:
-            encoder output tensor xs, and subsampled masks
-            xs: padded output tensor (B, T' ~= T/subsample_rate, D)
-            masks: torch.Tensor batch padding mask after subsample
-                (B, 1, T' ~= T/subsample_rate)
-        """
-        T = xs.size(1)
-        masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        xs, pos_emb, masks = self.embed(xs, masks)
-        mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk,
-                                              self.use_dynamic_left_chunk,
-                                              decoding_chunk_size,
-                                              self.static_chunk_size,
-                                              num_decoding_left_chunks)
-        for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-        # Here we assume the mask is not changed in encoder layers, so just
-        # return the masks before encoder layers, and the masks will be used
-        # for cross attention with decoder later
-        return xs, masks
-
-    def forward_chunk(
-        self,
-        xs: torch.Tensor,
-        offset: int,
-        required_cache_size: int,
-        att_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        cnn_cache: torch.Tensor = torch.zeros(0, 0, 0, 0),
-        att_mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """ Forward just one chunk
-
-        Args:
-            xs (torch.Tensor): chunk input, with shape (b=1, time, mel-dim),
-                where `time == (chunk_size - 1) * subsample_rate + \
-                        subsample.right_context + 1`
-            offset (int): current offset in encoder output time stamp
-            required_cache_size (int): cache size required for next chunk
-                compuation
-                >=0: actual cache size
-                <0: means all history cache is required
-            att_cache (torch.Tensor): cache tensor for KEY & VALUE in
-                transformer/conformer attention, with shape
-                (elayers, head, cache_t1, d_k * 2), where
-                `head * d_k == hidden-dim` and
-                `cache_t1 == chunk_size * num_decoding_left_chunks`.
-            cnn_cache (torch.Tensor): cache tensor for cnn_module in conformer,
-                (elayers, b=1, hidden-dim, cache_t2), where
-                `cache_t2 == cnn.lorder - 1`
-
-        Returns:
-            torch.Tensor: output of current input xs,
-                with shape (b=1, chunk_size, hidden-dim).
-            torch.Tensor: new attention cache required for next chunk, with
-                dynamic shape (elayers, head, ?, d_k * 2)
-                depending on required_cache_size.
-            torch.Tensor: new conformer cnn cache required for next chunk, with
-                same shape as the original cnn_cache.
-
-        """
-        assert xs.size(0) == 1
-        # tmp_masks is just for interface compatibility
-        tmp_masks = torch.ones(1,
-                               xs.size(1),
-                               device=xs.device,
-                               dtype=torch.bool)
-        tmp_masks = tmp_masks.unsqueeze(1)
-        if self.global_cmvn is not None:
-            xs = self.global_cmvn(xs)
-        # NOTE(xcsong): Before embed, shape(xs) is (b=1, time, mel-dim)
-        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset)
-        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
-        elayers, cache_t1 = att_cache.size(0), att_cache.size(2)
-        chunk_size = xs.size(1)
-        attention_key_size = cache_t1 + chunk_size
-        pos_emb = self.embed.position_encoding(
-            offset=offset - cache_t1, size=attention_key_size)
-        if required_cache_size < 0:
-            next_cache_start = 0
-        elif required_cache_size == 0:
-            next_cache_start = attention_key_size
-        else:
-            next_cache_start = max(attention_key_size - required_cache_size, 0)
-        r_att_cache = []
-        r_cnn_cache = []
-        for i, layer in enumerate(self.encoders):
-            # NOTE(xcsong): Before layer.forward
-            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
-            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
-            xs, _, new_att_cache, new_cnn_cache = layer(
-                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if cnn_cache.size(0) > 0 else cnn_cache
-            )
-            # NOTE(xcsong): After layer.forward
-            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
-            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
-            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
-            r_cnn_cache.append(new_cnn_cache.unsqueeze(0))
-        if self.normalize_before:
-            xs = self.after_norm(xs)
-
-        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
-        #   ? may be larger than cache_t1, it depends on required_cache_size
-        r_att_cache = torch.cat(r_att_cache, dim=0)
-        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
-        r_cnn_cache = torch.cat(r_cnn_cache, dim=0)
-
-        return (xs, r_att_cache, r_cnn_cache)
-
-    def forward_chunk_by_chunk(
-        self,
-        xs: torch.Tensor,
-        decoding_chunk_size: int,
-        num_decoding_left_chunks: int = -1,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Forward input chunk by chunk with chunk_size like a streaming
-            fashion
-
-        Here we should pay special attention to computation cache in the
-        streaming style forward chunk by chunk. Three things should be taken
-        into account for computation in the current network:
-            1. transformer/conformer encoder layers output cache
-            2. convolution in conformer
-            3. convolution in subsampling
-
-        However, we don't implement subsampling cache for:
-            1. We can control subsampling module to output the right result by
-               overlapping input instead of cache left context, even though it
-               wastes some computation, but subsampling only takes a very
-               small fraction of computation in the whole model.
-            2. Typically, there are several covolution layers with subsampling
-               in subsampling module, it is tricky and complicated to do cache
-               with different convolution layers with different subsampling
-               rate.
-            3. Currently, nn.Sequential is used to stack all the convolution
-               layers in subsampling, we need to rewrite it to make it work
-               with cache, which is not prefered.
-        Args:
-            xs (torch.Tensor): (1, max_len, dim)
-            chunk_size (int): decoding chunk size
-        """
-        assert decoding_chunk_size > 0
-        # The model is trained by static or dynamic chunk
-        assert self.static_chunk_size > 0 or self.use_dynamic_chunk
-        subsampling = self.embed.subsampling_rate
-        context = self.embed.right_context + 1  # Add current frame
-        stride = subsampling * decoding_chunk_size
-        decoding_window = (decoding_chunk_size - 1) * subsampling + context
-        num_frames = xs.size(1)
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0), device=xs.device)
-        outputs = []
-        offset = 0
-        required_cache_size = decoding_chunk_size * num_decoding_left_chunks
-
-        # Feed forward overlap input step by step
-        for cur in range(0, num_frames - context + 1, stride):
-            end = min(cur + decoding_window, num_frames)
-            chunk_xs = xs[:, cur:end, :]
-            (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
-            outputs.append(y)
-            offset += y.size(1)
-        ys = torch.cat(outputs, 1)
-        masks = torch.ones((1, 1, ys.size(1)), device=ys.device, dtype=torch.bool)
-        return ys, masks
-
-
-class TransformerEncoder(BaseEncoder):
-    """Transformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "abs_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-    ):
-        """ Construct TransformerEncoder
-
-        See Encoder for the meaning of each parameter.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        self.encoders = torch.nn.ModuleList([
-            TransformerEncoderLayer(
-                output_size,
-                MultiHeadedAttention(attention_heads, output_size,
-                                     attention_dropout_rate),
-                PositionwiseFeedForward(output_size, linear_units,
-                                        dropout_rate), dropout_rate,
-                normalize_before, concat_after) for _ in range(num_blocks)
-        ])
-
-
-class ConformerEncoder(BaseEncoder):
-    """Conformer encoder module."""
-    def __init__(
-        self,
-        input_size: int,
-        output_size: int = 256,
-        attention_heads: int = 4,
-        linear_units: int = 2048,
-        num_blocks: int = 6,
-        dropout_rate: float = 0.1,
-        positional_dropout_rate: float = 0.1,
-        attention_dropout_rate: float = 0.0,
-        input_layer: str = "conv2d",
-        pos_enc_layer_type: str = "rel_pos",
-        normalize_before: bool = True,
-        concat_after: bool = False,
-        static_chunk_size: int = 0,
-        use_dynamic_chunk: bool = False,
-        global_cmvn: torch.nn.Module = None,
-        use_dynamic_left_chunk: bool = False,
-        positionwise_conv_kernel_size: int = 1,
-        macaron_style: bool = True,
-        selfattention_layer_type: str = "rel_selfattn",
-        activation_type: str = "swish",
-        use_cnn_module: bool = True,
-        cnn_module_kernel: int = 15,
-        causal: bool = False,
-        cnn_module_norm: str = "batch_norm",
-    ):
-        """Construct ConformerEncoder
-
-        Args:
-            input_size to use_dynamic_chunk, see in BaseEncoder
-            positionwise_conv_kernel_size (int): Kernel size of positionwise
-                conv1d layer.
-            macaron_style (bool): Whether to use macaron style for
-                positionwise layer.
-            selfattention_layer_type (str): Encoder attention layer type,
-                the parameter has no effect now, it's just for configure
-                compatibility.
-            activation_type (str): Encoder activation function type.
-            use_cnn_module (bool): Whether to use convolution module.
-            cnn_module_kernel (int): Kernel size of convolution module.
-            causal (bool): whether to use causal convolution or not.
-        """
-        assert check_argument_types()
-        super().__init__(input_size, output_size, attention_heads,
-                         linear_units, num_blocks, dropout_rate,
-                         positional_dropout_rate, attention_dropout_rate,
-                         input_layer, pos_enc_layer_type, normalize_before,
-                         concat_after, static_chunk_size, use_dynamic_chunk,
-                         global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
-
-        # self-attention module definition
-        if pos_enc_layer_type != "rel_pos":
-            encoder_selfattn_layer = MultiHeadedAttention
-        else:
-            encoder_selfattn_layer = RelPositionMultiHeadedAttention
-        encoder_selfattn_layer_args = (
-            attention_heads,
-            output_size,
-            attention_dropout_rate,
-        )
-        # feed-forward module definition
-        positionwise_layer = PositionwiseFeedForward
-        positionwise_layer_args = (
-            output_size,
-            linear_units,
-            dropout_rate,
-            activation,
-        )
-        # convolution module definition
-        convolution_layer = ConvolutionModule
-        convolution_layer_args = (output_size, cnn_module_kernel, activation,
-                                  cnn_module_norm, causal)
-
-        self.encoders = torch.nn.ModuleList([
-            ConformerEncoderLayer(
-                output_size,
-                encoder_selfattn_layer(*encoder_selfattn_layer_args),
-                positionwise_layer(*positionwise_layer_args),
-                positionwise_layer(
-                    *positionwise_layer_args) if macaron_style else None,
-                convolution_layer(
-                    *convolution_layer_args) if use_cnn_module else None,
-                dropout_rate,
-                normalize_before,
-                concat_after,
-            ) for _ in range(num_blocks)
-        ])
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/encoder_layer.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/encoder_layer.py
deleted file mode 100644
index 6b4629a6802a90422fa1494f82f46488f2553c16..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/encoder_layer.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#               2022 Xingchen Song (sxc19@mails.tsinghua.edu.cn)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-"""Encoder self-attention layer definition."""
-
-from typing import Optional, Tuple
-
-import torch
-from torch import nn
-
-
-class TransformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward`, instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: to use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: torch.nn.Module,
-        dropout_rate: float,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.norm1 = nn.LayerNorm(size, eps=1e-5)
-        self.norm2 = nn.LayerNorm(size, eps=1e-5)
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): just for interface compatibility
-                to ConformerEncoderLayer
-            mask_pad (torch.Tensor): does not used in transformer layer,
-                just for unified api with conformer.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2), not used here, it's for interface
-                compatibility to ConformerEncoderLayer.
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
-
-        """
-        residual = x
-        if self.normalize_before:
-            x = self.norm1(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, cache=att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm1(x)
-
-        residual = x
-        if self.normalize_before:
-            x = self.norm2(x)
-        x = residual + self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm2(x)
-
-        fake_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        return x, mask, new_att_cache, fake_cnn_cache
-
-
-class ConformerEncoderLayer(nn.Module):
-    """Encoder layer module.
-    Args:
-        size (int): Input dimension.
-        self_attn (torch.nn.Module): Self-attention module instance.
-            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention`
-            instance can be used as the argument.
-        feed_forward (torch.nn.Module): Feed-forward module instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        feed_forward_macaron (torch.nn.Module): Additional feed-forward module
-             instance.
-            `PositionwiseFeedForward` instance can be used as the argument.
-        conv_module (torch.nn.Module): Convolution module instance.
-            `ConvlutionModule` instance can be used as the argument.
-        dropout_rate (float): Dropout rate.
-        normalize_before (bool):
-            True: use layer_norm before each sub-block.
-            False: use layer_norm after each sub-block.
-        concat_after (bool): Whether to concat attention layer's input and
-            output.
-            True: x -> x + linear(concat(x, att(x)))
-            False: x -> x + att(x)
-    """
-    def __init__(
-        self,
-        size: int,
-        self_attn: torch.nn.Module,
-        feed_forward: Optional[nn.Module] = None,
-        feed_forward_macaron: Optional[nn.Module] = None,
-        conv_module: Optional[nn.Module] = None,
-        dropout_rate: float = 0.1,
-        normalize_before: bool = True,
-        concat_after: bool = False,
-    ):
-        """Construct an EncoderLayer object."""
-        super().__init__()
-        self.self_attn = self_attn
-        self.feed_forward = feed_forward
-        self.feed_forward_macaron = feed_forward_macaron
-        self.conv_module = conv_module
-        self.norm_ff = nn.LayerNorm(size, eps=1e-5)  # for the FNN module
-        self.norm_mha = nn.LayerNorm(size, eps=1e-5)  # for the MHA module
-        if feed_forward_macaron is not None:
-            self.norm_ff_macaron = nn.LayerNorm(size, eps=1e-5)
-            self.ff_scale = 0.5
-        else:
-            self.ff_scale = 1.0
-        if self.conv_module is not None:
-            self.norm_conv = nn.LayerNorm(size,
-                                          eps=1e-5)  # for the CNN module
-            self.norm_final = nn.LayerNorm(
-                size, eps=1e-5)  # for the final output of the block
-        self.dropout = nn.Dropout(dropout_rate)
-        self.size = size
-        self.normalize_before = normalize_before
-        self.concat_after = concat_after
-        if self.concat_after:
-            self.concat_linear = nn.Linear(size + size, size)
-        else:
-            self.concat_linear = nn.Identity()
-
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool),
-        att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-        cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)),
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute encoded features.
-
-        Args:
-            x (torch.Tensor): (#batch, time, size)
-            mask (torch.Tensor): Mask tensor for the input (#batch, time，time),
-                (0, 0, 0) means fake mask.
-            pos_emb (torch.Tensor): positional encoding, must not be None
-                for ConformerEncoderLayer.
-            mask_pad (torch.Tensor): batch padding mask used for conv module.
-                (#batch, 1，time), (0, 0, 0) means fake mask.
-            att_cache (torch.Tensor): Cache tensor of the KEY & VALUE
-                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
-            cnn_cache (torch.Tensor): Convolution cache in conformer layer
-                (#batch=1, size, cache_t2)
-        Returns:
-            torch.Tensor: Output tensor (#batch, time, size).
-            torch.Tensor: Mask tensor (#batch, time, time).
-            torch.Tensor: att_cache tensor,
-                (#batch=1, head, cache_t1 + time, d_k * 2).
-            torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
-        """
-
-        # whether to use macaron style
-        if self.feed_forward_macaron is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_ff_macaron(x)
-            x = residual + self.ff_scale * self.dropout(
-                self.feed_forward_macaron(x))
-            if not self.normalize_before:
-                x = self.norm_ff_macaron(x)
-
-        # multi-headed self-attention module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_mha(x)
-
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, pos_emb, att_cache)
-        if self.concat_after:
-            x_concat = torch.cat((x, x_att), dim=-1)
-            x = residual + self.concat_linear(x_concat)
-        else:
-            x = residual + self.dropout(x_att)
-        if not self.normalize_before:
-            x = self.norm_mha(x)
-
-        # convolution module
-        # Fake new cnn cache here, and then change it in conv_module
-        new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device)
-        if self.conv_module is not None:
-            residual = x
-            if self.normalize_before:
-                x = self.norm_conv(x)
-            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
-            x = residual + self.dropout(x)
-
-            if not self.normalize_before:
-                x = self.norm_conv(x)
-
-        # feed forward module
-        residual = x
-        if self.normalize_before:
-            x = self.norm_ff(x)
-
-        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
-        if not self.normalize_before:
-            x = self.norm_ff(x)
-
-        if self.conv_module is not None:
-            x = self.norm_final(x)
-
-        return x, mask, new_att_cache, new_cnn_cache
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/label_smoothing_loss.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/label_smoothing_loss.py
deleted file mode 100644
index 428fedcb0eb4345cd1361c97008a9afcd94ac171..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/label_smoothing_loss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Label smoothing module."""
-
-import torch
-from torch import nn
-
-
-class LabelSmoothingLoss(nn.Module):
-    """Label-smoothing loss.
-
-    In a standard CE loss, the label's data distribution is:
-    [0,1,2] ->
-    [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-
-    In the smoothing version CE Loss,some probabilities
-    are taken from the true label prob (1.0) and are divided
-    among other labels.
-
-    e.g.
-    smoothing=0.1
-    [0,1,2] ->
-    [
-        [0.9, 0.05, 0.05],
-        [0.05, 0.9, 0.05],
-        [0.05, 0.05, 0.9],
-    ]
-
-    Args:
-        size (int): the number of class
-        padding_idx (int): padding class id which will be ignored for loss
-        smoothing (float): smoothing rate (0.0 means the conventional CE)
-        normalize_length (bool):
-            normalize loss by sequence length if True
-            normalize loss by batch size if False
-    """
-    def __init__(self,
-                 size: int,
-                 padding_idx: int,
-                 smoothing: float,
-                 normalize_length: bool = False):
-        """Construct an LabelSmoothingLoss object."""
-        super(LabelSmoothingLoss, self).__init__()
-        self.criterion = nn.KLDivLoss(reduction="none")
-        self.padding_idx = padding_idx
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-        self.size = size
-        self.normalize_length = normalize_length
-
-    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        """Compute loss between x and target.
-
-        The model outputs and data labels tensors are flatten to
-        (batch*seqlen, class) shape and a mask is applied to the
-        padding part which should not be calculated for loss.
-
-        Args:
-            x (torch.Tensor): prediction (batch, seqlen, class)
-            target (torch.Tensor):
-                target signal masked with self.padding_id (batch, seqlen)
-        Returns:
-            loss (torch.Tensor) : The KL loss, scalar float value
-        """
-        assert x.size(2) == self.size
-        batch_size = x.size(0)
-        x = x.view(-1, self.size)
-        target = target.view(-1)
-        # use zeros_like instead of torch.no_grad() for true_dist,
-        # since no_grad() can not be exported by JIT
-        true_dist = torch.zeros_like(x)
-        true_dist.fill_(self.smoothing / (self.size - 1))
-        ignore = target == self.padding_idx  # (B,)
-        total = len(target) - ignore.sum().item()
-        target = target.masked_fill(ignore, 0)  # avoid -1 index
-        true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
-        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
-        denom = total if self.normalize_length else batch_size
-        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/positionwise_feed_forward.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/positionwise_feed_forward.py
deleted file mode 100644
index 73ba239e3f1e68f65650961f2c4ee6758729a06e..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/positionwise_feed_forward.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Positionwise feed forward layer definition."""
-
-import torch
-
-
-class PositionwiseFeedForward(torch.nn.Module):
-    """Positionwise feed forward layer.
-
-    FeedForward are appied on each position of the sequence.
-    The output dim is same with the input dim.
-
-    Args:
-        idim (int): Input dimenstion.
-        hidden_units (int): The number of hidden units.
-        dropout_rate (float): Dropout rate.
-        activation (torch.nn.Module): Activation function
-    """
-    def __init__(self,
-                 idim: int,
-                 hidden_units: int,
-                 dropout_rate: float,
-                 activation: torch.nn.Module = torch.nn.ReLU()):
-        """Construct a PositionwiseFeedForward object."""
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = torch.nn.Linear(idim, hidden_units)
-        self.activation = activation
-        self.dropout = torch.nn.Dropout(dropout_rate)
-        self.w_2 = torch.nn.Linear(hidden_units, idim)
-
-    def forward(self, xs: torch.Tensor) -> torch.Tensor:
-        """Forward function.
-
-        Args:
-            xs: input tensor (B, L, D)
-        Returns:
-            output tensor, (B, L, D)
-        """
-        return self.w_2(self.dropout(self.activation(self.w_1(xs))))
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/subsampling.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/subsampling.py
deleted file mode 100644
index 5f2823eedf0e623188d6af6680fa50ca44b47877..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/subsampling.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-
-
-"""Subsampling layer definition."""
-
-from typing import Tuple, Union
-
-import torch
-
-
-class BaseSubsampling(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def position_encoding(self, offset: Union[int, torch.Tensor],
-                          size: int) -> torch.Tensor:
-        return self.pos_enc.position_encoding(offset, size)
-
-
-class LinearNoSubsampling(BaseSubsampling):
-    """Linear transform the input without subsampling
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an linear object."""
-        super().__init__()
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(idim, odim),
-            torch.nn.LayerNorm(odim, eps=1e-5),
-            torch.nn.Dropout(dropout_rate),
-        )
-        self.pos_enc = pos_enc_class
-        self.right_context = 0
-        self.subsampling_rate = 1
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Input x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: linear input tensor (#batch, time', odim),
-                where time' = time .
-            torch.Tensor: linear input mask (#batch, 1, time'),
-                where time' = time .
-
-        """
-        x = self.out(x)
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask
-
-
-class Conv2dSubsampling4(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/4 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling4 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.out = torch.nn.Sequential(
-            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
-        self.pos_enc = pos_enc_class
-        # The right context for every conv layer is computed by:
-        # (kernel_size - 1) * frame_rate_of_this_layer
-        self.subsampling_rate = 4
-        # 6 = (3 - 1) * 1 + (3 - 1) * 2
-        self.right_context = 6
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 4.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 4.
-            torch.Tensor: positional encoding
-
-        """
-        x = x.unsqueeze(1)  # (b, c=1, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2]
-
-
-class Conv2dSubsampling6(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/6 length).
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-        pos_enc (torch.nn.Module): Custom position encoding layer.
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling6 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 5, 3),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(odim * (((idim - 1) // 2 - 2) // 3),
-                                      odim)
-        self.pos_enc = pos_enc_class
-        # 10 = (3 - 1) * 1 + (5 - 1) * 2
-        self.subsampling_rate = 6
-        self.right_context = 10
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 6.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 6.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 4::3]
-
-
-class Conv2dSubsampling8(BaseSubsampling):
-    """Convolutional 2D subsampling (to 1/8 length).
-
-    Args:
-        idim (int): Input dimension.
-        odim (int): Output dimension.
-        dropout_rate (float): Dropout rate.
-
-    """
-    def __init__(self, idim: int, odim: int, dropout_rate: float,
-                 pos_enc_class: torch.nn.Module):
-        """Construct an Conv2dSubsampling8 object."""
-        super().__init__()
-        self.conv = torch.nn.Sequential(
-            torch.nn.Conv2d(1, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-            torch.nn.Conv2d(odim, odim, 3, 2),
-            torch.nn.ReLU(),
-        )
-        self.linear = torch.nn.Linear(
-            odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2), odim)
-        self.pos_enc = pos_enc_class
-        self.subsampling_rate = 8
-        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
-        self.right_context = 14
-
-    def forward(
-            self,
-            x: torch.Tensor,
-            x_mask: torch.Tensor,
-            offset: Union[int, torch.Tensor] = 0
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Subsample x.
-
-        Args:
-            x (torch.Tensor): Input tensor (#batch, time, idim).
-            x_mask (torch.Tensor): Input mask (#batch, 1, time).
-
-        Returns:
-            torch.Tensor: Subsampled tensor (#batch, time', odim),
-                where time' = time // 8.
-            torch.Tensor: Subsampled mask (#batch, 1, time'),
-                where time' = time // 8.
-            torch.Tensor: positional encoding
-        """
-        x = x.unsqueeze(1)  # (b, c, t, f)
-        x = self.conv(x)
-        b, c, t, f = x.size()
-        x = self.linear(x.transpose(1, 2).contiguous().view(b, t, c * f))
-        x, pos_emb = self.pos_enc(x, offset)
-        return x, pos_emb, x_mask[:, :, 2::2][:, :, 2::2][:, :, 2::2]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/swish.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/swish.py
deleted file mode 100644
index b4250f5c93104f38958d145572e363256e03fcb0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/transformer/swish.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
-#               2020 Northwestern Polytechnical University (Pengcheng Guo)
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Swish() activation function for Conformer."""
-
-import torch
-
-
-class Swish(torch.nn.Module):
-    """Construct an Swish object."""
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Return Swish activation function."""
-        return x * torch.sigmoid(x)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/checkpoint.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/checkpoint.py
deleted file mode 100644
index 8e0c413c79c34cd667240357d7ef9eab816a885c..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/checkpoint.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-
-import yaml
-import torch
-from collections import OrderedDict
-
-import datetime
-
-
-def load_checkpoint(model: torch.nn.Module, path: str) -> dict:
-    if torch.cuda.is_available():
-        logging.info('Checkpoint: loading from checkpoint %s for GPU' % path)
-        checkpoint = torch.load(path)
-    else:
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' % path)
-        checkpoint = torch.load(path, map_location='cpu')
-    model.load_state_dict(checkpoint, strict=False)
-    info_path = re.sub('.pt$', '.yaml', path)
-    configs = {}
-    if os.path.exists(info_path):
-        with open(info_path, 'r') as fin:
-            configs = yaml.load(fin, Loader=yaml.FullLoader)
-    return configs
-
-
-def save_checkpoint(model: torch.nn.Module, path: str, infos=None):
-    '''
-    Args:
-        infos (dict or None): any info you want to save.
-    '''
-    logging.info('Checkpoint: save to checkpoint %s' % path)
-    if isinstance(model, torch.nn.DataParallel):
-        state_dict = model.module.state_dict()
-    elif isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        state_dict = model.module.state_dict()
-    else:
-        state_dict = model.state_dict()
-    torch.save(state_dict, path)
-    info_path = re.sub('.pt$', '.yaml', path)
-    if infos is None:
-        infos = {}
-    infos['save_time'] = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
-    with open(info_path, 'w') as fout:
-        data = yaml.dump(infos)
-        fout.write(data)
-
-
-def filter_modules(model_state_dict, modules):
-    new_mods = []
-    incorrect_mods = []
-    mods_model = model_state_dict.keys()
-    for mod in modules:
-        if any(key.startswith(mod) for key in mods_model):
-            new_mods += [mod]
-        else:
-            incorrect_mods += [mod]
-    if incorrect_mods:
-        logging.warning(
-            "module(s) %s don't match or (partially match) "
-            "available modules in model.",
-            incorrect_mods,
-        )
-        logging.warning("for information, the existing modules in model are:")
-        logging.warning("%s", mods_model)
-
-    return new_mods
-
-
-def load_trained_modules(model: torch.nn.Module, args: None):
-    # Load encoder modules with pre-trained model(s).
-    enc_model_path = args.enc_init
-    enc_modules = args.enc_init_mods
-    main_state_dict = model.state_dict()
-    logging.warning("model(s) found for pre-initialization")
-    if os.path.isfile(enc_model_path):
-        logging.info('Checkpoint: loading from checkpoint %s for CPU' %
-                     enc_model_path)
-        model_state_dict = torch.load(enc_model_path, map_location='cpu')
-        modules = filter_modules(model_state_dict, enc_modules)
-        partial_state_dict = OrderedDict()
-        for key, value in model_state_dict.items():
-            if any(key.startswith(m) for m in modules):
-                partial_state_dict[key] = value
-        main_state_dict.update(partial_state_dict)
-    else:
-        logging.warning("model was not found : %s", enc_model_path)
-
-    model.load_state_dict(main_state_dict)
-    configs = {}
-    return configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/cmvn.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/cmvn.py
deleted file mode 100644
index 3101c619f54991c947124f393f3459c317356a2f..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/cmvn.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import math
-
-import numpy as np
-
-
-def _load_json_cmvn(json_cmvn_file):
-    """ Load the json format cmvn stats file and calculate cmvn
-
-    Args:
-        json_cmvn_file: cmvn stats file in json format
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    with open(json_cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stat']
-    variance = cmvn_stats['var_stat']
-    count = cmvn_stats['frame_num']
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def _load_kaldi_cmvn(kaldi_cmvn_file):
-    """ Load the kaldi format cmvn stats file and calculate cmvn
-
-    Args:
-        kaldi_cmvn_file:  kaldi text style global cmvn file, which
-           is generated by:
-           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
-
-    Returns:
-        a numpy array of [means, vars]
-    """
-    means = []
-    variance = []
-    with open(kaldi_cmvn_file, 'r') as fid:
-        # kaldi binary file start with '\0B'
-        if fid.read(2) == '\0B':
-            logging.error('kaldi cmvn binary file is not supported, please '
-                          'recompute it by: compute-cmvn-stats --binary=false '
-                          ' scp:feats.scp global_cmvn')
-            sys.exit(1)
-        fid.seek(0)
-        arr = fid.read().split()
-        assert (arr[0] == '[')
-        assert (arr[-2] == '0')
-        assert (arr[-1] == ']')
-        feat_dim = int((len(arr) - 2 - 2) / 2)
-        for i in range(1, feat_dim + 1):
-            means.append(float(arr[i]))
-        count = float(arr[feat_dim + 1])
-        for i in range(feat_dim + 2, 2 * feat_dim + 2):
-            variance.append(float(arr[i]))
-
-    for i in range(len(means)):
-        means[i] /= count
-        variance[i] = variance[i] / count - means[i] * means[i]
-        if variance[i] < 1.0e-20:
-            variance[i] = 1.0e-20
-        variance[i] = 1.0 / math.sqrt(variance[i])
-    cmvn = np.array([means, variance])
-    return cmvn
-
-
-def load_cmvn(cmvn_file, is_json):
-    if is_json:
-        cmvn = _load_json_cmvn(cmvn_file)
-    else:
-        cmvn = _load_kaldi_cmvn(cmvn_file)
-    return cmvn[0], cmvn[1]
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/common.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/common.py
deleted file mode 100644
index 74238d59aefbf227fe6b811703af17550bc7f8f0..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/common.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-"""Unility functions for Transformer."""
-
-import math
-from typing import List, Tuple
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-IGNORE_ID = -1
-
-
-def pad_list(xs: List[torch.Tensor], pad_value: int):
-    """Perform padding for the list of tensors.
-
-    Args:
-        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
-        pad_value (float): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tmax, `*`).
-
-    Examples:
-        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
-        >>> x
-        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
-        >>> pad_list(x, 0)
-        tensor([[1., 1., 1., 1.],
-                [1., 1., 0., 0.],
-                [1., 0., 0., 0.]])
-
-    """
-    n_batch = len(xs)
-    max_len = max([x.size(0) for x in xs])
-    pad = torch.zeros(n_batch, max_len, dtype=xs[0].dtype, device=xs[0].device)
-    pad = pad.fill_(pad_value)
-    for i in range(n_batch):
-        pad[i, :xs[i].size(0)] = xs[i]
-
-    return pad
-
-
-def add_blank(ys_pad: torch.Tensor, blank: int,
-              ignore_id: int) -> torch.Tensor:
-    """ Prepad blank for transducer predictor
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        blank (int): index of <blank>
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> blank = 0
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,   4,   5],
-                [ 4,  5,  6,  -1,  -1],
-                [ 7,  8,  9,  -1,  -1]], dtype=torch.int32)
-        >>> ys_in = add_blank(ys_pad, 0, -1)
-        >>> ys_in
-        tensor([[0,  1,  2,  3,  4,  5],
-                [0,  4,  5,  6,  0,  0],
-                [0,  7,  8,  9,  0,  0]])
-    """
-    bs = ys_pad.size(0)
-    _blank = torch.tensor([blank],
-                          dtype=torch.long,
-                          requires_grad=False,
-                          device=ys_pad.device)
-    _blank = _blank.repeat(bs).unsqueeze(1)  # [bs,1]
-    out = torch.cat([_blank, ys_pad], dim=1)  # [bs, Lmax+1]
-    return torch.where(out == ignore_id, blank, out)
-
-
-def add_sos_eos(ys_pad: torch.Tensor, sos: int, eos: int,
-                ignore_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Add <sos> and <eos> labels.
-
-    Args:
-        ys_pad (torch.Tensor): batch of padded target sequences (B, Lmax)
-        sos (int): index of <sos>
-        eos (int): index of <eeos>
-        ignore_id (int): index of padding
-
-    Returns:
-        ys_in (torch.Tensor) : (B, Lmax + 1)
-        ys_out (torch.Tensor) : (B, Lmax + 1)
-
-    Examples:
-        >>> sos_id = 10
-        >>> eos_id = 11
-        >>> ignore_id = -1
-        >>> ys_pad
-        tensor([[ 1,  2,  3,  4,  5],
-                [ 4,  5,  6, -1, -1],
-                [ 7,  8,  9, -1, -1]], dtype=torch.int32)
-        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
-        >>> ys_in
-        tensor([[10,  1,  2,  3,  4,  5],
-                [10,  4,  5,  6, 11, 11],
-                [10,  7,  8,  9, 11, 11]])
-        >>> ys_out
-        tensor([[ 1,  2,  3,  4,  5, 11],
-                [ 4,  5,  6, 11, -1, -1],
-                [ 7,  8,  9, 11, -1, -1]])
-    """
-    _sos = torch.tensor([sos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    _eos = torch.tensor([eos],
-                        dtype=torch.long,
-                        requires_grad=False,
-                        device=ys_pad.device)
-    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
-    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
-    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
-    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
-
-
-def reverse_pad_list(ys_pad: torch.Tensor,
-                     ys_lens: torch.Tensor,
-                     pad_value: float = -1.0) -> torch.Tensor:
-    """Reverse padding for the list of tensors.
-
-    Args:
-        ys_pad (tensor): The padded tensor (B, Tokenmax).
-        ys_lens (tensor): The lens of token seqs (B)
-        pad_value (int): Value for padding.
-
-    Returns:
-        Tensor: Padded tensor (B, Tokenmax).
-
-    Examples:
-        >>> x
-        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
-        >>> pad_list(x, 0)
-        tensor([[4, 3, 2, 1],
-                [7, 6, 5, 0],
-                [9, 8, 0, 0]])
-
-    """
-    r_ys_pad = pad_sequence([(torch.flip(y.int()[:i], [0]))
-                             for y, i in zip(ys_pad, ys_lens)], True,
-                            pad_value)
-    return r_ys_pad
-
-
-def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
-                ignore_label: int) -> float:
-    """Calculate accuracy.
-
-    Args:
-        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
-        pad_targets (LongTensor): Target label tensors (B, Lmax).
-        ignore_label (int): Ignore label id.
-
-    Returns:
-        float: Accuracy value (0.0 - 1.0).
-
-    """
-    pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1),
-                                pad_outputs.size(1)).argmax(2)
-    mask = pad_targets != ignore_label
-    numerator = torch.sum(
-        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
-    denominator = torch.sum(mask)
-    return float(numerator) / float(denominator)
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
-
-
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_subsample(config):
-    input_layer = config["encoder_conf"]["input_layer"]
-    assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
-    if input_layer == "conv2d":
-        return 4
-    elif input_layer == "conv2d6":
-        return 6
-    elif input_layer == "conv2d8":
-        return 8
-
-
-def remove_duplicates_and_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != 0:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def replace_duplicates_with_blank(hyp: List[int]) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        new_hyp.append(hyp[cur])
-        prev = cur
-        cur += 1
-        while cur < len(hyp) and hyp[cur] == hyp[prev] and hyp[cur] != 0:
-            new_hyp.append(0)
-            cur += 1
-    return new_hyp
-
-
-def log_add(args: List[int]) -> float:
-    """
-    Stable log add
-    """
-    if all(a == -float('inf') for a in args):
-        return -float('inf')
-    a_max = max(args)
-    lsp = math.log(sum(math.exp(a - a_max) for a in args))
-    return a_max + lsp
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/config.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/config.py
deleted file mode 100644
index 50170ced44534d3ee6532a2f87fcd78c5148f7e7..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021 Shaoshang Qi
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import copy
-
-def override_config(configs, override_list):
-    new_configs = copy.deepcopy(configs)
-    for item in override_list:
-        arr = item.split()
-        if len(arr) != 2:
-            print(f"the overrive {item} format not correct, skip it")
-            continue
-        keys = arr[0].split('.')
-        s_configs = new_configs
-        for i, key in enumerate(keys):
-            if key not in s_configs:
-                print(f"the overrive {item} format not correct, skip it")
-            if i == len(keys) - 1:
-                param_type = type(s_configs[key])
-                if param_type != bool:
-                    s_configs[key] = param_type(arr[1])
-                else:
-                    s_configs[key] = arr[1] in ['true', 'True']
-                print(f"override {arr[0]} with {arr[1]}")
-            else:
-                s_configs = s_configs[key]
-    return new_configs
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/ctc_util.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/ctc_util.py
deleted file mode 100644
index 73b8fb272ac153dd6d05207f352ebcf1ad14890d..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/ctc_util.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc (Binbin Zhang, Di Wu)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-
-def insert_blank(label, blank_id=0):
-    """Insert blank token between every two label token."""
-    label = np.expand_dims(label, 1)
-    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
-    label = np.concatenate([blanks, label], axis=1)
-    label = label.reshape(-1)
-    label = np.append(label, label[0])
-    return label
-
-def forced_align(ctc_probs: torch.Tensor,
-                 y: torch.Tensor,
-                 blank_id=0) -> list:
-    """ctc forced alignment.
-
-    Args:
-        torch.Tensor ctc_probs: hidden state sequence, 2d tensor (T, D)
-        torch.Tensor y: id sequence tensor 1d tensor (L)
-        int blank_id: blank symbol index
-    Returns:
-        torch.Tensor: alignment result
-    """
-    y_insert_blank = insert_blank(y, blank_id)
-
-    log_alpha = torch.zeros((ctc_probs.size(0), len(y_insert_blank)))
-    log_alpha = log_alpha - float('inf')  # log of zero
-    state_path = (torch.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=torch.int16) - 1
-    )  # state path
-
-    # init start state
-    log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]]
-    log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]]
-
-    for t in range(1, ctc_probs.size(0)):
-        for s in range(len(y_insert_blank)):
-            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
-                    s] == y_insert_blank[s - 2]:
-                candidates = torch.tensor(
-                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
-                prev_state = [s, s - 1]
-            else:
-                candidates = torch.tensor([
-                    log_alpha[t - 1, s],
-                    log_alpha[t - 1, s - 1],
-                    log_alpha[t - 1, s - 2],
-                ])
-                prev_state = [s, s - 1, s - 2]
-            log_alpha[t, s] = torch.max(candidates) + ctc_probs[t][y_insert_blank[s]]
-            state_path[t, s] = prev_state[torch.argmax(candidates)]
-
-    state_seq = -1 * torch.ones((ctc_probs.size(0), 1), dtype=torch.int16)
-
-    candidates = torch.tensor([
-        log_alpha[-1, len(y_insert_blank) - 1],
-        log_alpha[-1, len(y_insert_blank) - 2]
-    ])
-    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
-    state_seq[-1] = prev_state[torch.argmax(candidates)]
-    for t in range(ctc_probs.size(0) - 2, -1, -1):
-        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
-
-    output_alignment = []
-    for t in range(0, ctc_probs.size(0)):
-        output_alignment.append(y_insert_blank[state_seq[t, 0]])
-
-    return output_alignment
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/executor.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/executor.py
deleted file mode 100644
index dc0b69e6e32055566a0e8c41945f6979276e5672..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/executor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from contextlib import nullcontext
-
-# if your python version < 3.7 use the below one
-# from contextlib import suppress as nullcontext
-import torch
-from torch.nn.utils import clip_grad_norm_
-
-
-class Executor:
-
-    def __init__(self):
-        self.step = 0
-
-    def train(self, model, optimizer, scheduler, data_loader, device, writer,
-              args, scaler):
-        ''' Train one epoch
-        '''
-        model.train()
-        clip = args.get('grad_clip', 50.0)
-        log_interval = args.get('log_interval', 10)
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        accum_grad = args.get('accum_grad', 1)
-        is_distributed = args.get('is_distributed', True)
-        use_amp = args.get('use_amp', False)
-        logging.info('using accumulate grad, new batch size is {} times'
-                     ' larger than before'.format(accum_grad))
-        if use_amp:
-            assert scaler is not None
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
-            model_context = model.join
-        else:
-            model_context = nullcontext
-        num_seen_utts = 0
-        with model_context():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                context = None
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if is_distributed and batch_idx % accum_grad != 0:
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    # autocast context
-                    # The more details about amp can be found in
-                    # https://pytorch.org/docs/stable/notes/amp_examples.html
-                    with torch.cuda.amp.autocast(scaler is not None):
-                        loss_dict = model(feats, feats_lengths, target,
-                                          target_lengths)
-                        loss = loss_dict['loss'] / accum_grad
-                    if use_amp:
-                        scaler.scale(loss).backward()
-                    else:
-                        loss.backward()
-
-                num_seen_utts += num_utts
-                if batch_idx % accum_grad == 0:
-                    if rank == 0 and writer is not None:
-                        writer.add_scalar('train_loss', loss, self.step)
-                    # Use mixed precision training
-                    if use_amp:
-                        scaler.unscale_(optimizer)
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        # Must invoke scaler.update() if unscale_() is used in
-                        # the iteration to avoid the following error:
-                        #   RuntimeError: unscale_() has already been called
-                        #   on this optimizer since the last update().
-                        # We don't check grad here since that if the gradient
-                        # has inf/nan values, scaler.step will skip
-                        # optimizer.step().
-                        scaler.step(optimizer)
-                        scaler.update()
-                    else:
-                        grad_norm = clip_grad_norm_(model.parameters(), clip)
-                        if torch.isfinite(grad_norm):
-                            optimizer.step()
-                    optimizer.zero_grad()
-                    scheduler.step()
-                    self.step += 1
-                if batch_idx % log_interval == 0:
-                    lr = optimizer.param_groups[0]['lr']
-                    log_str = 'TRAIN Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx,
-                        loss.item() * accum_grad)
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'lr {:.8f} rank {}'.format(lr, rank)
-                    logging.debug(log_str)
-
-    def cv(self, model, data_loader, device, args):
-        ''' Cross validation on
-        '''
-        model.eval()
-        rank = args.get('rank', 0)
-        epoch = args.get('epoch', 0)
-        log_interval = args.get('log_interval', 10)
-        # in order to avoid division by 0
-        num_seen_utts = 1
-        total_loss = 0.0
-        with torch.no_grad():
-            for batch_idx, batch in enumerate(data_loader):
-                key, feats, target, feats_lengths, target_lengths = batch
-                feats = feats.to(device)
-                target = target.to(device)
-                feats_lengths = feats_lengths.to(device)
-                target_lengths = target_lengths.to(device)
-                num_utts = target_lengths.size(0)
-                if num_utts == 0:
-                    continue
-                loss_dict = model(feats, feats_lengths, target, target_lengths)
-                loss = loss_dict['loss']
-                if torch.isfinite(loss):
-                    num_seen_utts += num_utts
-                    total_loss += loss.item() * num_utts
-                if batch_idx % log_interval == 0:
-                    log_str = 'CV Batch {}/{} loss {:.6f} '.format(
-                        epoch, batch_idx, loss.item())
-                    for name, value in loss_dict.items():
-                        if name != 'loss' and value is not None:
-                            log_str += '{} {:.6f} '.format(name, value.item())
-                    log_str += 'history loss {:.6f}'.format(total_loss /
-                                                            num_seen_utts)
-                    log_str += ' rank {}'.format(rank)
-                    logging.debug(log_str)
-        return total_loss, num_seen_utts
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/file_utils.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/file_utils.py
deleted file mode 100644
index 7b7e516cc61f759267f4ef09309ff0b45110a0c1..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/file_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def read_lists(list_file):
-    lists = []
-    with open(list_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            lists.append(line.strip())
-    return lists
-
-
-def read_non_lang_symbols(non_lang_sym_path):
-    """read non-linguistic symbol from file.
-
-    The file format is like below:
-
-    {NOISE}\n
-    {BRK}\n
-    ...
-
-
-    Args:
-        non_lang_sym_path: non-linguistic symbol file path, None means no any
-        syms.
-
-    """
-    if non_lang_sym_path is None:
-        return None
-    else:
-        syms = read_lists(non_lang_sym_path)
-        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
-        for sym in syms:
-            if non_lang_syms_pattern.fullmatch(sym) is None:
-                class BadSymbolFormat(Exception):
-                    pass
-                raise BadSymbolFormat(
-                    "Non-linguistic symbols should be "
-                    "formatted in {xxx}/<xxx>/[xxx], consider"
-                    " modify '%s' to meet the requirment. "
-                    "More details can be found in discussions here : "
-                    "https://github.com/wenet-e2e/wenet/pull/819" % (sym))
-        return syms
-
-
-def read_symbol_table(symbol_table_file):
-    symbol_table = {}
-    with open(symbol_table_file, 'r', encoding='utf8') as fin:
-        for line in fin:
-            arr = line.strip().split()
-            assert len(arr) == 2
-            symbol_table[arr[0]] = int(arr[1])
-    return symbol_table
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
deleted file mode 100644
index 377e110b36cc140a55edc9dcc1b20dc5f91387a2..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from wenet.transducer.joint import TransducerJoint
-from wenet.transducer.predictor import (ConvPredictor, EmbeddingPredictor,
-                                        RNNPredictor)
-from wenet.transducer.transducer import Transducer
-from wenet.transformer.asr_model import ASRModel
-from wenet.transformer.cmvn import GlobalCMVN
-from wenet.transformer.ctc import CTC
-from wenet.transformer.decoder import BiTransformerDecoder, TransformerDecoder
-from wenet.transformer.encoder import ConformerEncoder, TransformerEncoder
-from wenet.squeezeformer.encoder import SqueezeformerEncoder
-from wenet.efficient_conformer.encoder import EfficientConformerEncoder
-from wenet.utils.cmvn import load_cmvn
-
-
-def init_model(configs):
-    if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['cmvn_conf']['is_json_cmvn'])
-        global_cmvn = GlobalCMVN(
-            torch.from_numpy(mean).float(),
-            torch.from_numpy(istd).float())
-    else:
-        global_cmvn = None
-
-    input_dim = configs['input_dim']
-    vocab_size = configs['output_dim']
-
-    encoder_type = configs.get('encoder', 'conformer')
-    decoder_type = configs.get('decoder', 'bitransformer')
-
-    if encoder_type == 'conformer':
-        encoder = ConformerEncoder(input_dim,
-                                   global_cmvn=global_cmvn,
-                                   **configs['encoder_conf'])
-    elif encoder_type == 'squeezeformer':
-        encoder = SqueezeformerEncoder(input_dim,
-                                       global_cmvn=global_cmvn,
-                                       **configs['encoder_conf'])
-    elif encoder_type == 'efficientConformer':
-        encoder = EfficientConformerEncoder(input_dim,
-                                            global_cmvn=global_cmvn,
-                                            **configs['encoder_conf'],
-                                            **configs['encoder_conf']['efficient_conf']
-                                            if 'efficient_conf' in
-                                               configs['encoder_conf'] else {})
-    else:
-        encoder = TransformerEncoder(input_dim,
-                                     global_cmvn=global_cmvn,
-                                     **configs['encoder_conf'])
-    if decoder_type == 'transformer':
-        decoder = TransformerDecoder(vocab_size, encoder.output_size(),
-                                     **configs['decoder_conf'])
-    else:
-        assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
-        assert configs['decoder_conf']['r_num_blocks'] > 0
-        decoder = BiTransformerDecoder(vocab_size, encoder.output_size(),
-                                       **configs['decoder_conf'])
-    ctc = CTC(vocab_size, encoder.output_size())
-
-    # Init joint CTC/Attention or Transducer model
-    if 'predictor' in configs:
-        predictor_type = configs.get('predictor', 'rnn')
-        if predictor_type == 'rnn':
-            predictor = RNNPredictor(vocab_size, **configs['predictor_conf'])
-        elif predictor_type == 'embedding':
-            predictor = EmbeddingPredictor(vocab_size,
-                                           **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        elif predictor_type == 'conv':
-            predictor = ConvPredictor(vocab_size, **configs['predictor_conf'])
-            configs['predictor_conf']['output_size'] = configs[
-                'predictor_conf']['embed_size']
-        else:
-            raise NotImplementedError(
-                "only rnn, embedding and conv type support now")
-        configs['joint_conf']['enc_output_size'] = configs['encoder_conf'][
-            'output_size']
-        configs['joint_conf']['pred_output_size'] = configs['predictor_conf'][
-            'output_size']
-        joint = TransducerJoint(vocab_size, **configs['joint_conf'])
-        model = Transducer(vocab_size=vocab_size,
-                           blank=0,
-                           predictor=predictor,
-                           encoder=encoder,
-                           attention_decoder=decoder,
-                           joint=joint,
-                           ctc=ctc,
-                           **configs['model_conf'])
-    else:
-        model = ASRModel(vocab_size=vocab_size,
-                         encoder=encoder,
-                         decoder=decoder,
-                         ctc=ctc,
-                         **configs['model_conf'])
-    return model
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/mask.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/mask.py
deleted file mode 100644
index 2985006ab2bc2d27a9b8adaeb863cc44ca6a0d24..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/mask.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2019 Shigeki Karita
-#               2020 Mobvoi Inc (Binbin Zhang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-
-'''
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    ret = torch.ones(size, size, device=device, dtype=torch.bool)
-    return torch.tril(ret)
-'''
-
-def subsequent_mask(
-        size: int,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size).
-
-    This mask is used only in decoder which works in an auto-regressive mode.
-    This means the current step could only do attention with its left steps.
-
-    In encoder, fully attention is used when streaming is not necessary and
-    the sequence is not long. In this  case, no attention mask is needed.
-
-    When streaming is need, chunk-based attention is used in encoder. See
-    subsequent_chunk_mask for the chunk-based attention mask.
-
-    Args:
-        size (int): size of mask
-        str device (str): "cpu" or "cuda" or torch.Tensor.device
-        dtype (torch.device): result dtype
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_mask(3)
-        [[1, 0, 0],
-         [1, 1, 0],
-         [1, 1, 1]]
-    """
-    arange = torch.arange(size, device=device)
-    mask = arange.expand(size, size)
-    arange = arange.unsqueeze(-1)
-    mask = mask <= arange
-    return mask
-
-
-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def add_optional_chunk_mask(xs: torch.Tensor, masks: torch.Tensor,
-                            use_dynamic_chunk: bool,
-                            use_dynamic_left_chunk: bool,
-                            decoding_chunk_size: int, static_chunk_size: int,
-                            num_decoding_left_chunks: int):
-    """ Apply optional mask for encoder.
-
-    Args:
-        xs (torch.Tensor): padded input, (B, L, D), L for max length
-        mask (torch.Tensor): mask for xs, (B, 1, L)
-        use_dynamic_chunk (bool): whether to use dynamic chunk or not
-        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
-            training.
-        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
-            0: default for training, use random dynamic chunk.
-            <0: for decoding, use full chunk.
-            >0: for decoding, use fixed chunk size as set.
-        static_chunk_size (int): chunk size for static chunk training/decoding
-            if it's greater than 0, if use_dynamic_chunk is true,
-            this parameter will be ignored
-        num_decoding_left_chunks: number of left chunks, this is for decoding,
-            the chunk size is decoding_chunk_size.
-            >=0: use num_decoding_left_chunks
-            <0: use all left chunks
-
-    Returns:
-        torch.Tensor: chunk mask of the input xs.
-    """
-    # Whether to use chunk mask or not
-    if use_dynamic_chunk:
-        max_len = xs.size(1)
-        if decoding_chunk_size < 0:
-            chunk_size = max_len
-            num_left_chunks = -1
-        elif decoding_chunk_size > 0:
-            chunk_size = decoding_chunk_size
-            num_left_chunks = num_decoding_left_chunks
-        else:
-            # chunk size is either [1, 25] or full context(max_len).
-            # Since we use 4 times subsampling and allow up to 1s(100 frames)
-            # delay, the maximum frame is 100 / 4 = 25.
-            chunk_size = torch.randint(1, max_len, (1, )).item()
-            num_left_chunks = -1
-            if chunk_size > max_len // 2:
-                chunk_size = max_len
-            else:
-                chunk_size = chunk_size % 25 + 1
-                if use_dynamic_left_chunk:
-                    max_left_chunks = (max_len - 1) // chunk_size
-                    num_left_chunks = torch.randint(0, max_left_chunks,
-                                                    (1, )).item()
-        chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    elif static_chunk_size > 0:
-        num_left_chunks = num_decoding_left_chunks
-        chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size,
-                                            num_left_chunks,
-                                            xs.device)  # (L, L)
-        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
-        chunk_masks = masks & chunk_masks  # (B, L, L)
-    else:
-        chunk_masks = masks
-    return chunk_masks
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-    return mask
-
-
-def make_non_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """Make mask tensor containing indices of non-padded part.
-
-    The sequences in a batch may have different lengths. To enable
-    batch computing, padding is need to make all sequence in same
-    size. To avoid the padding part pass value to context dependent
-    block such as attention or convolution , this padding part is
-    masked.
-
-    This pad_mask is used in both encoder and decoder.
-
-    1 for non-padded part and 0 for padded part.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_non_pad_mask(lengths)
-        masks = [[1, 1, 1, 1 ,1],
-                 [1, 1, 1, 0, 0],
-                 [1, 1, 0, 0, 0]]
-    """
-    return ~make_pad_mask(lengths)
-
-
-def mask_finished_scores(score: torch.Tensor,
-                         flag: torch.Tensor) -> torch.Tensor:
-    """
-    If a sequence is finished, we only allow one alive branch. This function
-    aims to give one branch a zero score and the rest -inf score.
-
-    Args:
-        score (torch.Tensor): A real value array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size, beam_size).
-    """
-    beam_size = score.size(-1)
-    zero_mask = torch.zeros_like(flag, dtype=torch.bool)
-    if beam_size > 1:
-        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),
-                               dim=1)
-        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),
-                             dim=1)
-    else:
-        unfinished = zero_mask
-        finished = flag
-    score.masked_fill_(unfinished, -float('inf'))
-    score.masked_fill_(finished, 0)
-    return score
-
-
-def mask_finished_preds(pred: torch.Tensor, flag: torch.Tensor,
-                        eos: int) -> torch.Tensor:
-    """
-    If a sequence is finished, all of its branch should be <eos>
-
-    Args:
-        pred (torch.Tensor): A int array with shape
-            (batch_size * beam_size, beam_size).
-        flag (torch.Tensor): A bool array with shape
-            (batch_size * beam_size, 1).
-
-    Returns:
-        torch.Tensor: (batch_size * beam_size).
-    """
-    beam_size = pred.size(-1)
-    finished = flag.repeat([1, beam_size])
-    return pred.masked_fill_(finished, eos)
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/scheduler.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/scheduler.py
deleted file mode 100644
index c418a731dec0041a238787bbba23102dba8db5e5..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/scheduler.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
-#               2022 Ximalaya Inc (Yuguang Yang)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from ESPnet(https://github.com/espnet/espnet)
-#               NeMo(https://github.com/NVIDIA/NeMo)
-
-from typing import Union
-
-import math
-import warnings
-import torch
-from torch.optim.lr_scheduler import _LRScheduler
-
-from typeguard import check_argument_types
-
-
-class WarmupLR(_LRScheduler):
-    """The WarmupLR scheduler
-
-    This scheduler is almost same as NoamLR Scheduler except for following
-    difference:
-
-    NoamLR:
-        lr = optimizer.lr * model_size ** -0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-    WarmupLR:
-        lr = optimizer.lr * warmup_step ** 0.5
-             * min(step ** -0.5, step * warmup_step ** -1.5)
-
-    Note that the maximum lr equals to optimizer.lr in this scheduler.
-
-    """
-
-    def __init__(
-            self,
-            optimizer: torch.optim.Optimizer,
-            warmup_steps: Union[int, float] = 25000,
-            last_epoch: int = -1,
-    ):
-        assert check_argument_types()
-        self.warmup_steps = warmup_steps
-
-        # __init__() must be invoked before setting field
-        # because step() is also invoked in __init__()
-        super().__init__(optimizer, last_epoch)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
-
-    def get_lr(self):
-        step_num = self.last_epoch + 1
-        if self.warmup_steps == 0:
-            return [
-                lr * step_num ** -0.5
-                for lr in self.base_lrs
-            ]
-        else:
-            return [
-                lr
-                * self.warmup_steps ** 0.5
-                * min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
-                for lr in self.base_lrs
-            ]
-
-    def set_step(self, step: int):
-        self.last_epoch = step
-
-
-class WarmupPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(self, optimizer, *, warmup_steps=None, warmup_ratio=None,
-                 max_steps=None, min_lr=0.0, last_epoch=-1):
-        assert not (warmup_steps is not None and warmup_ratio is not None),\
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class SquareRootConstantPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self, optimizer, *, constant_steps=None, constant_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert constant_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.constant_lr = 1 / (constant_steps ** 0.5)
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        if step <= self.constant_steps:
-            return [self.constant_lr for _ in self.base_lrs]
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-class WarmupHoldPolicy(WarmupPolicy):
-    """Variant of WarmupPolicy which maintains high
-       learning rate for a defined number of steps.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        hold_steps: Number of training steps to
-                    hold the learning rate after warm up
-        hold_ratio: Ratio of hold steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            hold_steps=None,
-            hold_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (hold_steps is not None and hold_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert hold_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        self.min_lr = min_lr
-        self._last_warmup_lr = 0.0
-
-        # Necessary to duplicate as class attributes are hidden in inner class
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if hold_steps is not None:
-            self.hold_steps = hold_steps + self.warmup_steps
-        elif hold_ratio is not None:
-            self.hold_steps = int(hold_ratio * max_steps) + self.warmup_steps
-        else:
-            self.hold_steps = 0
-
-        super().__init__(
-            optimizer,
-            warmup_steps=warmup_steps,
-            warmup_ratio=warmup_ratio,
-            max_steps=max_steps,
-            last_epoch=last_epoch,
-            min_lr=min_lr,
-        )
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed by the scheduler,"
-                " " "please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup phase
-        if step <= self.warmup_steps and self.warmup_steps > 0:
-            return self._get_warmup_lr(step)
-
-        # Hold phase
-        if (step >= self.warmup_steps) and (step < self.hold_steps):
-            return self.base_lrs
-
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-
-class WarmupAnnealHoldPolicy(_LRScheduler):
-    """Adds warmup kwargs and warmup logic to lr policy.
-    All arguments should be passed as kwargs for clarity,
-    Args:
-        warmup_steps: Number of training steps in warmup stage
-        warmup_ratio: Ratio of warmup steps to total steps
-        max_steps: Total number of steps while training or `None` for
-            infinite training
-        min_lr: Minimum lr to hold the learning rate after decay at.
-        constant_steps: Number of steps to keep lr constant at.
-        constant_ratio: Ratio of steps to keep lr constant.
-    """
-
-    def __init__(
-            self,
-            optimizer,
-            *,
-            warmup_steps=None,
-            warmup_ratio=None,
-            constant_steps=None,
-            constant_ratio=None,
-            max_steps=None,
-            min_lr=0.0,
-            last_epoch=-1,
-    ):
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert not (constant_steps is not None
-                    and constant_ratio is not None), \
-            "Either use constant_steps or constant_ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        if constant_steps is not None:
-            self.constant_steps = constant_steps
-        elif constant_ratio is not None:
-            self.constant_steps = int(constant_ratio * max_steps)
-        else:
-            self.constant_steps = 0
-
-        self.decay_steps = max_steps - (self.constant_steps + self.warmup_steps)
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = self.last_epoch
-
-        # Warmup steps
-        if self.warmup_steps > 0 and step <= self.warmup_steps:
-            return self._get_warmup_lr(step)
-
-        # Constant steps after warmup and decay
-        if self.constant_steps > 0 and (
-                self.warmup_steps + self.decay_steps) < step <= self.max_steps:
-            return self._get_constant_lr(step)
-
-        # Min lr after max steps of updates
-        if step > self.max_steps:
-            return [self.min_lr for _ in self.base_lrs]
-
-        return self._get_lr(step)
-
-    def _get_warmup_lr(self, step):
-        lr_val = (step + 1) / (self.warmup_steps + 1)
-        return [initial_lr * lr_val for initial_lr in self.base_lrs]
-
-    def _get_constant_lr(self, step):
-        return [self.min_lr for _ in self.base_lrs]
-
-    def _get_lr(self, step):
-        """Simple const lr policy"""
-        return self.base_lrs
-
-
-def _squareroot_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 0.5
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _square_annealing(initial_lr, step, max_steps, min_lr):
-    mult = ((max_steps - step) / max_steps) ** 2
-    out_lr = initial_lr * mult
-    out_lr = max(out_lr, min_lr)
-    return out_lr
-
-
-def _cosine_annealing(initial_lr, step, max_steps, min_lr):
-    mult = 0.5 * (1 + math.cos(math.pi * step / max_steps))
-    out_lr = (initial_lr - min_lr) * mult + min_lr
-    return out_lr
-
-
-def _linear_warmup_with_cosine_annealing(max_lr, warmup_steps, step,
-                                         decay_steps, min_lr):
-    assert max_lr > min_lr
-    # Use linear warmup for the initial part.
-    if warmup_steps > 0 and step <= warmup_steps:
-        return max_lr * float(step) / float(warmup_steps)
-
-    # For any steps larger than `decay_steps`, use `min_lr`.
-    if step > warmup_steps + decay_steps:
-        return min_lr
-
-    # If we are done with the warmup period, use the decay style.
-    num_steps_ = step - warmup_steps
-    decay_steps_ = decay_steps
-    decay_ratio = float(num_steps_) / float(decay_steps_)
-    assert decay_ratio >= 0.0
-    assert decay_ratio <= 1.0
-    delta_lr = max_lr - min_lr
-
-    coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
-
-    return min_lr + coeff * delta_lr
-
-
-def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
-    if cycle:
-        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
-        decay_steps *= multiplier
-    else:
-        step = min(step, decay_steps)
-    p = step / decay_steps
-    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
-    lr += min_lr
-    return lr
-
-
-def _noam_hold_annealing(initial_lr, step, warmup_steps,
-                         hold_steps, decay_rate, min_lr):
-    # hold_steps = total number of steps
-    # to hold the LR, not the warmup + hold steps.
-    T_warmup_decay = max(1, warmup_steps ** decay_rate)
-    T_hold_decay = max(1, (step - hold_steps) ** decay_rate)
-    lr = (initial_lr * T_warmup_decay) / T_hold_decay
-    lr = max(lr, min_lr)
-    return lr
-
-
-class SquareAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=1e-5, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _square_annealing(
-                initial_lr=initial_lr,
-                step=step - self.warmup_steps,
-                max_steps=self.max_steps - self.warmup_steps,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class SquareRootAnnealing(WarmupPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        new_lrs = [
-            _squareroot_annealing(initial_lr=initial_lr, step=step,
-                                  max_steps=self.max_steps, min_lr=self.min_lr)
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-
-class CosineAnnealing(WarmupAnnealHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, min_lr=0, last_epoch=-1,
-                 **kwargs):
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        if self.constant_steps is None or self.constant_steps == 0:
-            new_lrs = [
-                _cosine_annealing(
-                    initial_lr=initial_lr,
-                    step=step - self.warmup_steps,
-                    max_steps=self.max_steps - self.warmup_steps,
-                    min_lr=self.min_lr,
-                )
-                for initial_lr in self.base_lrs
-            ]
-        else:
-            new_lrs = self._get_linear_warmup_with_cosine_annealing_lr(step)
-        return new_lrs
-
-    def _get_warmup_lr(self, step):
-        if self.constant_steps is None or self.constant_steps == 0:
-            return super()._get_warmup_lr(step)
-        else:
-            # Use linear warmup for the initial part.
-            return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_constant_lr(self, step):
-        # Only called when `constant_steps` > 0.
-        return self._get_linear_warmup_with_cosine_annealing_lr(step)
-
-    def _get_linear_warmup_with_cosine_annealing_lr(self, step):
-        # Cosine Schedule for Megatron LM,
-        # slightly different warmup schedule + constant LR at the end.
-        new_lrs = [
-            _linear_warmup_with_cosine_annealing(
-                max_lr=self.base_lrs[0],
-                warmup_steps=self.warmup_steps,
-                step=step,
-                decay_steps=self.decay_steps,
-                min_lr=self.min_lr,
-            )
-            for _ in self.base_lrs
-        ]
-        return new_lrs
-
-
-class NoamAnnealing(_LRScheduler):
-    def __init__(
-            self, optimizer, *, d_model, warmup_steps=None, warmup_ratio=None,
-            max_steps=None, min_lr=0.0, last_epoch=-1
-    ):
-        self._normalize = d_model ** (-0.5)
-        assert not (warmup_steps is not None
-                    and warmup_ratio is not None), \
-            "Either use particular number of step or ratio"
-        assert warmup_ratio is None or max_steps is not None, \
-            "If there is a ratio, there should be a total steps"
-
-        # It is necessary to assign all attributes *before* __init__,
-        # as class is wrapped by an inner class.
-        self.max_steps = max_steps
-        if warmup_steps is not None:
-            self.warmup_steps = warmup_steps
-        elif warmup_ratio is not None:
-            self.warmup_steps = int(warmup_ratio * max_steps)
-        else:
-            self.warmup_steps = 0
-
-        self.min_lr = min_lr
-        super().__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        if not self._get_lr_called_within_step:
-            warnings.warn(
-                "To get the last learning rate computed "
-                "by the scheduler, please use `get_last_lr()`.",
-                UserWarning, stacklevel=2
-            )
-
-        step = max(1, self.last_epoch)
-
-        for initial_lr in self.base_lrs:
-            if initial_lr < self.min_lr:
-                raise ValueError(
-                    f"{self} received an initial learning rate "
-                    f"that was lower than the minimum learning rate."
-                )
-
-        new_lrs = [self._noam_annealing(initial_lr=initial_lr, step=step) for
-                   initial_lr in self.base_lrs]
-        return new_lrs
-
-    def _noam_annealing(self, initial_lr, step):
-        if self.warmup_steps > 0:
-            mult = self._normalize * min(step ** (-0.5),
-                                         step * (self.warmup_steps ** (-1.5)))
-        else:
-            mult = self._normalize * step ** (-0.5)
-
-        out_lr = initial_lr * mult
-        if step > self.warmup_steps:
-            out_lr = max(out_lr, self.min_lr)
-        return out_lr
-
-
-class NoamHoldAnnealing(WarmupHoldPolicy):
-    def __init__(self, optimizer, *, max_steps, decay_rate=0.5, min_lr=0.0,
-                 last_epoch=-1, **kwargs):
-        """
-        From Nemo:
-        Implementation of the Noam Hold Annealing policy
-        from the SqueezeFormer paper.
-
-        Unlike NoamAnnealing, the peak learning rate
-        can be explicitly set for this scheduler.
-        The schedule first performs linear warmup,
-        then holds the peak LR, then decays with some schedule for
-        the remainder of the steps.
-        Therefore the min-lr is still dependent
-        on the hyper parameters selected.
-
-        It's schedule is determined by three factors-
-
-        Warmup Steps: Initial stage, where linear warmup
-            occurs uptil the peak LR is reached. Unlike NoamAnnealing,
-            the peak LR is explicitly stated here instead of a scaling factor.
-
-        Hold Steps: Intermediate stage, where the peak LR
-            is maintained for some number of steps. In this region,
-            the high peak LR allows the model to converge faster
-            if training is stable. However the high LR
-            may also cause instability during training.
-            Should usually be a significant fraction of training
-            steps (around 30-40% of the entire training steps).
-
-        Decay Steps: Final stage, where the LR rapidly decays
-            with some scaling rate (set by decay rate).
-            To attain Noam decay, use 0.5,
-            for Squeezeformer recommended decay, use 1.0.
-            The fast decay after prolonged high LR during
-            hold phase allows for rapid convergence.
-
-        References:
-            - [Squeezeformer:
-            An Efficient Transformer for Automatic Speech Recognition]
-            (https://arxiv.org/abs/2206.00888)
-
-        Args:
-            optimizer: Pytorch compatible Optimizer object.
-            warmup_steps: Number of training steps in warmup stage
-            warmup_ratio: Ratio of warmup steps to total steps
-            hold_steps: Number of training steps to
-                        hold the learning rate after warm up
-            hold_ratio: Ratio of hold steps to total steps
-            max_steps: Total number of steps while training or `None` for
-                infinite training
-            decay_rate: Float value describing the polynomial decay
-                        after the hold period. Default value
-                        of 0.5 corresponds to Noam decay.
-            min_lr: Minimum learning rate.
-        """
-        self.decay_rate = decay_rate
-        super().__init__(optimizer=optimizer, max_steps=max_steps,
-                         last_epoch=last_epoch, min_lr=min_lr, **kwargs)
-
-    def _get_lr(self, step):
-        if self.warmup_steps is None or self.warmup_steps == 0:
-            raise ValueError(
-                "Noam scheduler cannot be used without warmup steps")
-
-        if self.hold_steps > 0:
-            hold_steps = self.hold_steps - self.warmup_steps
-        else:
-            hold_steps = 0
-
-        new_lrs = [
-            _noam_hold_annealing(
-                initial_lr,
-                step=step,
-                warmup_steps=self.warmup_steps,
-                hold_steps=hold_steps,
-                decay_rate=self.decay_rate,
-                min_lr=self.min_lr,
-            )
-            for initial_lr in self.base_lrs
-        ]
-        return new_lrs
-
-    def set_step(self, step: int):
-        self.last_epoch = step
diff --git a/models/audio/speech_recognition/conformer/ixrt/README.md b/models/audio/speech_recognition/conformer/ixrt/README.md
index ca8125825585929d15da5e8ec6b8610f88341d5b..e8ae1e058ec6852cd6c514118f7dff50811def01 100644
--- a/models/audio/speech_recognition/conformer/ixrt/README.md
+++ b/models/audio/speech_recognition/conformer/ixrt/README.md
@@ -21,8 +21,8 @@ Dataset: <https://www.openslr.org/33/> to download the Aishell dataset.
 
 ```bash
 # Download and put model in conformer_checkpoints
-wget http://files.deepspark.org.cn:880/deepspark/conformer_checkpoints.tar.gz
-tar xf conformer_checkpoints.tar.gz
+wget http://files.deepspark.org.cn:880/deepspark/conformer_checkpoints.tar
+tar xf conformer_checkpoints.tar
 
 # Prepare AISHELL Data
 DATA_DIR=/PATH/to/aishell_test_data
@@ -35,9 +35,9 @@ bash scripts/aishell_data_prepare.sh ${DATA_DIR} ${TOOL_DIR}
 ```bash
 # Install libGL
 ## CentOS
-yum install -y mesa-libGL
+yum install sox sox-devel -y
 ## Ubuntu
-apt install -y libgl1-mesa-glx
+apt install sox libsox-fmt-all -y
 
 pip3 install -r requirements.txt
 ```
@@ -57,4 +57,4 @@ bash scripts/infer_conformer_fp16_performance.sh
 
 | Model     | BatchSize | Precision | QPS     | CER    |
 | --------- | --------- | --------- | ------- | ------ |
-| Conformer | 24        | FP16      | 387.821 | 0.0517 |
+| Conformer | 24        | FP16      | 1408.352 | 0.0497 |
diff --git a/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh
index 7944a1fc2c0053e967917904cf94f2f5200a90c3..40fd32af79cc255f1dbd8f7588402d02c3f8bece 100644
--- a/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh
+++ b/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -18,17 +18,17 @@ set -x
 
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
+    apt install sox libsox-fmt-all -y
 elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
+    yum install sox sox-devel -y
 else
     echo "Not Support Os"
 fi
 
 pip3 install -r requirements.txt
 
-ln -s /root/data/checkpoints/conformer_checkpoints.tar.gz ./
-tar xf conformer_checkpoints.tar.gz
+ln -s /root/data/checkpoints/conformer_checkpoints.tar ./
+tar xf conformer_checkpoints.tar
 cp /root/data/datasets/aishell_test_data.tar ./
 tar xf aishell_test_data.tar
 bash scripts/aishell_data_prepare.sh ./aishell_test_data ./tools
\ No newline at end of file